InfiniTensor · GordonYang1 · Jun 29, 2026 · Jun 24, 2026 · Jun 24, 2026 · Jun 25, 2026
diff --git a/README.md b/README.md
@@ -173,18 +173,20 @@ After having a successful build and a complete `cluster.yaml`, we are ready for
 ### 1. Run Internal Examples
 
 #### 1.1. Run a Single Example
-To run an internal example program (e.g., `examples/all_reduce.cc`), just run: 
+The repository organizes example suites by collective paradigm layout under `examples/`.
+
+To run an internal example program (e.g., `examples/mpi/all_reduce.cc`), just run: 
 
 ```bash
 # Build and run the executable across the cluster based on the config specified in your `cluster.yaml`
-icclrun --config [path-to-your-cluster.yaml] --build all_reduce [program args]
+icclrun --config [path-to-your-cluster.yaml] --build mpi/all_reduce [program args]
 ```
 
 - `--config / -c` : Path to the cluster YAML file.
 - `--build` : Instructs `icclrun` to compile the library on each node before execution. If omitted, `icclrun` assumes the library is already installed at a consistent location.
 - `--launcher` : Specify the backend launcher to be used. Default to `ompi`.
 
-The executable (e.g., `all_reduce`) and its arguments follow the options.
+The executable (e.g., `mpi/all_reduce`) and its arguments follow the options.
 
 For more details, run:
 
@@ -207,7 +209,7 @@ chmod +x ./scripts/run_examples.sh
 For all the options of the script, see:
 
 ```bash
-./run_examples --help
+./scripts/run_examples.py --help
 ```
 
 ### 2. Run a Custom User Program

diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -1,47 +1,61 @@
 file(GLOB_RECURSE EXAMPLE_SOURCES "*.cc")
 
 foreach(source_file ${EXAMPLE_SOURCES})
-    get_filename_component(example_name ${source_file} NAME_WE)
+    file(RELATIVE_PATH rel_file "${CMAKE_CURRENT_SOURCE_DIR}" "${source_file}")
 
-    add_executable(${example_name} ${source_file})
+    get_filename_component(file_dir ${rel_file} DIRECTORY)
+    get_filename_component(file_we ${rel_file} NAME_WE)
 
-    target_link_libraries(${example_name} PRIVATE infiniccl)
+    if(file_dir)
+        string(REPLACE "/" "_" target_name "${file_dir}_${file_we}")
+    else()
+        set(target_name ${file_we})
+    endif()
+
+    add_executable(${target_name} ${source_file})
+
+    set_target_properties(${target_name} PROPERTIES
+        OUTPUT_NAME "${file_we}"
+        RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${file_dir}"
+    )
+
+    target_link_libraries(${target_name} PRIVATE infiniccl)
 
     # Add runtime and backend dependencies for direct runtime/backend usage.
     if(WITH_NVIDIA)
-        target_link_libraries(${example_name} PRIVATE CUDA::cudart)
+        target_link_libraries(${target_name} PRIVATE CUDA::cudart)
     endif()
 
     if(WITH_ILUVATAR)
         set_source_files_properties(${source_file} PROPERTIES LANGUAGE CXX)
-        set_target_properties(${example_name} PROPERTIES
+        set_target_properties(${target_name} PROPERTIES
             RULE_LAUNCH_COMPILE "${ILUVATAR_CUDA_COMPILER} "
         )
-        target_compile_options(${example_name} PRIVATE ${ILUVATAR_CUDA_FLAGS} "-Wno-unused-command-line-argument")
-        target_link_libraries(${example_name} PRIVATE CUDA::cudart CUDA::cuda_driver)
+        target_compile_options(${target_name} PRIVATE ${ILUVATAR_CUDA_FLAGS} "-Wno-unused-command-line-argument")
+        target_link_libraries(${target_name} PRIVATE CUDA::cudart CUDA::cuda_driver)
     endif()
 
     if(WITH_METAX)
-        target_link_libraries(${example_name} PRIVATE ${MACA_RUNTIME_LIB})
-        target_compile_options(${example_name} PRIVATE "-x" "maca")
+        target_link_libraries(${target_name} PRIVATE ${MACA_RUNTIME_LIB})
+        target_compile_options(${target_name} PRIVATE "-x" "maca")
     endif()
 
     if(WITH_MOORE)
-        target_link_libraries(${example_name} PRIVATE ${MUSART_LIB})
-        target_compile_options(${example_name} PRIVATE "-x" "musa")
+        target_link_libraries(${target_name} PRIVATE ${MUSART_LIB})
+        target_compile_options(${target_name} PRIVATE "-x" "musa")
     endif()
 
     if(WITH_CAMBRICON)
-        target_link_libraries(${example_name} PRIVATE ${CAMBRICON_RUNTIME_LIB})
+        target_link_libraries(${target_name} PRIVATE ${CAMBRICON_RUNTIME_LIB})
     endif()
 
     if(WITH_OMPI OR WITH_MPICH)
-        target_link_libraries(${example_name} PRIVATE MPI::MPI_CXX)
+        target_link_libraries(${target_name} PRIVATE MPI::MPI_CXX)
     endif()
 
     # Explicitly allow examples to "peek" into the internal `src` and binary dirs.
     # This is necessary because these were marked `PRIVATE` in the library's CMake.
-    target_include_directories(${example_name} PRIVATE 
+    target_include_directories(${target_name} PRIVATE 
         ${CMAKE_CURRENT_SOURCE_DIR}
         "${PROJECT_SOURCE_DIR}/src"         # For internal templates like `runtime.h`.
         "${CMAKE_BINARY_DIR}/src"           # For the generated `backend_manifest.h`.

diff --git a/examples/all_gather.cc → examples/mpi/all_gather.cc b/examples/all_gather.cc → examples/mpi/all_gather.cc
diff --git a/examples/all_reduce.cc → examples/mpi/all_reduce.cc b/examples/all_reduce.cc → examples/mpi/all_reduce.cc
diff --git a/examples/all_to_all.cc → examples/mpi/all_to_all.cc b/examples/all_to_all.cc → examples/mpi/all_to_all.cc
diff --git a/examples/broadcast.cc → examples/mpi/broadcast.cc b/examples/broadcast.cc → examples/mpi/broadcast.cc
diff --git a/examples/gather.cc → examples/mpi/gather.cc b/examples/gather.cc → examples/mpi/gather.cc
diff --git a/examples/reduce.cc → examples/mpi/reduce.cc b/examples/reduce.cc → examples/mpi/reduce.cc
diff --git a/examples/reduce_scatter.cc → examples/mpi/reduce_scatter.cc b/examples/reduce_scatter.cc → examples/mpi/reduce_scatter.cc
diff --git a/examples/scatter.cc → examples/mpi/scatter.cc b/examples/scatter.cc → examples/mpi/scatter.cc
diff --git a/examples/send_recv.cc → examples/mpi/send_recv.cc b/examples/send_recv.cc → examples/mpi/send_recv.cc
diff --git a/scripts/run_examples.py b/scripts/run_examples.py
@@ -4,7 +4,75 @@
 import subprocess
 import sys
 from datetime import datetime
-from typing import Optional
+from pathlib import Path
+from typing import List, Optional
+
+
+# ==============================================================================
+# DISCOVERY UTILITIES
+# ==============================================================================
+def discover_available_examples(examples_root: Path) -> List[dict]:
+    """
+    Scans the `examples/` directory recursively to find all source files.
+    """
+    found = []
+    for path in examples_root.rglob("*.cc"):
+        rel_path = path.relative_to(examples_root)
+
+        file_dir = rel_path.parent
+        file_we = rel_path.stem
+
+        category = str(file_dir).replace(os.sep, "/") if str(file_dir) != "." else ""
+
+        binary_rel_path = f"{category}/{file_we}" if category else file_we
+
+        found.append(
+            {
+                "absolute_path": path,
+                "relative_path": str(rel_path).replace(os.sep, "/"),
+                "category": category,
+                "name_we": file_we,
+                "binary_path": binary_rel_path,
+            }
+        )
+    return found
+
+
+def resolve_targets(input_strings: List[str], available: List[dict]) -> List[dict]:
+    """
+    Resolves user-provided strings into targets.
+    Supports filtering by:
+      1. Category directory (e.g. 'mpi') -> Runs everything inside
+      2. Exact relative path (e.g. 'mpi/all_reduce')
+      3. Short program name matching (e.g. 'all_reduce') -> Runs all instances
+    """
+    resolved = []
+    seen_binaries = set()
+
+    for query in input_strings:
+        query = query.strip().replace(os.sep, "/")
+        if not query:
+            continue
+
+        matched_any = False
+        for item in available:
+            if (
+                query == item["category"]
+                or query == item["relative_path"]
+                or query == item["binary_path"]
+                or query == item["name_we"]
+            ):
+                if item["binary_path"] not in seen_binaries:
+                    seen_binaries.add(item["binary_path"])
+                    resolved.append(item)
+                matched_any = True
+
+        if not matched_any:
+            print(
+                f"⚠️  Warning: Input pattern '{query}' did not match any discovered examples."
+            )
+
+    return resolved
 
 
 # ==============================================================================
@@ -19,7 +87,7 @@ def setup_log_directory(directory: str) -> str:
 
 
 def run_iccl_example(
-    example_name: str,
+    target_info: dict,
     config_path: str,
     launcher_opt: Optional[str],
     log_dir: str,
@@ -28,41 +96,39 @@ def run_iccl_example(
     timeout_duration: int,
 ) -> bool:
     """Executes an example via `icclrun` orchestration framework."""
-    log_file_path = os.path.join(log_dir, f"{example_name}.log")
+    binary_path = target_info["binary_path"]
+    safe_log_name = binary_path.replace("/", "_")
+    log_file_path = os.path.join(log_dir, f"{safe_log_name}.log")
 
     status_msg = (
         "🚀 Running via `icclrun` (with build):"
         if trigger_build
         else "🚀 Running via `icclrun`:             "
     )
-    print(f"{status_msg:<35} {example_name:<20}", end="", flush=True)
+    print(f"{status_msg:<35} {binary_path:<30}", end="", flush=True)
 
     # Base Command Assembly
     cmd = ["icclrun", "--config", config_path]
     if launcher_opt and launcher_opt.strip():
         cmd.extend(["--launcher", launcher_opt.strip()])
 
     if trigger_build:
-        cmd.extend(["--build", example_name])
+        cmd.extend(["--build", binary_path])
     else:
-        cmd.append(example_name)
-
-    # Format the exact command as a clean string for log documentation.
-    exact_command_str = " ".join(cmd)
+        cmd.append(binary_path)
 
     # Force environment unbuffered stream states for sub-python instances.
     custom_env = os.environ.copy()
     custom_env["PYTHONUNBUFFERED"] = "1"
 
     try:
         with open(log_file_path, "w", buffering=1) as log_file:
-            # Write the exact underlying command as the absolute first line of the log.
-            log_file.write(f"[COMMAND]: {exact_command_str}\n")
+            log_file.write(f"[COMMAND]: {' '.join(cmd)}\n")
             log_file.write("=" * 80 + "\n\n")
             log_file.flush()
 
             if verbose:
-                print(f"\n--- [VERBOSE OUTPUT START: `{example_name}`] ---")
+                print(f"\n--- [VERBOSE OUTPUT START: `{binary_path}`] ---")
 
                 # Execute with `Popen` to stream `stdout`/`stderr` live to both terminal and file.
                 process = subprocess.Popen(
@@ -80,7 +146,7 @@ def run_iccl_example(
                 process.wait(timeout=timeout_duration)
                 return_code = process.returncode
                 print(
-                    f"--- [VERBOSE OUTPUT END: `{example_name}`] ---\n" + " " * 56,
+                    f"--- [VERBOSE OUTPUT END: `{binary_path}`] ---\n" + " " * 66,
                     end="",
                 )
             else:
@@ -106,7 +172,7 @@ def run_iccl_example(
         print(f" ❌ TIMEOUT (Exceeded {timeout_duration} seconds)")
         with open(log_file_path, "a") as f:
             f.write(
-                f"\n[RUNNER ERROR]: Distributed `icclrun` harness timed out after {timeout_duration} seconds.\n"
+                f"\n[RUNNER ERROR]: Harness timed out after {timeout_duration} seconds.\n"
             )
         return False
     except FileNotFoundError:
@@ -142,8 +208,8 @@ def main():
         "-e",
         "--examples",
         type=str,
-        default="all_reduce",
-        help="Comma-separated list of example target names to execute.",
+        default="mpi",
+        help="Comma-separated paths, categories, or short names (fuzzy match). E.g. 'mpi', 'mpi/all_reduce', or 'all_reduce'.",
     )
     parser.add_argument(
         "-o",
@@ -168,17 +234,27 @@ def main():
 
     args = parser.parse_args()
 
-    # Parse and clean the comma-separated examples list.
-    examples_to_run = [ex.strip() for ex in args.examples.split(",") if ex.strip()]
+    script_dir = Path(__file__).parent.resolve()
+    examples_root = script_dir / "../examples"
+    if not examples_root.exists():
+        examples_root = Path("examples").resolve()
+
+    if not examples_root.exists():
+        print("❌ Error: Could not locate 'examples/' directory tree.")
+        sys.exit(1)
+
+    all_available = discover_available_examples(examples_root)
+
+    input_queries = [ex.strip() for ex in args.examples.split(",") if ex.strip()]
+    targets_to_run = resolve_targets(input_queries, all_available)
 
-    # Sanity Checks
     if not os.path.exists(args.config):
         print(f"❌ Error: Config file matching '{args.config}' could not be located.")
         sys.exit(1)
 
-    if not examples_to_run:
+    if not targets_to_run:
         print(
-            "❌ Error: No target programs defined. Please check your `--examples` list."
+            "❌ Error: No valid targets resolved. Please check your `--examples` query configurations."
         )
         sys.exit(1)
 
@@ -199,9 +275,9 @@ def main():
     failed_programs = []
     is_first_run = True
 
-    for example in examples_to_run:
+    for target in targets_to_run:
         success = run_iccl_example(
-            example_name=example,
+            target_info=target,
             config_path=args.config,
             launcher_opt=args.launcher,
             log_dir=current_run_log_dir,
@@ -216,12 +292,9 @@ def main():
             passed_count += 1
         else:
             failed_count += 1
-            failed_programs.append(example)
+            failed_programs.append(target["binary_path"])
 
-    # ==============================================================================
-    # METRICS REPORTING
-    # ==============================================================================
-    total_programs = len(examples_to_run)
+    total_programs = len(targets_to_run)
     success_rate = (passed_count / total_programs) * 100
 
     print("\n==================================================================")