ai-dynamo
diff --git a/‎collector/.gitignore‎
Lines changed: 0 additions & 3 deletions b/‎collector/.gitignore‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎collector/collect.py‎
Lines changed: 2 additions & 15 deletions b/‎collector/collect.py‎
Lines changed: 2 additions & 15 deletions
diff --git a/‎collector/common_test_cases.py‎
Lines changed: 0 additions & 132 deletions b/‎collector/common_test_cases.py‎
Lines changed: 0 additions & 132 deletions
diff --git a/‎collector/helper.py‎
Lines changed: 17 additions & 33 deletions b/‎collector/helper.py‎
Lines changed: 17 additions & 33 deletions
@@ -128,12 +128,6 @@ def worker(queue, device_id: int, func, progress_value, lock, error_queue=None,
             for handler in worker_logger.handlers:
                 handler.flush()
 
-            # This error is could be fatal and require a process restart.
-            if isinstance(e, torch.AcceleratorError):
-                # Exiting with non-zero code will add an additional error to the summary,
-                # which we don't want.
-                exit(0)
-
 
 def parallel_run(tasks, func, num_processes, module_name="unknown"):
     """parallel runner with error collection"""
@@ -433,7 +427,7 @@ def collect_sglang(num_processes: int, ops: list[str] | None = None):
 
 def collect_vllm(num_processes: int, ops: list[str] | None = None):
     """
-    Collect performance data for VLLM
+    Collect performance data for VLLM v1.
     """
 
     try:
@@ -447,7 +441,7 @@ def collect_vllm(num_processes: int, ops: list[str] | None = None):
 
     collections = [
         # GEMM collections
-        # vllm GEMM collection for fp16, fp8, fp8_block, nvfp4, awq, and gptq
+        # vllm v1 GEMM collection for fp16, fp8, fp8_block, nvfp4, awq, and gptq
         {
             "name": "vllm",
             "type": "gemm",
@@ -470,13 +464,6 @@ def collect_vllm(num_processes: int, ops: list[str] | None = None):
             "get_func": "get_generation_attention_test_cases",
             "run_func": "run_attention_torch",
         },
-        {
-            "name": "vllm",
-            "type": "moe",
-            "module": "collector.vllm.collect_moe",
-            "get_func": "get_moe_test_cases",
-            "run_func": "run_moe_torch",
-        },
     ]
 
     all_errors = collect_ops(num_processes, collections, ops, version)
 
@@ -10,6 +10,11 @@
 import signal
 import sys
 import traceback
+
+try:
+    from cuda import cuda
+except:
+    from cuda.bindings import driver as cuda
 from datetime import datetime
 from pathlib import Path
 
@@ -213,42 +218,21 @@ def save_error_report(errors, filename):
 
 
 def get_sm_version():
-    """Get CUDA compute capability (SM version)"""
-    try:
-        import torch
+    # Init
+    (err,) = cuda.cuInit(0)
 
-        if torch.cuda.is_available():
-            device = torch.cuda.current_device()
-            capability = torch.cuda.get_device_capability(device)
-            return capability[0] * 10 + capability[1]
-    except Exception:
-        pass
-
-    # fallback to cuda-python
-    try:
-        from cuda import cuda
+    # Device
+    err, cu_device = cuda.cuDeviceGet(0)
 
-        # Init
-        (err,) = cuda.cuInit(0)
-        if err != 0:
-            raise RuntimeError(f"cuInit failed with error code: {err}")
-
-        # Device
-        err, cu_device = cuda.cuDeviceGet(0)
-        if err != 0:
-            raise RuntimeError(f"cuDeviceGet failed with error code: {err}")
-
-        # Get target architecture
-        err, sm_major = cuda.cuDeviceGetAttribute(
-            cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cu_device
-        )
-        err, sm_minor = cuda.cuDeviceGetAttribute(
-            cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cu_device
-        )
+    # Get target architecture
+    err, sm_major = cuda.cuDeviceGetAttribute(
+        cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cu_device
+    )
+    err, sm_minor = cuda.cuDeviceGetAttribute(
+        cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cu_device
+    )
 
-        return sm_major * 10 + sm_minor
-    except Exception as e:
-        raise RuntimeError(f"Cannot get SM version: both PyTorch and cuda-python failed. Error: {e}") from e
+    return sm_major * 10 + sm_minor
 
 
 def create_test_case_id(test_case, test_type, module_name):