99import argparse
1010import asyncio
1111import gc
12+ import platform
1213import sys
1314import time
1415from pathlib import Path
@@ -58,6 +59,7 @@ def __init__(self):
5859 self .nvml_initialized = False
5960 self .gpu_handle = None
6061 self .baseline_gpu_memory_mb = 0
62+ self .platform = platform .system ()
6163
6264 if NVML_AVAILABLE and nvml :
6365 try :
@@ -68,8 +70,12 @@ def __init__(self):
6870 # Store baseline GPU memory usage
6971 mem_info = nvml .nvmlDeviceGetMemoryInfo (self .gpu_handle )
7072 self .baseline_gpu_memory_mb = mem_info .used / 1024 / 1024
73+ print (
74+ f"NVML initialized on { self .platform } . "
75+ f"Initial GPU memory: { self .baseline_gpu_memory_mb :.1f} MB"
76+ )
7177 except Exception as e :
72- print (f"Failed to initialize NVML: { e } " )
78+ print (f"Failed to initialize NVML on { self . platform } : { e } " )
7379 self .nvml_initialized = False
7480
7581 def get_process_tree_pids (self ) -> list [int ]:
@@ -132,6 +138,15 @@ def get_memory_usage(self) -> dict[str, float]:
132138 vram_delta = current_used_mb - self .baseline_gpu_memory_mb
133139 memory_info ["host_vram_mb" ] = max (0 , vram_delta )
134140
141+ # Debug output for Windows
142+ if self .platform == "Windows" :
143+ print (
144+ f"[DEBUG Windows] Current GPU: { current_used_mb :.1f} MB, "
145+ f"Baseline: { self .baseline_gpu_memory_mb :.1f} MB, "
146+ f"Delta: { vram_delta :.1f} MB" ,
147+ file = sys .stderr ,
148+ )
149+
135150 except Exception as e :
136151 print (f"Error getting GPU memory usage: { e } " )
137152
@@ -162,9 +177,15 @@ def reset_baseline(self):
162177 if self .nvml_initialized and self .gpu_handle :
163178 try :
164179 mem_info = nvml .nvmlDeviceGetMemoryInfo (self .gpu_handle )
180+ old_baseline = self .baseline_gpu_memory_mb
165181 self .baseline_gpu_memory_mb = mem_info .used / 1024 / 1024
182+ print (
183+ f"[DEBUG { self .platform } ] Reset baseline from { old_baseline :.1f} MB "
184+ f"to { self .baseline_gpu_memory_mb :.1f} MB" ,
185+ file = sys .stderr ,
186+ )
166187 except Exception as e :
167- print (f"Error resetting GPU memory baseline: { e } " )
188+ print (f"Error resetting GPU memory baseline on { self . platform } : { e } " )
168189
169190 def __del__ (self ):
170191 """Cleanup NVML on deletion."""
@@ -297,11 +318,23 @@ async def run_scaling_test(
297318 ExtensionManagerConfig (venv_root_path = str (self .test_base .test_root / "extension-venvs" )),
298319 )
299320
300- # Measure memory before creating extensions
321+ # Clean up and reset baseline before measuring
301322 gc .collect ()
302323 if CUDA_AVAILABLE :
303324 torch .cuda .empty_cache ()
325+ torch .cuda .synchronize () # Ensure all operations complete
326+
327+ # Reset GPU memory baseline for this test
328+ self .memory_tracker .reset_baseline ()
329+
330+ # Wait a moment for memory to settle
331+ await asyncio .sleep (1 )
332+
304333 before_memory = self .memory_tracker .get_memory_usage ()
334+ print (
335+ f"Baseline GPU memory: { before_memory .get ('gpu_used_mb' , 0 ):.1f} MB "
336+ f"(baseline: { self .memory_tracker .baseline_gpu_memory_mb :.1f} MB)"
337+ )
305338
306339 # Create and load extensions
307340 print (f"Creating { num_extensions } extensions..." )
@@ -337,12 +370,21 @@ async def run_scaling_test(
337370 with torch .inference_mode ():
338371 if use_cuda and CUDA_AVAILABLE :
339372 test_tensor = torch .randn (* test_tensor_size , device = "cuda" )
373+ torch .cuda .synchronize () # Ensure tensor creation completes
340374 else :
341375 test_tensor = torch .randn (* test_tensor_size )
342376
343377 tensor_size_mb = test_tensor .element_size () * test_tensor .numel () / (1024 * 1024 )
344378 print (f"Tensor size: { tensor_size_mb :.1f} MB on { test_tensor .device } " )
345379
380+ # Check memory after tensor creation
381+ if use_cuda and CUDA_AVAILABLE :
382+ post_tensor_memory = self .memory_tracker .get_memory_usage ()
383+ print (
384+ f"GPU memory after tensor creation: { post_tensor_memory .get ('gpu_used_mb' , 0 ):.1f} MB "
385+ f"(delta: { post_tensor_memory .get ('host_vram_mb' , 0 ):.1f} MB)"
386+ )
387+
346388 # Send tensor to all extensions
347389 print (f"Sending tensor to { num_extensions } extensions..." )
348390 send_start = time .time ()
@@ -352,12 +394,19 @@ async def run_scaling_test(
352394 info = await ext .store_tensor (f"test_tensor_{ i } " , test_tensor )
353395 if i == 0 :
354396 print (f" First extension stored: { info } " )
397+ # Force GPU sync after each send for accurate memory tracking
398+ if use_cuda and CUDA_AVAILABLE :
399+ torch .cuda .synchronize ()
355400 except Exception as e :
356401 print (f" Failed to send to { ext_name } : { e } " )
357402
358403 send_time = time .time () - send_start
359404 print (f"Send completed in { send_time :.2f} s" )
360405
406+ # Force final sync before measuring
407+ if use_cuda and CUDA_AVAILABLE :
408+ torch .cuda .synchronize ()
409+
361410 # Wait for memory to settle
362411 await asyncio .sleep (2 )
363412
@@ -413,6 +462,13 @@ async def run_scaling_test(
413462 print (f" RAM per extension: { result ['ram_per_extension_mb' ]:.1f} MB" )
414463 print (f" RAM for tensor transfer: { result ['send_ram_delta_mb' ]:.1f} MB" )
415464
465+ # Debug GPU memory tracking
466+ print ("\n GPU Memory Details:" )
467+ print (f" Before: { before_memory .get ('gpu_used_mb' , 0 ):.1f} MB" )
468+ print (f" After Load: { after_load_memory .get ('gpu_used_mb' , 0 ):.1f} MB" )
469+ print (f" After Send: { after_send_memory .get ('gpu_used_mb' , 0 ):.1f} MB" )
470+ print (f" Baseline: { self .memory_tracker .baseline_gpu_memory_mb :.1f} MB" )
471+
416472 # Show GPU memory if this is a GPU test
417473 if use_cuda and result ["load_gpu_delta_mb" ] > 0 :
418474 print (f" GPU memory for tensor creation: { result ['load_gpu_delta_mb' ]:.1f} MB" )
@@ -426,6 +482,11 @@ async def run_scaling_test(
426482 # Cleanup
427483 print ("\n Cleaning up extensions..." )
428484 manager .stop_all_extensions ()
485+ del test_tensor
486+ gc .collect ()
487+ if CUDA_AVAILABLE :
488+ torch .cuda .empty_cache ()
489+ torch .cuda .synchronize ()
429490
430491 # Wait for cleanup
431492 await asyncio .sleep (2 )
@@ -850,12 +911,16 @@ def main():
850911 print ("PyTorch not available. Install with: pip install torch" )
851912 return 1
852913
914+ print (f"Running on: { platform .system ()} { platform .release ()} " )
915+
853916 if not CUDA_AVAILABLE :
854917 print ("CUDA not available. GPU memory tests will be skipped." )
855918
856919 if not NVML_AVAILABLE :
857920 print ("nvidia-ml-py3 not installed. Install with: pip install nvidia-ml-py3" )
858921 print ("VRAM tracking will not be available." )
922+ else :
923+ print ("NVML available for GPU memory tracking" )
859924
860925 # Determine what to test
861926 test_small = not args .large_only
0 commit comments