Skip to content

Commit f711f6a

Browse files
committed
Improve VRAM tracking
1 parent cf26aba commit f711f6a

File tree

2 files changed

+67
-22
lines changed

2 files changed

+67
-22
lines changed

benchmarks/benchmark.py

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -243,16 +243,12 @@ def example_entrypoint():
243243
("medium_tensor", (512, 512)), # 262K elements, ~1MB
244244
("large_tensor", (1024, 1024)), # 1M elements, ~4MB
245245
("image_8k", (3, 8192, 8192)), # 201M elements, ~800MB (8K RGB image)
246-
("model_6gb", (40132, 40132)), # 1.6B elements, ~6GB (modern LLM/diffusion model)
247246
]
248247

249248
# Create CPU tensors and add to test data
250249
for name, size in tensor_specs:
251250
try:
252-
if name == "model_6gb":
253-
print(f" Creating {name} tensor {size} (WARNING: This will use ~6GB RAM)...")
254-
else:
255-
print(f" Creating {name} tensor {size}...")
251+
print(f" Creating {name} tensor {size}...")
256252

257253
with torch.inference_mode():
258254
tensor = torch.randn(*size)
@@ -265,7 +261,7 @@ def example_entrypoint():
265261
if not no_gpu and torch.cuda.is_available():
266262
try:
267263
# Skip GPU for very large tensors to avoid OOM
268-
if name == "image_8k" or name == "model_6gb":
264+
if name == "image_8k":
269265
print(f" Creating GPU version of {name} (may use significant VRAM)...")
270266
with torch.inference_mode():
271267
gpu_tensor = tensor.cuda()
@@ -303,6 +299,30 @@ def example_entrypoint():
303299
except MemoryError as e:
304300
print(f" Skipping {name}: {e}")
305301

302+
# Add the 6GB model test at the very end if torch is available
303+
if torch_available and not no_torch:
304+
try:
305+
print(" Creating model_6gb tensor (40132, 40132) (WARNING: This will use ~6GB RAM)...")
306+
with torch.inference_mode():
307+
model_6gb_tensor = torch.randn(40132, 40132)
308+
test_data.append(("model_6gb_cpu", model_6gb_tensor))
309+
310+
size_gb = (model_6gb_tensor.numel() * 4) / (1024**3)
311+
print(f" CPU tensor created successfully ({size_gb:.2f}GB)")
312+
313+
# Try GPU version if available
314+
if not no_gpu and torch.cuda.is_available():
315+
try:
316+
print(" Creating GPU version of model_6gb (may use significant VRAM)...")
317+
with torch.inference_mode():
318+
gpu_tensor = model_6gb_tensor.cuda()
319+
test_data.append(("model_6gb_gpu", gpu_tensor))
320+
print(" GPU tensor created successfully")
321+
except RuntimeError as gpu_e:
322+
print(f" GPU tensor failed: {gpu_e}")
323+
except RuntimeError as e:
324+
print(f" Skipping model_6gb: {e}")
325+
306326
from tests.test_benchmarks import BenchmarkRunner
307327

308328
runner = BenchmarkRunner(warmup_runs=2 if quick else 5, benchmark_runs=100 if quick else 1000)

benchmarks/memory_benchmark.py

Lines changed: 41 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,17 @@ def __init__(self):
5757
self.process = psutil.Process()
5858
self.nvml_initialized = False
5959
self.gpu_handle = None
60+
self.baseline_gpu_memory_mb = 0
6061

6162
if NVML_AVAILABLE and nvml:
6263
try:
6364
nvml.nvmlInit()
6465
self.nvml_initialized = True
6566
# Get the first GPU
6667
self.gpu_handle = nvml.nvmlDeviceGetHandleByIndex(0)
68+
# Store baseline GPU memory usage
69+
mem_info = nvml.nvmlDeviceGetMemoryInfo(self.gpu_handle)
70+
self.baseline_gpu_memory_mb = mem_info.used / 1024 / 1024
6771
except Exception as e:
6872
print(f"Failed to initialize NVML: {e}")
6973
self.nvml_initialized = False
@@ -113,34 +117,55 @@ def get_memory_usage(self) -> dict[str, float]:
113117
except Exception as e:
114118
print(f"Error getting RAM usage: {e}")
115119

116-
# Get total GPU memory usage (not per-process)
120+
# Get GPU memory usage - use total system VRAM since extensions run in separate processes
117121
if self.nvml_initialized and self.gpu_handle:
118122
try:
119123
# Get total GPU memory info
120124
mem_info = nvml.nvmlDeviceGetMemoryInfo(self.gpu_handle)
121-
memory_info["gpu_used_mb"] = mem_info.used / 1024 / 1024
125+
current_used_mb = mem_info.used / 1024 / 1024
126+
memory_info["gpu_used_mb"] = current_used_mb
122127
memory_info["gpu_total_mb"] = mem_info.total / 1024 / 1024
123-
memory_info["total_vram_mb"] = memory_info["gpu_used_mb"]
128+
memory_info["total_vram_mb"] = current_used_mb
124129

125-
# Try to get per-process info (might return 0)
126-
try:
127-
processes = nvml.nvmlDeviceGetComputeRunningProcesses(self.gpu_handle)
128-
our_pids = set(self.get_process_tree_pids())
129-
130-
for proc in processes:
131-
if proc.pid in our_pids and proc.usedGpuMemory is not None:
132-
vram_mb = proc.usedGpuMemory / 1024 / 1024
133-
if proc.pid == self.process.pid:
134-
memory_info["host_vram_mb"] = vram_mb
135-
except Exception as e:
136-
# Per-process tracking failed, use total GPU memory instead
137-
print(f"Warning: Per-process GPU tracking failed: {e}", file=sys.stderr)
130+
# Calculate VRAM usage relative to baseline (captures all processes)
131+
# This is more reliable than per-process tracking, especially on Windows
132+
vram_delta = current_used_mb - self.baseline_gpu_memory_mb
133+
memory_info["host_vram_mb"] = max(0, vram_delta)
138134

139135
except Exception as e:
140136
print(f"Error getting GPU memory usage: {e}")
141137

138+
# Fallback: try PyTorch CUDA memory for current process if NVML failed
139+
elif CUDA_AVAILABLE and torch.cuda.is_available():
140+
try:
141+
# This only captures current process, but better than nothing
142+
allocated_mb = torch.cuda.memory_allocated() / 1024 / 1024
143+
reserved_mb = torch.cuda.memory_reserved() / 1024 / 1024
144+
145+
memory_info["host_vram_mb"] = allocated_mb
146+
memory_info["total_vram_mb"] = allocated_mb
147+
memory_info["pytorch_reserved_mb"] = reserved_mb
148+
149+
print(
150+
"Warning: Using PyTorch CUDA memory (current process only): "
151+
+ f"{allocated_mb:.1f} MB allocated",
152+
file=sys.stderr,
153+
)
154+
155+
except Exception as e:
156+
print(f"Error getting PyTorch CUDA memory: {e}")
157+
142158
return memory_info
143159

160+
def reset_baseline(self):
161+
"""Reset the baseline GPU memory measurement."""
162+
if self.nvml_initialized and self.gpu_handle:
163+
try:
164+
mem_info = nvml.nvmlDeviceGetMemoryInfo(self.gpu_handle)
165+
self.baseline_gpu_memory_mb = mem_info.used / 1024 / 1024
166+
except Exception as e:
167+
print(f"Error resetting GPU memory baseline: {e}")
168+
144169
def __del__(self):
145170
"""Cleanup NVML on deletion."""
146171
if self.nvml_initialized:

0 commit comments

Comments
 (0)