Skip to content

Commit b6e1085

Browse files
committed
Add one-click benchmarks for Linux and Windows
1 parent f711f6a commit b6e1085

File tree

5 files changed

+1093
-3
lines changed

5 files changed

+1093
-3
lines changed

BENCHMARK_INSTRUCTIONS.md

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
# PyIsolate Benchmark Instructions
2+
3+
Thank you for helping collect benchmark data! This document explains how to run the benchmarks on your system.
4+
5+
## Overview
6+
7+
The benchmark scripts will:
8+
1. Install necessary tools and dependencies
9+
2. Run performance benchmarks to measure RPC overhead
10+
3. Run memory benchmarks to measure RAM and VRAM usage
11+
4. Collect system information
12+
5. Save all results to a single file
13+
14+
## Prerequisites
15+
16+
- Python 3.9 or higher
17+
- Internet connection (for downloading dependencies)
18+
- At least 16GB of RAM recommended (8GB minimum)
19+
- For GPU benchmarks: NVIDIA GPU with CUDA support (optional)
20+
21+
## Instructions
22+
23+
### Windows Users
24+
25+
1. Download or clone this repository to your local machine
26+
2. Open Command Prompt (cmd) or PowerShell
27+
3. Navigate to the pyisolate directory:
28+
```
29+
cd path\to\pyisolate
30+
```
31+
4. Run the benchmark script:
32+
```
33+
run_benchmarks_windows.bat
34+
```
35+
5. Follow the on-screen instructions
36+
6. When complete, send back the file named `benchmark_results_COMPUTERNAME_TIMESTAMP.txt`
37+
38+
### Linux/macOS Users
39+
40+
1. Download or clone this repository to your local machine
41+
2. Open Terminal
42+
3. Navigate to the pyisolate directory:
43+
```
44+
cd /path/to/pyisolate
45+
```
46+
4. Run the benchmark script:
47+
```
48+
./run_benchmarks_linux.sh
49+
```
50+
5. Follow the on-screen instructions
51+
6. When complete, send back the file named `benchmark_results_hostname_timestamp.txt`
52+
53+
## What to Expect
54+
55+
- **First Run**: The script will prompt you to install `uv` if it's not already installed
56+
- **Installation**: The script will automatically install PyTorch with appropriate CUDA support
57+
- **Duration**: The full benchmark suite takes approximately 10-20 minutes
58+
- **Memory Usage**: Some tests may use significant RAM (up to 6GB) and VRAM
59+
- **Errors**: If tests fail due to out-of-memory errors, this is expected and will be noted in the results
60+
61+
## Troubleshooting
62+
63+
### "uv not found" Error
64+
65+
The script requires `uv` for fast package management. Install it using:
66+
67+
**Windows (PowerShell)**:
68+
```powershell
69+
powershell -c "irm https://astral.sh/uv/install.ps1 | iex"
70+
```
71+
72+
**Linux/macOS**:
73+
```bash
74+
curl -LsSf https://astral.sh/uv/install.sh | sh
75+
```
76+
77+
### PyTorch Installation Issues
78+
79+
If PyTorch installation fails:
80+
1. The script will try to install a CPU-only version automatically
81+
2. You can manually install PyTorch from https://pytorch.org/get-started/locally/
82+
3. The benchmarks will still run (with some GPU tests skipped)
83+
84+
### Out of Memory Errors
85+
86+
If you see "CUDA out of memory" or similar errors:
87+
- This is expected for systems with limited VRAM
88+
- The script will continue and note which tests failed
89+
- Results are still valuable!
90+
91+
### Permission Denied (Linux/macOS)
92+
93+
If you get "permission denied" when running the script:
94+
```bash
95+
chmod +x run_benchmarks_linux.sh
96+
./run_benchmarks_linux.sh
97+
```
98+
99+
## What Data is Collected
100+
101+
The benchmark results file contains:
102+
- System specifications (OS, CPU, RAM, GPU)
103+
- Python and package versions
104+
- Performance benchmark results (RPC call timings)
105+
- Memory usage measurements
106+
- Any errors encountered during testing
107+
108+
No personal data is collected.
109+
110+
## Questions?
111+
112+
If you encounter any issues not covered here, please include:
113+
1. The complete error message
114+
2. Your operating system and version
115+
3. Any steps you tried to resolve the issue
116+
117+
Thank you for your help in benchmarking PyIsolate!

benchmarks/memory_benchmark.py

Lines changed: 68 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import argparse
1010
import asyncio
1111
import gc
12+
import platform
1213
import sys
1314
import time
1415
from pathlib import Path
@@ -58,6 +59,7 @@ def __init__(self):
5859
self.nvml_initialized = False
5960
self.gpu_handle = None
6061
self.baseline_gpu_memory_mb = 0
62+
self.platform = platform.system()
6163

6264
if NVML_AVAILABLE and nvml:
6365
try:
@@ -68,8 +70,12 @@ def __init__(self):
6870
# Store baseline GPU memory usage
6971
mem_info = nvml.nvmlDeviceGetMemoryInfo(self.gpu_handle)
7072
self.baseline_gpu_memory_mb = mem_info.used / 1024 / 1024
73+
print(
74+
f"NVML initialized on {self.platform}. "
75+
f"Initial GPU memory: {self.baseline_gpu_memory_mb:.1f} MB"
76+
)
7177
except Exception as e:
72-
print(f"Failed to initialize NVML: {e}")
78+
print(f"Failed to initialize NVML on {self.platform}: {e}")
7379
self.nvml_initialized = False
7480

7581
def get_process_tree_pids(self) -> list[int]:
@@ -132,6 +138,15 @@ def get_memory_usage(self) -> dict[str, float]:
132138
vram_delta = current_used_mb - self.baseline_gpu_memory_mb
133139
memory_info["host_vram_mb"] = max(0, vram_delta)
134140

141+
# Debug output for Windows
142+
if self.platform == "Windows":
143+
print(
144+
f"[DEBUG Windows] Current GPU: {current_used_mb:.1f} MB, "
145+
f"Baseline: {self.baseline_gpu_memory_mb:.1f} MB, "
146+
f"Delta: {vram_delta:.1f} MB",
147+
file=sys.stderr,
148+
)
149+
135150
except Exception as e:
136151
print(f"Error getting GPU memory usage: {e}")
137152

@@ -162,9 +177,15 @@ def reset_baseline(self):
162177
if self.nvml_initialized and self.gpu_handle:
163178
try:
164179
mem_info = nvml.nvmlDeviceGetMemoryInfo(self.gpu_handle)
180+
old_baseline = self.baseline_gpu_memory_mb
165181
self.baseline_gpu_memory_mb = mem_info.used / 1024 / 1024
182+
print(
183+
f"[DEBUG {self.platform}] Reset baseline from {old_baseline:.1f} MB "
184+
f"to {self.baseline_gpu_memory_mb:.1f} MB",
185+
file=sys.stderr,
186+
)
166187
except Exception as e:
167-
print(f"Error resetting GPU memory baseline: {e}")
188+
print(f"Error resetting GPU memory baseline on {self.platform}: {e}")
168189

169190
def __del__(self):
170191
"""Cleanup NVML on deletion."""
@@ -297,11 +318,23 @@ async def run_scaling_test(
297318
ExtensionManagerConfig(venv_root_path=str(self.test_base.test_root / "extension-venvs")),
298319
)
299320

300-
# Measure memory before creating extensions
321+
# Clean up and reset baseline before measuring
301322
gc.collect()
302323
if CUDA_AVAILABLE:
303324
torch.cuda.empty_cache()
325+
torch.cuda.synchronize() # Ensure all operations complete
326+
327+
# Reset GPU memory baseline for this test
328+
self.memory_tracker.reset_baseline()
329+
330+
# Wait a moment for memory to settle
331+
await asyncio.sleep(1)
332+
304333
before_memory = self.memory_tracker.get_memory_usage()
334+
print(
335+
f"Baseline GPU memory: {before_memory.get('gpu_used_mb', 0):.1f} MB "
336+
f"(baseline: {self.memory_tracker.baseline_gpu_memory_mb:.1f} MB)"
337+
)
305338

306339
# Create and load extensions
307340
print(f"Creating {num_extensions} extensions...")
@@ -337,12 +370,21 @@ async def run_scaling_test(
337370
with torch.inference_mode():
338371
if use_cuda and CUDA_AVAILABLE:
339372
test_tensor = torch.randn(*test_tensor_size, device="cuda")
373+
torch.cuda.synchronize() # Ensure tensor creation completes
340374
else:
341375
test_tensor = torch.randn(*test_tensor_size)
342376

343377
tensor_size_mb = test_tensor.element_size() * test_tensor.numel() / (1024 * 1024)
344378
print(f"Tensor size: {tensor_size_mb:.1f} MB on {test_tensor.device}")
345379

380+
# Check memory after tensor creation
381+
if use_cuda and CUDA_AVAILABLE:
382+
post_tensor_memory = self.memory_tracker.get_memory_usage()
383+
print(
384+
f"GPU memory after tensor creation: {post_tensor_memory.get('gpu_used_mb', 0):.1f} MB "
385+
f"(delta: {post_tensor_memory.get('host_vram_mb', 0):.1f} MB)"
386+
)
387+
346388
# Send tensor to all extensions
347389
print(f"Sending tensor to {num_extensions} extensions...")
348390
send_start = time.time()
@@ -352,12 +394,19 @@ async def run_scaling_test(
352394
info = await ext.store_tensor(f"test_tensor_{i}", test_tensor)
353395
if i == 0:
354396
print(f" First extension stored: {info}")
397+
# Force GPU sync after each send for accurate memory tracking
398+
if use_cuda and CUDA_AVAILABLE:
399+
torch.cuda.synchronize()
355400
except Exception as e:
356401
print(f" Failed to send to {ext_name}: {e}")
357402

358403
send_time = time.time() - send_start
359404
print(f"Send completed in {send_time:.2f}s")
360405

406+
# Force final sync before measuring
407+
if use_cuda and CUDA_AVAILABLE:
408+
torch.cuda.synchronize()
409+
361410
# Wait for memory to settle
362411
await asyncio.sleep(2)
363412

@@ -413,6 +462,13 @@ async def run_scaling_test(
413462
print(f" RAM per extension: {result['ram_per_extension_mb']:.1f} MB")
414463
print(f" RAM for tensor transfer: {result['send_ram_delta_mb']:.1f} MB")
415464

465+
# Debug GPU memory tracking
466+
print("\nGPU Memory Details:")
467+
print(f" Before: {before_memory.get('gpu_used_mb', 0):.1f} MB")
468+
print(f" After Load: {after_load_memory.get('gpu_used_mb', 0):.1f} MB")
469+
print(f" After Send: {after_send_memory.get('gpu_used_mb', 0):.1f} MB")
470+
print(f" Baseline: {self.memory_tracker.baseline_gpu_memory_mb:.1f} MB")
471+
416472
# Show GPU memory if this is a GPU test
417473
if use_cuda and result["load_gpu_delta_mb"] > 0:
418474
print(f" GPU memory for tensor creation: {result['load_gpu_delta_mb']:.1f} MB")
@@ -426,6 +482,11 @@ async def run_scaling_test(
426482
# Cleanup
427483
print("\nCleaning up extensions...")
428484
manager.stop_all_extensions()
485+
del test_tensor
486+
gc.collect()
487+
if CUDA_AVAILABLE:
488+
torch.cuda.empty_cache()
489+
torch.cuda.synchronize()
429490

430491
# Wait for cleanup
431492
await asyncio.sleep(2)
@@ -850,12 +911,16 @@ def main():
850911
print("PyTorch not available. Install with: pip install torch")
851912
return 1
852913

914+
print(f"Running on: {platform.system()} {platform.release()}")
915+
853916
if not CUDA_AVAILABLE:
854917
print("CUDA not available. GPU memory tests will be skipped.")
855918

856919
if not NVML_AVAILABLE:
857920
print("nvidia-ml-py3 not installed. Install with: pip install nvidia-ml-py3")
858921
print("VRAM tracking will not be available.")
922+
else:
923+
print("NVML available for GPU memory tracking")
859924

860925
# Determine what to test
861926
test_small = not args.large_only

0 commit comments

Comments
 (0)