Skip to content

Commit cf26aba

Browse files
committed
Make sure test tensors are in inference mode
1 parent f5f9f70 commit cf26aba

File tree

5 files changed

+49
-30
lines changed

5 files changed

+49
-30
lines changed

benchmarks/benchmark.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,8 @@ def example_entrypoint():
254254
else:
255255
print(f" Creating {name} tensor {size}...")
256256

257-
tensor = torch.randn(*size)
257+
with torch.inference_mode():
258+
tensor = torch.randn(*size)
258259
test_data.append((f"{name}_cpu", tensor))
259260

260261
size_gb = (tensor.numel() * 4) / (1024**3)
@@ -266,11 +267,13 @@ def example_entrypoint():
266267
# Skip GPU for very large tensors to avoid OOM
267268
if name == "image_8k" or name == "model_6gb":
268269
print(f" Creating GPU version of {name} (may use significant VRAM)...")
269-
gpu_tensor = tensor.cuda()
270+
with torch.inference_mode():
271+
gpu_tensor = tensor.cuda()
270272
test_data.append((f"{name}_gpu", gpu_tensor))
271273
print(" GPU tensor created successfully")
272274
else:
273-
gpu_tensor = tensor.cuda()
275+
with torch.inference_mode():
276+
gpu_tensor = tensor.cuda()
274277
test_data.append((f"{name}_gpu", gpu_tensor))
275278
print(" GPU tensor created successfully")
276279
except RuntimeError as gpu_e:

benchmarks/memory_benchmark.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -309,10 +309,11 @@ async def run_scaling_test(
309309

310310
# Create test tensor
311311
print(f"Creating test tensor {test_tensor_size}...")
312-
if use_cuda and CUDA_AVAILABLE:
313-
test_tensor = torch.randn(*test_tensor_size, device="cuda")
314-
else:
315-
test_tensor = torch.randn(*test_tensor_size)
312+
with torch.inference_mode():
313+
if use_cuda and CUDA_AVAILABLE:
314+
test_tensor = torch.randn(*test_tensor_size, device="cuda")
315+
else:
316+
test_tensor = torch.randn(*test_tensor_size)
316317

317318
tensor_size_mb = test_tensor.element_size() * test_tensor.numel() / (1024 * 1024)
318319
print(f"Tensor size: {tensor_size_mb:.1f} MB on {test_tensor.device}")
@@ -479,7 +480,10 @@ async def run_large_tensor_sharing_test(
479480
side = int(num_elements**0.5)
480481

481482
print(f"Creating {tensor_gb}GB tensor ({side}x{side}) on {device_name}...")
482-
large_tensor = torch.randn(side, side, device="cuda") if use_cuda else torch.randn(side, side)
483+
with torch.inference_mode():
484+
large_tensor = (
485+
torch.randn(side, side, device="cuda") if use_cuda else torch.randn(side, side)
486+
)
483487
actual_size_mb = large_tensor.element_size() * large_tensor.numel() / (1024 * 1024)
484488
print(f"Actual tensor size: {actual_size_mb:.1f} MB on {large_tensor.device}")
485489

benchmarks/simple_benchmark.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -122,20 +122,22 @@ class CustomConfig(TypedDict):
122122
for name, size in tensor_specs:
123123
try:
124124
print(f" Creating {name} tensor {size}...")
125-
tensor = torch.randn(*size)
125+
with torch.inference_mode():
126+
tensor = torch.randn(*size)
126127
test_data.append((name, tensor))
127128
print(f" {name} created successfully ({tensor.numel() * 4 / (1024**3):.2f}GB)")
128129
except RuntimeError as e:
129130
print(f" Skipping {name}: {e}")
130131

131132
if include_large_tensors:
132133
print(" Including very large tensors (this will use significant memory)...")
133-
test_data.extend(
134-
[
135-
("huge_tensor", torch.randn(4096, 4096)), # ~64MB
136-
("image_4k", torch.randn(3, 4096, 4096)), # ~200MB (4K RGB image)
137-
]
138-
)
134+
with torch.inference_mode():
135+
test_data.extend(
136+
[
137+
("huge_tensor", torch.randn(4096, 4096)), # ~64MB
138+
("image_4k", torch.randn(3, 4096, 4096)), # ~200MB (4K RGB image)
139+
]
140+
)
139141
# 8K image would be ~800MB, only add if explicitly requested
140142
print(" (8K image tensor skipped - would use ~800MB)")
141143
else:

tests/test_benchmarks.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,8 @@ async def test_torch_tensor_benchmarks(self):
394394
print("=" * 60)
395395

396396
# Small tensor (CPU)
397-
small_tensor_cpu = torch.randn(100, 100) # ~40KB
397+
with torch.inference_mode():
398+
small_tensor_cpu = torch.randn(100, 100) # ~40KB
398399

399400
await self.runner.run_benchmark(
400401
"Small Tensor CPU - Local Baseline",
@@ -406,7 +407,8 @@ async def test_torch_tensor_benchmarks(self):
406407
)
407408

408409
# Large tensor (CPU)
409-
large_tensor_cpu = torch.randn(1024, 1024) # ~4MB
410+
with torch.inference_mode():
411+
large_tensor_cpu = torch.randn(1024, 1024) # ~4MB
410412

411413
await self.runner.run_benchmark(
412414
"Large Tensor CPU - RPC Call",
@@ -416,8 +418,9 @@ async def test_torch_tensor_benchmarks(self):
416418

417419
# GPU tests if available
418420
if CUDA_AVAILABLE:
419-
small_tensor_gpu = small_tensor_cpu.cuda()
420-
large_tensor_gpu = large_tensor_cpu.cuda()
421+
with torch.inference_mode():
422+
small_tensor_gpu = small_tensor_cpu.cuda()
423+
large_tensor_gpu = large_tensor_cpu.cuda()
421424

422425
await self.runner.run_benchmark(
423426
"Small Tensor GPU - RPC Call",

tests/test_torch_tensor_integration.py

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,8 @@ def example_entrypoint() -> ExampleExtension:
267267
# Test 1: Simple CPU tensor
268268
import torch
269269

270-
cpu_tensor = torch.randn(3, 4)
270+
with torch.inference_mode():
271+
cpu_tensor = torch.randn(3, 4)
271272

272273
# Call extension method
273274
result_tensor = await extension.do_stuff({"operation": "process_tensor", "tensor": cpu_tensor})
@@ -284,7 +285,8 @@ def example_entrypoint() -> ExampleExtension:
284285
assert tensor_info["is_cuda"] is False
285286

286287
# Test 2: Multiple tensors
287-
tensors = [torch.ones(2, 2), torch.zeros(2, 2), torch.eye(2)]
288+
with torch.inference_mode():
289+
tensors = [torch.ones(2, 2), torch.zeros(2, 2), torch.eye(2)]
288290
stacked_result = await extension.do_stuff(
289291
{"operation": "test_multiple_tensors", "tensors": tensors}
290292
)
@@ -415,7 +417,8 @@ def example_entrypoint() -> ExampleExtension:
415417
import torch
416418

417419
# Test 1: Basic tensor processing
418-
input_tensor = torch.randn(4, 5)
420+
with torch.inference_mode():
421+
input_tensor = torch.randn(4, 5)
419422
normalized = await extension.do_stuff(
420423
{"operation": "process_tensor_isolated", "tensor": input_tensor}
421424
)
@@ -428,11 +431,12 @@ def example_entrypoint() -> ExampleExtension:
428431
assert abs(norm_info["output_std"] - 1.0) < 1e-6 # Should be close to 1
429432

430433
# Test 2: Different dtypes
431-
tensors_dict = {
432-
"float32": torch.randn(2, 3),
433-
"int64": torch.randint(0, 10, (2, 3)),
434-
"bool": torch.tensor([[True, False], [False, True]]),
435-
}
434+
with torch.inference_mode():
435+
tensors_dict = {
436+
"float32": torch.randn(2, 3),
437+
"int64": torch.randint(0, 10, (2, 3)),
438+
"bool": torch.tensor([[True, False], [False, True]]),
439+
}
436440

437441
dtype_results = await extension.do_stuff(
438442
{"operation": "test_different_dtypes", "tensors_dict": tensors_dict}
@@ -540,7 +544,8 @@ def example_entrypoint() -> ExampleExtension:
540544
import torch
541545

542546
# Test 1: GPU tensor operations
543-
gpu_tensor = torch.randn(5, 5).cuda()
547+
with torch.inference_mode():
548+
gpu_tensor = torch.randn(5, 5).cuda()
544549
gpu_result = await extension.do_stuff({"operation": "process_gpu_tensor", "tensor": gpu_tensor})
545550

546551
assert isinstance(gpu_result, torch.Tensor)
@@ -552,7 +557,8 @@ def example_entrypoint() -> ExampleExtension:
552557
assert "cuda" in gpu_info["device"]
553558

554559
# Test 2: CPU to GPU transfer
555-
cpu_tensor = torch.ones(3, 3)
560+
with torch.inference_mode():
561+
cpu_tensor = torch.ones(3, 3)
556562
transferred_result = await extension.do_stuff(
557563
{"operation": "transfer_between_devices", "tensor": cpu_tensor}
558564
)
@@ -637,7 +643,8 @@ def example_entrypoint() -> ExampleExtension:
637643
import torch
638644

639645
# Test GPU operations
640-
gpu_tensor = torch.randn(4, 4).cuda()
646+
with torch.inference_mode():
647+
gpu_tensor = torch.randn(4, 4).cuda()
641648
squared_result = await extension.do_stuff(
642649
{"operation": "process_gpu_operations", "tensor": gpu_tensor}
643650
)

0 commit comments

Comments
 (0)