Skip to content

Commit 85fb2e3

Browse files
hmellorkhluu
authored andcommitted
Remove default values from InitVars so that they're not stored (#29859)
Signed-off-by: Harry Mellor <[email protected]> (cherry picked from commit 951445a)
1 parent d8c6210 commit 85fb2e3

File tree

17 files changed

+139
-77
lines changed

17 files changed

+139
-77
lines changed

benchmarks/benchmark_ngram_proposer.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,10 @@ def benchmark_batched_propose(args):
108108
device_config=DeviceConfig(device=current_platform.device_type),
109109
parallel_config=ParallelConfig(),
110110
load_config=LoadConfig(),
111-
scheduler_config=SchedulerConfig(),
111+
scheduler_config=SchedulerConfig(
112+
max_model_len=model_config.max_model_len,
113+
is_encoder_decoder=model_config.is_encoder_decoder,
114+
),
112115
)
113116

114117
# monkey patch vllm.v1.worker.gpu_model_runner.get_pp_group

tests/compile/test_fusion_attn.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -318,13 +318,18 @@ def test_attention_quant_pattern(
318318
torch.set_default_dtype(dtype)
319319
torch.manual_seed(42)
320320

321+
model_config = ModelConfig(
322+
model=model_name,
323+
max_model_len=2048,
324+
dtype=dtype,
325+
)
321326
vllm_config = VllmConfig(
322-
model_config=ModelConfig(
323-
model=model_name,
324-
max_model_len=2048,
325-
dtype=dtype,
327+
model_config=model_config,
328+
scheduler_config=SchedulerConfig(
329+
max_num_seqs=1024,
330+
max_model_len=model_config.max_model_len,
331+
is_encoder_decoder=model_config.is_encoder_decoder,
326332
),
327-
scheduler_config=SchedulerConfig(max_num_seqs=1024),
328333
compilation_config=CompilationConfig(
329334
mode=CompilationMode.VLLM_COMPILE,
330335
custom_ops=custom_ops_list,

tests/lora/test_worker.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,16 @@ def set_active_loras(worker: Worker, lora_requests: list[LoRARequest]):
3333
lora_requests, lora_mapping
3434
)
3535

36+
model_config = ModelConfig(
37+
MODEL_PATH,
38+
seed=0,
39+
dtype="float16",
40+
max_model_len=127,
41+
enforce_eager=True,
42+
)
43+
3644
vllm_config = VllmConfig(
37-
model_config=ModelConfig(
38-
MODEL_PATH,
39-
seed=0,
40-
dtype="float16",
41-
max_model_len=127,
42-
enforce_eager=True,
43-
),
45+
model_config=model_config,
4446
load_config=LoadConfig(
4547
download_dir=None,
4648
load_format="dummy",
@@ -50,7 +52,14 @@ def set_active_loras(worker: Worker, lora_requests: list[LoRARequest]):
5052
tensor_parallel_size=1,
5153
data_parallel_size=1,
5254
),
53-
scheduler_config=SchedulerConfig("generate", 32, 32, 32),
55+
scheduler_config=SchedulerConfig(
56+
max_model_len=model_config.max_model_len,
57+
is_encoder_decoder=model_config.is_encoder_decoder,
58+
runner_type="generate",
59+
max_num_batched_tokens=32,
60+
max_num_seqs=32,
61+
max_num_partial_prefills=32,
62+
),
5463
device_config=DeviceConfig("cuda"),
5564
cache_config=CacheConfig(
5665
block_size=16,

tests/test_config.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,14 @@
66
from unittest.mock import patch
77

88
import pytest
9+
from pydantic import ValidationError
910

1011
from vllm.compilation.backends import VllmBackend
1112
from vllm.config import (
1213
CompilationConfig,
1314
ModelConfig,
1415
PoolerConfig,
16+
SchedulerConfig,
1517
VllmConfig,
1618
update_config,
1719
)
@@ -1095,3 +1097,14 @@ def test_vllm_config_explicit_overrides():
10951097
# Other fields should still use defaults
10961098
assert config.compilation_config.mode == CompilationMode.VLLM_COMPILE
10971099
assert config.compilation_config.cudagraph_mode == CUDAGraphMode.FULL_AND_PIECEWISE
1100+
1101+
1102+
def test_scheduler_config_init():
1103+
with pytest.raises(ValidationError):
1104+
# Positional InitVars missing
1105+
# (InitVars cannot have defaults otherwise they will become attributes)
1106+
SchedulerConfig()
1107+
1108+
with pytest.raises(AttributeError):
1109+
# InitVar does not become an attribute
1110+
print(SchedulerConfig.default_factory().max_model_len)

tests/v1/attention/utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,8 @@ def create_vllm_config(
185185
max_num_seqs=max_num_seqs,
186186
max_num_batched_tokens=max_num_batched_tokens,
187187
enable_chunked_prefill=enable_chunked_prefill,
188+
max_model_len=model_config.max_model_len,
189+
is_encoder_decoder=model_config.is_encoder_decoder,
188190
)
189191

190192
device_config = DeviceConfig()

tests/v1/core/test_kv_cache_utils.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1128,7 +1128,11 @@ def test_estimate_max_model_len(model_id, max_model_len, want_estimated_max_len)
11281128
dtype="float16",
11291129
max_model_len=max_model_len,
11301130
)
1131-
scheduler_config = SchedulerConfig(max_num_batched_tokens=32768)
1131+
scheduler_config = SchedulerConfig(
1132+
max_num_batched_tokens=32768,
1133+
max_model_len=model_config.max_model_len,
1134+
is_encoder_decoder=model_config.is_encoder_decoder,
1135+
)
11321136

11331137
vllm_config = VllmConfig(
11341138
model_config=model_config,
@@ -1163,7 +1167,10 @@ def test_get_max_concurrency_for_kv_cache_config():
11631167
max_model_len=max_model_len,
11641168
)
11651169
scheduler_config = SchedulerConfig(
1166-
max_num_batched_tokens=1024, enable_chunked_prefill=True
1170+
max_num_batched_tokens=1024,
1171+
enable_chunked_prefill=True,
1172+
max_model_len=model_config.max_model_len,
1173+
is_encoder_decoder=model_config.is_encoder_decoder,
11671174
)
11681175

11691176
vllm_config = VllmConfig(

tests/v1/core/test_scheduler.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1508,6 +1508,12 @@ def create_scheduler_with_priority(
15081508
Returns:
15091509
{class}`Scheduler` instance with priority scheduling
15101510
"""
1511+
model_config = ModelConfig(
1512+
model=model,
1513+
trust_remote_code=True,
1514+
dtype="float16",
1515+
seed=42,
1516+
)
15111517
if max_model_len is None:
15121518
max_model_len = max_num_batched_tokens
15131519
scheduler_config = SchedulerConfig(
@@ -1517,14 +1523,9 @@ def create_scheduler_with_priority(
15171523
long_prefill_token_threshold=long_prefill_token_threshold,
15181524
disable_chunked_mm_input=disable_chunked_mm_input,
15191525
enable_chunked_prefill=True,
1526+
is_encoder_decoder=model_config.is_encoder_decoder,
15201527
policy="priority", # Enable priority scheduling
15211528
)
1522-
model_config = ModelConfig(
1523-
model=model,
1524-
trust_remote_code=True,
1525-
dtype="float16",
1526-
seed=42,
1527-
)
15281529
# Cache config, optionally force APC
15291530
cache_config = CacheConfig(
15301531
block_size=block_size,

tests/v1/core/utils.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,13 @@ def create_scheduler(
6969
Returns:
7070
{class}`Scheduler` instance
7171
"""
72+
model_config = ModelConfig(
73+
model=model,
74+
trust_remote_code=True,
75+
dtype="float16",
76+
seed=42,
77+
skip_tokenizer_init=skip_tokenizer_init,
78+
)
7279
if max_model_len is None:
7380
max_model_len = max_num_batched_tokens
7481
scheduler_config = SchedulerConfig(
@@ -79,13 +86,7 @@ def create_scheduler(
7986
disable_chunked_mm_input=disable_chunked_mm_input,
8087
enable_chunked_prefill=enable_chunked_prefill,
8188
async_scheduling=async_scheduling,
82-
)
83-
model_config = ModelConfig(
84-
model=model,
85-
trust_remote_code=True,
86-
dtype="float16",
87-
seed=42,
88-
skip_tokenizer_init=skip_tokenizer_init,
89+
is_encoder_decoder=model_config.is_encoder_decoder,
8990
)
9091
# Cache config, optionally force APC
9192
cache_config = CacheConfig(

tests/v1/cudagraph/test_cudagraph_dispatch.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@ def _create_vllm_config(
4040
) -> MagicMock:
4141
mock_config = MagicMock(spec=VllmConfig)
4242
mock_config.compilation_config = compilation_config
43-
mock_config.scheduler_config = SchedulerConfig(max_num_seqs=max_num_seqs)
43+
mock_config.scheduler_config = SchedulerConfig.default_factory(
44+
max_num_seqs=max_num_seqs,
45+
)
4446
mock_config.parallel_config = ParallelConfig()
4547
mock_config.speculative_config = None # No speculative decoding
4648
if not lora_config:

tests/v1/engine/test_engine_core.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -484,19 +484,20 @@ def test_encoder_instance_zero_kv_cache(
484484
vision encoder, so they don't need KV cache for text generation.
485485
"""
486486
# Form vllm config
487-
scheduler_config = SchedulerConfig(
488-
max_num_seqs=10,
489-
max_num_batched_tokens=512,
490-
max_model_len=512,
491-
disable_hybrid_kv_cache_manager=True,
492-
)
493487
model_config = ModelConfig(
494488
model="llava-hf/llava-1.5-7b-hf", # Multimodal model
495489
enforce_eager=True,
496490
trust_remote_code=True,
497491
dtype="float16",
498492
seed=42,
499493
)
494+
scheduler_config = SchedulerConfig(
495+
max_num_seqs=10,
496+
max_num_batched_tokens=512,
497+
max_model_len=512,
498+
disable_hybrid_kv_cache_manager=True,
499+
is_encoder_decoder=model_config.is_encoder_decoder,
500+
)
500501
cache_config = CacheConfig(
501502
block_size=16,
502503
gpu_memory_utilization=gpu_memory_utilization,

0 commit comments

Comments
 (0)