Skip to content

Commit e6f114a

Browse files
authored
[Bugfix][EPLB] Prevent user-provided EPLB config from being overwritten with defaults (#29911)
Signed-off-by: Sage Moore <[email protected]>
1 parent 6fc5841 commit e6f114a

File tree

2 files changed

+9
-21
lines changed

2 files changed

+9
-21
lines changed

tests/distributed/test_eplb_spec_decode.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,14 @@ def get_model_args(
2222
"num_speculative_tokens": 1,
2323
"max_model_len": model_max_len,
2424
}
25-
25+
eplb_config = {
26+
"num_redundant_experts": tp_size,
27+
"window_size": 128,
28+
"step_interval": 1024,
29+
"log_balancedness": False,
30+
}
31+
if use_async:
32+
eplb_config["use_async"] = True
2633
model_args = {
2734
"pretrained": model_name,
2835
"dtype": "auto",
@@ -31,15 +38,10 @@ def get_model_args(
3138
"gpu_memory_utilization": 0.7,
3239
"speculative_config": speculative_config,
3340
"enable_expert_parallel": True,
34-
"num_redundant_experts": tp_size,
35-
"eplb_window_size": 128,
36-
"eplb_step_interval": 1024,
37-
"eplb_log_balancedness": False,
41+
"eplb_config": eplb_config,
3842
"enable_eplb": True,
3943
"max_model_len": model_max_len,
4044
}
41-
if use_async:
42-
model_args["eplb_config"] = {"use_async": True}
4345
return model_args
4446

4547

vllm/engine/arg_utils.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -421,10 +421,6 @@ class EngineArgs:
421421
)
422422
_api_process_count: int = ParallelConfig._api_process_count
423423
_api_process_rank: int = ParallelConfig._api_process_rank
424-
num_redundant_experts: int = EPLBConfig.num_redundant_experts
425-
eplb_window_size: int = EPLBConfig.window_size
426-
eplb_step_interval: int = EPLBConfig.step_interval
427-
eplb_log_balancedness: bool = EPLBConfig.log_balancedness
428424
max_parallel_loading_workers: int | None = (
429425
ParallelConfig.max_parallel_loading_workers
430426
)
@@ -1582,16 +1578,6 @@ def create_engine_config(
15821578
)
15831579
self.disable_nccl_for_dp_synchronization = True
15841580

1585-
# Forward the deprecated CLI args to the EPLB config.
1586-
if self.num_redundant_experts is not None:
1587-
self.eplb_config.num_redundant_experts = self.num_redundant_experts
1588-
if self.eplb_window_size is not None:
1589-
self.eplb_config.window_size = self.eplb_window_size
1590-
if self.eplb_step_interval is not None:
1591-
self.eplb_config.step_interval = self.eplb_step_interval
1592-
if self.eplb_log_balancedness is not None:
1593-
self.eplb_config.log_balancedness = self.eplb_log_balancedness
1594-
15951581
parallel_config = ParallelConfig(
15961582
pipeline_parallel_size=self.pipeline_parallel_size,
15971583
tensor_parallel_size=self.tensor_parallel_size,

0 commit comments

Comments
 (0)