Skip to content

Commit b6ae5ae

Browse files
SageMoorekhluu
authored andcommitted
[Bugfix][EPLB] Prevent user-provided EPLB config from being overwritten with defaults (#29911)
Signed-off-by: Sage Moore <[email protected]> (cherry picked from commit e6f114a)
1 parent 5c7c09a commit b6ae5ae

File tree

2 files changed

+9
-21
lines changed

2 files changed

+9
-21
lines changed

tests/distributed/test_eplb_spec_decode.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,14 @@ def get_model_args(
2222
"num_speculative_tokens": 1,
2323
"max_model_len": model_max_len,
2424
}
25-
25+
eplb_config = {
26+
"num_redundant_experts": tp_size,
27+
"window_size": 128,
28+
"step_interval": 1024,
29+
"log_balancedness": False,
30+
}
31+
if use_async:
32+
eplb_config["use_async"] = True
2633
model_args = {
2734
"pretrained": model_name,
2835
"dtype": "auto",
@@ -31,15 +38,10 @@ def get_model_args(
3138
"gpu_memory_utilization": 0.7,
3239
"speculative_config": speculative_config,
3340
"enable_expert_parallel": True,
34-
"num_redundant_experts": tp_size,
35-
"eplb_window_size": 128,
36-
"eplb_step_interval": 1024,
37-
"eplb_log_balancedness": False,
41+
"eplb_config": eplb_config,
3842
"enable_eplb": True,
3943
"max_model_len": model_max_len,
4044
}
41-
if use_async:
42-
model_args["eplb_config"] = {"use_async": True}
4345
return model_args
4446

4547

vllm/engine/arg_utils.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -420,10 +420,6 @@ class EngineArgs:
420420
)
421421
_api_process_count: int = ParallelConfig._api_process_count
422422
_api_process_rank: int = ParallelConfig._api_process_rank
423-
num_redundant_experts: int = EPLBConfig.num_redundant_experts
424-
eplb_window_size: int = EPLBConfig.window_size
425-
eplb_step_interval: int = EPLBConfig.step_interval
426-
eplb_log_balancedness: bool = EPLBConfig.log_balancedness
427423
max_parallel_loading_workers: int | None = (
428424
ParallelConfig.max_parallel_loading_workers
429425
)
@@ -1581,16 +1577,6 @@ def create_engine_config(
15811577
)
15821578
self.disable_nccl_for_dp_synchronization = True
15831579

1584-
# Forward the deprecated CLI args to the EPLB config.
1585-
if self.num_redundant_experts is not None:
1586-
self.eplb_config.num_redundant_experts = self.num_redundant_experts
1587-
if self.eplb_window_size is not None:
1588-
self.eplb_config.window_size = self.eplb_window_size
1589-
if self.eplb_step_interval is not None:
1590-
self.eplb_config.step_interval = self.eplb_step_interval
1591-
if self.eplb_log_balancedness is not None:
1592-
self.eplb_config.log_balancedness = self.eplb_log_balancedness
1593-
15941580
parallel_config = ParallelConfig(
15951581
pipeline_parallel_size=self.pipeline_parallel_size,
15961582
tensor_parallel_size=self.tensor_parallel_size,

0 commit comments

Comments
 (0)