Skip to content

Commit 0037b57

Browse files
authored
[Core] Eliminate redundant is_encoder_decoder lookups (20-40us/step) (#29800)
Signed-off-by: Wushi Dong <[email protected]>
1 parent f5b0846 commit 0037b57

File tree

1 file changed

+3
-9
lines changed

1 file changed

+3
-9
lines changed

vllm/v1/worker/gpu_model_runner.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2439,16 +2439,13 @@ def _preprocess(
24392439
]:
24402440
num_scheduled_tokens = scheduler_output.total_num_scheduled_tokens
24412441
is_first_rank = get_pp_group().is_first_rank
2442+
is_encoder_decoder = self.model_config.is_encoder_decoder
24422443

24432444
# _prepare_inputs may reorder the batch, so we must gather multi
24442445
# modal outputs after that to ensure the correct order
24452446
ec_connector_output = None
24462447

2447-
if (
2448-
self.supports_mm_inputs
2449-
and is_first_rank
2450-
and not self.model_config.is_encoder_decoder
2451-
):
2448+
if self.supports_mm_inputs and is_first_rank and not is_encoder_decoder:
24522449
# Run the multimodal encoder if any.
24532450
with self.maybe_get_ec_connector_output(
24542451
scheduler_output,
@@ -2526,10 +2523,7 @@ def _preprocess(
25262523
num_input_tokens, intermediate_tensors, True
25272524
)
25282525

2529-
if (
2530-
self.model_config.is_encoder_decoder
2531-
and scheduler_output.scheduled_encoder_inputs
2532-
):
2526+
if is_encoder_decoder and scheduler_output.scheduled_encoder_inputs:
25332527
# Run the encoder, just like we do with other multimodal inputs.
25342528
# For an encoder-decoder model, our processing here is a bit
25352529
# simpler, because the outputs are just passed to the decoder.

0 commit comments

Comments
 (0)