@@ -263,6 +263,7 @@ def schedule(self) -> SchedulerOutput:
263263 request .num_computed_tokens ,
264264 num_new_tokens ,
265265 encoder_compute_budget ,
266+ shift_computed_tokens = 1 if self .use_eagle else 0 ,
266267 )
267268
268269 if num_new_tokens == 0 :
@@ -532,6 +533,7 @@ def schedule(self) -> SchedulerOutput:
532533 num_computed_tokens ,
533534 num_new_tokens ,
534535 encoder_compute_budget ,
536+ shift_computed_tokens = 1 if self .use_eagle else 0 ,
535537 )
536538 if num_new_tokens == 0 :
537539 # The request cannot be scheduled.
@@ -829,6 +831,7 @@ def _try_schedule_encoder_inputs(
829831 num_computed_tokens : int ,
830832 num_new_tokens : int ,
831833 encoder_compute_budget : int ,
834+ shift_computed_tokens : int = 0 ,
832835 ) -> tuple [list [int ], int , int , list [int ]]:
833836 """
834837 Determine which encoder inputs need to be scheduled in the current step,
@@ -873,7 +876,10 @@ def _try_schedule_encoder_inputs(
873876 # The encoder output is needed if the two ranges overlap:
874877 # [num_computed_tokens, num_computed_tokens + num_new_tokens) and
875878 # [start_pos, start_pos + num_encoder_tokens)
876- if start_pos >= num_computed_tokens + num_new_tokens :
879+ if (
880+ start_pos
881+ >= num_computed_tokens + num_new_tokens + shift_computed_tokens
882+ ):
877883 # The encoder input is not needed in this step.
878884 break
879885
@@ -929,10 +935,12 @@ def _try_schedule_encoder_inputs(
929935 # NOTE(woosuk): We assume that the encoder input tokens should
930936 # be processed altogether, as the encoder usually uses
931937 # bidirectional attention.
932- if num_computed_tokens < start_pos :
938+ if num_computed_tokens + shift_computed_tokens < start_pos :
933939 # We only schedule the decoder tokens just before the
934940 # encoder input.
935- num_new_tokens = start_pos - num_computed_tokens
941+ num_new_tokens = start_pos - (
942+ num_computed_tokens + shift_computed_tokens
943+ )
936944 else :
937945 # Because of prefix caching, num_computed_tokens is greater
938946 # than start_pos even though its encoder input is not
0 commit comments