File tree Expand file tree Collapse file tree 2 files changed +17
-3
lines changed
Expand file tree Collapse file tree 2 files changed +17
-3
lines changed Original file line number Diff line number Diff line change @@ -123,7 +123,13 @@ def __init__(
123123 # Maps req_index -> tensor of shape (num_prompt_tokens, hidden_size)
124124 self .req_prompt_embeds : dict [int , torch .Tensor ] = {}
125125 self .num_tokens = np .zeros (max_num_reqs , dtype = np .int32 )
126- self .num_tokens_no_spec = np .zeros (max_num_reqs , dtype = np .int32 )
126+ self .num_tokens_no_spec_cpu_tensor = torch .zeros (
127+ (max_num_reqs ,),
128+ device = "cpu" ,
129+ dtype = torch .int32 ,
130+ pin_memory = pin_memory ,
131+ )
132+ self .num_tokens_no_spec = self .num_tokens_no_spec_cpu_tensor .numpy ()
127133 self .num_prompt_tokens = np .zeros (max_num_reqs , dtype = np .int32 )
128134 self .num_computed_tokens_cpu_tensor = torch .zeros (
129135 (max_num_reqs ,),
Original file line number Diff line number Diff line change @@ -1100,7 +1100,12 @@ def _update_ngram_gpu_tensors_incremental(
11001100 self .input_batch .token_ids_cpu_tensor [new_req_idx , :num_tokens ],
11011101 non_blocking = True ,
11021102 )
1103- self .num_tokens_no_spec_gpu [new_req_idx ] = num_tokens
1103+ self .num_tokens_no_spec_gpu [new_req_idx : new_req_idx + 1 ].copy_ (
1104+ self .input_batch .num_tokens_no_spec_cpu_tensor [
1105+ new_req_idx : new_req_idx + 1
1106+ ],
1107+ non_blocking = True ,
1108+ )
11041109
11051110 def _ngram_gpu_full_init (self ) -> None :
11061111 """Initialize all GPU tensors for ngram proposer from scratch.
@@ -1116,7 +1121,10 @@ def _ngram_gpu_full_init(self) -> None:
11161121 self .input_batch .token_ids_cpu_tensor [idx , :num_tokens ],
11171122 non_blocking = True ,
11181123 )
1119- self .num_tokens_no_spec_gpu [idx ] = num_tokens
1124+ self .num_tokens_no_spec_gpu [idx : idx + 1 ].copy_ (
1125+ self .input_batch .num_tokens_no_spec_cpu_tensor [idx : idx + 1 ],
1126+ non_blocking = True ,
1127+ )
11201128
11211129 def _update_states_after_model_execute (
11221130 self , output_token_ids : torch .Tensor
You can’t perform that action at this time.
0 commit comments