File tree Expand file tree Collapse file tree 2 files changed +17
-3
lines changed
Expand file tree Collapse file tree 2 files changed +17
-3
lines changed Original file line number Diff line number Diff line change @@ -123,7 +123,13 @@ def __init__(
123123 # Maps req_index -> tensor of shape (num_prompt_tokens, hidden_size)
124124 self .req_prompt_embeds : dict [int , torch .Tensor ] = {}
125125 self .num_tokens = np .zeros (max_num_reqs , dtype = np .int32 )
126- self .num_tokens_no_spec = np .zeros (max_num_reqs , dtype = np .int32 )
126+ self .num_tokens_no_spec_cpu_tensor = torch .zeros (
127+ (max_num_reqs ,),
128+ device = "cpu" ,
129+ dtype = torch .int32 ,
130+ pin_memory = pin_memory ,
131+ )
132+ self .num_tokens_no_spec = self .num_tokens_no_spec_cpu_tensor .numpy ()
127133 self .num_prompt_tokens = np .zeros (max_num_reqs , dtype = np .int32 )
128134 self .num_computed_tokens_cpu_tensor = torch .zeros (
129135 (max_num_reqs ,),
Original file line number Diff line number Diff line change @@ -1099,7 +1099,12 @@ def _update_ngram_gpu_tensors_incremental(
10991099 self .input_batch .token_ids_cpu_tensor [new_req_idx , :num_tokens ],
11001100 non_blocking = True ,
11011101 )
1102- self .num_tokens_no_spec_gpu [new_req_idx ] = num_tokens
1102+ self .num_tokens_no_spec_gpu [new_req_idx : new_req_idx + 1 ].copy_ (
1103+ self .input_batch .num_tokens_no_spec_cpu_tensor [
1104+ new_req_idx : new_req_idx + 1
1105+ ],
1106+ non_blocking = True ,
1107+ )
11031108
11041109 def _ngram_gpu_full_init (self ) -> None :
11051110 """Initialize all GPU tensors for ngram proposer from scratch.
@@ -1115,7 +1120,10 @@ def _ngram_gpu_full_init(self) -> None:
11151120 self .input_batch .token_ids_cpu_tensor [idx , :num_tokens ],
11161121 non_blocking = True ,
11171122 )
1118- self .num_tokens_no_spec_gpu [idx ] = num_tokens
1123+ self .num_tokens_no_spec_gpu [idx : idx + 1 ].copy_ (
1124+ self .input_batch .num_tokens_no_spec_cpu_tensor [idx : idx + 1 ],
1125+ non_blocking = True ,
1126+ )
11191127
11201128 def _update_states_after_model_execute (
11211129 self , output_token_ids : torch .Tensor
You can’t perform that action at this time.
0 commit comments