Unverified Commit 9528e3a0 authored by Woosuk Kwon's avatar Woosuk Kwon Committed by GitHub
Browse files

[BugFix][Spec Decode] Fix spec token ids in model runner (#20530)


Signed-off-by: default avatarWoosuk Kwon <woosuk.kwon@berkeley.edu>
parent 9fb52e52
...@@ -528,18 +528,19 @@ class GPUModelRunner(LoRAModelRunnerMixin): ...@@ -528,18 +528,19 @@ class GPUModelRunner(LoRAModelRunnerMixin):
start_token_index:end_token_index] = new_token_ids start_token_index:end_token_index] = new_token_ids
self.input_batch.num_tokens_no_spec[ self.input_batch.num_tokens_no_spec[
req_index] = end_token_index req_index] = end_token_index
self.input_batch.num_tokens[req_index] = end_token_index
# Add spec_token_ids to token_ids_cpu. # Add spec_token_ids to token_ids_cpu.
spec_token_ids = ( spec_token_ids = (
scheduler_output.scheduled_spec_decode_tokens.get( scheduler_output.scheduled_spec_decode_tokens.get(req_id, ()))
req_id, ()))
if spec_token_ids: if spec_token_ids:
start_index = end_token_index num_spec_tokens = len(spec_token_ids)
end_token_index += len(spec_token_ids) start_index = self.input_batch.num_tokens_no_spec[req_index]
end_token_index = start_index + num_spec_tokens
self.input_batch.token_ids_cpu[ self.input_batch.token_ids_cpu[
req_index, req_index, start_index:end_token_index] = spec_token_ids
start_index:end_token_index] = spec_token_ids
# NOTE(woosuk): `num_tokens` here may include spec tokens. # NOTE(woosuk): `num_tokens` here may include spec tokens.
self.input_batch.num_tokens[req_index] = end_token_index self.input_batch.num_tokens[req_index] += num_spec_tokens
# Add the new or resumed requests to the persistent batch. # Add the new or resumed requests to the persistent batch.
# The smaller empty indices are filled first. # The smaller empty indices are filled first.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment