"cacheflow/vscode:/vscode.git/clone" did not exist on "0a11a2e5ca764af37254fc962e5e6d35295d499b"
Commit 42b06117 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.9.2-dev' into v0.9.2-dev-ds

parents b2d14ba3 48114bb1
...@@ -79,6 +79,7 @@ class Request: ...@@ -79,6 +79,7 @@ class Request:
self._all_token_ids: list[int] = self.prompt_token_ids.copy() self._all_token_ids: list[int] = self.prompt_token_ids.copy()
self.spec_token_ids: list[int] = [] self.spec_token_ids: list[int] = []
self.num_computed_tokens = 0 self.num_computed_tokens = 0
self.num_generated_token_ids = 0
self.cache_salt: Optional[str] = cache_salt self.cache_salt: Optional[str] = cache_salt
# Multi-modal related # Multi-modal related
......
...@@ -496,8 +496,8 @@ class GPUModelRunner(LoRAModelRunnerMixin): ...@@ -496,8 +496,8 @@ class GPUModelRunner(LoRAModelRunnerMixin):
elif num_new_tokens > 0: elif num_new_tokens > 0:
req_state.output_token_ids.extend( req_state.output_token_ids.extend(
new_token_ids[-num_new_tokens:]) new_token_ids[-num_new_tokens:])
if len(spec_token_ids) > 0: if len(spec_token_ids) > 0:
req_state.spec_token_ids = spec_token_ids req_state.spec_token_ids = spec_token_ids
# Update the block IDs. # Update the block IDs.
if not resumed_from_preemption: if not resumed_from_preemption:
...@@ -528,10 +528,10 @@ class GPUModelRunner(LoRAModelRunnerMixin): ...@@ -528,10 +528,10 @@ class GPUModelRunner(LoRAModelRunnerMixin):
if not is_last_rank: if not is_last_rank:
# Add new_token_ids to token_ids_cpu. # Add new_token_ids to token_ids_cpu.
start_token_index = num_computed_tokens start_token_index = num_computed_tokens
end_token_index = num_computed_tokens + len(new_token_ids) end_token_index = num_computed_tokens + 1
self.input_batch.token_ids_cpu[ self.input_batch.token_ids_cpu[
req_index, req_index,
start_token_index:end_token_index] = new_token_ids start_token_index:end_token_index] = new_token_ids[-1]
self.input_batch.num_tokens_no_spec[ self.input_batch.num_tokens_no_spec[
req_index] = end_token_index req_index] = end_token_index
self.input_batch.num_tokens[req_index] = end_token_index self.input_batch.num_tokens[req_index] = end_token_index
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment