Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
42b06117
"cacheflow/vscode:/vscode.git/clone" did not exist on "0a11a2e5ca764af37254fc962e5e6d35295d499b"
Commit
42b06117
authored
Sep 29, 2025
by
zhuwenwen
Browse files
Merge branch 'v0.9.2-dev' into v0.9.2-dev-ds
parents
b2d14ba3
48114bb1
Changes
22
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
5 additions
and
4 deletions
+5
-4
vllm/v1/request.py
vllm/v1/request.py
+1
-0
vllm/v1/worker/gpu_model_runner.py
vllm/v1/worker/gpu_model_runner.py
+4
-4
No files found.
vllm/v1/request.py
View file @
42b06117
...
@@ -79,6 +79,7 @@ class Request:
...
@@ -79,6 +79,7 @@ class Request:
self
.
_all_token_ids
:
list
[
int
]
=
self
.
prompt_token_ids
.
copy
()
self
.
_all_token_ids
:
list
[
int
]
=
self
.
prompt_token_ids
.
copy
()
self
.
spec_token_ids
:
list
[
int
]
=
[]
self
.
spec_token_ids
:
list
[
int
]
=
[]
self
.
num_computed_tokens
=
0
self
.
num_computed_tokens
=
0
self
.
num_generated_token_ids
=
0
self
.
cache_salt
:
Optional
[
str
]
=
cache_salt
self
.
cache_salt
:
Optional
[
str
]
=
cache_salt
# Multi-modal related
# Multi-modal related
...
...
vllm/v1/worker/gpu_model_runner.py
View file @
42b06117
...
@@ -496,8 +496,8 @@ class GPUModelRunner(LoRAModelRunnerMixin):
...
@@ -496,8 +496,8 @@ class GPUModelRunner(LoRAModelRunnerMixin):
elif
num_new_tokens
>
0
:
elif
num_new_tokens
>
0
:
req_state
.
output_token_ids
.
extend
(
req_state
.
output_token_ids
.
extend
(
new_token_ids
[
-
num_new_tokens
:])
new_token_ids
[
-
num_new_tokens
:])
if
len
(
spec_token_ids
)
>
0
:
if
len
(
spec_token_ids
)
>
0
:
req_state
.
spec_token_ids
=
spec_token_ids
req_state
.
spec_token_ids
=
spec_token_ids
# Update the block IDs.
# Update the block IDs.
if
not
resumed_from_preemption
:
if
not
resumed_from_preemption
:
...
@@ -528,10 +528,10 @@ class GPUModelRunner(LoRAModelRunnerMixin):
...
@@ -528,10 +528,10 @@ class GPUModelRunner(LoRAModelRunnerMixin):
if
not
is_last_rank
:
if
not
is_last_rank
:
# Add new_token_ids to token_ids_cpu.
# Add new_token_ids to token_ids_cpu.
start_token_index
=
num_computed_tokens
start_token_index
=
num_computed_tokens
end_token_index
=
num_computed_tokens
+
len
(
new_token_ids
)
end_token_index
=
num_computed_tokens
+
1
self
.
input_batch
.
token_ids_cpu
[
self
.
input_batch
.
token_ids_cpu
[
req_index
,
req_index
,
start_token_index
:
end_token_index
]
=
new_token_ids
start_token_index
:
end_token_index
]
=
new_token_ids
[
-
1
]
self
.
input_batch
.
num_tokens_no_spec
[
self
.
input_batch
.
num_tokens_no_spec
[
req_index
]
=
end_token_index
req_index
]
=
end_token_index
self
.
input_batch
.
num_tokens
[
req_index
]
=
end_token_index
self
.
input_batch
.
num_tokens
[
req_index
]
=
end_token_index
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment