Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f6ce3afa
Commit
f6ce3afa
authored
Dec 05, 2024
by
zhuwenwen
Browse files
Merge branch 'v0.6.2-dev' of
ssh://10.6.10.68:10022/dcutoolkit/deeplearing/vllm
into v0.6.2-dev
parents
78800ecf
1a313afb
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
7 additions
and
4 deletions
+7
-4
vllm/model_executor/models/medusa.py
vllm/model_executor/models/medusa.py
+1
-1
vllm/model_executor/models/mlp_speculator.py
vllm/model_executor/models/mlp_speculator.py
+1
-1
vllm/spec_decode/spec_decode_worker.py
vllm/spec_decode/spec_decode_worker.py
+5
-2
No files found.
vllm/model_executor/models/medusa.py
View file @
f6ce3afa
...
...
@@ -244,7 +244,7 @@ class Medusa(nn.Module):
default_weight_loader
)
weight_loader
(
param
,
loaded_weight
)
if
self
.
use_llama_nn
and
"lm_head"
in
name
:
if
self
.
use_llama_nn
and
os
.
environ
[
'LM_NN'
]
==
'1'
and
"lm_head"
in
name
:
_weight
=
torch
.
zeros_like
(
param
.
data
)
ori_shape
=
_weight
.
shape
...
...
vllm/model_executor/models/mlp_speculator.py
View file @
f6ce3afa
...
...
@@ -201,7 +201,7 @@ class MLPSpeculator(nn.Module):
default_weight_loader
)
weight_loader
(
param
,
loaded_weight
)
if
self
.
use_llama_nn
and
"head"
in
name
:
if
self
.
use_llama_nn
and
os
.
environ
[
'LM_NN'
]
==
'1'
and
"head"
in
name
:
_weight
=
torch
.
zeros_like
(
param
.
data
)
ori_shape
=
_weight
.
shape
...
...
vllm/spec_decode/spec_decode_worker.py
View file @
f6ce3afa
...
...
@@ -531,6 +531,9 @@ class SpecDecodeWorker(LoraNotSupportedWorkerBase):
not called, meaning that the kv-cache in proposer for requests is not
updated, so they cannot enable spec decode in the rest decoding.
"""
if
self
.
tree_style_spec_decoding
and
self
.
kvcache_slot_to_be_moved
is
not
None
:
execute_model_req
.
kvcache_slot_to_be_moved
=
self
.
kvcache_slot_to_be_moved
self
.
kvcache_slot_to_be_moved
=
None
sampler_output
=
self
.
scorer_worker
.
execute_model
(
execute_model_req
)
assert
len
(
sampler_output
)
==
1
...
...
@@ -734,7 +737,7 @@ class SpecDecodeWorker(LoraNotSupportedWorkerBase):
# Get probabilities according to proposal method.
proposal_probs
=
proposals
.
proposal_probs
if
proposals
.
proposal_probs
is
not
None
else
None
if
non_spec_indices
:
if
proposal_probs
is
not
None
and
non_spec_indices
:
proposal_probs
=
proposal_probs
[
spec_indices
]
# Get proposed tokens.
...
...
@@ -744,7 +747,7 @@ class SpecDecodeWorker(LoraNotSupportedWorkerBase):
# Get tree buffers.
cart_candidates
=
proposals
.
cart_candidates
if
proposals
.
cart_candidates
is
not
None
else
None
if
non_spec_indices
:
if
cart_candidates
is
not
None
and
non_spec_indices
:
cart_candidates
=
cart_candidates
[
spec_indices
]
# Sampler arguments
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment