Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
5d2965b7
Unverified
Commit
5d2965b7
authored
Feb 16, 2025
by
Cyrus Leung
Committed by
GitHub
Feb 16, 2025
Browse files
[Bugfix] Fix 2 Node and Spec Decode tests (#13341)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
a0231b7c
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
17 additions
and
9 deletions
+17
-9
tests/distributed/test_pipeline_parallel.py
tests/distributed/test_pipeline_parallel.py
+5
-5
vllm/spec_decode/ngram_worker.py
vllm/spec_decode/ngram_worker.py
+12
-4
No files found.
tests/distributed/test_pipeline_parallel.py
View file @
5d2965b7
...
...
@@ -275,11 +275,11 @@ def _compare_tp(
if
load_format
==
"dummy"
:
# Avoid OOM
text_overrides
=
{
"num_layers"
:
1
,
"
num_
hidden_
layers"
:
1
,
"
num_experts"
:
2
,
"num_
experts_per_tok
"
:
2
,
"num_
local_expert
s"
:
2
,
"num_
hidden_
layers"
:
4
,
"hidden_
size"
:
512
,
"
intermediate_size"
:
800
,
"num_
attention_heads
"
:
4
,
"num_
key_value_head
s"
:
1
,
}
if
is_multimodal
:
...
...
vllm/spec_decode/ngram_worker.py
View file @
5d2965b7
...
...
@@ -6,6 +6,7 @@ from typing import List, Optional, Set, Tuple
import
torch
import
torch.nn
as
nn
from
vllm.config
import
VllmConfig
from
vllm.model_executor.layers.sampler
import
SamplerOutput
from
vllm.sequence
import
ExecuteModelRequest
from
vllm.spec_decode.interfaces
import
SpeculativeProposals
...
...
@@ -25,11 +26,18 @@ class NGramWorker(NonLLMProposerWorkerBase):
which don't rely on LLM model to give proposals.
"""
def
__init__
(
self
,
*
args
,
**
kwargs
):
def
__init__
(
self
,
vllm_config
:
VllmConfig
,
local_rank
:
int
,
device_type
:
str
=
"cuda"
,
**
kwargs
,
):
super
().
__init__
(
vllm_config
)
# Get local_rank/vocab_size from kwargs attribute
self
.
local_rank
=
kwargs
[
"local_rank"
]
self
.
vocab_size
=
kwargs
[
"vllm_config"
].
model_config
.
get_vocab_size
()
self
.
device_type
=
kwargs
.
get
(
"device_type"
,
"cuda"
)
self
.
local_rank
=
local_rank
self
.
device_type
=
device_type
# Lazy initialization list.
self
.
_proposer
:
Top1Proposer
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment