Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
263d8ee1
Unverified
Commit
263d8ee1
authored
Oct 19, 2024
by
Cyrus Leung
Committed by
GitHub
Oct 19, 2024
Browse files
[Bugfix] Fix missing task for speculative decoding (#9524)
parent
c5eea3c8
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
14 additions
and
9 deletions
+14
-9
vllm/config.py
vllm/config.py
+14
-9
No files found.
vllm/config.py
View file @
263d8ee1
...
@@ -33,8 +33,10 @@ logger = init_logger(__name__)
...
@@ -33,8 +33,10 @@ logger = init_logger(__name__)
_EMBEDDING_MODEL_MAX_NUM_BATCHED_TOKENS
=
32768
_EMBEDDING_MODEL_MAX_NUM_BATCHED_TOKENS
=
32768
_MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS
=
5120
_MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS
=
5120
Task
=
Literal
[
"generate"
,
"embedding"
]
TaskOption
=
Literal
[
"auto"
,
"generate"
,
"embedding"
]
TaskOption
=
Literal
[
"auto"
,
Task
]
# "draft" is only used internally for speculative decoding
_Task
=
Literal
[
"generate"
,
"embedding"
,
"draft"
]
class
ModelConfig
:
class
ModelConfig
:
...
@@ -115,7 +117,7 @@ class ModelConfig:
...
@@ -115,7 +117,7 @@ class ModelConfig:
def
__init__
(
self
,
def
__init__
(
self
,
model
:
str
,
model
:
str
,
task
:
TaskOption
,
task
:
Union
[
TaskOption
,
_Task
],
tokenizer
:
str
,
tokenizer
:
str
,
tokenizer_mode
:
str
,
tokenizer_mode
:
str
,
trust_remote_code
:
bool
,
trust_remote_code
:
bool
,
...
@@ -255,18 +257,21 @@ class ModelConfig:
...
@@ -255,18 +257,21 @@ class ModelConfig:
def
_resolve_task
(
def
_resolve_task
(
self
,
self
,
task_option
:
TaskOption
,
task_option
:
Union
[
TaskOption
,
_Task
],
hf_config
:
PretrainedConfig
,
hf_config
:
PretrainedConfig
,
)
->
Tuple
[
Set
[
Task
],
Task
]:
)
->
Tuple
[
Set
[
_Task
],
_Task
]:
if
task_option
==
"draft"
:
return
{
"draft"
},
"draft"
architectures
=
getattr
(
hf_config
,
"architectures"
,
[])
architectures
=
getattr
(
hf_config
,
"architectures"
,
[])
task_support
:
Dict
[
Task
,
bool
]
=
{
task_support
:
Dict
[
_
Task
,
bool
]
=
{
# NOTE: Listed from highest to lowest priority,
# NOTE: Listed from highest to lowest priority,
# in case the model supports multiple of them
# in case the model supports multiple of them
"generate"
:
ModelRegistry
.
is_text_generation_model
(
architectures
),
"generate"
:
ModelRegistry
.
is_text_generation_model
(
architectures
),
"embedding"
:
ModelRegistry
.
is_embedding_model
(
architectures
),
"embedding"
:
ModelRegistry
.
is_embedding_model
(
architectures
),
}
}
supported_tasks_lst
:
List
[
Task
]
=
[
supported_tasks_lst
:
List
[
_
Task
]
=
[
task
for
task
,
is_supported
in
task_support
.
items
()
if
is_supported
task
for
task
,
is_supported
in
task_support
.
items
()
if
is_supported
]
]
supported_tasks
=
set
(
supported_tasks_lst
)
supported_tasks
=
set
(
supported_tasks_lst
)
...
@@ -1002,7 +1007,7 @@ class SchedulerConfig:
...
@@ -1002,7 +1007,7 @@ class SchedulerConfig:
"""
"""
def
__init__
(
self
,
def
__init__
(
self
,
task
:
Task
,
task
:
_
Task
,
max_num_batched_tokens
:
Optional
[
int
],
max_num_batched_tokens
:
Optional
[
int
],
max_num_seqs
:
int
,
max_num_seqs
:
int
,
max_model_len
:
int
,
max_model_len
:
int
,
...
@@ -1269,7 +1274,7 @@ class SpeculativeConfig:
...
@@ -1269,7 +1274,7 @@ class SpeculativeConfig:
ngram_prompt_lookup_min
=
0
ngram_prompt_lookup_min
=
0
draft_model_config
=
ModelConfig
(
draft_model_config
=
ModelConfig
(
model
=
speculative_model
,
model
=
speculative_model
,
task
=
target_model_config
.
task
,
task
=
"draft"
,
tokenizer
=
target_model_config
.
tokenizer
,
tokenizer
=
target_model_config
.
tokenizer
,
tokenizer_mode
=
target_model_config
.
tokenizer_mode
,
tokenizer_mode
=
target_model_config
.
tokenizer_mode
,
trust_remote_code
=
target_model_config
.
trust_remote_code
,
trust_remote_code
=
target_model_config
.
trust_remote_code
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment