Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
xdb4_94051
vllm
Commits
b4b195b3
Unverified
Commit
b4b195b3
authored
Jul 17, 2023
by
Lily Liu
Committed by
GitHub
Jul 17, 2023
Browse files
fix max seq len (#489)
parent
20b0d88d
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
8 additions
and
8 deletions
+8
-8
vllm/config.py
vllm/config.py
+2
-2
vllm/core/scheduler.py
vllm/core/scheduler.py
+3
-1
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+2
-3
vllm/engine/llm_engine.py
vllm/engine/llm_engine.py
+1
-2
No files found.
vllm/config.py
View file @
b4b195b3
...
...
@@ -204,10 +204,10 @@ class SchedulerConfig:
"""
def
__init__
(
self
,
max_num_batched_tokens
:
int
,
max_num_seqs
:
int
,
max_
seq
_len
:
int
)
->
None
:
max_
model
_len
:
int
)
->
None
:
self
.
max_num_batched_tokens
=
max_num_batched_tokens
self
.
max_num_seqs
=
max_num_seqs
self
.
max_
seq
_len
=
max_
seq
_len
self
.
max_
model
_len
=
max_
model
_len
_STR_DTYPE_TO_TORCH_DTYPE
=
{
...
...
vllm/core/scheduler.py
View file @
b4b195b3
...
...
@@ -190,7 +190,9 @@ class Scheduler:
break
num_prompt_tokens
=
seq_group
.
get_seqs
()[
0
].
get_len
()
if
num_prompt_tokens
>
self
.
scheduler_config
.
max_seq_len
:
if
num_prompt_tokens
>
min
(
self
.
scheduler_config
.
max_model_len
,
self
.
scheduler_config
.
max_num_batched_tokens
):
logger
.
warning
(
f
"Input prompt (
{
num_prompt_tokens
}
tokens) is too long"
" and exceeds limit of "
...
...
vllm/engine/arg_utils.py
View file @
b4b195b3
...
...
@@ -155,11 +155,10 @@ class EngineArgs:
parallel_config
=
ParallelConfig
(
self
.
pipeline_parallel_size
,
self
.
tensor_parallel_size
,
self
.
worker_use_ray
)
model
_max
_len
=
getattr
(
model_config
.
hf_config
,
m
ax_m
odel_len
=
getattr
(
model_config
.
hf_config
,
'max_position_embeddings'
,
float
(
'inf'
))
max_seq_len
=
min
(
self
.
max_num_batched_tokens
,
model_max_len
)
scheduler_config
=
SchedulerConfig
(
self
.
max_num_batched_tokens
,
self
.
max_num_seqs
,
max_
seq
_len
)
self
.
max_num_seqs
,
max_
model
_len
)
return
model_config
,
cache_config
,
parallel_config
,
scheduler_config
...
...
vllm/engine/llm_engine.py
View file @
b4b195b3
...
...
@@ -300,8 +300,7 @@ class LLMEngine:
continue
# Check if the sequence has reached max_seq_len.
if
(
seq
.
get_len
()
>
self
.
scheduler
.
scheduler_config
.
max_seq_len
):
if
seq
.
get_len
()
>
self
.
scheduler_config
.
max_model_len
:
self
.
scheduler
.
free_seq
(
seq
,
SequenceStatus
.
FINISHED_LENGTH_CAPPED
)
continue
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment