Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
3dcb3e8b
Unverified
Commit
3dcb3e8b
authored
Apr 04, 2024
by
SangBin Cho
Committed by
GitHub
Apr 03, 2024
Browse files
[3/N] Refactor scheduler for chunked prefill scheduling (#3550)
parent
c64cf386
Changes
5
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
1020 additions
and
255 deletions
+1020
-255
tests/core/test_scheduler.py
tests/core/test_scheduler.py
+532
-6
tests/core/utils.py
tests/core/utils.py
+13
-6
vllm/core/scheduler.py
vllm/core/scheduler.py
+456
-241
vllm/engine/llm_engine.py
vllm/engine/llm_engine.py
+1
-1
vllm/utils.py
vllm/utils.py
+18
-1
No files found.
tests/core/test_scheduler.py
View file @
3dcb3e8b
This diff is collapsed.
Click to expand it.
tests/core/utils.py
View file @
3dcb3e8b
import
time
from
typing
import
Tuple
from
typing
import
Optional
,
Tuple
from
vllm
import
SamplingParams
from
vllm.lora.request
import
LoRARequest
from
vllm.sequence
import
Logprob
,
Sequence
,
SequenceGroup
def
create_dummy_prompt
(
request_id
:
str
,
prompt_length
:
int
,
block_size
:
int
=
None
)
->
Tuple
[
Sequence
,
SequenceGroup
]:
request_id
:
str
,
prompt_length
:
int
,
block_size
:
Optional
[
int
]
=
None
,
lora_request
:
Optional
[
LoRARequest
]
=
None
,
use_beam_search
:
bool
=
False
,
best_of
:
int
=
1
,
)
->
Tuple
[
Sequence
,
SequenceGroup
]:
if
not
block_size
:
block_size
=
prompt_length
...
...
@@ -17,8 +22,10 @@ def create_dummy_prompt(
prompt_tokens
=
list
(
range
(
prompt_length
))
prompt_str
=
" "
.
join
([
str
(
t
)
for
t
in
prompt_tokens
])
prompt
=
Sequence
(
int
(
request_id
),
prompt_str
,
prompt_tokens
,
block_size
)
seq_group
=
SequenceGroup
(
request_id
,
[
prompt
],
SamplingParams
(),
time
.
time
(),
None
)
seq_group
=
SequenceGroup
(
request_id
,
[
prompt
],
SamplingParams
(
use_beam_search
=
use_beam_search
,
best_of
=
best_of
),
time
.
time
(),
lora_request
)
return
prompt
,
seq_group
...
...
vllm/core/scheduler.py
View file @
3dcb3e8b
This diff is collapsed.
Click to expand it.
vllm/engine/llm_engine.py
View file @
3dcb3e8b
...
...
@@ -728,7 +728,7 @@ class LLMEngine:
time_per_output_tokens
=
[]
time_e2e_requests
=
[]
if
scheduler_outputs
is
not
None
:
prompt_run
=
scheduler_outputs
.
prompt_run
prompt_run
=
scheduler_outputs
.
num_prefill_groups
>
0
# Number of Tokens.
if
prompt_run
:
...
...
vllm/utils.py
View file @
3dcb3e8b
...
...
@@ -6,7 +6,7 @@ import socket
import
subprocess
import
uuid
import
warnings
from
collections
import
OrderedDict
from
collections
import
OrderedDict
,
defaultdict
from
functools
import
lru_cache
,
partial
from
platform
import
uname
from
typing
import
(
Any
,
Awaitable
,
Callable
,
Generic
,
Hashable
,
List
,
...
...
@@ -450,3 +450,20 @@ def maybe_expand_dim(tensor: torch.Tensor,
if
tensor
.
ndim
<
target_dims
:
tensor
=
tensor
.
view
(
-
1
,
*
([
size
]
*
(
target_dims
-
tensor
.
ndim
)))
return
tensor
def
merge_dicts
(
dict1
:
dict
[
Any
,
list
[
Any
]],
dict2
:
dict
[
Any
,
list
[
Any
]])
->
dict
[
Any
,
list
[
Any
]]:
"""Merge 2 dicts that have key -> List of items.
When a key conflicts, the values in dict1 is prioritized.
"""
merged_dict
=
defaultdict
(
list
)
for
key
,
value
in
dict1
.
items
():
merged_dict
[
key
].
extend
(
value
)
for
key
,
value
in
dict2
.
items
():
merged_dict
[
key
].
extend
(
value
)
return
dict
(
merged_dict
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment