Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a33ce60c
Unverified
Commit
a33ce60c
authored
Mar 06, 2024
by
Cade Daniel
Committed by
GitHub
Mar 06, 2024
Browse files
[Testing] Fix core tests (#3224)
parent
24aecf42
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
36 additions
and
23 deletions
+36
-23
tests/core/test_block_manager.py
tests/core/test_block_manager.py
+31
-18
tests/core/test_scheduler.py
tests/core/test_scheduler.py
+3
-3
tests/core/utils.py
tests/core/utils.py
+1
-1
vllm/sequence.py
vllm/sequence.py
+1
-1
No files found.
tests/core/test_block_manager.py
View file @
a33ce60c
...
@@ -6,7 +6,7 @@ from vllm import SamplingParams
...
@@ -6,7 +6,7 @@ from vllm import SamplingParams
from
vllm.block
import
PhysicalTokenBlock
from
vllm.block
import
PhysicalTokenBlock
from
vllm.core.block_manager
import
BlockAllocator
,
BlockSpaceManager
,
AllocStatus
from
vllm.core.block_manager
import
BlockAllocator
,
BlockSpaceManager
,
AllocStatus
from
vllm.utils
import
Device
from
vllm.utils
import
Device
from
vllm.sequence
import
Sequence
,
SequenceGroup
,
SequenceStatus
from
vllm.sequence
import
Sequence
,
SequenceGroup
,
SequenceStatus
,
Logprob
from
.utils
import
create_dummy_prompt
from
.utils
import
create_dummy_prompt
...
@@ -22,7 +22,8 @@ def test_block_allocator_allocate():
...
@@ -22,7 +22,8 @@ def test_block_allocator_allocate():
for
_
in
range
(
num_cpu_blocks
):
for
_
in
range
(
num_cpu_blocks
):
block
=
cpu_allocator
.
allocate
()
block
=
cpu_allocator
.
allocate
()
num_free
-=
1
num_free
-=
1
assert
block
not
in
cpu_allocator
.
free_blocks
assert
block
.
block_hash
not
in
cpu_allocator
.
evictor
assert
cpu_allocator
.
get_num_free_blocks
()
==
num_free
assert
cpu_allocator
.
get_num_free_blocks
()
==
num_free
with
pytest
.
raises
(
ValueError
):
with
pytest
.
raises
(
ValueError
):
...
@@ -39,7 +40,7 @@ def test_block_allocator_free():
...
@@ -39,7 +40,7 @@ def test_block_allocator_free():
for
_
in
range
(
num_cpu_blocks
):
for
_
in
range
(
num_cpu_blocks
):
block
=
cpu_allocator
.
allocate
()
block
=
cpu_allocator
.
allocate
()
blocks
.
append
(
block
)
blocks
.
append
(
block
)
assert
block
not
in
cpu_allocator
.
free_blocks
assert
block
.
block_hash
not
in
cpu_allocator
.
evictor
# Free all allocated cpu blocks.
# Free all allocated cpu blocks.
num_free
=
0
num_free
=
0
...
@@ -47,7 +48,7 @@ def test_block_allocator_free():
...
@@ -47,7 +48,7 @@ def test_block_allocator_free():
for
block
in
blocks
:
for
block
in
blocks
:
cpu_allocator
.
free
(
block
)
cpu_allocator
.
free
(
block
)
num_free
+=
1
num_free
+=
1
assert
block
in
cpu_allocator
.
free_blocks
assert
block
.
block_hash
in
cpu_allocator
.
evictor
assert
cpu_allocator
.
get_num_free_blocks
()
==
num_free
assert
cpu_allocator
.
get_num_free_blocks
()
==
num_free
with
pytest
.
raises
(
ValueError
):
with
pytest
.
raises
(
ValueError
):
...
@@ -106,7 +107,7 @@ def test_append_slot_single_seq():
...
@@ -106,7 +107,7 @@ def test_append_slot_single_seq():
# Add block_size number of new tokens and append slot.
# Add block_size number of new tokens and append slot.
for
i
in
range
(
block_size
):
for
i
in
range
(
block_size
):
token_id
=
i
+
5
token_id
=
i
+
5
prompt
.
append_token_id
(
token_id
,
{
token_id
:
0.0
})
prompt
.
append_token_id
(
token_id
,
{
token_id
:
Logprob
(
0.0
)
})
assert
block_manager
.
can_append_slot
(
seq_group
)
assert
block_manager
.
can_append_slot
(
seq_group
)
before_blocks
=
block_manager
.
get_num_free_gpu_blocks
()
before_blocks
=
block_manager
.
get_num_free_gpu_blocks
()
...
@@ -119,25 +120,37 @@ def test_append_slot_cow():
...
@@ -119,25 +120,37 @@ def test_append_slot_cow():
block_size
=
4
block_size
=
4
num_cpu_blocks
=
4
num_cpu_blocks
=
4
num_gpu_blocks
=
4
num_gpu_blocks
=
4
block_manager
=
BlockSpaceManager
(
block_size
,
block_manager
=
BlockSpaceManager
(
block_size
=
block_size
,
num_cpu_blocks
,
num_cpu_blocks
=
num_cpu_blocks
,
num_gpu_blocks
,
num_gpu_blocks
=
num_gpu_blocks
,
watermark
=
0
)
watermark
=
0
)
# Allocate prompt to gpu block.
# Allocate prompt to gpu block. There is one slot left in the block.
prompt
=
Sequence
(
1
,
"one two three"
,
[
1
,
2
,
3
],
block_size
)
prompt
=
Sequence
(
seq_id
=
1
,
child
=
prompt
.
fork
(
2
)
prompt
=
"one two three"
,
token_id
=
4
prompt_token_ids
=
[
1
,
2
,
3
],
child
.
append_token_id
(
token_id
,
{
token_id
:
0.0
})
block_size
=
block_size
)
# Fork the sequence, such that a COW will be required when we append a new
# token id.
child
=
prompt
.
fork
(
new_seq_id
=
2
)
# Allocate space for the sequence group.
seq_group
=
SequenceGroup
(
"1"
,
[
prompt
,
child
],
SamplingParams
(),
seq_group
=
SequenceGroup
(
"1"
,
[
prompt
,
child
],
SamplingParams
(),
time
.
time
(),
time
.
perf_counter
)
time
.
time
(),
time
.
perf_counter
)
block_manager
.
allocate
(
seq_group
)
block_manager
.
allocate
(
seq_group
)
# Append slot for child token.
# Fork and append a new token id. We expect a COW to be scheduled.
# Last block being modified is shared. Copy on write occurs.
token_id
=
4
child
.
append_token_id
(
token_id
,
{
token_id
:
Logprob
(
0.0
)})
block_manager
.
fork
(
prompt
,
child
)
assert
block_manager
.
can_append_slot
(
seq_group
)
assert
block_manager
.
can_append_slot
(
seq_group
)
before_blocks
=
block_manager
.
get_num_free_gpu_blocks
()
before_blocks
=
block_manager
.
get_num_free_gpu_blocks
()
src_block
,
dst_block
=
block_manager
.
append_slot
(
child
)
maybe_src_dst_block
=
block_manager
.
append_slot
(
child
)
assert
maybe_src_dst_block
is
not
None
src_block
,
dst_block
=
maybe_src_dst_block
assert
src_block
!=
dst_block
assert
src_block
!=
dst_block
after_blocks
=
block_manager
.
get_num_free_gpu_blocks
()
after_blocks
=
block_manager
.
get_num_free_gpu_blocks
()
...
@@ -165,7 +178,7 @@ def test_fork():
...
@@ -165,7 +178,7 @@ def test_fork():
prompt
)
==
block_manager
.
get_block_table
(
child
)
prompt
)
==
block_manager
.
get_block_table
(
child
)
token_id
=
4
token_id
=
4
# Append token to child. Block is shared so copy on write occurs.
# Append token to child. Block is shared so copy on write occurs.
child
.
append_token_id
(
token_id
,
{
token_id
:
0.0
})
child
.
append_token_id
(
token_id
,
{
token_id
:
Logprob
(
0.0
)
})
block_manager
.
append_slot
(
child
)
block_manager
.
append_slot
(
child
)
assert
block_manager
.
get_block_table
(
assert
block_manager
.
get_block_table
(
prompt
)
!=
block_manager
.
get_block_table
(
child
)
prompt
)
!=
block_manager
.
get_block_table
(
child
)
...
@@ -189,7 +202,7 @@ def test_swap():
...
@@ -189,7 +202,7 @@ def test_swap():
# tokens will be written in the next forward pass.
# tokens will be written in the next forward pass.
token_id
=
0
token_id
=
0
prompt
.
status
=
SequenceStatus
.
RUNNING
prompt
.
status
=
SequenceStatus
.
RUNNING
prompt
.
append_token_id
(
token_id
,
{
token_id
:
0.0
})
prompt
.
append_token_id
(
token_id
,
{
token_id
:
Logprob
(
0.0
)
})
# Swap seq group from GPU -> CPU.
# Swap seq group from GPU -> CPU.
gpu_blocks
=
block_manager
.
get_block_table
(
prompt
)
gpu_blocks
=
block_manager
.
get_block_table
(
prompt
)
...
...
tests/core/test_scheduler.py
View file @
a33ce60c
...
@@ -3,7 +3,7 @@ import pytest # noqa
...
@@ -3,7 +3,7 @@ import pytest # noqa
from
vllm.config
import
CacheConfig
,
SchedulerConfig
from
vllm.config
import
CacheConfig
,
SchedulerConfig
from
vllm.core.scheduler
import
Scheduler
from
vllm.core.scheduler
import
Scheduler
from
vllm.sequence
import
SequenceGroup
from
vllm.sequence
import
SequenceGroup
,
Logprob
from
.utils
import
create_dummy_prompt
from
.utils
import
create_dummy_prompt
...
@@ -108,8 +108,8 @@ def test_scheduler_schedule_preempt_abort():
...
@@ -108,8 +108,8 @@ def test_scheduler_schedule_preempt_abort():
# Append "generated" tokens, allowing the sequence to mark prompt tokens as
# Append "generated" tokens, allowing the sequence to mark prompt tokens as
# processed.
# processed.
token_id
=
0
token_id
=
0
seq_a
.
append_token_id
(
token_id
,
{
token_id
:
0.0
})
seq_a
.
append_token_id
(
token_id
,
{
token_id
:
Logprob
(
0.0
)
})
seq_b
.
append_token_id
(
token_id
,
{
token_id
:
0.0
})
seq_b
.
append_token_id
(
token_id
,
{
token_id
:
Logprob
(
0.0
)
})
# Schedule seq groups generation and preempt seq group b.
# Schedule seq groups generation and preempt seq group b.
seq_group_meta
,
out
=
scheduler
.
schedule
()
seq_group_meta
,
out
=
scheduler
.
schedule
()
...
...
tests/core/utils.py
View file @
a33ce60c
...
@@ -18,7 +18,7 @@ def create_dummy_prompt(
...
@@ -18,7 +18,7 @@ def create_dummy_prompt(
prompt_str
=
" "
.
join
([
str
(
t
)
for
t
in
prompt_tokens
])
prompt_str
=
" "
.
join
([
str
(
t
)
for
t
in
prompt_tokens
])
prompt
=
Sequence
(
int
(
request_id
),
prompt_str
,
prompt_tokens
,
block_size
)
prompt
=
Sequence
(
int
(
request_id
),
prompt_str
,
prompt_tokens
,
block_size
)
seq_group
=
SequenceGroup
(
request_id
,
[
prompt
],
SamplingParams
(),
seq_group
=
SequenceGroup
(
request_id
,
[
prompt
],
SamplingParams
(),
time
.
time
(),
None
,
None
)
time
.
time
(),
None
)
return
prompt
,
seq_group
return
prompt
,
seq_group
...
...
vllm/sequence.py
View file @
a33ce60c
...
@@ -142,7 +142,7 @@ class Sequence:
...
@@ -142,7 +142,7 @@ class Sequence:
prompt
:
str
,
prompt
:
str
,
prompt_token_ids
:
List
[
int
],
prompt_token_ids
:
List
[
int
],
block_size
:
int
,
block_size
:
int
,
eos_token_id
:
int
,
eos_token_id
:
Optional
[
int
]
=
None
,
lora_request
:
Optional
[
LoRARequest
]
=
None
,
lora_request
:
Optional
[
LoRARequest
]
=
None
,
)
->
None
:
)
->
None
:
self
.
seq_id
=
seq_id
self
.
seq_id
=
seq_id
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment