Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
66b809cc
Commit
66b809cc
authored
Feb 08, 2025
by
zhuwenwen
Browse files
Merge tag 'v0.7.2' into v0.7.2-dev
parents
37b63c24
0408efc6
Changes
1000
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
53 additions
and
6 deletions
+53
-6
tests/compile/piecewise/test_toy_llama.py
tests/compile/piecewise/test_toy_llama.py
+1
-0
tests/compile/test_basic_correctness.py
tests/compile/test_basic_correctness.py
+2
-0
tests/compile/test_full_graph.py
tests/compile/test_full_graph.py
+2
-0
tests/compile/test_functionalization.py
tests/compile/test_functionalization.py
+2
-0
tests/compile/test_fusion.py
tests/compile/test_fusion.py
+2
-0
tests/compile/test_pass_manager.py
tests/compile/test_pass_manager.py
+2
-0
tests/compile/test_wrapper.py
tests/compile/test_wrapper.py
+2
-0
tests/compile/utils.py
tests/compile/utils.py
+2
-0
tests/conftest.py
tests/conftest.py
+14
-4
tests/core/block/conftest.py
tests/core/block/conftest.py
+2
-0
tests/core/block/e2e/conftest.py
tests/core/block/e2e/conftest.py
+2
-0
tests/core/block/e2e/test_correctness.py
tests/core/block/e2e/test_correctness.py
+2
-0
tests/core/block/e2e/test_correctness_sliding_window.py
tests/core/block/e2e/test_correctness_sliding_window.py
+2
-0
tests/core/block/test_block_manager.py
tests/core/block/test_block_manager.py
+2
-0
tests/core/block/test_block_table.py
tests/core/block/test_block_table.py
+2
-0
tests/core/block/test_common.py
tests/core/block/test_common.py
+2
-0
tests/core/block/test_cpu_gpu_block_allocator.py
tests/core/block/test_cpu_gpu_block_allocator.py
+2
-0
tests/core/block/test_naive_block.py
tests/core/block/test_naive_block.py
+2
-0
tests/core/block/test_prefix_caching_block.py
tests/core/block/test_prefix_caching_block.py
+4
-2
tests/core/test_chunked_prefill_scheduler.py
tests/core/test_chunked_prefill_scheduler.py
+2
-0
No files found.
Too many changes to show.
To preserve performance only
1000 of 1000+
files are displayed.
Plain diff
Email patch
tests/compile/piecewise/test_toy_llama.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""
"""
Test the piecewise compilation with a simple model, comparing the output
Test the piecewise compilation with a simple model, comparing the output
with and without the piecewise compilation.
with and without the piecewise compilation.
...
...
tests/compile/test_basic_correctness.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
dataclasses
import
dataclasses
from
typing
import
Dict
,
List
,
Optional
from
typing
import
Dict
,
List
,
Optional
...
...
tests/compile/test_full_graph.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
pytest
import
pytest
from
vllm.config
import
CompilationLevel
from
vllm.config
import
CompilationLevel
...
...
tests/compile/test_functionalization.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
os
import
os
import
pytest
import
pytest
import
torch
import
torch
...
...
tests/compile/test_fusion.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
pytest
import
pytest
import
torch
import
torch
from
compressed_tensors.quantization
import
FP8_DTYPE
from
compressed_tensors.quantization
import
FP8_DTYPE
...
...
tests/compile/test_pass_manager.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
pickle
import
pickle
import
pytest
import
pytest
...
...
tests/compile/test_wrapper.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
typing
import
Optional
from
typing
import
Optional
import
torch
import
torch
...
...
tests/compile/utils.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
os
import
os
import
torch
import
torch
...
...
tests/conftest.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
json
import
json
import
os
import
os
import
tempfile
import
tempfile
...
@@ -738,6 +740,7 @@ class VllmRunner:
...
@@ -738,6 +740,7 @@ class VllmRunner:
images
:
Optional
[
PromptImageInput
]
=
None
,
images
:
Optional
[
PromptImageInput
]
=
None
,
videos
:
Optional
[
PromptVideoInput
]
=
None
,
videos
:
Optional
[
PromptVideoInput
]
=
None
,
audios
:
Optional
[
PromptAudioInput
]
=
None
,
audios
:
Optional
[
PromptAudioInput
]
=
None
,
**
kwargs
:
Any
,
)
->
List
[
Tuple
[
List
[
List
[
int
]],
List
[
str
]]]:
)
->
List
[
Tuple
[
List
[
List
[
int
]],
List
[
str
]]]:
inputs
=
self
.
get_inputs
(
prompts
,
inputs
=
self
.
get_inputs
(
prompts
,
images
=
images
,
images
=
images
,
...
@@ -745,7 +748,8 @@ class VllmRunner:
...
@@ -745,7 +748,8 @@ class VllmRunner:
audios
=
audios
)
audios
=
audios
)
req_outputs
=
self
.
model
.
generate
(
inputs
,
req_outputs
=
self
.
model
.
generate
(
inputs
,
sampling_params
=
sampling_params
)
sampling_params
=
sampling_params
,
**
kwargs
)
outputs
:
List
[
Tuple
[
List
[
List
[
int
]],
List
[
str
]]]
=
[]
outputs
:
List
[
Tuple
[
List
[
List
[
int
]],
List
[
str
]]]
=
[]
for
req_output
in
req_outputs
:
for
req_output
in
req_outputs
:
...
@@ -783,6 +787,7 @@ class VllmRunner:
...
@@ -783,6 +787,7 @@ class VllmRunner:
images
:
Optional
[
PromptImageInput
]
=
None
,
images
:
Optional
[
PromptImageInput
]
=
None
,
audios
:
Optional
[
PromptAudioInput
]
=
None
,
audios
:
Optional
[
PromptAudioInput
]
=
None
,
videos
:
Optional
[
PromptVideoInput
]
=
None
,
videos
:
Optional
[
PromptVideoInput
]
=
None
,
**
kwargs
:
Any
,
)
->
Union
[
List
[
TokensTextLogprobs
],
)
->
Union
[
List
[
TokensTextLogprobs
],
List
[
TokensTextLogprobsPromptLogprobs
]]:
List
[
TokensTextLogprobsPromptLogprobs
]]:
inputs
=
self
.
get_inputs
(
prompts
,
inputs
=
self
.
get_inputs
(
prompts
,
...
@@ -791,7 +796,8 @@ class VllmRunner:
...
@@ -791,7 +796,8 @@ class VllmRunner:
audios
=
audios
)
audios
=
audios
)
req_outputs
=
self
.
model
.
generate
(
inputs
,
req_outputs
=
self
.
model
.
generate
(
inputs
,
sampling_params
=
sampling_params
)
sampling_params
=
sampling_params
,
**
kwargs
)
toks_str_logsprobs_prompt_logprobs
=
(
toks_str_logsprobs_prompt_logprobs
=
(
self
.
_final_steps_generate_w_logprobs
(
req_outputs
))
self
.
_final_steps_generate_w_logprobs
(
req_outputs
))
...
@@ -827,13 +833,15 @@ class VllmRunner:
...
@@ -827,13 +833,15 @@ class VllmRunner:
images
:
Optional
[
PromptImageInput
]
=
None
,
images
:
Optional
[
PromptImageInput
]
=
None
,
videos
:
Optional
[
PromptVideoInput
]
=
None
,
videos
:
Optional
[
PromptVideoInput
]
=
None
,
audios
:
Optional
[
PromptAudioInput
]
=
None
,
audios
:
Optional
[
PromptAudioInput
]
=
None
,
**
kwargs
:
Any
,
)
->
List
[
Tuple
[
List
[
int
],
str
]]:
)
->
List
[
Tuple
[
List
[
int
],
str
]]:
greedy_params
=
SamplingParams
(
temperature
=
0.0
,
max_tokens
=
max_tokens
)
greedy_params
=
SamplingParams
(
temperature
=
0.0
,
max_tokens
=
max_tokens
)
outputs
=
self
.
generate
(
prompts
,
outputs
=
self
.
generate
(
prompts
,
greedy_params
,
greedy_params
,
images
=
images
,
images
=
images
,
videos
=
videos
,
videos
=
videos
,
audios
=
audios
)
audios
=
audios
,
**
kwargs
)
return
[(
output_ids
[
0
],
output_str
[
0
])
return
[(
output_ids
[
0
],
output_str
[
0
])
for
output_ids
,
output_str
in
outputs
]
for
output_ids
,
output_str
in
outputs
]
...
@@ -848,6 +856,7 @@ class VllmRunner:
...
@@ -848,6 +856,7 @@ class VllmRunner:
videos
:
Optional
[
PromptVideoInput
]
=
None
,
videos
:
Optional
[
PromptVideoInput
]
=
None
,
stop_token_ids
:
Optional
[
List
[
int
]]
=
None
,
stop_token_ids
:
Optional
[
List
[
int
]]
=
None
,
stop
:
Optional
[
List
[
str
]]
=
None
,
stop
:
Optional
[
List
[
str
]]
=
None
,
**
kwargs
:
Any
,
)
->
Union
[
List
[
TokensTextLogprobs
],
)
->
Union
[
List
[
TokensTextLogprobs
],
List
[
TokensTextLogprobsPromptLogprobs
]]:
List
[
TokensTextLogprobsPromptLogprobs
]]:
greedy_logprobs_params
=
SamplingParams
(
greedy_logprobs_params
=
SamplingParams
(
...
@@ -862,7 +871,8 @@ class VllmRunner:
...
@@ -862,7 +871,8 @@ class VllmRunner:
greedy_logprobs_params
,
greedy_logprobs_params
,
images
=
images
,
images
=
images
,
audios
=
audios
,
audios
=
audios
,
videos
=
videos
)
videos
=
videos
,
**
kwargs
)
def
generate_encoder_decoder_greedy_logprobs
(
def
generate_encoder_decoder_greedy_logprobs
(
self
,
self
,
...
...
tests/core/block/conftest.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
pytest
import
pytest
...
...
tests/core/block/e2e/conftest.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
typing
import
Callable
,
Iterable
,
Optional
from
typing
import
Callable
,
Iterable
,
Optional
import
pytest
import
pytest
...
...
tests/core/block/e2e/test_correctness.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
itertools
import
cycle
from
itertools
import
cycle
import
pytest
import
pytest
...
...
tests/core/block/e2e/test_correctness_sliding_window.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
random
import
random
from
typing
import
List
from
typing
import
List
...
...
tests/core/block/test_block_manager.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
pytest
import
pytest
from
vllm.core.block.utils
import
(
STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE
,
from
vllm.core.block.utils
import
(
STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE
,
...
...
tests/core/block/test_block_table.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
from
typing
import
List
import
pytest
import
pytest
...
...
tests/core/block/test_common.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
random
import
random
import
pytest
import
pytest
...
...
tests/core/block/test_cpu_gpu_block_allocator.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
pytest
import
pytest
from
vllm.core.block.cpu_gpu_block_allocator
import
CpuGpuBlockAllocator
from
vllm.core.block.cpu_gpu_block_allocator
import
CpuGpuBlockAllocator
...
...
tests/core/block/test_naive_block.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
,
Optional
from
typing
import
List
,
Optional
import
pytest
import
pytest
...
...
tests/core/block/test_prefix_caching_block.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
math
import
math
import
random
import
random
from
typing
import
List
,
Optional
from
typing
import
List
,
Optional
...
@@ -63,8 +65,8 @@ class TestPrefixCachingBlock:
...
@@ -63,8 +65,8 @@ class TestPrefixCachingBlock:
previous_block
=
MagicMock
(
spec
=
PrefixCachingBlock
)
previous_block
=
MagicMock
(
spec
=
PrefixCachingBlock
)
prev_block_hash
=
random
.
randint
(
0
,
1000
)
prev_block_hash
=
random
.
randint
(
0
,
1000
)
previous_block
.
content_hash
=
(
prev_block_hash
previous_block
.
content_hash
=
(
prev_block_hash
if
prev_block_has_hash
if
prev_block_has_hash
else
None
)
else
hash
(
'
None
'
)
)
num_to_fill
=
block_size
if
is_curr_block_full
else
random
.
randint
(
num_to_fill
=
block_size
if
is_curr_block_full
else
random
.
randint
(
0
,
block_size
-
1
)
0
,
block_size
-
1
)
...
...
tests/core/test_chunked_prefill_scheduler.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
from
typing
import
List
from
unittest.mock
import
MagicMock
from
unittest.mock
import
MagicMock
...
...
Prev
1
…
4
5
6
7
8
9
10
11
12
…
50
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment