Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
66b809cc
Commit
66b809cc
authored
Feb 08, 2025
by
zhuwenwen
Browse files
Merge tag 'v0.7.2' into v0.7.2-dev
parents
37b63c24
0408efc6
Changes
1000
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
109 additions
and
0 deletions
+109
-0
tests/v1/test_stats.py
tests/v1/test_stats.py
+2
-0
tests/v1/test_utils.py
tests/v1/test_utils.py
+2
-0
tests/v1/worker/test_gpu_input_batch.py
tests/v1/worker/test_gpu_input_batch.py
+2
-0
tests/vllm_test_utils/setup.py
tests/vllm_test_utils/setup.py
+2
-0
tests/vllm_test_utils/vllm_test_utils/__init__.py
tests/vllm_test_utils/vllm_test_utils/__init__.py
+1
-0
tests/vllm_test_utils/vllm_test_utils/blame.py
tests/vllm_test_utils/vllm_test_utils/blame.py
+2
-0
tests/vllm_test_utils/vllm_test_utils/monitor.py
tests/vllm_test_utils/vllm_test_utils/monitor.py
+2
-0
tests/weight_loading/test_weight_loading.py
tests/weight_loading/test_weight_loading.py
+2
-0
tests/worker/test_encoder_decoder_model_runner.py
tests/worker/test_encoder_decoder_model_runner.py
+2
-0
tests/worker/test_model_input.py
tests/worker/test_model_input.py
+2
-0
tests/worker/test_model_runner.py
tests/worker/test_model_runner.py
+11
-0
tests/worker/test_profile.py
tests/worker/test_profile.py
+2
-0
tests/worker/test_swap.py
tests/worker/test_swap.py
+2
-0
tools/check_spdx_header.py
tools/check_spdx_header.py
+50
-0
tools/profiler/print_layerwise_table.py
tools/profiler/print_layerwise_table.py
+2
-0
tools/profiler/visualize_layerwise_profile.py
tools/profiler/visualize_layerwise_profile.py
+2
-0
tools/report_build_time_ninja.py
tools/report_build_time_ninja.py
+2
-0
use_existing_torch.py
use_existing_torch.py
+2
-0
vllm/__init__.py
vllm/__init__.py
+1
-0
vllm/_custom_ops.py
vllm/_custom_ops.py
+16
-0
No files found.
Too many changes to show.
To preserve performance only
1000 of 1000+
files are displayed.
Plain diff
Email patch
tests/v1/test_stats.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
pytest
from
vllm.sampling_params
import
SamplingParams
...
...
tests/v1/test_utils.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
import
torch
...
...
tests/v1/worker/test_gpu_input_batch.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
typing
import
Dict
,
List
,
Set
,
Tuple
import
numpy
as
np
...
...
tests/vllm_test_utils/setup.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
setuptools
import
setup
setup
(
...
...
tests/vllm_test_utils/vllm_test_utils/__init__.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""
vllm_utils is a package for vLLM testing utilities.
It does not import any vLLM modules.
...
...
tests/vllm_test_utils/vllm_test_utils/blame.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
contextlib
import
dataclasses
import
sys
...
...
tests/vllm_test_utils/vllm_test_utils/monitor.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
contextlib
import
dataclasses
import
sys
...
...
tests/weight_loading/test_weight_loading.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
os
import
pytest
...
...
tests/worker/test_encoder_decoder_model_runner.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
itertools
from
typing
import
List
...
...
tests/worker/test_model_input.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
dataclasses
from
typing
import
List
,
Tuple
,
Type
...
...
tests/worker/test_model_runner.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
import
pytest
...
...
@@ -25,6 +27,15 @@ def _create_model_runner(model: str, *args, **kwargs) -> ModelRunner:
return
model_runner
def
test_deepseek_mla_attn_backend_module
():
model_runner
=
_create_model_runner
(
"deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
,
trust_remote_code
=
True
,
enable_chunked_prefill
=
False
,
)
assert
model_runner
.
attn_backend
.
__name__
==
"TritonMLABackend"
@
pytest
.
mark
.
parametrize
(
"batch_size"
,
list
(
range
(
1
,
257
)))
def
test_prepare_prompt
(
batch_size
):
model_runner
=
_create_model_runner
(
...
...
tests/worker/test_profile.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
os
import
torch
...
...
tests/worker/test_swap.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
torch
import
os
...
...
tools/check_spdx_header.py
0 → 100644
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
sys
SPDX_HEADER
=
"# SPDX-License-Identifier: Apache-2.0"
SPDX_HEADER_PREFIX
=
"# SPDX-License-Identifier:"
def
check_spdx_header
(
file_path
):
with
open
(
file_path
,
encoding
=
'UTF-8'
)
as
file
:
lines
=
file
.
readlines
()
if
not
lines
:
# Empty file like __init__.py
return
True
for
line
in
lines
:
if
line
.
strip
().
startswith
(
SPDX_HEADER_PREFIX
):
return
True
return
False
def
add_header
(
file_path
):
with
open
(
file_path
,
'r+'
,
encoding
=
'UTF-8'
)
as
file
:
lines
=
file
.
readlines
()
file
.
seek
(
0
,
0
)
if
lines
and
lines
[
0
].
startswith
(
"#!"
):
file
.
write
(
lines
[
0
])
file
.
write
(
SPDX_HEADER
+
'
\n
'
)
file
.
writelines
(
lines
[
1
:])
else
:
file
.
write
(
SPDX_HEADER
+
'
\n
'
)
file
.
writelines
(
lines
)
def
main
():
files_with_missing_header
=
[]
for
file_path
in
sys
.
argv
[
1
:]:
if
not
check_spdx_header
(
file_path
):
files_with_missing_header
.
append
(
file_path
)
if
files_with_missing_header
:
print
(
"The following files are missing the SPDX header:"
)
for
file_path
in
files_with_missing_header
:
print
(
f
"
{
file_path
}
"
)
add_header
(
file_path
)
sys
.
exit
(
1
if
files_with_missing_header
else
0
)
if
__name__
==
"__main__"
:
main
()
tools/profiler/print_layerwise_table.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
argparse
import
json
from
typing
import
Dict
...
...
tools/profiler/visualize_layerwise_profile.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
argparse
import
copy
import
json
...
...
tools/report_build_time_ninja.py
View file @
66b809cc
#!/usr/bin/env python3
# SPDX-License-Identifier: Apache-2.0
# Copyright (c) 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
...
...
use_existing_torch.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
glob
requires_files
=
glob
.
glob
(
'requirements*.txt'
)
...
...
vllm/__init__.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""vLLM: a high-throughput and memory-efficient inference engine for LLMs"""
import
os
...
...
vllm/_custom_ops.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
contextlib
import
importlib
from
typing
import
TYPE_CHECKING
,
List
,
Optional
,
Tuple
,
Union
,
Type
...
...
@@ -1369,6 +1371,15 @@ def moe_align_block_size(topk_ids: torch.Tensor, num_experts: int,
num_tokens_post_pad
)
def
sgl_moe_align_block_size
(
topk_ids
:
torch
.
Tensor
,
num_experts
:
int
,
block_size
:
int
,
sorted_token_ids
:
torch
.
Tensor
,
experts_ids
:
torch
.
Tensor
,
num_tokens_post_pad
:
torch
.
Tensor
)
->
None
:
torch
.
ops
.
_moe_C
.
sgl_moe_align_block_size
(
topk_ids
,
num_experts
,
block_size
,
sorted_token_ids
,
experts_ids
,
num_tokens_post_pad
)
def
topk_softmax
(
topk_weights
:
torch
.
Tensor
,
topk_ids
:
torch
.
Tensor
,
token_expert_indicies
:
torch
.
Tensor
,
gating_output
:
float
)
->
None
:
...
...
@@ -1445,6 +1456,11 @@ def copy_blocks(key_caches: List[torch.Tensor],
torch
.
ops
.
_C_cache_ops
.
copy_blocks
(
key_caches
,
value_caches
,
block_mapping
)
def
copy_blocks_mla
(
kv_caches
:
List
[
torch
.
Tensor
],
block_mapping
:
torch
.
Tensor
)
->
None
:
torch
.
ops
.
_C_cache_ops
.
copy_blocks_mla
(
kv_caches
,
block_mapping
)
def
swap_blocks
(
src
:
torch
.
Tensor
,
dst
:
torch
.
Tensor
,
block_mapping
:
torch
.
Tensor
)
->
None
:
torch
.
ops
.
_C_cache_ops
.
swap_blocks
(
src
,
dst
,
block_mapping
)
...
...
Prev
1
…
21
22
23
24
25
26
27
28
29
…
50
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment