Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
84dfdb17
Commit
84dfdb17
authored
Jul 31, 2025
by
zhuwenwen
Browse files
remove unused code
parent
f137e58c
Changes
45
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
2 additions
and
1326 deletions
+2
-1326
tests/spec_decode/test_ngram_worker.py
tests/spec_decode/test_ngram_worker.py
+0
-223
tests/spec_decode/test_scorer.py
tests/spec_decode/test_scorer.py
+0
-124
tests/spec_decode/test_spec_decode_worker.py
tests/spec_decode/test_spec_decode_worker.py
+0
-947
tests/test_regression.py
tests/test_regression.py
+2
-2
tests/utils.py
tests/utils.py
+0
-30
No files found.
tests/spec_decode/test_ngram_worker.py
deleted
100644 → 0
View file @
f137e58c
This diff is collapsed.
Click to expand it.
tests/spec_decode/test_scorer.py
deleted
100644 → 0
View file @
f137e58c
This diff is collapsed.
Click to expand it.
tests/spec_decode/test_spec_decode_worker.py
deleted
100644 → 0
View file @
f137e58c
This diff is collapsed.
Click to expand it.
tests/test_regression.py
View file @
84dfdb17
...
...
@@ -39,7 +39,7 @@ def test_max_tokens_none():
sampling_params
=
SamplingParams
(
temperature
=
0.01
,
top_p
=
0.1
,
max_tokens
=
None
)
if
gpuname
.
startswith
(
'BW'
)
and
envs
.
VLLM_FLASH_ATTN_BACKEND
:
if
gpuname
.
startswith
(
'BW'
):
llm
=
LLM
(
model
=
os
.
path
.
join
(
models_path_prefix
,
"distilbert/distilgpt2"
),
max_num_batched_tokens
=
4096
,
tensor_parallel_size
=
1
,
...
...
@@ -75,7 +75,7 @@ def test_model_from_modelscope(monkeypatch: pytest.MonkeyPatch):
# Don't use HF_TOKEN for ModelScope repos, otherwise it will fail
# with 400 Client Error: Bad Request.
m
.
setenv
(
"HF_TOKEN"
,
""
)
if
gpuname
.
startswith
(
'BW'
)
and
envs
.
VLLM_FLASH_ATTN_
BACKEND
:
if
envs
.
VLLM_
USE_
FLASH_ATTN_
PA
:
llm
=
LLM
(
model
=
os
.
path
.
join
(
models_path_prefix
,
"qwen/Qwen1.5-0.5B-Chat"
),
block_size
=
64
)
else
:
llm
=
LLM
(
model
=
os
.
path
.
join
(
models_path_prefix
,
"qwen/Qwen1.5-0.5B-Chat"
))
...
...
tests/utils.py
View file @
84dfdb17
...
...
@@ -769,36 +769,6 @@ def fork_new_process_for_each_test(
return
wrapper
def
large_gpu_test
(
*
,
min_gb
:
int
):
"""
Decorate a test to be skipped if no GPU is available or it does not have
sufficient memory.
Currently, the CI machine uses L4 GPU which has 24 GB VRAM.
"""
try
:
if
current_platform
.
is_cpu
():
memory_gb
=
0
else
:
memory_gb
=
current_platform
.
get_device_total_memory
()
/
GB_bytes
except
Exception
as
e
:
warnings
.
warn
(
f
"An error occurred when finding the available memory:
{
e
}
"
,
stacklevel
=
2
,
)
memory_gb
=
0
test_skipif
=
pytest
.
mark
.
skipif
(
memory_gb
<
min_gb
,
reason
=
f
"Need at least
{
memory_gb
}
GB GPU memory to run the test."
,
)
def
wrapper
(
f
:
Callable
[
_P
,
None
])
->
Callable
[
_P
,
None
]:
return
test_skipif
(
fork_new_process_for_each_test
(
f
))
return
wrapper
def
spawn_new_process_for_each_test
(
f
:
Callable
[
_P
,
None
])
->
Callable
[
_P
,
None
]:
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment