Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
308e5937
Commit
308e5937
authored
Nov 21, 2024
by
zhuwenwen
Browse files
remove unused backend
parent
00f18159
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
10 additions
and
6 deletions
+10
-6
tests/metrics/test_metrics.py
tests/metrics/test_metrics.py
+4
-3
tests/multi_step/test_correctness_async_llm.py
tests/multi_step/test_correctness_async_llm.py
+2
-1
tests/prefix_caching/test_prefix_caching.py
tests/prefix_caching/test_prefix_caching.py
+2
-1
tests/samplers/test_rejection_sampler.py
tests/samplers/test_rejection_sampler.py
+2
-1
No files found.
tests/metrics/test_metrics.py
View file @
308e5937
...
@@ -10,6 +10,7 @@ from vllm.engine.arg_utils import AsyncEngineArgs
...
@@ -10,6 +10,7 @@ from vllm.engine.arg_utils import AsyncEngineArgs
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
from
vllm.engine.metrics
import
RayPrometheusStatLogger
from
vllm.engine.metrics
import
RayPrometheusStatLogger
from
vllm.sampling_params
import
SamplingParams
from
vllm.sampling_params
import
SamplingParams
import
vllm.envs
as
envs
from
..conftest
import
cleanup
from
..conftest
import
cleanup
...
@@ -19,7 +20,7 @@ MODELS = [
...
@@ -19,7 +20,7 @@ MODELS = [
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float"
])
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
(
"float"
if
envs
.
VLLM_USE_TRITON_FLASH_ATTN
else
"half"
)
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
128
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
128
])
def
test_metric_counter_prompt_tokens
(
def
test_metric_counter_prompt_tokens
(
vllm_runner
,
vllm_runner
,
...
@@ -54,7 +55,7 @@ def test_metric_counter_prompt_tokens(
...
@@ -54,7 +55,7 @@ def test_metric_counter_prompt_tokens(
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float"
])
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
(
"float"
if
envs
.
VLLM_USE_TRITON_FLASH_ATTN
else
"half"
)
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
128
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
128
])
def
test_metric_counter_generation_tokens
(
def
test_metric_counter_generation_tokens
(
vllm_runner
,
vllm_runner
,
...
@@ -86,7 +87,7 @@ def test_metric_counter_generation_tokens(
...
@@ -86,7 +87,7 @@ def test_metric_counter_generation_tokens(
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float"
])
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
(
"float"
if
envs
.
VLLM_USE_TRITON_FLASH_ATTN
else
"half"
)
])
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
"served_model_name"
,
"served_model_name"
,
[
None
,
[],
[
"ModelName0"
],
[
"ModelName0"
,
"ModelName1"
,
"ModelName2"
]])
[
None
,
[],
[
"ModelName0"
],
[
"ModelName0"
,
"ModelName1"
,
"ModelName2"
]])
...
...
tests/multi_step/test_correctness_async_llm.py
View file @
308e5937
...
@@ -36,7 +36,8 @@ DEFAULT_SERVER_ARGS: List[str] = [
...
@@ -36,7 +36,8 @@ DEFAULT_SERVER_ARGS: List[str] = [
@
pytest
.
mark
.
parametrize
(
"num_prompts"
,
NUM_PROMPTS
)
@
pytest
.
mark
.
parametrize
(
"num_prompts"
,
NUM_PROMPTS
)
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
@
pytest
.
mark
.
parametrize
(
"is_async"
,
[
True
])
@
pytest
.
mark
.
parametrize
(
"is_async"
,
[
True
])
@
pytest
.
mark
.
parametrize
(
"attention_backend"
,
[
"FLASHINFER"
,
"FLASH_ATTN"
])
# @pytest.mark.parametrize("attention_backend", ["FLASHINFER", "FLASH_ATTN"])
@
pytest
.
mark
.
parametrize
(
"attention_backend"
,
[
"FLASH_ATTN"
])
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
async
def
test_multi_step
(
async
def
test_multi_step
(
example_prompts
,
example_prompts
,
...
...
tests/prefix_caching/test_prefix_caching.py
View file @
308e5937
...
@@ -93,7 +93,8 @@ def test_eviction(num_blocks: int, ):
...
@@ -93,7 +93,8 @@ def test_eviction(num_blocks: int, ):
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
@
pytest
.
mark
.
parametrize
(
"backend"
,
[
"FLASH_ATTN"
,
"FLASHINFER"
,
"XFORMERS"
])
# @pytest.mark.parametrize("backend", ["FLASH_ATTN", "FLASHINFER", "XFORMERS"])
@
pytest
.
mark
.
parametrize
(
"backend"
,
[
"FLASH_ATTN"
])
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"half"
])
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"half"
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
5
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
5
])
@
pytest
.
mark
.
parametrize
(
"cached_position"
,
[
0
,
1
])
@
pytest
.
mark
.
parametrize
(
"cached_position"
,
[
0
,
1
])
...
...
tests/samplers/test_rejection_sampler.py
View file @
308e5937
...
@@ -240,7 +240,8 @@ def test_compare_nonflashinfer_backend(k: int, vocab_size: int,
...
@@ -240,7 +240,8 @@ def test_compare_nonflashinfer_backend(k: int, vocab_size: int,
for
i
in
range
(
batch_size
)
for
i
in
range
(
batch_size
)
}
}
for
use_flashinfer
in
[
True
,
False
]:
# for use_flashinfer in [True, False]:
for
use_flashinfer
in
[
False
]:
rejection_sampler
=
RejectionSampler
(
use_flashinfer
=
use_flashinfer
)
rejection_sampler
=
RejectionSampler
(
use_flashinfer
=
use_flashinfer
)
rejection_sampler
.
init_gpu_tensors
(
device
=
device
)
rejection_sampler
.
init_gpu_tensors
(
device
=
device
)
# We use seeded sequences to ensure the same tokens are accepted
# We use seeded sequences to ensure the same tokens are accepted
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment