Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
04629132
Commit
04629132
authored
Jun 12, 2025
by
zhuwenwen
Browse files
[tests] fix tests
parent
07c69390
Changes
52
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
150 additions
and
135 deletions
+150
-135
tests/models/multimodal/processing/test_llava_onevision.py
tests/models/multimodal/processing/test_llava_onevision.py
+5
-3
tests/models/multimodal/processing/test_mllama.py
tests/models/multimodal/processing/test_mllama.py
+4
-2
tests/models/multimodal/processing/test_phi3v.py
tests/models/multimodal/processing/test_phi3v.py
+4
-2
tests/models/multimodal/processing/test_phi4mm.py
tests/models/multimodal/processing/test_phi4mm.py
+4
-2
tests/models/multimodal/processing/test_qwen2_vl.py
tests/models/multimodal/processing/test_qwen2_vl.py
+4
-2
tests/models/multimodal/processing/test_smolvlm.py
tests/models/multimodal/processing/test_smolvlm.py
+4
-2
tests/models/registry.py
tests/models/registry.py
+2
-2
tests/models/test_registry.py
tests/models/test_registry.py
+13
-10
tests/quantization/test_cpu_offload.py
tests/quantization/test_cpu_offload.py
+78
-78
tests/test_embedded_commit.py
tests/test_embedded_commit.py
+8
-8
tests/test_seed_behavior.py
tests/test_seed_behavior.py
+24
-24
tests/weight_loading/run_model_weight_loading_test.sh
tests/weight_loading/run_model_weight_loading_test.sh
+0
-0
No files found.
tests/models/multimodal/processing/test_llava_onevision.py
View file @
04629132
...
...
@@ -3,6 +3,7 @@
import
itertools
from
functools
import
partial
import
os
import
pytest
from
PIL
import
Image
from
pqdm.threads
import
pqdm
...
...
@@ -12,6 +13,7 @@ from vllm.multimodal.parse import ImageSize
from
vllm.multimodal.processing
import
BaseMultiModalProcessor
from
...utils
import
build_model_context
from
....utils
import
models_path_prefix
def
_validate_image_max_tokens_one
(
...
...
@@ -33,7 +35,7 @@ def _validate_image_max_tokens_one(
@
pytest
.
mark
.
skip
(
"This test takes around 5 minutes to run. "
"Comment this out to run it manually."
)
@
pytest
.
mark
.
parametrize
(
"model_id"
,
[
"llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
])
[
os
.
path
.
join
(
models_path_prefix
,
"llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
)
])
def
test_processor_max_tokens
(
model_id
):
ctx
=
build_model_context
(
model_id
,
...
...
@@ -127,7 +129,7 @@ def _test_image_prompt_replacements(
@
pytest
.
mark
.
parametrize
(
"model_id"
,
[
"llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
])
[
os
.
path
.
join
(
models_path_prefix
,
"llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
)
])
@
pytest
.
mark
.
parametrize
(
"num_imgs"
,
[
1
,
2
])
def
test_processor_prompt_replacements_regression
(
model_id
,
num_imgs
):
ctx
=
build_model_context
(
...
...
@@ -180,4 +182,4 @@ def test_processor_prompt_replacements_all(model_id, num_imgs):
processor
,
num_imgs
=
num_imgs
,
image_sizes
=
image_sizes
,
)
)
\ No newline at end of file
tests/models/multimodal/processing/test_mllama.py
View file @
04629132
# SPDX-License-Identifier: Apache-2.0
"""Tests for mllama's multimodal preprocessing and profiling."""
import
os
import
pytest
from
transformers
import
MllamaConfig
...
...
@@ -7,10 +8,11 @@ from vllm.multimodal import MULTIMODAL_REGISTRY
from
vllm.multimodal.profiling
import
MultiModalProfiler
from
...utils
import
build_model_context
from
....utils
import
models_path_prefix
@
pytest
.
mark
.
parametrize
(
"model_id"
,
[
"meta-llama/Llama-3.2-11B-Vision-Instruct"
])
[
os
.
path
.
join
(
models_path_prefix
,
"meta-llama/Llama-3.2-11B-Vision-Instruct"
)
])
@
pytest
.
mark
.
parametrize
(
"max_model_len"
,
[
4096
,
8192
,
25600
,
131072
])
@
pytest
.
mark
.
parametrize
(
"max_num_seqs"
,
[
1
,
2
,
8
])
def
test_profiling
(
...
...
@@ -68,4 +70,4 @@ def test_profiling(
# simulate mllama image-present prefill.
for
actual_len
,
last_group_len
in
zip
(
actual_encoder_seq_lens
,
encoder_seq_lens
):
assert
actual_len
>=
last_group_len
assert
actual_len
>=
last_group_len
\ No newline at end of file
tests/models/multimodal/processing/test_phi3v.py
View file @
04629132
# SPDX-License-Identifier: Apache-2.0
"""Tests for phi3v's multimodal preprocessing kwargs."""
import
os
import
pytest
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
....conftest
import
_ImageAssets
from
...utils
import
build_model_context
from
....utils
import
models_path_prefix
@
pytest
.
mark
.
parametrize
(
"model_id"
,
[
"microsoft/Phi-3.5-vision-instruct"
])
@
pytest
.
mark
.
parametrize
(
"model_id"
,
[
os
.
path
.
join
(
models_path_prefix
,
"microsoft/Phi-3.5-vision-instruct"
)
])
# yapf: disable
@
pytest
.
mark
.
parametrize
(
(
"mm_processor_kwargs"
,
"expected_toks_per_img"
),
...
...
@@ -50,4 +52,4 @@ def test_processor_override(
# Ensure we have the right number of placeholders per num_crops size
img_tok_count
=
processed_inputs
[
"prompt_token_ids"
].
count
(
_IMAGE_TOKEN_ID
)
assert
img_tok_count
==
expected_toks_per_img
*
num_imgs
assert
img_tok_count
==
expected_toks_per_img
*
num_imgs
\ No newline at end of file
tests/models/multimodal/processing/test_phi4mm.py
View file @
04629132
# SPDX-License-Identifier: Apache-2.0
"""Tests for phi4mm's multimodal preprocessing kwargs."""
import
os
import
pytest
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
....conftest
import
_ImageAssets
from
...utils
import
build_model_context
from
....utils
import
models_path_prefix
@
pytest
.
mark
.
parametrize
(
"model_id"
,
[
"microsoft/Phi-4-multimodal-instruct"
])
@
pytest
.
mark
.
parametrize
(
"model_id"
,
[
os
.
path
.
join
(
models_path_prefix
,
"microsoft/Phi-4-multimodal-instruct"
)
])
# yapf: disable
@
pytest
.
mark
.
parametrize
(
(
"mm_processor_kwargs"
,
"expected_toks_per_img"
),
...
...
@@ -56,4 +58,4 @@ def test_processor_override(
# Ensure we have the right number of placeholders per num_crops size
img_tok_count
=
processed_inputs
[
"prompt_token_ids"
].
count
(
_IMAGE_PLACEHOLDER_TOKEN_ID
)
assert
img_tok_count
==
expected_toks_per_img
*
num_imgs
assert
img_tok_count
==
expected_toks_per_img
*
num_imgs
\ No newline at end of file
tests/models/multimodal/processing/test_qwen2_vl.py
View file @
04629132
# SPDX-License-Identifier: Apache-2.0
import
os
import
pytest
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
....conftest
import
_ImageAssets
from
...utils
import
build_model_context
from
....utils
import
models_path_prefix
@
pytest
.
mark
.
parametrize
(
"model_id"
,
[
"Qwen/Qwen2-VL-2B-Instruct"
])
@
pytest
.
mark
.
parametrize
(
"model_id"
,
[
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen2-VL-2B-Instruct"
)
])
# yapf: disable
@
pytest
.
mark
.
parametrize
(
(
"mm_processor_kwargs"
,
"expected_toks_per_img"
,
"expected_pixels_shape"
),
[
...
...
@@ -51,4 +53,4 @@ def test_processor_override(
assert
img_tok_count
==
expected_toks_per_img
*
num_imgs
assert
pixel_shape
[
0
]
==
expected_pixels_shape
[
0
]
*
num_imgs
assert
pixel_shape
[
1
]
==
expected_pixels_shape
[
1
]
assert
pixel_shape
[
1
]
==
expected_pixels_shape
[
1
]
\ No newline at end of file
tests/models/multimodal/processing/test_smolvlm.py
View file @
04629132
# SPDX-License-Identifier: Apache-2.0
"""Tests for smolvlm's multimodal preprocessing kwargs."""
import
os
import
pytest
from
transformers
import
SmolVLMConfig
...
...
@@ -7,9 +8,10 @@ from vllm.multimodal import MULTIMODAL_REGISTRY
from
....conftest
import
_ImageAssets
from
...utils
import
build_model_context
from
....utils
import
models_path_prefix
@
pytest
.
mark
.
parametrize
(
"model_id"
,
[
"HuggingFaceTB/SmolVLM2-2.2B-Instruct"
])
@
pytest
.
mark
.
parametrize
(
"model_id"
,
[
os
.
path
.
join
(
models_path_prefix
,
"HuggingFaceTB/SmolVLM2-2.2B-Instruct"
)
])
# yapf: disable
@
pytest
.
mark
.
parametrize
(
(
"mm_processor_kwargs"
,
"expected_toks_per_img"
),
...
...
@@ -62,4 +64,4 @@ def test_processor_override(
# Ensure we have the right number of placeholders per num_crops size
image_token_id
=
ctx
.
get_hf_config
().
image_token_id
img_tok_count
=
processed_inputs
[
"prompt_token_ids"
].
count
(
image_token_id
)
assert
img_tok_count
==
expected_toks_per_img
*
num_imgs
assert
img_tok_count
==
expected_toks_per_img
*
num_imgs
\ No newline at end of file
tests/models/registry.py
View file @
04629132
...
...
@@ -8,7 +8,9 @@ import os
import
pytest
from
packaging.version
import
Version
from
transformers
import
__version__
as
TRANSFORMERS_VERSION
# from ..utils import models_path_prefix
models_path_prefix
=
os
.
getenv
(
'VLLM_OPTEST_MODELS_PATH'
)
or
os
.
getenv
(
"OPTEST_MODELS_PATH"
)
@
dataclass
(
frozen
=
True
)
...
...
@@ -109,8 +111,6 @@ class _HfExamplesInfo:
pytest
.
skip
(
msg
)
models_path_prefix
=
os
.
getenv
(
'VLLM_OPTEST_MODELS_PATH'
)
or
os
.
getenv
(
"OPTEST_MODELS_PATH"
)
# yapf: disable
_TEXT_GENERATION_EXAMPLE_MODELS
=
{
# [Decoder-only]
...
...
tests/models/test_registry.py
View file @
04629132
...
...
@@ -2,6 +2,7 @@
import
warnings
import
os
import
pytest
import
torch.cuda
...
...
@@ -20,6 +21,8 @@ from vllm.platforms import current_platform
from
..utils
import
create_new_process_for_each_test
from
.registry
import
HF_EXAMPLE_MODELS
models_path_prefix
=
os
.
getenv
(
'VLLM_OPTEST_MODELS_PATH'
)
or
os
.
getenv
(
"OPTEST_MODELS_PATH"
)
@
pytest
.
mark
.
parametrize
(
"model_arch"
,
ModelRegistry
.
get_supported_archs
())
def
test_registry_imports
(
model_arch
):
...
...
@@ -52,12 +55,12 @@ def test_registry_imports(model_arch):
@
create_new_process_for_each_test
()
@
pytest
.
mark
.
parametrize
(
"model_arch,is_mm,init_cuda,is_ce"
,
[
(
"LlamaForCausalLM"
,
False
,
False
,
False
),
(
"MllamaForConditionalGeneration"
,
True
,
False
,
False
),
(
"LlavaForConditionalGeneration"
,
True
,
True
,
False
),
(
"BertForSequenceClassification"
,
False
,
False
,
True
),
(
"RobertaForSequenceClassification"
,
False
,
False
,
True
),
(
"XLMRobertaForSequenceClassification"
,
False
,
False
,
True
),
(
os
.
path
.
join
(
models_path_prefix
,
"LlamaForCausalLM"
)
,
False
,
False
,
False
),
(
os
.
path
.
join
(
models_path_prefix
,
"MllamaForConditionalGeneration"
)
,
True
,
False
,
False
),
(
os
.
path
.
join
(
models_path_prefix
,
"LlavaForConditionalGeneration"
)
,
True
,
True
,
False
),
(
os
.
path
.
join
(
models_path_prefix
,
"BertForSequenceClassification"
)
,
False
,
False
,
True
),
(
os
.
path
.
join
(
models_path_prefix
,
"RobertaForSequenceClassification"
)
,
False
,
False
,
True
),
(
os
.
path
.
join
(
models_path_prefix
,
"XLMRobertaForSequenceClassification"
)
,
False
,
False
,
True
),
])
def
test_registry_model_property
(
model_arch
,
is_mm
,
init_cuda
,
is_ce
):
assert
ModelRegistry
.
is_multimodal_model
(
model_arch
)
is
is_mm
...
...
@@ -77,9 +80,9 @@ def test_registry_model_property(model_arch, is_mm, init_cuda, is_ce):
@
create_new_process_for_each_test
()
@
pytest
.
mark
.
parametrize
(
"model_arch,is_pp,init_cuda"
,
[
(
"MLPSpeculatorPreTrainedModel"
,
False
,
False
),
(
"DeepseekV2ForCausalLM"
,
True
,
False
),
(
"Qwen2VLForConditionalGeneration"
,
True
,
True
),
(
os
.
path
.
join
(
models_path_prefix
,
"MLPSpeculatorPreTrainedModel"
)
,
False
,
False
),
(
os
.
path
.
join
(
models_path_prefix
,
"DeepseekV2ForCausalLM"
)
,
True
,
False
),
(
os
.
path
.
join
(
models_path_prefix
,
"Qwen2VLForConditionalGeneration"
)
,
True
,
True
),
])
def
test_registry_is_pp
(
model_arch
,
is_pp
,
init_cuda
):
assert
ModelRegistry
.
is_pp_supported_model
(
model_arch
)
is
is_pp
...
...
@@ -104,4 +107,4 @@ def test_hf_registry_coverage():
assert
not
untested_archs
,
(
"Please add the following architectures to "
f
"`tests/models/registry.py`:
{
untested_archs
}
"
)
f
"`tests/models/registry.py`:
{
untested_archs
}
"
)
\ No newline at end of file
tests/quantization/test_cpu_offload.py
View file @
04629132
# SPDX-License-Identifier: Apache-2.0
# Expanded quantized model tests for CPU offloading
# Base tests: tests/basic_correctness/test_cpu_offload.py
import
pytest
import
os
from
tests.quantization.utils
import
is_quant_method_supported
from
..utils
import
compare_two_settings
,
models_path_prefix
from
vllm.platforms
import
current_platform
@
pytest
.
mark
.
skipif
(
not
is_quant_method_supported
(
"fp8"
)
or
current_platform
.
is_rocm
(),
reason
=
"fp8 is not supported on this GPU type."
)
def
test_cpu_offload_fp8
():
# Test quantization of an unquantized checkpoint
compare_two_settings
(
os
.
path
.
join
(
models_path_prefix
,
"meta-llama/Llama-3.2-1B-Instruct"
),
[
"--quantization"
,
"fp8"
],
[
"--quantization"
,
"fp8"
,
"--cpu-offload-gb"
,
"1"
],
max_wait_seconds
=
480
)
# Test loading a quantized checkpoint
# compare_two_settings(os.path.join(models_path_prefix, "neuralmagic/Qwen2-1.5B-Instruct-FP8"), [],
# ["--cpu-offload-gb", "1"],
# max_wait_seconds=480)
@
pytest
.
mark
.
skipif
(
not
is_quant_method_supported
(
"gptq_marlin"
)
or
current_platform
.
is_rocm
(),
reason
=
"gptq_marlin is not supported on this GPU type."
)
def
test_cpu_offload_gptq
(
monkeypatch
):
# This quant method is sensitive to dummy weights, so we force real weights
monkeypatch
.
setenv
(
'VLLM_TEST_FORCE_LOAD_FORMAT'
,
'auto'
)
# Test GPTQ Marlin
compare_two_settings
(
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen2-1.5B-Instruct-GPTQ-Int4"
),
[],
[
"--cpu-offload-gb"
,
"1"
],
max_wait_seconds
=
480
)
# Test GPTQ
compare_two_settings
(
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen2-1.5B-Instruct-GPTQ-Int4"
),
[
"--quantization"
,
"gptq"
],
[
"--quantization"
,
"gptq"
,
"--cpu-offload-gb"
,
"1"
],
max_wait_seconds
=
480
)
@
pytest
.
mark
.
skipif
(
not
is_quant_method_supported
(
"awq_marlin"
)
or
current_platform
.
is_rocm
(),
reason
=
"awq_marlin is not supported on this GPU type."
)
def
test_cpu_offload_awq
(
monkeypatch
):
# This quant method is sensitive to dummy weights, so we force real weights
monkeypatch
.
setenv
(
'VLLM_TEST_FORCE_LOAD_FORMAT'
,
'auto'
)
# Test AWQ Marlin
compare_two_settings
(
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen2-1.5B-Instruct-AWQ"
),
[],
[
"--cpu-offload-gb"
,
"1"
],
max_wait_seconds
=
480
)
# Test AWQ
compare_two_settings
(
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen2-1.5B-Instruct-AWQ"
),
[
"--quantization"
,
"awq"
],
[
"--quantization"
,
"awq"
,
"--cpu-offload-gb"
,
"1"
],
max_wait_seconds
=
480
)
@
pytest
.
mark
.
skipif
(
not
is_quant_method_supported
(
"gptq_marlin"
)
or
current_platform
.
is_rocm
(),
reason
=
"gptq_marlin is not supported on this GPU type."
)
def
test_cpu_offload_compressed_tensors
(
monkeypatch
):
# This quant method is sensitive to dummy weights, so we force real weights
monkeypatch
.
setenv
(
'VLLM_TEST_FORCE_LOAD_FORMAT'
,
'auto'
)
# Test wNa16
compare_two_settings
(
os
.
path
.
join
(
models_path_prefix
,
"nm-testing/tinyllama-oneshot-w4a16-channel-v2"
),
[],
[
"--cpu-offload-gb"
,
"1"
],
max_wait_seconds
=
480
)
# Test w4a16_marlin24
compare_two_settings
(
os
.
path
.
join
(
models_path_prefix
,
"nm-testing/llama7b-one-shot-2_4-w4a16-marlin24-t"
),
[],
[
"--cpu-offload-gb"
,
"1"
],
max_wait_seconds
=
480
)
# Test w8a8
compare_two_settings
(
os
.
path
.
join
(
models_path_prefix
,
"nm-testing/tinyllama-oneshot-w8w8-test-static-shape-change"
),
[],
[
"--cpu-offload-gb"
,
"1"
],
max_wait_seconds
=
480
)
# SPDX-License-Identifier: Apache-2.0
# Expanded quantized model tests for CPU offloading
# Base tests: tests/basic_correctness/test_cpu_offload.py
import
pytest
import
os
from
tests.quantization.utils
import
is_quant_method_supported
from
..utils
import
compare_two_settings
,
models_path_prefix
from
vllm.platforms
import
current_platform
@
pytest
.
mark
.
skipif
(
not
is_quant_method_supported
(
"fp8"
)
or
current_platform
.
is_rocm
(),
reason
=
"fp8 is not supported on this GPU type."
)
def
test_cpu_offload_fp8
():
# Test quantization of an unquantized checkpoint
compare_two_settings
(
os
.
path
.
join
(
models_path_prefix
,
"meta-llama/Llama-3.2-1B-Instruct"
),
[
"--quantization"
,
"fp8"
],
[
"--quantization"
,
"fp8"
,
"--cpu-offload-gb"
,
"1"
],
max_wait_seconds
=
480
)
# Test loading a quantized checkpoint
# compare_two_settings(os.path.join(models_path_prefix, "neuralmagic/Qwen2-1.5B-Instruct-FP8"), [],
# ["--cpu-offload-gb", "1"],
# max_wait_seconds=480)
@
pytest
.
mark
.
skipif
(
not
is_quant_method_supported
(
"gptq_marlin"
)
or
current_platform
.
is_rocm
(),
reason
=
"gptq_marlin is not supported on this GPU type."
)
def
test_cpu_offload_gptq
(
monkeypatch
):
# This quant method is sensitive to dummy weights, so we force real weights
monkeypatch
.
setenv
(
'VLLM_TEST_FORCE_LOAD_FORMAT'
,
'auto'
)
# Test GPTQ Marlin
compare_two_settings
(
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen2-1.5B-Instruct-GPTQ-Int4"
),
[],
[
"--cpu-offload-gb"
,
"1"
],
max_wait_seconds
=
480
)
# Test GPTQ
compare_two_settings
(
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen2-1.5B-Instruct-GPTQ-Int4"
),
[
"--quantization"
,
"gptq"
],
[
"--quantization"
,
"gptq"
,
"--cpu-offload-gb"
,
"1"
],
max_wait_seconds
=
480
)
@
pytest
.
mark
.
skipif
(
not
is_quant_method_supported
(
"awq_marlin"
)
or
current_platform
.
is_rocm
(),
reason
=
"awq_marlin is not supported on this GPU type."
)
def
test_cpu_offload_awq
(
monkeypatch
):
# This quant method is sensitive to dummy weights, so we force real weights
monkeypatch
.
setenv
(
'VLLM_TEST_FORCE_LOAD_FORMAT'
,
'auto'
)
# Test AWQ Marlin
compare_two_settings
(
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen2-1.5B-Instruct-AWQ"
),
[],
[
"--cpu-offload-gb"
,
"1"
],
max_wait_seconds
=
480
)
# Test AWQ
compare_two_settings
(
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen2-1.5B-Instruct-AWQ"
),
[
"--quantization"
,
"awq"
],
[
"--quantization"
,
"awq"
,
"--cpu-offload-gb"
,
"1"
],
max_wait_seconds
=
480
)
@
pytest
.
mark
.
skipif
(
not
is_quant_method_supported
(
"gptq_marlin"
)
or
current_platform
.
is_rocm
(),
reason
=
"gptq_marlin is not supported on this GPU type."
)
def
test_cpu_offload_compressed_tensors
(
monkeypatch
):
# This quant method is sensitive to dummy weights, so we force real weights
monkeypatch
.
setenv
(
'VLLM_TEST_FORCE_LOAD_FORMAT'
,
'auto'
)
# Test wNa16
compare_two_settings
(
os
.
path
.
join
(
models_path_prefix
,
"nm-testing/tinyllama-oneshot-w4a16-channel-v2"
),
[],
[
"--cpu-offload-gb"
,
"1"
],
max_wait_seconds
=
480
)
# Test w4a16_marlin24
compare_two_settings
(
os
.
path
.
join
(
models_path_prefix
,
"nm-testing/llama7b-one-shot-2_4-w4a16-marlin24-t"
),
[],
[
"--cpu-offload-gb"
,
"1"
],
max_wait_seconds
=
480
)
# Test w8a8
compare_two_settings
(
os
.
path
.
join
(
models_path_prefix
,
"nm-testing/tinyllama-oneshot-w8w8-test-static-shape-change"
),
[],
[
"--cpu-offload-gb"
,
"1"
],
max_wait_seconds
=
480
)
tests/test_embedded_commit.py
View file @
04629132
# SPDX-License-Identifier: Apache-2.0
import
vllm
def
test_embedded_commit_defined
():
assert
hasattr
(
vllm
,
"__version__"
)
assert
hasattr
(
vllm
,
"__version_tuple__"
)
assert
vllm
.
__version__
!=
"dev"
assert
vllm
.
__version_tuple__
!=
(
0
,
0
,
"dev"
)
import
vllm
def
test_embedded_commit_defined
():
assert
hasattr
(
vllm
,
"__version__"
)
assert
hasattr
(
vllm
,
"__version_tuple__"
)
assert
vllm
.
__version__
!=
"dev"
assert
vllm
.
__version_tuple__
!=
(
0
,
0
,
"dev"
)
tests/test_seed_behavior.py
View file @
04629132
# SPDX-License-Identifier: Apache-2.0
import
random
import
numpy
as
np
import
torch
from
vllm.platforms.interface
import
Platform
def
test_seed_behavior
():
# Test with a specific seed
Platform
.
seed_everything
(
42
)
random_value_1
=
random
.
randint
(
0
,
100
)
np_random_value_1
=
np
.
random
.
randint
(
0
,
100
)
torch_random_value_1
=
torch
.
randint
(
0
,
100
,
(
1
,
)).
item
()
Platform
.
seed_everything
(
42
)
random_value_2
=
random
.
randint
(
0
,
100
)
np_random_value_2
=
np
.
random
.
randint
(
0
,
100
)
torch_random_value_2
=
torch
.
randint
(
0
,
100
,
(
1
,
)).
item
()
assert
random_value_1
==
random_value_2
assert
np_random_value_1
==
np_random_value_2
assert
torch_random_value_1
==
torch_random_value_2
# SPDX-License-Identifier: Apache-2.0
import
random
import
numpy
as
np
import
torch
from
vllm.platforms.interface
import
Platform
def
test_seed_behavior
():
# Test with a specific seed
Platform
.
seed_everything
(
42
)
random_value_1
=
random
.
randint
(
0
,
100
)
np_random_value_1
=
np
.
random
.
randint
(
0
,
100
)
torch_random_value_1
=
torch
.
randint
(
0
,
100
,
(
1
,
)).
item
()
Platform
.
seed_everything
(
42
)
random_value_2
=
random
.
randint
(
0
,
100
)
np_random_value_2
=
np
.
random
.
randint
(
0
,
100
)
torch_random_value_2
=
torch
.
randint
(
0
,
100
,
(
1
,
)).
item
()
assert
random_value_1
==
random_value_2
assert
np_random_value_1
==
np_random_value_2
assert
torch_random_value_1
==
torch_random_value_2
tests/weight_loading/run_model_weight_loading_test.sh
100755 → 100644
View file @
04629132
File mode changed from 100755 to 100644
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment