Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
67cee40d
Unverified
Commit
67cee40d
authored
Aug 28, 2025
by
Li, Jiang
Committed by
GitHub
Aug 28, 2025
Browse files
[CI/Build][Bugfix] Fix Qwen VL tests on CPU (#23818)
Signed-off-by:
jiang1.li
<
jiang1.li@intel.com
>
parent
d99c3a4f
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
14 additions
and
14 deletions
+14
-14
.buildkite/scripts/hardware_ci/run-cpu-test.sh
.buildkite/scripts/hardware_ci/run-cpu-test.sh
+10
-10
vllm/model_executor/models/utils.py
vllm/model_executor/models/utils.py
+4
-4
No files found.
.buildkite/scripts/hardware_ci/run-cpu-test.sh
View file @
67cee40d
...
...
@@ -49,23 +49,23 @@ function cpu_tests() {
# Run kernel tests
docker
exec
cpu-test-
"
$NUMA_NODE
"
bash
-c
"
set -e
pytest -v -s tests/kernels/test_onednn.py"
pytest
-x
-v -s tests/kernels/test_onednn.py"
# Run basic model test
docker
exec
cpu-test-
"
$NUMA_NODE
"
bash
-c
"
set -e
# Note: disable until supports V1
# pytest -v -s tests/kernels/attention/test_cache.py -m cpu_model
# pytest -v -s tests/kernels/attention/test_mla_decode_cpu.py -m cpu_model
# pytest
-x
-v -s tests/kernels/attention/test_cache.py -m cpu_model
# pytest
-x
-v -s tests/kernels/attention/test_mla_decode_cpu.py -m cpu_model
# Note: disable Bart until supports V1
pytest -v -s tests/models/language/generation -m cpu_model
\
pytest
-x
-v -s tests/models/language/generation -m cpu_model
\
--ignore=tests/models/language/generation/test_bart.py
VLLM_CPU_SGL_KERNEL=1 pytest -v -s tests/models/language/generation -m cpu_model
\
VLLM_CPU_SGL_KERNEL=1 pytest
-x
-v -s tests/models/language/generation -m cpu_model
\
--ignore=tests/models/language/generation/test_bart.py
pytest -v -s tests/models/language/pooling -m cpu_model
pytest -v -s tests/models/multimodal/generation
\
pytest
-x
-v -s tests/models/language/pooling -m cpu_model
pytest
-x
-v -s tests/models/multimodal/generation
\
--ignore=tests/models/multimodal/generation/test_mllama.py
\
--ignore=tests/models/multimodal/generation/test_pixtral.py
\
-m cpu_model"
...
...
@@ -73,20 +73,20 @@ function cpu_tests() {
# Run compressed-tensor test
docker
exec
cpu-test-
"
$NUMA_NODE
"
bash
-c
"
set -e
pytest -s -v
\
pytest
-x
-s -v
\
tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_logprobs[False-10-32-neuralmagic/Llama-3.2-1B-quantized.w8a8]"
# Note: disable it until supports V1
# Run AWQ test
# docker exec cpu-test-"$NUMA_NODE" bash -c "
# set -e
# VLLM_USE_V1=0 pytest -s -v \
# VLLM_USE_V1=0 pytest
-x
-s -v \
# tests/quantization/test_ipex_quant.py"
# Run multi-lora tests
docker
exec
cpu-test-
"
$NUMA_NODE
"
bash
-c
"
set -e
pytest -s -v
\
pytest
-x
-s -v
\
tests/lora/test_qwen2vl.py"
# online serving
...
...
vllm/model_executor/models/utils.py
View file @
67cee40d
...
...
@@ -507,9 +507,9 @@ def merge_multimodal_embeddings(
This updates ``inputs_embeds`` in place.
"""
if
isinstance
(
placeholder_token_id
,
list
):
placeholder_token_id
=
torch
.
tensor
(
placeholder_token_id
,
pin_memory
=
True
).
to
(
device
=
input_ids
.
device
,
placeholder_token_id
=
torch
.
tensor
(
placeholder_token_id
,
pin_memory
=
is_pin_memory_available
()).
to
(
device
=
input_ids
.
device
,
non_blocking
=
True
)
return
_merge_multimodal_embeddings
(
inputs_embeds
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment