Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
324a3d2b
Unverified
Commit
324a3d2b
authored
Apr 16, 2026
by
Li, Jiang
Committed by
GitHub
Apr 16, 2026
Browse files
[CI/Build] Improve stability of CPU tests (#39966)
Signed-off-by:
jiang1.li
<
jiang1.li@intel.com
>
parent
4269b794
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
8 additions
and
6 deletions
+8
-6
.buildkite/hardware_tests/cpu.yaml
.buildkite/hardware_tests/cpu.yaml
+2
-2
tests/models/language/generation/test_common.py
tests/models/language/generation/test_common.py
+4
-4
tests/models/language/generation/test_granite.py
tests/models/language/generation/test_granite.py
+1
-0
vllm/platforms/cpu.py
vllm/platforms/cpu.py
+1
-0
No files found.
.buildkite/hardware_tests/cpu.yaml
View file @
324a3d2b
...
@@ -46,7 +46,7 @@ steps:
...
@@ -46,7 +46,7 @@ steps:
-
tests/models/language/pooling/
-
tests/models/language/pooling/
commands
:
commands
:
-
|
-
|
bash .buildkite/scripts/hardware_ci/run-cpu-test.sh
3
0m "
bash .buildkite/scripts/hardware_ci/run-cpu-test.sh
4
0m "
pytest -x -v -s tests/models/language/generation -m cpu_model
pytest -x -v -s tests/models/language/generation -m cpu_model
pytest -x -v -s tests/models/language/pooling -m cpu_model"
pytest -x -v -s tests/models/language/pooling -m cpu_model"
...
@@ -99,7 +99,7 @@ steps:
...
@@ -99,7 +99,7 @@ steps:
-
|
-
|
bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 45m "
bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 45m "
pytest -x -v -s tests/models/multimodal/generation --ignore=tests/models/multimodal/generation/test_pixtral.py -m cpu_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB"
pytest -x -v -s tests/models/multimodal/generation --ignore=tests/models/multimodal/generation/test_pixtral.py -m cpu_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB"
parallelism
:
2
parallelism
:
3
-
label
:
"
Arm
CPU
Test"
-
label
:
"
Arm
CPU
Test"
depends_on
:
[]
depends_on
:
[]
...
...
tests/models/language/generation/test_common.py
View file @
324a3d2b
...
@@ -100,7 +100,7 @@ AITER_MODEL_LIST = [
...
@@ -100,7 +100,7 @@ AITER_MODEL_LIST = [
pytest
.
param
(
"bigcode/starcoder2-3b"
),
# starcoder2
pytest
.
param
(
"bigcode/starcoder2-3b"
),
# starcoder2
pytest
.
param
(
pytest
.
param
(
"TitanML/tiny-mixtral"
,
# mixtral
"TitanML/tiny-mixtral"
,
# mixtral
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
],
marks
=
[
pytest
.
mark
.
core_model
],
),
),
pytest
.
param
(
"swiss-ai/Apertus-8B-Instruct-2509"
),
# apertus
pytest
.
param
(
"swiss-ai/Apertus-8B-Instruct-2509"
),
# apertus
pytest
.
param
(
pytest
.
param
(
...
@@ -143,9 +143,9 @@ def test_models(
...
@@ -143,9 +143,9 @@ def test_models(
# in parts of the operators
# in parts of the operators
pytest
.
skip
(
f
"Skipping '
{
model
}
' model test with AITER kernel."
)
pytest
.
skip
(
f
"Skipping '
{
model
}
' model test with AITER kernel."
)
if
current_platform
.
is_cpu
()
and
model
==
"TitanML/tiny-mixtral"
:
if
current_platform
.
is_cpu
()
and
model
in
(
"openai-community/gpt2"
,)
:
# Th
is untrained
model
is
sensitive to the rounding error
# Th
ese
model
s are
sensitive to the rounding error
# Fuse ops to reduce
bfloat16
rounding
# Fuse ops to reduce rounding
monkeypatch
.
setenv
(
"VLLM_CPU_CI_ENV"
,
"0"
)
monkeypatch
.
setenv
(
"VLLM_CPU_CI_ENV"
,
"0"
)
with
hf_runner
(
model
)
as
hf_model
:
with
hf_runner
(
model
)
as
hf_model
:
...
...
tests/models/language/generation/test_granite.py
View file @
324a3d2b
...
@@ -15,6 +15,7 @@ MODELS = [
...
@@ -15,6 +15,7 @@ MODELS = [
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"bfloat16"
])
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"bfloat16"
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
64
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
64
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
@
pytest
.
mark
.
cpu_model
def
test_models
(
def
test_models
(
hf_runner
,
hf_runner
,
vllm_runner
,
vllm_runner
,
...
...
vllm/platforms/cpu.py
View file @
324a3d2b
...
@@ -242,6 +242,7 @@ class CpuPlatform(Platform):
...
@@ -242,6 +242,7 @@ class CpuPlatform(Platform):
"cpp.dynamic_threads"
:
True
,
"cpp.dynamic_threads"
:
True
,
}
}
)
)
compilation_config
.
ir_enable_torch_wrap
=
False
if
vllm_config
.
lora_config
is
not
None
:
if
vllm_config
.
lora_config
is
not
None
:
compilation_config
.
mode
=
CompilationMode
.
NONE
compilation_config
.
mode
=
CompilationMode
.
NONE
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment