"deploy/vscode:/vscode.git/clone" did not exist on "a4bbe49228f5048044f68c9eebdfa19982304df2"
Unverified Commit 324a3d2b authored by Li, Jiang's avatar Li, Jiang Committed by GitHub
Browse files

[CI/Build] Improve stability of CPU tests (#39966)


Signed-off-by: default avatarjiang1.li <jiang1.li@intel.com>
parent 4269b794
......@@ -46,7 +46,7 @@ steps:
- tests/models/language/pooling/
commands:
- |
bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 30m "
bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 40m "
pytest -x -v -s tests/models/language/generation -m cpu_model
pytest -x -v -s tests/models/language/pooling -m cpu_model"
......@@ -99,7 +99,7 @@ steps:
- |
bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 45m "
pytest -x -v -s tests/models/multimodal/generation --ignore=tests/models/multimodal/generation/test_pixtral.py -m cpu_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB"
parallelism: 2
parallelism: 3
- label: "Arm CPU Test"
depends_on: []
......
......@@ -100,7 +100,7 @@ AITER_MODEL_LIST = [
pytest.param("bigcode/starcoder2-3b"), # starcoder2
pytest.param(
"TitanML/tiny-mixtral", # mixtral
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
marks=[pytest.mark.core_model],
),
pytest.param("swiss-ai/Apertus-8B-Instruct-2509"), # apertus
pytest.param(
......@@ -143,9 +143,9 @@ def test_models(
# in parts of the operators
pytest.skip(f"Skipping '{model}' model test with AITER kernel.")
if current_platform.is_cpu() and model == "TitanML/tiny-mixtral":
# This untrained model is sensitive to the rounding error
# Fuse ops to reduce bfloat16 rounding
if current_platform.is_cpu() and model in ("openai-community/gpt2",):
# These models are sensitive to the rounding error
# Fuse ops to reduce rounding
monkeypatch.setenv("VLLM_CPU_CI_ENV", "0")
with hf_runner(model) as hf_model:
......
......@@ -15,6 +15,7 @@ MODELS = [
@pytest.mark.parametrize("dtype", ["bfloat16"])
@pytest.mark.parametrize("max_tokens", [64])
@pytest.mark.parametrize("num_logprobs", [5])
@pytest.mark.cpu_model
def test_models(
hf_runner,
vllm_runner,
......
......@@ -242,6 +242,7 @@ class CpuPlatform(Platform):
"cpp.dynamic_threads": True,
}
)
compilation_config.ir_enable_torch_wrap = False
if vllm_config.lora_config is not None:
compilation_config.mode = CompilationMode.NONE
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment