"ssh:/git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "d55244df31969e7df435603b5d7014939e60881b"
Unverified Commit 324a3d2b authored by Li, Jiang's avatar Li, Jiang Committed by GitHub
Browse files

[CI/Build] Improve stability of CPU tests (#39966)


Signed-off-by: default avatarjiang1.li <jiang1.li@intel.com>
parent 4269b794
...@@ -46,7 +46,7 @@ steps: ...@@ -46,7 +46,7 @@ steps:
- tests/models/language/pooling/ - tests/models/language/pooling/
commands: commands:
- | - |
bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 30m " bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 40m "
pytest -x -v -s tests/models/language/generation -m cpu_model pytest -x -v -s tests/models/language/generation -m cpu_model
pytest -x -v -s tests/models/language/pooling -m cpu_model" pytest -x -v -s tests/models/language/pooling -m cpu_model"
...@@ -99,7 +99,7 @@ steps: ...@@ -99,7 +99,7 @@ steps:
- | - |
bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 45m " bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 45m "
pytest -x -v -s tests/models/multimodal/generation --ignore=tests/models/multimodal/generation/test_pixtral.py -m cpu_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB" pytest -x -v -s tests/models/multimodal/generation --ignore=tests/models/multimodal/generation/test_pixtral.py -m cpu_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB"
parallelism: 2 parallelism: 3
- label: "Arm CPU Test" - label: "Arm CPU Test"
depends_on: [] depends_on: []
......
...@@ -100,7 +100,7 @@ AITER_MODEL_LIST = [ ...@@ -100,7 +100,7 @@ AITER_MODEL_LIST = [
pytest.param("bigcode/starcoder2-3b"), # starcoder2 pytest.param("bigcode/starcoder2-3b"), # starcoder2
pytest.param( pytest.param(
"TitanML/tiny-mixtral", # mixtral "TitanML/tiny-mixtral", # mixtral
marks=[pytest.mark.core_model, pytest.mark.cpu_model], marks=[pytest.mark.core_model],
), ),
pytest.param("swiss-ai/Apertus-8B-Instruct-2509"), # apertus pytest.param("swiss-ai/Apertus-8B-Instruct-2509"), # apertus
pytest.param( pytest.param(
...@@ -143,9 +143,9 @@ def test_models( ...@@ -143,9 +143,9 @@ def test_models(
# in parts of the operators # in parts of the operators
pytest.skip(f"Skipping '{model}' model test with AITER kernel.") pytest.skip(f"Skipping '{model}' model test with AITER kernel.")
if current_platform.is_cpu() and model == "TitanML/tiny-mixtral": if current_platform.is_cpu() and model in ("openai-community/gpt2",):
# This untrained model is sensitive to the rounding error # These models are sensitive to the rounding error
# Fuse ops to reduce bfloat16 rounding # Fuse ops to reduce rounding
monkeypatch.setenv("VLLM_CPU_CI_ENV", "0") monkeypatch.setenv("VLLM_CPU_CI_ENV", "0")
with hf_runner(model) as hf_model: with hf_runner(model) as hf_model:
......
...@@ -15,6 +15,7 @@ MODELS = [ ...@@ -15,6 +15,7 @@ MODELS = [
@pytest.mark.parametrize("dtype", ["bfloat16"]) @pytest.mark.parametrize("dtype", ["bfloat16"])
@pytest.mark.parametrize("max_tokens", [64]) @pytest.mark.parametrize("max_tokens", [64])
@pytest.mark.parametrize("num_logprobs", [5]) @pytest.mark.parametrize("num_logprobs", [5])
@pytest.mark.cpu_model
def test_models( def test_models(
hf_runner, hf_runner,
vllm_runner, vllm_runner,
......
...@@ -242,6 +242,7 @@ class CpuPlatform(Platform): ...@@ -242,6 +242,7 @@ class CpuPlatform(Platform):
"cpp.dynamic_threads": True, "cpp.dynamic_threads": True,
} }
) )
compilation_config.ir_enable_torch_wrap = False
if vllm_config.lora_config is not None: if vllm_config.lora_config is not None:
compilation_config.mode = CompilationMode.NONE compilation_config.mode = CompilationMode.NONE
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment