Mirroring the test definitions (2025-10-22) (#27362)

Signed-off-by: Alexei V. Ivanov <alexei.ivanov@amd.com>

Mirroring the test definitions (2025-10-22) (#27362)
Signed-off-by: Alexei V. Ivanov <alexei.ivanov@amd.com>
295c7f02 · Alexei-V-Ivanov-AMD · GitHub · 3fa2c121 · 295c7f02
Unverified Commit 295c7f02 authored Oct 23, 2025 by Alexei-V-Ivanov-AMD Committed by GitHub Oct 24, 2025
Show whitespace changes
Inline Side-by-side

Showing with 9 additions and 2 deletions

.buildkite/test-amd.yaml .buildkite/test-amd.yaml +9 -2

No files found.
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -395,7 +395,9 @@ steps:
    - python3 offline_inference/basic/embed.py
    - python3 offline_inference/basic/score.py
    - python3 offline_inference/spec_decode.py --test --method eagle --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 2048
-    - python3 offline_inference/spec_decode.py --test --method eagle3 --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 2048
+    # https://github.com/vllm-project/vllm/pull/26682 uses slightly more memory in PyTorch 2.9+ causing this test to OOM in 1xL4 GPU
+    - python3 offline_inference/spec_decode.py --test --method eagle3 --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 1536
+    #- python3 offline_inference/spec_decode.py --test --method eagle3 --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 2048

 - label: Platform Tests (CUDA) # 4min
  timeout_in_minutes: 15
@@ -436,7 +438,11 @@ steps:
      --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT \
      --ignore=lora/test_chatglm3_tp.py \
      --ignore=lora/test_llama_tp.py \
-      --ignore=lora/test_llm_with_multi_loras.py
+      --ignore=lora/test_llm_with_multi_loras.py \
+      --ignore=lora/test_olmoe_tp.py \
+      --ignore=lora/test_deepseekv2_tp.py \
+      --ignore=lora/test_gptoss.py \
+      --ignore=lora/test_qwen3moe_tp.py
  parallelism: 4

 - label: PyTorch Compilation Unit Tests # 15min
@@ -1208,6 +1214,7 @@ steps:
    - pytest -v -s -x lora/test_chatglm3_tp.py
    - pytest -v -s -x lora/test_llama_tp.py
    - pytest -v -s -x lora/test_llm_with_multi_loras.py
+    - pytest -v -s -x lora/test_olmoe_tp.py

 - label: Weight Loading Multiple GPU Test  # 33min
  timeout_in_minutes: 45