Add nightly b200 test for spec decode eagle correctness (#38577)

Signed-off-by: Rishi Puri <riship@nvidia.com>

Add nightly b200 test for spec decode eagle correctness (#38577)
Signed-off-by: Rishi Puri <riship@nvidia.com>
adaabb8a · Rishi Puri · GitHub · f7cad674 · adaabb8a
Unverified Commit adaabb8a authored Apr 09, 2026 by Rishi Puri Committed by GitHub Apr 09, 2026
Show whitespace changes
Inline Side-by-side

Showing with 34 additions and 0 deletions

.buildkite/test_areas/spec_decode.yaml .buildkite/test_areas/spec_decode.yaml +34 -0

No files found.
--- a/.buildkite/test_areas/spec_decode.yaml
+++ b/.buildkite/test_areas/spec_decode.yaml
@@ -12,6 +12,17 @@ steps:
  commands:
    - pytest -v -s v1/e2e/spec_decode -k "eagle_correctness"

+- label: Spec Decode Eagle Nightly B200
+  timeout_in_minutes: 30
+  device: b200
+  optional: true
+  source_file_dependencies:
+    - vllm/v1/spec_decode/
+    - vllm/v1/worker/gpu/spec_decode/
+    - tests/v1/e2e/spec_decode/
+  commands:
+    - pytest -v -s v1/e2e/spec_decode -k "eagle_correctness"
+
 - label: Spec Decode Speculators + MTP
  timeout_in_minutes: 30
  device: h200_18gb
@@ -23,6 +34,18 @@ steps:
  commands:
    - pytest -v -s v1/e2e/spec_decode -k "speculators or mtp_correctness"

+- label: Spec Decode Speculators + MTP Nightly B200
+  timeout_in_minutes: 30
+  device: b200
+  optional: true
+  source_file_dependencies:
+    - vllm/v1/spec_decode/
+    - vllm/v1/worker/gpu/spec_decode/
+    - vllm/transformers_utils/configs/speculators/
+    - tests/v1/e2e/spec_decode/
+  commands:
+    - pytest -v -s v1/e2e/spec_decode -k "speculators or mtp_correctness"
+  
 - label: Spec Decode Ngram + Suffix
  timeout_in_minutes: 30
  device: h200_18gb
@@ -42,3 +65,14 @@ steps:
    - tests/v1/e2e/spec_decode/
  commands:
    - pytest -v -s v1/e2e/spec_decode -k "draft_model or no_sync or batch_inference"
+
+- label: Spec Decode Draft Model Nightly B200
+  timeout_in_minutes: 30
+  device: b200
+  optional: true
+  source_file_dependencies:
+    - vllm/v1/spec_decode/
+    - vllm/v1/worker/gpu/spec_decode/
+    - tests/v1/e2e/spec_decode/
+  commands:
+    - pytest -v -s v1/e2e/spec_decode -k "draft_model or no_sync or batch_inference"