Unverified Commit adaabb8a authored by Rishi Puri's avatar Rishi Puri Committed by GitHub
Browse files

Add nightly b200 test for spec decode eagle correctness (#38577)


Signed-off-by: default avatarRishi Puri <riship@nvidia.com>
parent f7cad674
......@@ -12,6 +12,17 @@ steps:
commands:
- pytest -v -s v1/e2e/spec_decode -k "eagle_correctness"
- label: Spec Decode Eagle Nightly B200
timeout_in_minutes: 30
device: b200
optional: true
source_file_dependencies:
- vllm/v1/spec_decode/
- vllm/v1/worker/gpu/spec_decode/
- tests/v1/e2e/spec_decode/
commands:
- pytest -v -s v1/e2e/spec_decode -k "eagle_correctness"
- label: Spec Decode Speculators + MTP
timeout_in_minutes: 30
device: h200_18gb
......@@ -23,6 +34,18 @@ steps:
commands:
- pytest -v -s v1/e2e/spec_decode -k "speculators or mtp_correctness"
- label: Spec Decode Speculators + MTP Nightly B200
timeout_in_minutes: 30
device: b200
optional: true
source_file_dependencies:
- vllm/v1/spec_decode/
- vllm/v1/worker/gpu/spec_decode/
- vllm/transformers_utils/configs/speculators/
- tests/v1/e2e/spec_decode/
commands:
- pytest -v -s v1/e2e/spec_decode -k "speculators or mtp_correctness"
- label: Spec Decode Ngram + Suffix
timeout_in_minutes: 30
device: h200_18gb
......@@ -42,3 +65,14 @@ steps:
- tests/v1/e2e/spec_decode/
commands:
- pytest -v -s v1/e2e/spec_decode -k "draft_model or no_sync or batch_inference"
- label: Spec Decode Draft Model Nightly B200
timeout_in_minutes: 30
device: b200
optional: true
source_file_dependencies:
- vllm/v1/spec_decode/
- vllm/v1/worker/gpu/spec_decode/
- tests/v1/e2e/spec_decode/
commands:
- pytest -v -s v1/e2e/spec_decode -k "draft_model or no_sync or batch_inference"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment