[CI] Temporarily Disable Nightly Failures (#35770)

Signed-off-by: Robert Shaw <robshaw@redhat.com> Signed-off-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com> Co-authored-by: Robert Shaw <robshaw@redhat.com>

[CI] Temporarily Disable Nightly Failures (#35770)
Signed-off-by: Robert Shaw <robshaw@redhat.com> Signed-off-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com> Co-authored-by: Robert Shaw <robshaw@redhat.com>
6521ccf2 · Robert Shaw · GitHub · 8ebd872f · 6521ccf2 · 6521ccf2
Unverified Commit 6521ccf2 authored Mar 02, 2026 by Robert Shaw Committed by GitHub Mar 03, 2026
4 changed files
--- a/.buildkite/test_areas/distributed.yaml
+++ b/.buildkite/test_areas/distributed.yaml
@@ -146,7 +146,7 @@ steps:
  num_devices: 2
  commands:
    - pytest -v -s tests/distributed/test_context_parallel.py
-    - VLLM_ALLOW_INSECURE_SERIALIZATION=1 python3 examples/offline_inference/new_weight_syncing/rlhf_async_new_apis.py
+    # - VLLM_ALLOW_INSECURE_SERIALIZATION=1 python3 examples/offline_inference/new_weight_syncing/rlhf_async_new_apis.py --- failing, need to re-enable
    - VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 examples/offline_inference/data_parallel.py --model=Qwen/Qwen1.5-MoE-A2.7B -tp=1 -dp=2 --max-model-len=2048 --all2all-backend=deepep_high_throughput
    - pytest -v -s tests/v1/distributed/test_dbo.py

--- a/.buildkite/test_areas/lm_eval.yaml
+++ b/.buildkite/test_areas/lm_eval.yaml
@@ -11,17 +11,17 @@ steps:
  commands:
  - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-small.txt
- label: LM Eval Large Models (4 GPUs)(A100)
+# - label: LM Eval Large Models (4 GPUs)(A100)
-  device: a100
+#   device: a100
-  optional: true
+#   optional: true
-  num_devices: 4
+#   num_devices: 4
-  working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
+#   working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
-  source_file_dependencies:
+#   source_file_dependencies:
-  - csrc/
+#   - csrc/
-  - vllm/model_executor/layers/quantization
+#   - vllm/model_executor/layers/quantization
-  commands:
+#   commands:
-  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+#   - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large.txt --tp-size=4
+#   - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large.txt --tp-size=4
 - label: LM Eval Large Models (4 GPUs)(H100)
  device: h100

--- a/.buildkite/test_areas/weight_loading.yaml
+++ b/.buildkite/test_areas/weight_loading.yaml
@@ -13,13 +13,13 @@ steps:
  commands:
    - bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models.txt
- label: Weight Loading Multiple GPU - Large Models # optional
+# - label: Weight Loading Multiple GPU - Large Models # optional
-  working_dir: "/vllm-workspace/tests"
+#   working_dir: "/vllm-workspace/tests"
-  num_devices: 2
+#   num_devices: 2
-  device: a100
+#   device: a100
-  optional: true
+#   optional: true
-  source_file_dependencies:
+#   source_file_dependencies:
-  - vllm/
+#   - vllm/
-  - tests/weight_loading
+#   - tests/weight_loading
-  commands:
+#   commands:
-    - bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-large.txt
+#     - bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-large.txt
--- a/tests/evals/gsm8k/configs/moe-refactor/config-h100.txt
+++ b/tests/evals/gsm8k/configs/moe-refactor/config-h100.txt
@@ -12,4 +12,4 @@ Llama-4-Scout-Fp8-ModelOpt-fi-cutlass.yaml
 Llama-4-Scout-Fp8-ModelOpt-marlin.yaml
 Llama-4-Scout-Fp8-ModelOpt-triton.yaml
 Qwen3-30B-A3B-BF16-fi-cutlass.yaml
 Qwen3-30B-A3B-BF16-triton.yaml
\ No newline at end of file