Commit 3b50924c authored by raojy's avatar raojy
Browse files

raw_vllm

parent fbeb8a6f
Pipeline #3455 canceled with stages
group: Quantization
depends_on:
- image-build
steps:
- label: Quantization
timeout_in_minutes: 90
source_file_dependencies:
- csrc/
- vllm/model_executor/layers/quantization
- tests/quantization
commands:
# temporary install here since we need nightly, will move to requirements/test.in
# after torchao 0.12 release, and pin a working version of torchao nightly here
# since torchao nightly is only compatible with torch nightly currently
# https://github.com/pytorch/ao/issues/2919, we'll have to skip new torchao tests for now
# we can only upgrade after this is resolved
# TODO(jerryzh168): resolve the above comment
- uv pip install --system torchao==0.14.1 --index-url https://download.pytorch.org/whl/cu129
- uv pip install --system conch-triton-kernels
- VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization/ --ignore quantization/test_blackwell_moe.py
- label: Quantized MoE Test (B200)
timeout_in_minutes: 60
working_dir: "/vllm-workspace/"
device: b200
source_file_dependencies:
- tests/quantization/test_blackwell_moe.py
- vllm/model_executor/models/deepseek_v2.py
- vllm/model_executor/models/gpt_oss.py
- vllm/model_executor/models/llama4.py
- vllm/model_executor/layers/fused_moe
- vllm/model_executor/layers/quantization/compressed_tensors
- vllm/model_executor/layers/quantization/modelopt.py
- vllm/model_executor/layers/quantization/mxfp4.py
- vllm/v1/attention/backends/flashinfer.py
commands:
- pytest -s -v tests/quantization/test_blackwell_moe.py
- label: Quantized Models Test
timeout_in_minutes: 60
source_file_dependencies:
- vllm/model_executor/layers/quantization
- tests/models/quantization
commands:
- pytest -v -s models/quantization
group: Ray Compatibility
depends_on:
- image-build
steps:
- label: Ray Dependency Compatibility Check
# Informational only — does not block the pipeline.
# If this fails, it means the PR introduces a dependency that
# conflicts with Ray's dependency constraints.
# See https://github.com/vllm-project/vllm/issues/33599
soft_fail: true
timeout_in_minutes: 10
source_file_dependencies:
- requirements/
- setup.py
commands:
- bash /vllm-workspace/.buildkite/scripts/check-ray-compatibility.sh
group: Samplers
depends_on:
- image-build
steps:
- label: Samplers Test
timeout_in_minutes: 75
source_file_dependencies:
- vllm/model_executor/layers
- vllm/sampling_metadata.py
- tests/samplers
- tests/conftest.py
commands:
- pytest -v -s samplers
- VLLM_USE_FLASHINFER_SAMPLER=1 pytest -v -s samplers
mirror:
amd:
device: mi325_1
depends_on:
- image-build-amd
commands:
- pytest -v -s samplers
group: Weight Loading
depends_on:
- image-build
steps:
- label: Weight Loading Multiple GPU # 33min
timeout_in_minutes: 45
working_dir: "/vllm-workspace/tests"
num_devices: 2
optional: true
source_file_dependencies:
- vllm/
- tests/weight_loading
commands:
- bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models.txt
# - label: Weight Loading Multiple GPU - Large Models # optional
# working_dir: "/vllm-workspace/tests"
# num_devices: 2
# device: a100
# optional: true
# source_file_dependencies:
# - vllm/
# - tests/weight_loading
# commands:
# - bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-large.txt
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment