Commit 3fb4b5fa authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge tag 'v0.18.0' into v0.18.0-ori

parents bcf25339 89138b21
...@@ -21,3 +21,18 @@ steps: ...@@ -21,3 +21,18 @@ steps:
commands: commands:
- pytest -v -s distributed/test_eplb_execute.py - pytest -v -s distributed/test_eplb_execute.py
- pytest -v -s distributed/test_eplb_spec_decode.py - pytest -v -s distributed/test_eplb_spec_decode.py
- label: Elastic EP Scaling Test
timeout_in_minutes: 20
device: b200
optional: true
working_dir: "/vllm-workspace/tests"
num_devices: 4
source_file_dependencies:
- vllm/distributed/
- vllm/engine/
- vllm/executor/
- vllm/compilation/
- tests/distributed/
commands:
- pytest -v -s distributed/test_elastic_ep.py
...@@ -8,8 +8,9 @@ steps: ...@@ -8,8 +8,9 @@ steps:
- csrc/ - csrc/
- tests/kernels/core - tests/kernels/core
- tests/kernels/test_top_k_per_row.py - tests/kernels/test_top_k_per_row.py
- tests/kernels/test_concat_mla_q.py
commands: commands:
- pytest -v -s kernels/core kernels/test_top_k_per_row.py - pytest -v -s kernels/core kernels/test_top_k_per_row.py kernels/test_concat_mla_q.py
- label: Kernels Attention Test %N - label: Kernels Attention Test %N
timeout_in_minutes: 35 timeout_in_minutes: 35
...@@ -44,7 +45,8 @@ steps: ...@@ -44,7 +45,8 @@ steps:
- vllm/envs.py - vllm/envs.py
- vllm/config - vllm/config
commands: commands:
- pytest -v -s kernels/moe --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT - pytest -v -s kernels/moe --ignore=kernels/moe/test_modular_oai_triton_moe.py --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
- pytest -v -s kernels/moe/test_modular_oai_triton_moe.py --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
parallelism: 2 parallelism: 2
- label: Kernels Mamba Test - label: Kernels Mamba Test
...@@ -70,7 +72,7 @@ steps: ...@@ -70,7 +72,7 @@ steps:
- tests/kernels/moe/test_batched_deepgemm.py - tests/kernels/moe/test_batched_deepgemm.py
- tests/kernels/attention/test_deepgemm_attention.py - tests/kernels/attention/test_deepgemm_attention.py
commands: commands:
- pytest -v -s kernels/quantization/test_block_fp8.py -k deep_gemm - pytest -v -s kernels/quantization/test_block_fp8.py
- pytest -v -s kernels/moe/test_deepgemm.py - pytest -v -s kernels/moe/test_deepgemm.py
- pytest -v -s kernels/moe/test_batched_deepgemm.py - pytest -v -s kernels/moe/test_batched_deepgemm.py
- pytest -v -s kernels/attention/test_deepgemm_attention.py - pytest -v -s kernels/attention/test_deepgemm_attention.py
...@@ -95,7 +97,7 @@ steps: ...@@ -95,7 +97,7 @@ steps:
- vllm/platforms/cuda.py - vllm/platforms/cuda.py
commands: commands:
- nvidia-smi - nvidia-smi
- python3 examples/offline_inference/basic/chat.py - python3 examples/basic/offline_inference/chat.py
# Attention # Attention
# num_heads2 broken by https://github.com/flashinfer-ai/flashinfer/issues/1353 # num_heads2 broken by https://github.com/flashinfer-ai/flashinfer/issues/1353
- pytest -v -s tests/kernels/attention/test_attention_selector.py - pytest -v -s tests/kernels/attention/test_attention_selector.py
...@@ -115,6 +117,7 @@ steps: ...@@ -115,6 +117,7 @@ steps:
- pytest -v -s tests/kernels/moe/test_nvfp4_moe.py - pytest -v -s tests/kernels/moe/test_nvfp4_moe.py
- pytest -v -s tests/kernels/moe/test_ocp_mx_moe.py - pytest -v -s tests/kernels/moe/test_ocp_mx_moe.py
- pytest -v -s tests/kernels/moe/test_flashinfer.py - pytest -v -s tests/kernels/moe/test_flashinfer.py
- pytest -v -s tests/kernels/moe/test_flashinfer_moe.py
- pytest -v -s tests/kernels/moe/test_cutedsl_moe.py - pytest -v -s tests/kernels/moe/test_cutedsl_moe.py
# e2e # e2e
- pytest -v -s tests/models/quantization/test_nvfp4.py - pytest -v -s tests/models/quantization/test_nvfp4.py
...@@ -154,8 +157,6 @@ steps: ...@@ -154,8 +157,6 @@ steps:
commands: commands:
- pytest -v -s kernels/moe/test_deepep_deepgemm_moe.py - pytest -v -s kernels/moe/test_deepep_deepgemm_moe.py
- pytest -v -s kernels/moe/test_deepep_moe.py - pytest -v -s kernels/moe/test_deepep_moe.py
- pytest -v -s kernels/moe/test_pplx_cutlass_moe.py
# - pytest -v -s kernels/moe/test_pplx_moe.py - failing on main
- label: Kernels Fp4 MoE Test (B200) - label: Kernels Fp4 MoE Test (B200)
timeout_in_minutes: 60 timeout_in_minutes: 60
......
...@@ -11,17 +11,17 @@ steps: ...@@ -11,17 +11,17 @@ steps:
commands: commands:
- pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-small.txt - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-small.txt
- label: LM Eval Large Models (4 GPUs)(A100) # - label: LM Eval Large Models (4 GPUs)(A100)
device: a100 # device: a100
optional: true # optional: true
num_devices: 4 # num_devices: 4
working_dir: "/vllm-workspace/.buildkite/lm-eval-harness" # working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
source_file_dependencies: # source_file_dependencies:
- csrc/ # - csrc/
- vllm/model_executor/layers/quantization # - vllm/model_executor/layers/quantization
commands: # commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn # - export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large.txt --tp-size=4 # - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large.txt --tp-size=4
- label: LM Eval Large Models (4 GPUs)(H100) - label: LM Eval Large Models (4 GPUs)(H100)
device: h100 device: h100
...@@ -73,3 +73,29 @@ steps: ...@@ -73,3 +73,29 @@ steps:
num_devices: 2 num_devices: 2
commands: commands:
- pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=evals/gsm8k/configs/moe-refactor-dp-ep/config-b200.txt - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=evals/gsm8k/configs/moe-refactor-dp-ep/config-b200.txt
- label: GPQA Eval (GPT-OSS) (H100)
timeout_in_minutes: 120
device: h100
optional: true
num_devices: 2
source_file_dependencies:
- csrc/
- vllm/model_executor/layers/quantization
- tests/evals/gpt_oss/
commands:
- uv pip install --system 'gpt-oss[eval]==0.0.5'
- pytest -s -v evals/gpt_oss/test_gpqa_correctness.py --config-list-file=configs/models-h100.txt
- label: GPQA Eval (GPT-OSS) (B200)
timeout_in_minutes: 120
device: b200
optional: true
num_devices: 2
source_file_dependencies:
- csrc/
- vllm/model_executor/layers/quantization
- tests/evals/gpt_oss/
commands:
- uv pip install --system 'gpt-oss[eval]==0.0.5'
- pytest -s -v evals/gpt_oss/test_gpqa_correctness.py --config-list-file=configs/models-b200.txt
This diff is collapsed.
This diff is collapsed.
...@@ -4,7 +4,6 @@ depends_on: ...@@ -4,7 +4,6 @@ depends_on:
steps: steps:
- label: Basic Models Tests (Initialization) - label: Basic Models Tests (Initialization)
timeout_in_minutes: 45 timeout_in_minutes: 45
mirror_hardwares: [amdexperimental]
torch_nightly: true torch_nightly: true
source_file_dependencies: source_file_dependencies:
- vllm/ - vllm/
...@@ -16,7 +15,6 @@ steps: ...@@ -16,7 +15,6 @@ steps:
- label: Basic Models Tests (Extra Initialization) %N - label: Basic Models Tests (Extra Initialization) %N
timeout_in_minutes: 45 timeout_in_minutes: 45
mirror_hardwares: [amdexperimental]
torch_nightly: true torch_nightly: true
source_file_dependencies: source_file_dependencies:
- vllm/model_executor/models/ - vllm/model_executor/models/
...@@ -38,6 +36,12 @@ steps: ...@@ -38,6 +36,12 @@ steps:
- tests/models/test_registry.py - tests/models/test_registry.py
commands: commands:
- pytest -v -s models/test_terratorch.py models/test_transformers.py models/test_registry.py - pytest -v -s models/test_terratorch.py models/test_transformers.py models/test_registry.py
mirror:
amd:
device: mi325_1
depends_on:
- image-build-amd
- label: Basic Models Test (Other CPU) # 5min - label: Basic Models Test (Other CPU) # 5min
depends_on: depends_on:
...@@ -61,7 +65,7 @@ steps: ...@@ -61,7 +65,7 @@ steps:
- pytest -v -s tests/models/test_transformers.py - pytest -v -s tests/models/test_transformers.py
- pytest -v -s tests/models/multimodal/processing/ - pytest -v -s tests/models/multimodal/processing/
- pytest -v -s tests/models/multimodal/test_mapping.py - pytest -v -s tests/models/multimodal/test_mapping.py
- python3 examples/offline_inference/basic/chat.py - python3 examples/basic/offline_inference/chat.py
- python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl - python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl
# Whisper needs spawn method to avoid deadlock # Whisper needs spawn method to avoid deadlock
- VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper
This diff is collapsed.
...@@ -15,10 +15,17 @@ steps: ...@@ -15,10 +15,17 @@ steps:
- pytest -v -s plugins_tests/test_platform_plugins.py - pytest -v -s plugins_tests/test_platform_plugins.py
- pip uninstall vllm_add_dummy_platform -y - pip uninstall vllm_add_dummy_platform -y
# end platform plugin tests # end platform plugin tests
# begin io_processor plugins test, all the code in between uses the prithvi_io_processor plugin # begin io_processor plugins test
# test generic io_processor plugins functions
- pytest -v -s ./plugins_tests/test_io_processor_plugins.py
# test Terratorch io_processor plugins
- pip install -e ./plugins/prithvi_io_processor_plugin - pip install -e ./plugins/prithvi_io_processor_plugin
- pytest -v -s plugins_tests/test_io_processor_plugins.py - pytest -v -s plugins_tests/test_terratorch_io_processor_plugins.py
- pip uninstall prithvi_io_processor_plugin -y - pip uninstall prithvi_io_processor_plugin -y
# test bge_m3_sparse io_processor plugin
- pip install -e ./plugins/bge_m3_sparse_plugin
- pytest -v -s plugins_tests/test_bge_m3_sparse_io_processor_plugins.py
- pip uninstall bge_m3_sparse_plugin -y
# end io_processor plugins test # end io_processor plugins test
# begin stat_logger plugins test # begin stat_logger plugins test
- pip install -e ./plugins/vllm_add_dummy_stat_logger - pip install -e ./plugins/vllm_add_dummy_stat_logger
......
This diff is collapsed.
...@@ -12,3 +12,10 @@ steps: ...@@ -12,3 +12,10 @@ steps:
commands: commands:
- pytest -v -s samplers - pytest -v -s samplers
- VLLM_USE_FLASHINFER_SAMPLER=1 pytest -v -s samplers - VLLM_USE_FLASHINFER_SAMPLER=1 pytest -v -s samplers
mirror:
amd:
device: mi325_1
depends_on:
- image-build-amd
commands:
- pytest -v -s samplers
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment