Add basic correctness 2 GPU tests to 4 GPU pipeline (#5518)

f31c1f90 · Antoni Baum · GitHub · 3ce2c050 · f31c1f90
Unverified Commit f31c1f90 authored Jun 16, 2024 by Antoni Baum Committed by GitHub Jun 16, 2024
Show whitespace changes
Inline Side-by-side

Showing with 6 additions and 2 deletions

.buildkite/test-pipeline.yaml .buildkite/test-pipeline.yaml +6 -2

No files found.
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -32,7 +32,7 @@ steps:
  working_dir: "/vllm-workspace/tests"
  num_gpus: 2

- label: Distributed Tests
+- label: Distributed Tests (2 GPUs)
  mirror_hardwares: [amd]
  working_dir: "/vllm-workspace/tests"
  num_gpus: 2
@@ -50,12 +50,16 @@ steps:
  - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py
  - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s distributed/test_utils.py

- label: Distributed Tests (Multiple Groups)
+- label: Distributed Tests (4 GPUs)
  #mirror_hardwares: [amd]
  working_dir: "/vllm-workspace/tests"
  num_gpus: 4
  commands:
  - pytest -v -s distributed/test_pynccl.py
+  # We want to test that models which use 2 GPUs work with 4 GPUs, which is why we duplicate them here.
+  # See https://github.com/vllm-project/vllm/pull/5473#issuecomment-2166601837 for context.
+  - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py
+  - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_basic_distributed_correctness.py

 - label: Engine Test
  mirror_hardwares: [amd]