[CI/Build] AMD CI pipeline with extended set of tests. (#4267)

Co-authored-by: simon-mo <simon.mo@hey.com>

[CI/Build] AMD CI pipeline with extended set of tests. (#4267)
Co-authored-by: simon-mo <simon.mo@hey.com>
9b5c9f94 · Alexei-V-Ivanov-AMD · GitHub · 32881f3f · 9b5c9f94 · 9b5c9f94
Unverified Commit 9b5c9f94 authored May 02, 2024 by Alexei-V-Ivanov-AMD Committed by GitHub May 02, 2024
5 changed files
--- a/.buildkite/run-amd-test.sh
+++ b/.buildkite/run-amd-test.sh
-# This script build the ROCm docker image and run the API server inside the container.
+# This script build the ROCm docker image and runs test inside it.
-# It serves a sanity check for compilation and basic model usage.
 set -ex
 # Print ROCm version
+echo "--- ROCm info"
 rocminfo
+echo "--- Resetting GPUs"
 echo "reset" > /opt/amdgpu/etc/gpu_state
@@ -16,37 +17,28 @@ while true; do
        fi
 done
+echo "--- Building container"
+sha=$(git rev-parse --short HEAD)
+container_name=rocm_${sha}
+docker build \
+        -t ${container_name} \
+        -f Dockerfile.rocm \
+        --progress plain \
+        .
+remove_docker_container() {
+   docker rm -f ${container_name} || docker image rm -f ${container_name} || true
+}
+trap remove_docker_container EXIT
+echo "--- Running container"
-# Try building the docker image
+docker run \
-docker build -t rocm -f Dockerfile.rocm .
+        --device /dev/kfd --device /dev/dri \
+        --network host \
-# Setup cleanup
+        --rm \
-remove_docker_container() { docker rm -f rocm || true; }
+        -e HF_TOKEN \
-trap remove_docker_container EXIT
+        --name ${container_name} \
-remove_docker_container
+        ${container_name} \
+        /bin/bash -c $(echo $1 | sed "s/^'//" | sed "s/'$//")
-# Run the image
-export HIP_VISIBLE_DEVICES=1
-docker run --device /dev/kfd --device /dev/dri --network host -e HIP_VISIBLE_DEVICES --name rocm rocm python3 -m vllm.entrypoints.api_server &
-# Wait for the server to start
-wait_for_server_to_start() {
-    timeout=300
-    counter=0
-    while [ "$(curl -s -o /dev/null -w ''%{http_code}'' localhost:8000/health)" != "200" ]; do
-        sleep 1
-        counter=$((counter + 1))
-        if [ $counter -ge $timeout ]; then
-            echo "Timeout after $timeout seconds"
-            break
-        fi
-    done
-}
-wait_for_server_to_start
-# Test a simple prompt
-curl -X POST -H "Content-Type: application/json" \
-    localhost:8000/generate \
-    -d '{"prompt": "San Francisco is a"}'
--- a/.buildkite/run-benchmarks.sh
+++ b/.buildkite/run-benchmarks.sh
@@ -53,6 +53,11 @@ echo '```' >> benchmark_results.md
 tail -n 20 benchmark_serving.txt >> benchmark_results.md # last 20 lines
 echo '```' >> benchmark_results.md
+# if the agent binary is not found, skip uploading the results, exit 0
+if [ ! -f /workspace/buildkite-agent ]; then
+    exit 0
+fi
 # upload the results to buildkite
 /workspace/buildkite-agent annotate --style "info" --context "benchmark-results" < benchmark_results.md

--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -20,6 +20,7 @@ steps:
  - VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py
 - label: Core Test
+  mirror_hardwares: [amd]
  command: pytest -v -s core
 - label: Distributed Comm Ops Test
@@ -29,7 +30,10 @@ steps:
 - label: Distributed Tests
  working_dir: "/vllm-workspace/tests/distributed"
-  num_gpus: 2
+  num_gpus: 2 # only support 1 or 2 for now.
+  mirror_hardwares: [amd]
  commands:
  - pytest -v -s test_pynccl_library.py
  - TEST_DIST_MODEL=facebook/opt-125m pytest -v -s test_basic_distributed_correctness.py
@@ -44,6 +48,7 @@ steps:
  - pytest -v -s test_pynccl.py
 - label: Engine Test
+  mirror_hardwares: [amd]
  command: pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py
 - label: Entrypoints Test
@@ -54,6 +59,7 @@ steps:
 - label: Examples Test
  working_dir: "/vllm-workspace/examples"
+  mirror_hardwares: [amd]
  commands:
    # install aws cli for llava_example.py
    - pip install awscli
@@ -67,16 +73,19 @@ steps:
  parallelism: 4
 - label: Models Test
+  mirror_hardwares: [amd]
  commands:
    - bash ../.buildkite/download-images.sh
    - pytest -v -s models --ignore=models/test_llava.py --ignore=models/test_mistral.py
 - label: Llava Test
+  mirror_hardwares: [amd]
  commands:
    - bash ../.buildkite/download-images.sh
    - pytest -v -s models/test_llava.py
 - label: Prefix Caching Test
+  mirror_hardwares: [amd]
  commands:
    - pytest -v -s prefix_caching
@@ -84,12 +93,15 @@ steps:
  command: pytest -v -s samplers
 - label: LogitsProcessor Test
+  mirror_hardwares: [amd]
  command: pytest -v -s test_logits_processor.py
 - label: Worker Test
+  mirror_hardwares: [amd]
  command: pytest -v -s worker
 - label: Speculative decoding tests
+  mirror_hardwares: [amd]
  command: pytest -v -s spec_decode
 - label: LoRA Test %N
@@ -107,6 +119,7 @@ steps:
 - label: Benchmarks
  working_dir: "/vllm-workspace/.buildkite"
+  mirror_hardwares: [amd]
  commands:
  - pip install aiohttp
  - bash run-benchmarks.sh

--- a/.buildkite/test-template.j2
+++ b/.buildkite/test-template.j2
@@ -16,18 +16,29 @@ steps:
          limit: 5
  - wait
-  - label: "AMD Test"
+  - group: "AMD Tests"
-    agents:
+    depends_on: ~
-      queue: amd
+    steps:
-    command: bash .buildkite/run-amd-test.sh
+    {% for step in steps %}
+    {% if step.mirror_hardwares and "amd" in step.mirror_hardwares %}
+      - label: "AMD: {{ step.label }}"
+        agents:
+          queue: amd
+        command: bash .buildkite/run-amd-test.sh "'cd {{ (step.working_dir or default_working_dir) | safe  }} && {{ step.command  or (step.commands | join(' && ')) | safe }}'"
+        env:
+          DOCKER_BUILDKIT: "1"
+    {% endif %}
+    {% endfor %}
  - label: "Neuron Test"
+    depends_on: ~
    agents:
      queue: neuron
    command: bash .buildkite/run-neuron-test.sh
    soft_fail: true
-  - label: "CPU Test"
+  - label: "Intel Test"
+    depends_on: ~
    command: bash .buildkite/run-cpu-test.sh
  {% for step in steps %}

--- a/Dockerfile.rocm
+++ b/Dockerfile.rocm
@@ -46,7 +46,7 @@ RUN apt-get update && apt-get install -y \
 ### Mount Point ###
 # When launching the container, mount the code directory to /app
-ARG APP_MOUNT=/app
+ARG APP_MOUNT=/vllm-workspace
 VOLUME [ ${APP_MOUNT} ]
 WORKDIR ${APP_MOUNT}
@@ -89,15 +89,16 @@ RUN if [ "$BUILD_TRITON" = "1" ]; then \
    && cd ../..; \
    fi
-COPY ./ /app/vllm
+WORKDIR /vllm-workspace
+COPY . .
 RUN python3 -m pip install --upgrade pip numba
-RUN cd /app \
+RUN --mount=type=cache,target=/root/.cache/pip \
-    && cd vllm \
+    pip install -U -r requirements-rocm.txt \
-    && pip install -U -r requirements-rocm.txt \
+    && patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h ./rocm_patch/rocm_bf16.patch \
-    && patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h /app/vllm/rocm_patch/rocm_bf16.patch \
    && python3 setup.py install \
+    && cp build/lib.linux-x86_64-cpython-39/vllm/_C.cpython-39-x86_64-linux-gnu.so vllm/ \
    && cd ..
 RUN python3 -m pip install --upgrade pip