Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
9b5c9f94
Unverified
Commit
9b5c9f94
authored
May 02, 2024
by
Alexei-V-Ivanov-AMD
Committed by
GitHub
May 02, 2024
Browse files
[CI/Build] AMD CI pipeline with extended set of tests. (#4267)
Co-authored-by:
simon-mo
<
simon.mo@hey.com
>
parent
32881f3f
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
67 additions
and
45 deletions
+67
-45
.buildkite/run-amd-test.sh
.buildkite/run-amd-test.sh
+25
-33
.buildkite/run-benchmarks.sh
.buildkite/run-benchmarks.sh
+5
-0
.buildkite/test-pipeline.yaml
.buildkite/test-pipeline.yaml
+14
-1
.buildkite/test-template.j2
.buildkite/test-template.j2
+16
-5
Dockerfile.rocm
Dockerfile.rocm
+7
-6
No files found.
.buildkite/run-amd-test.sh
View file @
9b5c9f94
# This script build the ROCm docker image and run the API server inside the container.
# This script build the ROCm docker image and runs test inside it.
# It serves a sanity check for compilation and basic model usage.
set
-ex
set
-ex
# Print ROCm version
# Print ROCm version
echo
"--- ROCm info"
rocminfo
rocminfo
echo
"--- Resetting GPUs"
echo
"reset"
>
/opt/amdgpu/etc/gpu_state
echo
"reset"
>
/opt/amdgpu/etc/gpu_state
...
@@ -16,37 +17,28 @@ while true; do
...
@@ -16,37 +17,28 @@ while true; do
fi
fi
done
done
echo
"--- Building container"
sha
=
$(
git rev-parse
--short
HEAD
)
container_name
=
rocm_
${
sha
}
docker build
\
-t
${
container_name
}
\
-f
Dockerfile.rocm
\
--progress
plain
\
.
remove_docker_container
()
{
docker
rm
-f
${
container_name
}
||
docker image
rm
-f
${
container_name
}
||
true
}
trap
remove_docker_container EXIT
echo
"--- Running container"
# Try building the docker image
docker run
\
docker build
-t
rocm
-f
Dockerfile.rocm
.
--device
/dev/kfd
--device
/dev/dri
\
--network
host
\
# Setup cleanup
--rm
\
remove_docker_container
()
{
docker
rm
-f
rocm
||
true
;
}
-e
HF_TOKEN
\
trap
remove_docker_container EXIT
--name
${
container_name
}
\
remove_docker_container
${
container_name
}
\
/bin/bash
-c
$(
echo
$1
|
sed
"s/^'//"
|
sed
"s/'
$/
/"
)
# Run the image
export
HIP_VISIBLE_DEVICES
=
1
docker run
--device
/dev/kfd
--device
/dev/dri
--network
host
-e
HIP_VISIBLE_DEVICES
--name
rocm rocm python3
-m
vllm.entrypoints.api_server &
# Wait for the server to start
wait_for_server_to_start
()
{
timeout
=
300
counter
=
0
while
[
"
$(
curl
-s
-o
/dev/null
-w
''
%
{
http_code
}
''
localhost:8000/health
)
"
!=
"200"
]
;
do
sleep
1
counter
=
$((
counter
+
1
))
if
[
$counter
-ge
$timeout
]
;
then
echo
"Timeout after
$timeout
seconds"
break
fi
done
}
wait_for_server_to_start
# Test a simple prompt
curl
-X
POST
-H
"Content-Type: application/json"
\
localhost:8000/generate
\
-d
'{"prompt": "San Francisco is a"}'
.buildkite/run-benchmarks.sh
View file @
9b5c9f94
...
@@ -53,6 +53,11 @@ echo '```' >> benchmark_results.md
...
@@ -53,6 +53,11 @@ echo '```' >> benchmark_results.md
tail
-n
20 benchmark_serving.txt
>>
benchmark_results.md
# last 20 lines
tail
-n
20 benchmark_serving.txt
>>
benchmark_results.md
# last 20 lines
echo
'```'
>>
benchmark_results.md
echo
'```'
>>
benchmark_results.md
# if the agent binary is not found, skip uploading the results, exit 0
if
[
!
-f
/workspace/buildkite-agent
]
;
then
exit
0
fi
# upload the results to buildkite
# upload the results to buildkite
/workspace/buildkite-agent annotate
--style
"info"
--context
"benchmark-results"
< benchmark_results.md
/workspace/buildkite-agent annotate
--style
"info"
--context
"benchmark-results"
< benchmark_results.md
...
...
.buildkite/test-pipeline.yaml
View file @
9b5c9f94
...
@@ -20,6 +20,7 @@ steps:
...
@@ -20,6 +20,7 @@ steps:
-
VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py
-
VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py
-
label
:
Core Test
-
label
:
Core Test
mirror_hardwares
:
[
amd
]
command
:
pytest -v -s core
command
:
pytest -v -s core
-
label
:
Distributed Comm Ops Test
-
label
:
Distributed Comm Ops Test
...
@@ -29,7 +30,10 @@ steps:
...
@@ -29,7 +30,10 @@ steps:
-
label
:
Distributed Tests
-
label
:
Distributed Tests
working_dir
:
"
/vllm-workspace/tests/distributed"
working_dir
:
"
/vllm-workspace/tests/distributed"
num_gpus
:
2
num_gpus
:
2
# only support 1 or 2 for now.
mirror_hardwares
:
[
amd
]
commands
:
commands
:
-
pytest -v -s test_pynccl_library.py
-
pytest -v -s test_pynccl_library.py
-
TEST_DIST_MODEL=facebook/opt-125m pytest -v -s test_basic_distributed_correctness.py
-
TEST_DIST_MODEL=facebook/opt-125m pytest -v -s test_basic_distributed_correctness.py
...
@@ -44,6 +48,7 @@ steps:
...
@@ -44,6 +48,7 @@ steps:
-
pytest -v -s test_pynccl.py
-
pytest -v -s test_pynccl.py
-
label
:
Engine Test
-
label
:
Engine Test
mirror_hardwares
:
[
amd
]
command
:
pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py
command
:
pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py
-
label
:
Entrypoints Test
-
label
:
Entrypoints Test
...
@@ -54,6 +59,7 @@ steps:
...
@@ -54,6 +59,7 @@ steps:
-
label
:
Examples Test
-
label
:
Examples Test
working_dir
:
"
/vllm-workspace/examples"
working_dir
:
"
/vllm-workspace/examples"
mirror_hardwares
:
[
amd
]
commands
:
commands
:
# install aws cli for llava_example.py
# install aws cli for llava_example.py
-
pip install awscli
-
pip install awscli
...
@@ -67,16 +73,19 @@ steps:
...
@@ -67,16 +73,19 @@ steps:
parallelism
:
4
parallelism
:
4
-
label
:
Models Test
-
label
:
Models Test
mirror_hardwares
:
[
amd
]
commands
:
commands
:
-
bash ../.buildkite/download-images.sh
-
bash ../.buildkite/download-images.sh
-
pytest -v -s models --ignore=models/test_llava.py --ignore=models/test_mistral.py
-
pytest -v -s models --ignore=models/test_llava.py --ignore=models/test_mistral.py
-
label
:
Llava Test
-
label
:
Llava Test
mirror_hardwares
:
[
amd
]
commands
:
commands
:
-
bash ../.buildkite/download-images.sh
-
bash ../.buildkite/download-images.sh
-
pytest -v -s models/test_llava.py
-
pytest -v -s models/test_llava.py
-
label
:
Prefix Caching Test
-
label
:
Prefix Caching Test
mirror_hardwares
:
[
amd
]
commands
:
commands
:
-
pytest -v -s prefix_caching
-
pytest -v -s prefix_caching
...
@@ -84,12 +93,15 @@ steps:
...
@@ -84,12 +93,15 @@ steps:
command
:
pytest -v -s samplers
command
:
pytest -v -s samplers
-
label
:
LogitsProcessor Test
-
label
:
LogitsProcessor Test
mirror_hardwares
:
[
amd
]
command
:
pytest -v -s test_logits_processor.py
command
:
pytest -v -s test_logits_processor.py
-
label
:
Worker Test
-
label
:
Worker Test
mirror_hardwares
:
[
amd
]
command
:
pytest -v -s worker
command
:
pytest -v -s worker
-
label
:
Speculative decoding tests
-
label
:
Speculative decoding tests
mirror_hardwares
:
[
amd
]
command
:
pytest -v -s spec_decode
command
:
pytest -v -s spec_decode
-
label
:
LoRA Test %N
-
label
:
LoRA Test %N
...
@@ -107,6 +119,7 @@ steps:
...
@@ -107,6 +119,7 @@ steps:
-
label
:
Benchmarks
-
label
:
Benchmarks
working_dir
:
"
/vllm-workspace/.buildkite"
working_dir
:
"
/vllm-workspace/.buildkite"
mirror_hardwares
:
[
amd
]
commands
:
commands
:
-
pip install aiohttp
-
pip install aiohttp
-
bash run-benchmarks.sh
-
bash run-benchmarks.sh
...
...
.buildkite/test-template.j2
View file @
9b5c9f94
...
@@ -16,18 +16,29 @@ steps:
...
@@ -16,18 +16,29 @@ steps:
limit: 5
limit: 5
- wait
- wait
- label: "AMD Test"
- group: "AMD Tests"
agents:
depends_on: ~
queue: amd
steps:
command: bash .buildkite/run-amd-test.sh
{% for step in steps %}
{% if step.mirror_hardwares and "amd" in step.mirror_hardwares %}
- label: "AMD: {{ step.label }}"
agents:
queue: amd
command: bash .buildkite/run-amd-test.sh "'cd {{ (step.working_dir or default_working_dir) | safe }} && {{ step.command or (step.commands | join(' && ')) | safe }}'"
env:
DOCKER_BUILDKIT: "1"
{% endif %}
{% endfor %}
- label: "Neuron Test"
- label: "Neuron Test"
depends_on: ~
agents:
agents:
queue: neuron
queue: neuron
command: bash .buildkite/run-neuron-test.sh
command: bash .buildkite/run-neuron-test.sh
soft_fail: true
soft_fail: true
- label: "CPU Test"
- label: "Intel Test"
depends_on: ~
command: bash .buildkite/run-cpu-test.sh
command: bash .buildkite/run-cpu-test.sh
{% for step in steps %}
{% for step in steps %}
...
...
Dockerfile.rocm
View file @
9b5c9f94
...
@@ -46,7 +46,7 @@ RUN apt-get update && apt-get install -y \
...
@@ -46,7 +46,7 @@ RUN apt-get update && apt-get install -y \
### Mount Point ###
### Mount Point ###
# When launching the container, mount the code directory to /app
# When launching the container, mount the code directory to /app
ARG APP_MOUNT=/
app
ARG APP_MOUNT=/
vllm-workspace
VOLUME [ ${APP_MOUNT} ]
VOLUME [ ${APP_MOUNT} ]
WORKDIR ${APP_MOUNT}
WORKDIR ${APP_MOUNT}
...
@@ -89,15 +89,16 @@ RUN if [ "$BUILD_TRITON" = "1" ]; then \
...
@@ -89,15 +89,16 @@ RUN if [ "$BUILD_TRITON" = "1" ]; then \
&& cd ../..; \
&& cd ../..; \
fi
fi
COPY ./ /app/vllm
WORKDIR /vllm-workspace
COPY . .
RUN python3 -m pip install --upgrade pip numba
RUN python3 -m pip install --upgrade pip numba
RUN cd /app \
RUN --mount=type=cache,target=/root/.cache/pip \
&& cd vllm \
pip install -U -r requirements-rocm.txt \
&& pip install -U -r requirements-rocm.txt \
&& patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h ./rocm_patch/rocm_bf16.patch \
&& patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h /app/vllm/rocm_patch/rocm_bf16.patch \
&& python3 setup.py install \
&& python3 setup.py install \
&& cp build/lib.linux-x86_64-cpython-39/vllm/_C.cpython-39-x86_64-linux-gnu.so vllm/ \
&& cd ..
&& cd ..
RUN python3 -m pip install --upgrade pip
RUN python3 -m pip install --upgrade pip
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment