[ci/build] fix gh200 test (#12681)

Signed-off-by: youkaichao <youkaichao@gmail.com>

[ci/build] fix gh200 test (#12681)
Signed-off-by: youkaichao <youkaichao@gmail.com>
1298a400 · youkaichao · GitHub · ad4a9dc8 · 1298a400 · 1298a400
Unverified Commit 1298a400 authored Feb 03, 2025 by youkaichao Committed by GitHub Feb 03, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 5 deletions

.buildkite/check-wheel-size.py .buildkite/check-wheel-size.py +2 -2

.buildkite/run-gh200-test.sh .buildkite/run-gh200-test.sh +2 -2

Dockerfile Dockerfile +1 -1

No files found.
--- a/.buildkite/check-wheel-size.py
+++ b/.buildkite/check-wheel-size.py
@@ -4,11 +4,11 @@ import os
 import sys
 import zipfile
-# Read the VLLM_MAX_SIZE_MB environment variable, defaulting to 300 MiB
+# Read the VLLM_MAX_SIZE_MB environment variable, defaulting to 400 MiB
 # Note that we have 400 MiB quota, please use it wisely.
 # See https://github.com/pypi/support/issues/3792 .
 # Please also sync the value with the one in Dockerfile.
-VLLM_MAX_SIZE_MB = int(os.environ.get('VLLM_MAX_SIZE_MB', 300))
+VLLM_MAX_SIZE_MB = int(os.environ.get('VLLM_MAX_SIZE_MB', 400))
 def print_top_10_largest_files(zip_file):

--- a/.buildkite/run-gh200-test.sh
+++ b/.buildkite/run-gh200-test.sh
@@ -23,6 +23,6 @@ trap remove_docker_container EXIT
 remove_docker_container
 # Run the image and test offline inference
-docker run --name gh200-test --gpus=all --entrypoint="" gh200-test bash -c '
+docker run -e HF_TOKEN -v /root/.cache/huggingface:/root/.cache/huggingface --name gh200-test --gpus=all --entrypoint="" gh200-test bash -c '
-    python3 examples/offline_inference/basic.py
+    python3 examples/offline_inference/cli.py --model meta-llama/Llama-3.2-1B
 '
--- a/Dockerfile
+++ b/Dockerfile
@@ -127,7 +127,7 @@ RUN --mount=type=cache,target=/root/.cache/ccache \
 # Check the size of the wheel if RUN_WHEEL_CHECK is true
 COPY .buildkite/check-wheel-size.py check-wheel-size.py
 # sync the default value with .buildkite/check-wheel-size.py
-ARG VLLM_MAX_SIZE_MB=300
+ARG VLLM_MAX_SIZE_MB=400
 ENV VLLM_MAX_SIZE_MB=$VLLM_MAX_SIZE_MB
 ARG RUN_WHEEL_CHECK=true
 RUN if [ "$RUN_WHEEL_CHECK" = "true" ]; then \