[ci/build] update torch nightly version for GH200 (#15135)

Signed-off-by: youkaichao <youkaichao@gmail.com>

[ci/build] update torch nightly version for GH200 (#15135)
Signed-off-by: youkaichao <youkaichao@gmail.com>
09b6a955 · youkaichao · GitHub · 50c9636d · 09b6a955 · 09b6a955
Unverified Commit 09b6a955 authored Mar 23, 2025 by youkaichao Committed by GitHub Mar 23, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 3 deletions

.buildkite/run-gh200-test.sh .buildkite/run-gh200-test.sh +2 -1

Dockerfile Dockerfile +4 -2

No files found.
--- a/.buildkite/run-gh200-test.sh
+++ b/.buildkite/run-gh200-test.sh
@@ -14,6 +14,7 @@ DOCKER_BUILDKIT=1 docker build . \
  -t gh200-test \
  --build-arg max_jobs=66 \
  --build-arg nvcc_threads=2 \
+  --build-arg RUN_WHEEL_CHECK=false \
  --build-arg torch_cuda_arch_list="9.0+PTX" \
  --build-arg vllm_fa_cmake_gpu_arches="90-real"

@@ -23,6 +24,6 @@ trap remove_docker_container EXIT
 remove_docker_container

 # Run the image and test offline inference
-docker run -e HF_TOKEN -v /root/.cache/huggingface:/root/.cache/huggingface --name gh200-test --gpus=all --entrypoint="" gh200-test bash -c '
+docker run -e HF_TOKEN -e VLLM_WORKER_MULTIPROC_METHOD=spawn -v /root/.cache/huggingface:/root/.cache/huggingface --name gh200-test --gpus=all --entrypoint="" gh200-test bash -c '
    python3 examples/offline_inference/basic/generate.py --model meta-llama/Llama-3.2-1B
 '
--- a/Dockerfile
+++ b/Dockerfile
@@ -52,7 +52,8 @@ WORKDIR /workspace
 # after this step
 RUN --mount=type=cache,target=/root/.cache/uv \
    if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
-        uv pip install --index-url https://download.pytorch.org/whl/nightly/cu126 "torch==2.7.0.dev20250121+cu126" "torchvision==0.22.0.dev20250121";  \
+        uv pip install --index-url https://download.pytorch.org/whl/nightly/cu128 "torch==2.8.0.dev20250318+cu128" "torchvision==0.22.0.dev20250319";  \
+        uv pip install --index-url https://download.pytorch.org/whl/nightly/cu128 --pre pytorch_triton==3.3.0+gitab727c40; \
    fi

 COPY requirements/common.txt requirements/common.txt
@@ -200,7 +201,8 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
 # after this step
 RUN --mount=type=cache,target=/root/.cache/uv \
    if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
-        uv pip install --index-url https://download.pytorch.org/whl/nightly/cu124 "torch==2.6.0.dev20241210+cu124" "torchvision==0.22.0.dev20241215";  \
+        uv pip install --index-url https://download.pytorch.org/whl/nightly/cu128 "torch==2.8.0.dev20250318+cu128" "torchvision==0.22.0.dev20250319";  \
+        uv pip install --index-url https://download.pytorch.org/whl/nightly/cu128 --pre pytorch_triton==3.3.0+gitab727c40; \
    fi

 # Install vllm wheel first, so that torch etc will be installed.