Unverified Commit 148022fc authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

gb200: update dockerfile to latest kernel (#9522)

parent 7a40e4f4
......@@ -4,6 +4,7 @@ FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04
ARG BUILD_TYPE=blackwell
ARG DEEPEP_COMMIT=1b14ad661c7640137fcfe93cccb2694ede1220b0
ARG CMAKE_BUILD_PARALLEL_LEVEL=2
ARG SGL_KERNEL_VERSION=0.3.8
ENV DEBIAN_FRONTEND=noninteractive \
CUDA_HOME=/usr/local/cuda \
GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ \
......@@ -61,11 +62,12 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5li
12.9.1) CUINDEX=129 ;; \
*) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \
esac \
&& python3 -m pip install --no-cache-dir -e "python[${BUILD_TYPE}]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} \
&& if [ "$CUDA_VERSION" = "12.9.1" ]; then \
python3 -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.6 --force-reinstall --no-deps ; \
python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v0.3.8/sgl_kernel-0.3.8+cu129-cp310-abi3-manylinux2014_$(uname -m).whl --force-reinstall --no-deps ; \
fi
python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION}/sgl_kernel-${SGL_KERNEL_VERSION}+cu129-cp310-abi3-manylinux2014_$(uname -m).whl --force-reinstall --no-deps ; \
fi \
&& python3 -m pip install --no-cache-dir -e "python[${BUILD_TYPE}]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} \
&& python3 -m flashinfer --download-cubin
# Download source files
RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz && \
......@@ -85,7 +87,7 @@ RUN cd /sgl-workspace/nvshmem && \
NVSHMEM_PMIX_SUPPORT=0 \
NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
NVSHMEM_USE_GDRCOPY=1 \
cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES="100;120" && \
cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES="90;100;120" && \
cmake --build build --target install -j${CMAKE_BUILD_PARALLEL_LEVEL}
# Install DeepEP
......@@ -105,11 +107,6 @@ RUN python3 -m pip install --no-cache-dir \
wheel \
scikit-build-core
# These will be automatically installed by future versions of flashinfer after 0.2.9rc2
RUN python3 -m pip install --no-cache-dir \
nvidia-cudnn-cu12 \
nvidia-cudnn-frontend
# Install nixl kv transfer backend
RUN python3 -m pip install --no-cache-dir \
nixl
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment