Unverified Commit fda0cb2a authored by kyleliang-nv's avatar kyleliang-nv Committed by GitHub
Browse files

Fix Dockerfile not installing correct version of DeepEP for arm build (#11773)

parent ebda73dc
......@@ -15,11 +15,13 @@ jobs:
- runner: x64-docker-build-node
platform: linux/amd64
build_type: all
grace_blackwell: 0
tag: dev-x86
version: 12.9.1
- runner: arm-docker-build-node
platform: linux/arm64
build_type: all
grace_blackwell: 1
tag: dev-arm64
version: 12.9.1
steps:
......@@ -51,7 +53,7 @@ jobs:
- name: Build and Push Dev Image
run: |
docker buildx build --platform ${{ matrix.platform }} --push -f docker/Dockerfile --build-arg CUDA_VERSION=${{ matrix.version }} --build-arg BUILD_TYPE=${{ matrix.build_type }} --build-arg CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) -t lmsysorg/sglang:${{ matrix.tag }} --no-cache .
docker buildx build --platform ${{ matrix.platform }} --push -f docker/Dockerfile --build-arg CUDA_VERSION=${{ matrix.version }} --build-arg BUILD_TYPE=${{ matrix.build_type }} --build-arg GRACE_BLACKWELL=${{ matrix.grace_blackwell }} --build-arg CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) -t lmsysorg/sglang:${{ matrix.tag }} --no-cache .
create-manifests:
runs-on: ubuntu-22.04
......
......@@ -16,6 +16,7 @@ jobs:
variant:
- cuda_version: "12.9.1"
build_type: "all"
grace_blackwell: 0
runs-on: x64-docker-build-node
steps:
- name: Delete huge unnecessary tools folder
......@@ -55,6 +56,7 @@ jobs:
-f docker/Dockerfile \
--build-arg CUDA_VERSION=${{ matrix.variant.cuda_version }} \
--build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \
--build-arg GRACE_BLACKWELL=${{ matrix.variant.grace_blackwell }} \
-t lmsysorg/sglang:${tag} \
--no-cache \
.
......@@ -67,6 +69,7 @@ jobs:
variant:
- cuda_version: "12.9.1"
build_type: "all"
grace_blackwell: 1
runs-on: arm-docker-build-node
steps:
- name: Delete huge unnecessary tools folder
......@@ -95,6 +98,7 @@ jobs:
-f docker/Dockerfile \
--build-arg CUDA_VERSION=${{ matrix.variant.cuda_version }} \
--build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \
--build-arg GRACE_BLACKWELL=${{ matrix.variant.grace_blackwell }} \
-t lmsysorg/sglang:${tag} \
--no-cache \
.
......
......@@ -2,6 +2,7 @@ ARG CUDA_VERSION=12.9.1
FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04 AS base
ARG TARGETARCH
ARG GRACE_BLACKWELL=0
ARG BUILD_TYPE=all
ARG BRANCH_TYPE=remote
ARG DEEPEP_COMMIT=9af0e0d0e74f3577af1979c9b9e1ac2cad0104ee
......@@ -99,7 +100,7 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5li
# Download NVSHMEM source files
# We use Tom's DeepEP fork for GB200 for now
RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz && \
if [ "$BUILD_TYPE" = "blackwell_aarch64" ]; then \
if [ "$GRACE_BLACKWELL" = "1" ]; then \
git clone https://github.com/fzyzcjy/DeepEP.git \
&& cd DeepEP && git checkout 1b14ad661c7640137fcfe93cccb2694ede1220b0 && sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh && cd .. ; \
else \
......@@ -112,7 +113,7 @@ RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/sour
# Build and install NVSHMEM
RUN cd /sgl-workspace/nvshmem && \
if [ "$BUILD_TYPE" = "blackwell" ] || [ "$BUILD_TYPE" = "blackwell_aarch" ]; then CUDA_ARCH="90;100;120"; else CUDA_ARCH="90"; fi && \
if [ "$GRACE_BLACKWELL" = "1" ]; then CUDA_ARCH="90;100;120"; else CUDA_ARCH="90"; fi && \
NVSHMEM_SHMEM_SUPPORT=0 \
NVSHMEM_UCX_SUPPORT=0 \
NVSHMEM_USE_NCCL=0 \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment