"examples/vscode:/vscode.git/clone" did not exist on "1995ef9aeec006313fad42cf12c3860081864cb6"
Unverified Commit 242a4d5b authored by Dillon Cullinan's avatar Dillon Cullinan Committed by GitHub
Browse files

ci: Fixing sccache behavior (#4866)


Signed-off-by: default avatarDillon Cullinan <dcullinan@nvidia.com>
Signed-off-by: default avatarAnant Sharma <anants@nvidia.com>
Co-authored-by: default avatarAnant Sharma <anants@nvidia.com>
parent 9b7152d7
......@@ -143,7 +143,9 @@ runs:
--vllm-max-jobs 10 \
--framework ${{ inputs.framework }} \
--platform ${{ inputs.platform }} \
$EXTRA_ARGS 2>&1 | tee "${BUILD_LOG_FILE}"
--use-sccache \
--sccache-bucket "$SCCACHE_S3_BUCKET" \
--sccache-region "$AWS_DEFAULT_REGION" $EXTRA_ARGS 2>&1 | tee "${BUILD_LOG_FILE}"
BUILD_EXIT_CODE=${PIPESTATUS[0]}
......
......@@ -249,8 +249,11 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
export CC=$(which gcc) && \
export CXX=$(which g++) && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \
fi && \
source ${VIRTUAL_ENV}/bin/activate && \
git clone --depth 1 --branch ${NIXL_REF} "https://github.com/ai-dynamo/nixl.git" && \
cd nixl && \
......@@ -276,8 +279,11 @@ RUN echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
export CC=$(which gcc) && \
export CXX=$(which g++) && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \
fi && \
cd /workspace/nixl && \
uv build . --out-dir /opt/dynamo/dist/nixl --python $PYTHON_VERSION
......@@ -292,8 +298,11 @@ ARG ENABLE_KVBM
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
export CC=$(which gcc) && \
export CXX=$(which g++) && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
export RUSTC_WRAPPER="sccache"; \
fi && \
source ${VIRTUAL_ENV}/bin/activate && \
cd /opt/dynamo && \
uv build --wheel --out-dir /opt/dynamo/dist && \
......
......@@ -253,8 +253,11 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
export CC=$(which gcc) && \
export CXX=$(which g++) && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \
fi && \
source ${VIRTUAL_ENV}/bin/activate && \
git clone --depth 1 --branch ${NIXL_REF} "https://github.com/ai-dynamo/nixl.git" && \
cd nixl && \
......@@ -280,8 +283,11 @@ RUN echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
export CC=$(which gcc) && \
export CXX=$(which g++) && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \
fi && \
cd /workspace/nixl && \
uv build . --out-dir /opt/dynamo/dist/nixl --python $PYTHON_VERSION
......@@ -301,8 +307,6 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
export RUSTC_WRAPPER="sccache"; \
fi && \
export CC=$(which gcc) && \
export CXX=$(which g++) && \
source ${VIRTUAL_ENV}/bin/activate && \
cd /opt/dynamo && \
uv build --wheel --out-dir /opt/dynamo/dist && \
......@@ -358,9 +362,6 @@ ARG GRACE_BLACKWELL=false
ARG ARCH
ARG ARCH_ALT
ARG PYTHON_VERSION
ARG USE_SCCACHE
ARG SCCACHE_BUCKET
ARG SCCACHE_REGION
ARG CARGO_BUILD_JOBS
ARG CUDA_VERSION
......@@ -463,17 +464,6 @@ RUN apt-get update \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean
# Install sccache if requested
COPY container/use-sccache.sh /tmp/use-sccache.sh
RUN if [ "$USE_SCCACHE" = "true" ]; then \
/tmp/use-sccache.sh install; \
fi
# Set environment variables - they'll be empty strings if USE_SCCACHE=false
ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \
SCCACHE_S3_KEY_PREFIX=${USE_SCCACHE:+${ARCH}}
WORKDIR /sgl-workspace
# GDRCopy installation
......@@ -538,13 +528,7 @@ RUN --mount=type=cache,target=/var/cache/curl,uid=1000,gid=0 \
&& sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh
# Build and install NVSHMEM library only (without python library)
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
export CC=$(which gcc) && \
export CXX=$(which g++) && \
export CUDA_CXX=$(which nvcc) && \
cd /sgl-workspace/nvshmem && \
RUN cd /sgl-workspace/nvshmem && \
if [ "$GRACE_BLACKWELL" = true ]; then CUDA_ARCH="90;100;120"; else CUDA_ARCH="90"; fi && \
NVSHMEM_SHMEM_SUPPORT=0 \
NVSHMEM_UCX_SUPPORT=0 \
......@@ -555,18 +539,11 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
NVSHMEM_USE_GDRCOPY=1 \
cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH} -DNVSHMEM_BUILD_PYTHON_LIB=OFF && \
cmake --build build --target install -j${CMAKE_BUILD_PARALLEL_LEVEL} && \
/tmp/use-sccache.sh show-stats "NVSHMEM"
cmake --build build --target install -j${CMAKE_BUILD_PARALLEL_LEVEL}
# Build nvshmem4py wheels separately (Python 3.10, CUDA 12) to avoid building the python library twice for multiple python versions
# Need to reconfigure with PYTHON_LIB=ON to add the nvshmem4py subdirectory
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
export CC=$(which gcc) && \
export CXX=$(which g++) && \
export CUDA_CXX=$(which nvcc) && \
cd /sgl-workspace/nvshmem && \
RUN cd /sgl-workspace/nvshmem && \
if [ "$GRACE_BLACKWELL" = true ]; then CUDA_ARCH="90;100;120"; else CUDA_ARCH="90"; fi && \
NVSHMEM_SHMEM_SUPPORT=0 \
NVSHMEM_UCX_SUPPORT=0 \
......@@ -577,14 +554,10 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
NVSHMEM_USE_GDRCOPY=1 \
cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH} -DNVSHMEM_BUILD_PYTHON_LIB=ON && \
cmake --build build --target build_nvshmem4py_wheel_cu12_${PYTHON_VERSION} -j${CMAKE_BUILD_PARALLEL_LEVEL} && \
/tmp/use-sccache.sh show-stats "NVSHMEM4PY"
cmake --build build --target build_nvshmem4py_wheel_cu12_${PYTHON_VERSION} -j${CMAKE_BUILD_PARALLEL_LEVEL}
# Install DeepEP
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
cd /sgl-workspace/DeepEP && \
RUN cd /sgl-workspace/DeepEP && \
NVSHMEM_DIR=${NVSHMEM_DIR} TORCH_CUDA_ARCH_LIST="9.0;10.0" pip install --no-build-isolation .
# Copy rust installation from dynamo_base to avoid duplication efforts
......
......@@ -232,8 +232,7 @@ RUN if [ "$USE_SCCACHE" = "true" ]; then \
# Set SCCACHE environment variables
ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \
RUSTC_WRAPPER=${USE_SCCACHE:+sccache}
SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}}
# Build and install UCX
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
......@@ -274,8 +273,11 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
export CC=$(which gcc) && \
export CXX=$(which g++) && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \
fi && \
source ${VIRTUAL_ENV}/bin/activate && \
git clone --depth 1 --branch ${NIXL_REF} "https://github.com/ai-dynamo/nixl.git" && \
cd nixl && \
......@@ -300,9 +302,12 @@ RUN echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
export CC=$(which gcc) && \
export CXX=$(which g++) && \
export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \
fi && \
cd /workspace/nixl && \
uv build . --out-dir /opt/dynamo/dist/nixl --python $PYTHON_VERSION
......@@ -323,8 +328,6 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
export RUSTC_WRAPPER="sccache"; \
fi && \
export CC=$(which gcc) && \
export CXX=$(which g++) && \
source ${VIRTUAL_ENV}/bin/activate && \
cd /opt/dynamo && \
uv build --wheel --out-dir /opt/dynamo/dist && \
......
......@@ -276,8 +276,11 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
export CC=$(which gcc) && \
export CXX=$(which g++) && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \
fi && \
source ${VIRTUAL_ENV}/bin/activate && \
git clone --depth 1 --branch ${NIXL_REF} "https://github.com/ai-dynamo/nixl.git" && \
cd nixl && \
......@@ -302,9 +305,12 @@ RUN echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
export CC=$(which gcc) && \
export CXX=$(which g++) && \
export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \
fi && \
cd /workspace/nixl && \
uv build . --out-dir /opt/dynamo/dist/nixl --python $PYTHON_VERSION
......@@ -324,8 +330,6 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
export RUSTC_WRAPPER="sccache"; \
fi && \
export CC=$(which gcc) && \
export CXX=$(which g++) && \
source ${VIRTUAL_ENV}/bin/activate && \
cd /opt/dynamo && \
uv build --wheel --out-dir /opt/dynamo/dist && \
......@@ -416,38 +420,12 @@ ARG MAX_JOBS=16
ENV MAX_JOBS=$MAX_JOBS
ENV CUDA_HOME=/usr/local/cuda
# Install sccache if requested
COPY container/use-sccache.sh /tmp/use-sccache.sh
# Install sccache if requested
ARG USE_SCCACHE
ARG ARCH_ALT
ARG SCCACHE_BUCKET
ARG SCCACHE_REGION
ENV ARCH_ALT=${ARCH_ALT}
RUN if [ "$USE_SCCACHE" = "true" ]; then \
/tmp/use-sccache.sh install; \
fi
# Set environment variables - they'll be empty strings if USE_SCCACHE=false
ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}}
# Install VLLM and related dependencies
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
--mount=type=cache,target=/root/.cache/uv \
--mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \
fi && \
cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
chmod +x /tmp/install_vllm.sh && \
/tmp/install_vllm.sh --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt ${DEEPGEMM_REF:+--deepgemm-ref "$DEEPGEMM_REF"} ${FLASHINF_REF:+--flashinf-ref "$FLASHINF_REF"} ${LMCACHE_REF:+--lmcache-ref "$LMCACHE_REF"} --cuda-version $CUDA_VERSION && \
/tmp/use-sccache.sh show-stats "vLLM";
cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
chmod +x /tmp/install_vllm.sh && \
/tmp/install_vllm.sh --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt ${DEEPGEMM_REF:+--deepgemm-ref "$DEEPGEMM_REF"} ${FLASHINF_REF:+--flashinf-ref "$FLASHINF_REF"} ${LMCACHE_REF:+--lmcache-ref "$LMCACHE_REF"} --cuda-version $CUDA_VERSION
ENV LD_LIBRARY_PATH=\
/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment