Unverified Commit a582f4df authored by Ran Rubin's avatar Ran Rubin Committed by GitHub
Browse files

chore: improve Docker layer caching in sglang runtime Dockerfile (#6275)

parent f6c34e07
......@@ -55,8 +55,7 @@ RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/loca
cp -r /tmp/usr/local/src/ffmpeg /usr/local/src/; \
true # in case ffmpeg not enabled
# Pattern: COPY --chmod=775 <path>; chmod g+w <path> done later as root because COPY --chmod only affects <path>/*, not <path>
COPY --chmod=775 --chown=dynamo:0 benchmarks/ /workspace/benchmarks/
# Copy wheels first (separate from benchmarks to avoid unnecessary cache invalidation)
COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/
......@@ -64,8 +63,7 @@ COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /workspace/nixl/build/src
ENV SGLANG_VERSION="${RUNTIME_IMAGE_TAG%%-*}"
# Install packages as root to ensure they go to system location (/usr/local/lib/python3.12/dist-packages)
ARG ENABLE_GPU_MEMORY_SERVICE
RUN --mount=type=bind,source=.,target=/mnt/local_src \
--mount=type=cache,target=/root/.cache/pip,sharing=locked \
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
export PIP_CACHE_DIR=/root/.cache/pip && \
pip install --break-system-packages \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
......@@ -81,6 +79,10 @@ RUN --mount=type=bind,source=.,target=/mnt/local_src \
pip install --no-cache-dir --break-system-packages "$GMS_WHEEL"; \
fi
# Copy benchmarks after wheel install so benchmarks changes don't invalidate the layer above
# Pattern: COPY --chmod=775 <path>; chmod g+w <path> done later as root because COPY --chmod only affects <path>/*, not <path>
COPY --chmod=775 --chown=dynamo:0 benchmarks/ /workspace/benchmarks/
# Install common and test dependencies as root
RUN --mount=type=bind,source=.,target=/mnt/local_src \
--mount=type=cache,target=/root/.cache/pip,sharing=locked \
......@@ -94,8 +96,11 @@ RUN --mount=type=bind,source=.,target=/mnt/local_src \
#TODO: Temporary change until upstream sglang runtime image is updated
pip install --break-system-packages "urllib3>=2.6.3" && \
# pip/uv bypasses umask when creating .egg-info files, but chmod -R is fast here (small directory)
chmod -R g+w /workspace/benchmarks && \
# Install NVIDIA packages based on CUDA version
chmod -R g+w /workspace/benchmarks
# Force-reinstall NVIDIA packages in a separate layer so requirements.txt changes don't trigger re-download
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
export PIP_CACHE_DIR=/root/.cache/pip && \
CUDA_MAJOR=$(nvcc --version | egrep -o 'cuda_[0-9]+' | cut -d_ -f2) && \
if [ "$CUDA_MAJOR" = "12" ]; then \
# Install NVIDIA packages that are needed for DeepEP to work properly
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment