Unverified Commit eb317cdf authored by Keiven C's avatar Keiven C Committed by GitHub
Browse files

fix: dev target needs special permission, now VLLM version works for run.sh...


fix: dev target needs special permission, now VLLM version works for run.sh and Dev Container (#2822)
Signed-off-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
Co-authored-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
parent 27fad26f
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
"name": "NVIDIA Dynamo Dev Container Development", "name": "NVIDIA Dynamo Dev Container Development",
"remoteUser": "ubuntu", // Matches our container user "remoteUser": "ubuntu", // Matches our container user
"updateRemoteUserUID": true, // Updates the UID of the remote user to match the host user, avoids permission errors "updateRemoteUserUID": true, // Updates the UID of the remote user to match the host user, avoids permission errors
"image": "dynamo:latest-vllm", // Use the latest VLLM local dev image "image": "dynamo:latest-vllm-dev", // Use the latest VLLM dev image
"runArgs": [ "runArgs": [
"--gpus=all", "--gpus=all",
"--network=host", "--network=host",
......
...@@ -296,7 +296,13 @@ CMD [] ...@@ -296,7 +296,13 @@ CMD []
FROM runtime AS dev FROM runtime AS dev
# Install utilities # Don't want ubuntu to be editable, just change uid and gid.
ENV USERNAME=ubuntu
ARG USER_UID
ARG USER_GID
ARG WORKSPACE_DIR=/workspace
# Install utilities as root
RUN apt-get update -y && \ RUN apt-get update -y && \
apt-get install -y --no-install-recommends \ apt-get install -y --no-install-recommends \
# Install utilities # Install utilities
...@@ -325,46 +331,59 @@ RUN apt-get update -y && \ ...@@ -325,46 +331,59 @@ RUN apt-get update -y && \
protobuf-compiler && \ protobuf-compiler && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
# Rust environment setup COPY --from=runtime /usr/local/bin /usr/local/bin
ENV RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
CARGO_TARGET_DIR=/opt/dynamo/target \
PATH=/usr/local/cargo/bin:$PATH
COPY --from=dynamo_base $RUSTUP_HOME $RUSTUP_HOME
COPY --from=dynamo_base $CARGO_HOME $CARGO_HOME
# https://code.visualstudio.com/remote/advancedcontainers/add-nonroot-user # https://code.visualstudio.com/remote/advancedcontainers/add-nonroot-user
# Will use the default ubuntu user, but give sudo access # Will use the default ubuntu user, but give sudo access
# Needed so files permissions aren't set to root ownership when writing from inside container # Needed so files permissions aren't set to root ownership when writing from inside container
# Don't want ubuntu to be editable, just change uid and gid. User ubuntu is hardcoded in .devcontainer
ENV USERNAME=ubuntu
ARG USER_UID=1000
ARG USER_GID=1000
RUN apt-get update && apt-get install -y sudo gnupg2 gnupg1 \ RUN apt-get update && apt-get install -y sudo gnupg2 gnupg1 \
&& echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \ && echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \
&& chmod 0440 /etc/sudoers.d/$USERNAME \ && chmod 0440 /etc/sudoers.d/$USERNAME \
&& mkdir -p /home/$USERNAME \ && mkdir -p /home/$USERNAME \
&& groupmod -g $USER_GID $USERNAME \
&& usermod -u $USER_UID -g $USER_GID $USERNAME \
&& chown -R $USERNAME:$USERNAME /home/$USERNAME \ && chown -R $USERNAME:$USERNAME /home/$USERNAME \
&& rm -rf /var/lib/apt/lists/* \ && rm -rf /var/lib/apt/lists/* \
&& chsh -s /bin/bash $USERNAME && chsh -s /bin/bash $USERNAME
# At this point, we are executing as the ubuntu user
USER $USERNAME
ENV HOME=/home/$USERNAME
WORKDIR $HOME
# Set workspace directory variable
ENV WORKSPACE_DIR=${WORKSPACE_DIR}
# Development environment variables for the dev target
# Path configuration notes:
# - DYNAMO_HOME: Main project directory (workspace mount point)
# - CARGO_TARGET_DIR: Build artifacts in workspace/target for persistence
# - CARGO_HOME: Must be in $HOME/.cargo (not workspace) because:
# * Workspace gets mounted to different paths where cargo binaries may not exist
# * Contains critical cargo binaries and registry that need consistent paths
# - RUSTUP_HOME: Must be in $HOME/.rustup (not workspace) because:
# * Contains rust toolchain binaries that must be at expected system paths
# * Workspace mount point would break rustup's toolchain resolution
# - PATH: Includes cargo binaries for rust tool access
ENV DYNAMO_HOME=${WORKSPACE_DIR}
ENV CARGO_TARGET_DIR=${WORKSPACE_DIR}/target
ENV CARGO_HOME=${HOME}/.cargo
ENV RUSTUP_HOME=${HOME}/.rustup
ENV PATH=${CARGO_HOME}/bin:$PATH
COPY --from=dynamo_base --chown=$USER_UID:$USER_GID /usr/local/rustup $RUSTUP_HOME
COPY --from=dynamo_base --chown=$USER_UID:$USER_GID /usr/local/cargo $CARGO_HOME
# This is a slow operation (~40s on my cpu) # This is a slow operation (~40s on my cpu)
# Much better than chown -R $USERNAME:$USERNAME /opt/dynamo/venv (~10min on my cpu) # Much better than chown -R $USERNAME:$USERNAME /opt/dynamo/venv (~10min on my cpu)
COPY --from=runtime --chown=$USER_UID:$USER_GID ${VIRTUAL_ENV} ${VIRTUAL_ENV} COPY --from=runtime --chown=$USER_UID:$USER_GID ${VIRTUAL_ENV} ${VIRTUAL_ENV}
RUN chown $USERNAME:$USERNAME ${VIRTUAL_ENV}
COPY --from=runtime --chown=$USERNAME:$USERNAME /usr/local/bin /usr/local/bin
# so we can use maturin develop # so we can use maturin develop
RUN uv pip install maturin[patchelf] RUN uv pip install maturin[patchelf]
USER $USERNAME # Make sure to sync this with the one specified on README.md.
ENV HOME=/home/$USERNAME # This is a generic PYTHONPATH which works for all the frameworks, so some paths may not be relevant for this particular framework.
ENV PYTHONPATH=$PYTHONPATH:$HOME/dynamo/components/planner/src ENV PYTHONPATH=${WORKSPACE_DIR}/components/metrics/src:${WORKSPACE_DIR}/components/frontend/src:${WORKSPACE_DIR}/components/planner/src:${WORKSPACE_DIR}/components/backends/mocker/src:${WORKSPACE_DIR}/components/backends/trtllm/src:${WORKSPACE_DIR}/components/backends/vllm/src:${WORKSPACE_DIR}/components/backends/sglang/src:${WORKSPACE_DIR}/components/backends/llama_cpp/src
ENV CARGO_TARGET_DIR=$HOME/dynamo/.build/target
WORKDIR $HOME
# https://code.visualstudio.com/remote/advancedcontainers/persist-bash-history # https://code.visualstudio.com/remote/advancedcontainers/persist-bash-history
RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=$HOME/.commandhistory/.bash_history" \ RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=$HOME/.commandhistory/.bash_history" \
......
...@@ -24,6 +24,8 @@ set -e ...@@ -24,6 +24,8 @@ set -e
TAG= TAG=
RUN_PREFIX= RUN_PREFIX=
PLATFORM=linux/amd64 PLATFORM=linux/amd64
USER_UID=
USER_GID=
# Get short commit hash # Get short commit hash
commit_id=$(git rev-parse --short HEAD) commit_id=$(git rev-parse --short HEAD)
...@@ -243,6 +245,22 @@ get_options() { ...@@ -243,6 +245,22 @@ get_options() {
missing_requirement "$1" missing_requirement "$1"
fi fi
;; ;;
--uid)
if [ "$2" ]; then
USER_UID="$2"
shift
else
missing_requirement "$1"
fi
;;
--gid)
if [ "$2" ]; then
USER_GID="$2"
shift
else
missing_requirement "$1"
fi
;;
--dry-run) --dry-run)
RUN_PREFIX="echo" RUN_PREFIX="echo"
echo "" echo ""
...@@ -419,6 +437,8 @@ show_help() { ...@@ -419,6 +437,8 @@ show_help() {
echo " [--cache-from cache location to start from]" echo " [--cache-from cache location to start from]"
echo " [--cache-to location where to cache the build output]" echo " [--cache-to location where to cache the build output]"
echo " [--tag tag for image]" echo " [--tag tag for image]"
echo " [--uid user ID for dev target (default: current user)]"
echo " [--gid group ID for dev target (default: current group)]"
echo " [--no-cache disable docker build cache]" echo " [--no-cache disable docker build cache]"
echo " [--dry-run print docker commands without running]" echo " [--dry-run print docker commands without running]"
echo " [--build-context name=path to add build context]" echo " [--build-context name=path to add build context]"
...@@ -443,6 +463,13 @@ error() { ...@@ -443,6 +463,13 @@ error() {
get_options "$@" get_options "$@"
# Validate UID/GID flags are only used with dev target
if [ -n "$USER_UID" ] || [ -n "$USER_GID" ]; then
if [[ "$TARGET" != "dev" ]]; then
echo "⚠️ Warning: --uid and --gid flags are only effective with --target dev"
echo " Current target: ${TARGET:-}"
fi
fi
# Automatically set ARCH and ARCH_ALT if PLATFORM is linux/arm64 # Automatically set ARCH and ARCH_ALT if PLATFORM is linux/arm64
ARCH="amd64" ARCH="amd64"
...@@ -466,7 +493,14 @@ fi ...@@ -466,7 +493,14 @@ fi
BUILD_ARGS+=" --build-arg NIXL_REF=${NIXL_REF} " BUILD_ARGS+=" --build-arg NIXL_REF=${NIXL_REF} "
if [[ $TARGET == "dev" ]]; then if [[ $TARGET == "dev" ]]; then
BUILD_ARGS+=" --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) " # Use provided UID/GID or default to current user
if [ -z "$USER_UID" ]; then
USER_UID=$(id -u)
fi
if [ -z "$USER_GID" ]; then
USER_GID=$(id -g)
fi
BUILD_ARGS+=" --build-arg USER_UID=$USER_UID --build-arg USER_GID=$USER_GID "
fi fi
# BUILD DEV IMAGE # BUILD DEV IMAGE
...@@ -599,8 +633,6 @@ if [ "$USE_SCCACHE" = true ]; then ...@@ -599,8 +633,6 @@ if [ "$USE_SCCACHE" = true ]; then
BUILD_ARGS+=" --build-arg USE_SCCACHE=true" BUILD_ARGS+=" --build-arg USE_SCCACHE=true"
BUILD_ARGS+=" --build-arg SCCACHE_BUCKET=${SCCACHE_BUCKET}" BUILD_ARGS+=" --build-arg SCCACHE_BUCKET=${SCCACHE_BUCKET}"
BUILD_ARGS+=" --build-arg SCCACHE_REGION=${SCCACHE_REGION}" BUILD_ARGS+=" --build-arg SCCACHE_REGION=${SCCACHE_REGION}"
fi fi
LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}" LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}"
......
...@@ -31,12 +31,14 @@ DEFAULT_FRAMEWORK=VLLM ...@@ -31,12 +31,14 @@ DEFAULT_FRAMEWORK=VLLM
SOURCE_DIR=$(dirname "$(readlink -f "$0")") SOURCE_DIR=$(dirname "$(readlink -f "$0")")
IMAGE= IMAGE=
TARGET="dev"
HF_CACHE= HF_CACHE=
DEFAULT_HF_CACHE=${SOURCE_DIR}/.cache/huggingface DEFAULT_HF_CACHE=${SOURCE_DIR}/.cache/huggingface
GPUS="all" GPUS="all"
PRIVILEGED= PRIVILEGED=
VOLUME_MOUNTS= VOLUME_MOUNTS=
MOUNT_WORKSPACE= MOUNT_WORKSPACE=
DEV_MODE=
ENVIRONMENT_VARIABLES= ENVIRONMENT_VARIABLES=
REMAINING_ARGS= REMAINING_ARGS=
INTERACTIVE= INTERACTIVE=
...@@ -162,6 +164,7 @@ get_options() { ...@@ -162,6 +164,7 @@ get_options() {
--mount-workspace) --mount-workspace)
MOUNT_WORKSPACE=TRUE MOUNT_WORKSPACE=TRUE
;; ;;
--use-nixl-gds) --use-nixl-gds)
USE_NIXL_GDS=TRUE USE_NIXL_GDS=TRUE
;; ;;
...@@ -227,32 +230,11 @@ get_options() { ...@@ -227,32 +230,11 @@ get_options() {
ENTRYPOINT_STRING="--entrypoint ${ENTRYPOINT}" ENTRYPOINT_STRING="--entrypoint ${ENTRYPOINT}"
fi fi
if [ -n "$MOUNT_WORKSPACE" ]; then
VOLUME_MOUNTS+=" -v ${SOURCE_DIR}/..:/workspace "
VOLUME_MOUNTS+=" -v /tmp:/tmp "
VOLUME_MOUNTS+=" -v /mnt/:/mnt "
if [ -z "$HF_CACHE" ]; then
HF_CACHE=$DEFAULT_HF_CACHE
fi
if [ -z "${PRIVILEGED}" ]; then
PRIVILEGED="TRUE"
fi
ENVIRONMENT_VARIABLES+=" -e HF_TOKEN"
INTERACTIVE=" -it "
fi
if [[ ${HF_CACHE^^} == "NONE" ]]; then if [[ ${HF_CACHE^^} == "NONE" ]]; then
HF_CACHE= HF_CACHE=
fi fi
if [ -n "$HF_CACHE" ]; then # HF_CACHE mounting will be handled in workspace section
mkdir -p "$HF_CACHE"
VOLUME_MOUNTS+=" -v $HF_CACHE:/root/.cache/huggingface"
fi
if [ -z "${PRIVILEGED}" ]; then if [ -z "${PRIVILEGED}" ]; then
PRIVILEGED="FALSE" PRIVILEGED="FALSE"
...@@ -262,9 +244,9 @@ get_options() { ...@@ -262,9 +244,9 @@ get_options() {
RM="TRUE" RM="TRUE"
fi fi
if [[ ${PRIVILEGED^^} == "FALSE" ]]; then # Initialize PRIVILEGED_STRING
PRIVILEGED_STRING="" PRIVILEGED_STRING=""
else if [[ ${PRIVILEGED^^} != "FALSE" ]]; then
PRIVILEGED_STRING="--privileged" PRIVILEGED_STRING="--privileged"
fi fi
...@@ -277,7 +259,6 @@ get_options() { ...@@ -277,7 +259,6 @@ get_options() {
if [ -n "$USE_NIXL_GDS" ]; then if [ -n "$USE_NIXL_GDS" ]; then
VOLUME_MOUNTS+=" -v /run/udev:/run/udev:ro " VOLUME_MOUNTS+=" -v /run/udev:/run/udev:ro "
NIXL_GDS_CAPS="--cap-add=IPC_LOCK" NIXL_GDS_CAPS="--cap-add=IPC_LOCK"
# NOTE(jthomson04): In the KVBM disk pools, we currently allocate our files in /tmp. # NOTE(jthomson04): In the KVBM disk pools, we currently allocate our files in /tmp.
# For some arcane reason, GDS requires that /tmp be mounted. # For some arcane reason, GDS requires that /tmp be mounted.
# This is already handled for us if we set --mount-workspace # This is already handled for us if we set --mount-workspace
...@@ -291,6 +272,16 @@ get_options() { ...@@ -291,6 +272,16 @@ get_options() {
if [[ "$GPUS" == "none" || "$GPUS" == "NONE" ]]; then if [[ "$GPUS" == "none" || "$GPUS" == "NONE" ]]; then
RUNTIME="" RUNTIME=""
fi fi
# Auto-enable DEV_MODE for vllm dev images
# TODO(keivenc): Currently only Dockerfile.vllm has proper permissions to run as ubuntu user.
# Other Dockerfiles (trtllm, sglang, etc.) still require root access.
if [[ "$IMAGE" == *"-vllm-dev" ]]; then
DEV_MODE=TRUE
MOUNT_WORKSPACE=TRUE
# Interactive mode is implied when MOUNT_WORKSPACE is TRUE
fi
REMAINING_ARGS=("$@") REMAINING_ARGS=("$@")
} }
...@@ -298,7 +289,8 @@ show_help() { ...@@ -298,7 +289,8 @@ show_help() {
echo "usage: run.sh" echo "usage: run.sh"
echo " [--image image]" echo " [--image image]"
echo " [--framework framework one of ${!FRAMEWORKS[*]}]" echo " [--framework framework one of ${!FRAMEWORKS[*]}]"
echo " [--name name for launched container, default NONE] " echo " [--target target stage to use, default is 'dev']"
echo " [--name name for launched container, default NONE]"
echo " [--privileged whether to launch in privileged mode, default FALSE unless mounting workspace]" echo " [--privileged whether to launch in privileged mode, default FALSE unless mounting workspace]"
echo " [--dry-run print docker commands without running]" echo " [--dry-run print docker commands without running]"
echo " [--hf-cache directory to volume mount as the hf cache, default is NONE unless mounting workspace]" echo " [--hf-cache directory to volume mount as the hf cache, default is NONE unless mounting workspace]"
...@@ -310,6 +302,8 @@ show_help() { ...@@ -310,6 +302,8 @@ show_help() {
echo " [-- stop processing and pass remaining args as command to docker run]" echo " [-- stop processing and pass remaining args as command to docker run]"
echo " [--workdir set the working directory inside the container]" echo " [--workdir set the working directory inside the container]"
echo " [--runtime add runtime variables]" echo " [--runtime add runtime variables]"
echo " [--entrypoint override container entrypoint]"
echo " [-h, --help show this help]"
exit 0 exit 0
} }
...@@ -324,8 +318,49 @@ error() { ...@@ -324,8 +318,49 @@ error() {
get_options "$@" get_options "$@"
# RUN the image # Process workspace mounting after auto-detection
if [ -n "$MOUNT_WORKSPACE" ]; then
HOME_PATH="/home/ubuntu"
# Common workspace setup
VOLUME_MOUNTS+=" -v $(dirname "${SOURCE_DIR}"):/workspace "
VOLUME_MOUNTS+=" -v /tmp:/tmp "
VOLUME_MOUNTS+=" -v /mnt/:/mnt "
WORKDIR=/workspace
INTERACTIVE=" -it "
# Set default HF_CACHE if not specified
if [ -z "$HF_CACHE" ]; then
HF_CACHE=$DEFAULT_HF_CACHE
fi
# Environment variables for all workspace modes
ENVIRONMENT_VARIABLES+=" -e HF_TOKEN"
ENVIRONMENT_VARIABLES+=" -e GITHUB_TOKEN"
ENVIRONMENT_VARIABLES+=" -e HOME=$HOME_PATH"
# Mount HF_CACHE to user's home cache directory
if [ -n "$HF_CACHE" ]; then
mkdir -p "$HF_CACHE"
VOLUME_MOUNTS+=" -v $HF_CACHE:$HOME_PATH/.cache/huggingface"
fi
if [ -n "$DEV_MODE" ]; then
# Dev Container-specific setup - the Dockerfile handles UID/GID mapping via build args
# This currently only works with Dockerfile.vllm which has proper ubuntu user setup.
echo "Dev Container mode enabled - using ubuntu user with host UID/GID"
# Use ubuntu user (with correct UID/GID baked into image)
PRIVILEGED_STRING+=" --user ubuntu"
else
# Standard workspace mode - enable privileged mode
# TODO(keivenc): Security risk, remove soon. Dockerfiles (trtllm, sglang) still need to run as root.
if [ -z "${PRIVILEGED}" ]; then
PRIVILEGED_STRING="--privileged"
fi
fi
fi
# RUN the image
if [ -z "$RUN_PREFIX" ]; then if [ -z "$RUN_PREFIX" ]; then
set -x set -x
fi fi
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment