Unverified Commit 76f4bc83 authored by KrishnanPrash's avatar KrishnanPrash Committed by GitHub
Browse files

build: Enable frontend decoding (media-nixl) in regular dynamo builds (#4618)


Signed-off-by: default avatarKrishnan Prashanth <kprashanth@nvidia.com>
parent b8c4a5f5
...@@ -48,15 +48,15 @@ jobs: ...@@ -48,15 +48,15 @@ jobs:
working-directory: ./deploy working-directory: ./deploy
run: | run: |
docker compose up -d nats-server etcd-server docker compose up -d nats-server etcd-server
- name: Run Rust checks (block-manager + integration tests) - name: Run Rust checks (block-manager + media-nixl + integration tests)
run: | run: |
docker run --rm -w /workspace/lib/llm \ docker run --rm -w /workspace/lib/llm \
--name ${{ env.CONTAINER_ID }}_rust_checks \ --name ${{ env.CONTAINER_ID }}_rust_checks \
${{ steps.define_image_tag.outputs.image_tag }} \ ${{ steps.define_image_tag.outputs.image_tag }} \
bash -ec 'rustup component add rustfmt clippy && \ bash -ec 'rustup component add rustfmt clippy && \
cargo fmt -- --check && \ cargo fmt -- --check && \
cargo clippy --features block-manager --no-deps --all-targets -- -D warnings && \ cargo clippy --features block-manager,media-nixl --no-deps --all-targets -- -D warnings && \
cargo test --locked --all-targets --features=block-manager && \ cargo test --locked --all-targets --features=block-manager,media-nixl && \
cargo test --locked --features integration -- --nocapture' cargo test --locked --features integration -- --nocapture'
- name: Cleanup services - name: Cleanup services
if: always() if: always()
......
...@@ -14,6 +14,7 @@ ARG BASE_IMAGE_TAG ...@@ -14,6 +14,7 @@ ARG BASE_IMAGE_TAG
ARG PYTHON_VERSION ARG PYTHON_VERSION
ARG ENABLE_KVBM ARG ENABLE_KVBM
ARG ENABLE_MEDIA_NIXL
ARG CARGO_BUILD_JOBS ARG CARGO_BUILD_JOBS
# Define general architecture ARGs for supporting both x86 and aarch64 builds. # Define general architecture ARGs for supporting both x86 and aarch64 builds.
...@@ -278,7 +279,11 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ ...@@ -278,7 +279,11 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
cd /opt/dynamo && \ cd /opt/dynamo && \
uv build --wheel --out-dir /opt/dynamo/dist && \ uv build --wheel --out-dir /opt/dynamo/dist && \
cd /opt/dynamo/lib/bindings/python && \ cd /opt/dynamo/lib/bindings/python && \
maturin build --release --out /opt/dynamo/dist && \ if [ "$ENABLE_MEDIA_NIXL" = "true" ]; then \
maturin build --release --features dynamo-llm/media-nixl --out /opt/dynamo/dist; \
else \
maturin build --release --out /opt/dynamo/dist; \
fi && \
if [ "$ENABLE_KVBM" = "true" ]; then \ if [ "$ENABLE_KVBM" = "true" ]; then \
cd /opt/dynamo/lib/bindings/kvbm && \ cd /opt/dynamo/lib/bindings/kvbm && \
maturin build --release --out target/wheels && \ maturin build --release --out target/wheels && \
...@@ -354,7 +359,7 @@ USER dynamo ...@@ -354,7 +359,7 @@ USER dynamo
ENV HOME=/home/dynamo \ ENV HOME=/home/dynamo \
DYNAMO_HOME=/opt/dynamo \ DYNAMO_HOME=/opt/dynamo \
CARGO_TARGET_DIR=/opt/dynamo/target CARGO_TARGET_DIR=/opt/dynamo/target
ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:${LD_LIBRARY_PATH} ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:/usr/local/cuda/compat/lib.real:${LD_LIBRARY_PATH}
# Create and activate virtual environment # Create and activate virtual environment
ARG PYTHON_VERSION ARG PYTHON_VERSION
......
...@@ -230,6 +230,7 @@ Follow the instructions in [`deploy/inference-gateway/README.md`](../deploy/infe ...@@ -230,6 +230,7 @@ Follow the instructions in [`deploy/inference-gateway/README.md`](../deploy/infe
The base image contains the core Dynamo runtime components, NATS server, etcd, and Python dependencies: The base image contains the core Dynamo runtime components, NATS server, etcd, and Python dependencies:
```bash ```bash
# Build the base dev image (framework=none for frontend-only deployment) # Build the base dev image (framework=none for frontend-only deployment)
# Note: --framework none defaults ENABLE_MEDIA_NIXL=false
./build.sh --framework none --target dev ./build.sh --framework none --target dev
``` ```
......
...@@ -297,6 +297,9 @@ get_options() { ...@@ -297,6 +297,9 @@ get_options() {
--enable-kvbm) --enable-kvbm)
ENABLE_KVBM=true ENABLE_KVBM=true
;; ;;
--enable-media-nixl)
ENABLE_MEDIA_NIXL=true
;;
--make-efa) --make-efa)
NIXL_UCX_REF=$NIXL_UCX_EFA_REF NIXL_UCX_REF=$NIXL_UCX_EFA_REF
;; ;;
...@@ -469,6 +472,7 @@ show_help() { ...@@ -469,6 +472,7 @@ show_help() {
echo " [--release-build perform a release build]" echo " [--release-build perform a release build]"
echo " [--make-efa Enables EFA support for NIXL]" echo " [--make-efa Enables EFA support for NIXL]"
echo " [--enable-kvbm Enables KVBM support in Python 3.12]" echo " [--enable-kvbm Enables KVBM support in Python 3.12]"
echo " [--enable-media-nixl Enable media processing with NIXL support (default: true for frameworks, false for none)]"
echo " [--use-sccache enable sccache for Rust/C/C++ compilation caching]" echo " [--use-sccache enable sccache for Rust/C/C++ compilation caching]"
echo " [--sccache-bucket S3 bucket name for sccache (required with --use-sccache)]" echo " [--sccache-bucket S3 bucket name for sccache (required with --use-sccache)]"
echo " [--sccache-region S3 region for sccache (required with --use-sccache)]" echo " [--sccache-region S3 region for sccache (required with --use-sccache)]"
...@@ -810,6 +814,18 @@ if [ ! -z ${ENABLE_KVBM} ]; then ...@@ -810,6 +814,18 @@ if [ ! -z ${ENABLE_KVBM} ]; then
BUILD_ARGS+=" --build-arg ENABLE_KVBM=${ENABLE_KVBM} " BUILD_ARGS+=" --build-arg ENABLE_KVBM=${ENABLE_KVBM} "
fi fi
# ENABLE_MEDIA_NIXL: Enable media processing with NIXL support
# Used in base Dockerfile for maturin build feature flag.
# Can be explicitly overridden with --enable-media-nixl flag
if [ -z "${ENABLE_MEDIA_NIXL}" ]; then
if [[ $FRAMEWORK == "VLLM" ]] || [[ $FRAMEWORK == "TRTLLM" ]] || [[ $FRAMEWORK == "SGLANG" ]]; then
ENABLE_MEDIA_NIXL=true
else
ENABLE_MEDIA_NIXL=false
fi
fi
BUILD_ARGS+=" --build-arg ENABLE_MEDIA_NIXL=${ENABLE_MEDIA_NIXL} "
# NIXL_UCX_REF: Used in base Dockerfile only. # NIXL_UCX_REF: Used in base Dockerfile only.
# Passed to framework Dockerfile.{vllm,sglang,...} where it's NOT used. # Passed to framework Dockerfile.{vllm,sglang,...} where it's NOT used.
if [ -n "${NIXL_UCX_REF}" ]; then if [ -n "${NIXL_UCX_REF}" ]; then
......
...@@ -28,7 +28,7 @@ use tracing; ...@@ -28,7 +28,7 @@ use tracing;
use crate::model_card::{ModelDeploymentCard, ModelInfo}; use crate::model_card::{ModelDeploymentCard, ModelInfo};
#[cfg(feature = "media-nixl")] #[cfg(feature = "media-nixl")]
use crate::preprocessor::media::{MediaDecoder, MediaFetcher, MediaLoader}; use crate::preprocessor::media::MediaLoader;
use crate::preprocessor::prompt::OAIChatLikeRequest; use crate::preprocessor::prompt::OAIChatLikeRequest;
use crate::protocols::common::preprocessor::{ use crate::protocols::common::preprocessor::{
MultimodalData, MultimodalDataMap, PreprocessedRequestBuilder, MultimodalData, MultimodalDataMap, PreprocessedRequestBuilder,
......
...@@ -38,6 +38,14 @@ register_llm( ...@@ -38,6 +38,14 @@ register_llm(
``` ```
## Known Limitations
> [!WARNING]
> **Incompatible with `Dockerfile.frontend`**: Frontend media decoding (enabled with `--features media-nixl`) is not supported when using `Dockerfile.frontend`. The frontend image built from `Dockerfile.frontend` does not enable the feature + include the required NIXL/UCX dependencies.
> [!WARNING]
> **Requires GPU node**: The frontend must run on a node with GPU access. During media processing, decoded tensors are written to GPU memory via NIXL, which requires `libcuda.so.1` to be available. Running the frontend on a CPU-only node will fail with something like: `Failed to initialize required backends: [UCX: No UCX plugin found]`.
## TODOs ## TODOs
### Modalities ### Modalities
......
...@@ -166,7 +166,7 @@ mod tests { ...@@ -166,7 +166,7 @@ mod tests {
..Default::default() ..Default::default()
}; };
let loader: MediaLoader = MediaLoader::new(media_decoder, fetcher).unwrap(); let loader: MediaLoader = MediaLoader::new(media_decoder, Some(fetcher)).unwrap();
let image_url = ImageUrl::from(format!("{}/llm-optimize-deploy-graphic.png", server.url())); let image_url = ImageUrl::from(format!("{}/llm-optimize-deploy-graphic.png", server.url()));
let content_part = ChatCompletionRequestUserMessageContentPart::ImageUrl( let content_part = ChatCompletionRequestUserMessageContentPart::ImageUrl(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment