Unverified Commit 76f4bc83 authored by KrishnanPrash's avatar KrishnanPrash Committed by GitHub
Browse files

build: Enable frontend decoding (media-nixl) in regular dynamo builds (#4618)


Signed-off-by: default avatarKrishnan Prashanth <kprashanth@nvidia.com>
parent b8c4a5f5
......@@ -48,15 +48,15 @@ jobs:
working-directory: ./deploy
run: |
docker compose up -d nats-server etcd-server
- name: Run Rust checks (block-manager + integration tests)
- name: Run Rust checks (block-manager + media-nixl + integration tests)
run: |
docker run --rm -w /workspace/lib/llm \
--name ${{ env.CONTAINER_ID }}_rust_checks \
${{ steps.define_image_tag.outputs.image_tag }} \
bash -ec 'rustup component add rustfmt clippy && \
cargo fmt -- --check && \
cargo clippy --features block-manager --no-deps --all-targets -- -D warnings && \
cargo test --locked --all-targets --features=block-manager && \
cargo clippy --features block-manager,media-nixl --no-deps --all-targets -- -D warnings && \
cargo test --locked --all-targets --features=block-manager,media-nixl && \
cargo test --locked --features integration -- --nocapture'
- name: Cleanup services
if: always()
......
......@@ -14,6 +14,7 @@ ARG BASE_IMAGE_TAG
ARG PYTHON_VERSION
ARG ENABLE_KVBM
ARG ENABLE_MEDIA_NIXL
ARG CARGO_BUILD_JOBS
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
......@@ -278,7 +279,11 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
cd /opt/dynamo && \
uv build --wheel --out-dir /opt/dynamo/dist && \
cd /opt/dynamo/lib/bindings/python && \
maturin build --release --out /opt/dynamo/dist && \
if [ "$ENABLE_MEDIA_NIXL" = "true" ]; then \
maturin build --release --features dynamo-llm/media-nixl --out /opt/dynamo/dist; \
else \
maturin build --release --out /opt/dynamo/dist; \
fi && \
if [ "$ENABLE_KVBM" = "true" ]; then \
cd /opt/dynamo/lib/bindings/kvbm && \
maturin build --release --out target/wheels && \
......@@ -354,7 +359,7 @@ USER dynamo
ENV HOME=/home/dynamo \
DYNAMO_HOME=/opt/dynamo \
CARGO_TARGET_DIR=/opt/dynamo/target
ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:${LD_LIBRARY_PATH}
ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:/usr/local/cuda/compat/lib.real:${LD_LIBRARY_PATH}
# Create and activate virtual environment
ARG PYTHON_VERSION
......
......@@ -230,6 +230,7 @@ Follow the instructions in [`deploy/inference-gateway/README.md`](../deploy/infe
The base image contains the core Dynamo runtime components, NATS server, etcd, and Python dependencies:
```bash
# Build the base dev image (framework=none for frontend-only deployment)
# Note: --framework none defaults ENABLE_MEDIA_NIXL=false
./build.sh --framework none --target dev
```
......
......@@ -297,6 +297,9 @@ get_options() {
--enable-kvbm)
ENABLE_KVBM=true
;;
--enable-media-nixl)
ENABLE_MEDIA_NIXL=true
;;
--make-efa)
NIXL_UCX_REF=$NIXL_UCX_EFA_REF
;;
......@@ -469,6 +472,7 @@ show_help() {
echo " [--release-build perform a release build]"
echo " [--make-efa Enables EFA support for NIXL]"
echo " [--enable-kvbm Enables KVBM support in Python 3.12]"
echo " [--enable-media-nixl Enable media processing with NIXL support (default: true for frameworks, false for none)]"
echo " [--use-sccache enable sccache for Rust/C/C++ compilation caching]"
echo " [--sccache-bucket S3 bucket name for sccache (required with --use-sccache)]"
echo " [--sccache-region S3 region for sccache (required with --use-sccache)]"
......@@ -810,6 +814,18 @@ if [ ! -z ${ENABLE_KVBM} ]; then
BUILD_ARGS+=" --build-arg ENABLE_KVBM=${ENABLE_KVBM} "
fi
# ENABLE_MEDIA_NIXL: Enable media processing with NIXL support
# Used in base Dockerfile for maturin build feature flag.
# Can be explicitly overridden with --enable-media-nixl flag
if [ -z "${ENABLE_MEDIA_NIXL}" ]; then
if [[ $FRAMEWORK == "VLLM" ]] || [[ $FRAMEWORK == "TRTLLM" ]] || [[ $FRAMEWORK == "SGLANG" ]]; then
ENABLE_MEDIA_NIXL=true
else
ENABLE_MEDIA_NIXL=false
fi
fi
BUILD_ARGS+=" --build-arg ENABLE_MEDIA_NIXL=${ENABLE_MEDIA_NIXL} "
# NIXL_UCX_REF: Used in base Dockerfile only.
# Passed to framework Dockerfile.{vllm,sglang,...} where it's NOT used.
if [ -n "${NIXL_UCX_REF}" ]; then
......
......@@ -28,7 +28,7 @@ use tracing;
use crate::model_card::{ModelDeploymentCard, ModelInfo};
#[cfg(feature = "media-nixl")]
use crate::preprocessor::media::{MediaDecoder, MediaFetcher, MediaLoader};
use crate::preprocessor::media::MediaLoader;
use crate::preprocessor::prompt::OAIChatLikeRequest;
use crate::protocols::common::preprocessor::{
MultimodalData, MultimodalDataMap, PreprocessedRequestBuilder,
......
......@@ -38,6 +38,14 @@ register_llm(
```
## Known Limitations
> [!WARNING]
> **Incompatible with `Dockerfile.frontend`**: Frontend media decoding (enabled with `--features media-nixl`) is not supported when using `Dockerfile.frontend`. The frontend image built from `Dockerfile.frontend` does not enable the feature + include the required NIXL/UCX dependencies.
> [!WARNING]
> **Requires GPU node**: The frontend must run on a node with GPU access. During media processing, decoded tensors are written to GPU memory via NIXL, which requires `libcuda.so.1` to be available. Running the frontend on a CPU-only node will fail with something like: `Failed to initialize required backends: [UCX: No UCX plugin found]`.
## TODOs
### Modalities
......
......@@ -166,7 +166,7 @@ mod tests {
..Default::default()
};
let loader: MediaLoader = MediaLoader::new(media_decoder, fetcher).unwrap();
let loader: MediaLoader = MediaLoader::new(media_decoder, Some(fetcher)).unwrap();
let image_url = ImageUrl::from(format!("{}/llm-optimize-deploy-graphic.png", server.url()));
let content_part = ChatCompletionRequestUserMessageContentPart::ImageUrl(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment