Unverified Commit 74fcd4a9 authored by milesial's avatar milesial Committed by GitHub
Browse files

feat: video decoder in the frontend (#4719)


Signed-off-by: default avatarAlexandre Milesi <milesial@users.noreply.github.com>
parent 32eaecb4
......@@ -44,20 +44,20 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.CI_TOKEN }}
run: |
./container/build.sh --tag ${{ steps.define_image_tag.outputs.image_tag }} --target dev --framework none --enable-kvbm
./container/build.sh --tag ${{ steps.define_image_tag.outputs.image_tag }} --target dev --framework none --enable-kvbm --enable-media-ffmpeg
- name: Start services with docker-compose
working-directory: ./deploy
run: |
docker compose up -d nats-server etcd-server
- name: Run Rust checks (block-manager + media-nixl + integration tests)
- name: Run Rust checks (block-manager + media-nixl + media-ffmpeg + integration tests)
run: |
docker run --rm -w /workspace/lib/llm \
--name ${{ env.CONTAINER_ID }}_rust_checks \
${{ steps.define_image_tag.outputs.image_tag }} \
bash -ec 'rustup component add rustfmt clippy && \
cargo fmt -- --check && \
cargo clippy --features block-manager,media-nixl --no-deps --all-targets -- -D warnings && \
cargo test --locked --all-targets --features=block-manager,media-nixl && \
cargo clippy --features block-manager,media-nixl,media-ffmpeg --no-deps --all-targets -- -D warnings && \
cargo test --locked --all-targets --features=block-manager,media-nixl,media-ffmpeg && \
cargo test --locked --features integration -- --nocapture'
- name: Cleanup services
if: always()
......
......@@ -830,6 +830,24 @@ dependencies = [
"virtue",
]
[[package]]
name = "bindgen"
version = "0.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f"
dependencies = [
"bitflags 2.10.0",
"cexpr",
"clang-sys",
"itertools 0.13.0",
"proc-macro2",
"quote",
"regex",
"rustc-hash 1.1.0",
"shlex",
"syn 2.0.111",
]
[[package]]
name = "bindgen"
version = "0.71.1"
......@@ -2677,6 +2695,7 @@ dependencies = [
"either",
"erased-serde",
"etcd-client",
"ffmpeg-next",
"futures",
"futures-util",
"galil-seiferas",
......@@ -2690,6 +2709,7 @@ dependencies = [
"itertools 0.14.0",
"json-five",
"lazy_static",
"memfile",
"minijinja",
"minijinja-contrib",
"mockito",
......@@ -2741,6 +2761,7 @@ dependencies = [
"utoipa-swagger-ui",
"uuid 1.18.1",
"validator",
"video-rs",
"xxhash-rust",
"zeromq",
]
......@@ -3231,6 +3252,31 @@ dependencies = [
"simd-adler32",
]
[[package]]
name = "ffmpeg-next"
version = "7.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da02698288e0275e442a47fc12ca26d50daf0d48b15398ba5906f20ac2e2a9f9"
dependencies = [
"bitflags 2.10.0",
"ffmpeg-sys-next",
"libc",
]
[[package]]
name = "ffmpeg-sys-next"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9e9c75ebd4463de9d8998fb134ba26347fe5faee62fabf0a4b4d41bd500b4ad"
dependencies = [
"bindgen 0.70.1",
"cc",
"libc",
"num_cpus",
"pkg-config",
"vcpkg",
]
[[package]]
name = "fiat-crypto"
version = "0.2.9"
......@@ -5928,6 +5974,15 @@ version = "2.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
[[package]]
name = "memfile"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f64636fdb65a5f0740f920c4281f3dbb76a71e25e25914b6d27000739897d40e"
dependencies = [
"libc",
]
[[package]]
name = "memmap2"
version = "0.9.9"
......@@ -6717,7 +6772,7 @@ version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d80bd4b5b8363cfd933000a8757a453e58ee10ee6e400c38ae31db512444a31"
dependencies = [
"bindgen",
"bindgen 0.71.1",
"cc",
"libc",
"os_info",
......@@ -11882,6 +11937,17 @@ version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
name = "video-rs"
version = "0.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "859aad7261bac267f90f9635ec9addba3b4bcb4bbb2edb03fec3e6b765657bee"
dependencies = [
"ffmpeg-next",
"tracing",
"url",
]
[[package]]
name = "virtue"
version = "0.0.18"
......
......@@ -27,6 +27,7 @@ ARG BASE_IMAGE_TAG
ARG PYTHON_VERSION
ARG ENABLE_KVBM
ARG ENABLE_MEDIA_NIXL
ARG ENABLE_MEDIA_FFMPEG
ARG CARGO_BUILD_JOBS
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
......@@ -112,6 +113,7 @@ FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder
ARG ARCH
ARG ARCH_ALT
ARG CARGO_BUILD_JOBS
ARG ENABLE_MEDIA_FFMPEG
WORKDIR /workspace
......@@ -155,6 +157,7 @@ RUN yum groupinstall -y 'Development Tools' && \
librdmacm-devel \
numactl-devel
# Ensure a modern protoc is available (required for --experimental_allow_proto3_optional)
RUN set -eux; \
PROTOC_VERSION=25.3; \
......@@ -210,6 +213,43 @@ ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \
RUSTC_WRAPPER=${USE_SCCACHE:+sccache}
# Build FFmpeg from source
ARG FFMPEG_VERSION=7.1
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
if [ "$ENABLE_MEDIA_FFMPEG" = "true" ]; then \
export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
export RUSTC_WRAPPER="sccache"; \
fi && \
dnf install -y pkg-config && \
cd /tmp && \
curl -LO https://ffmpeg.org/releases/ffmpeg-${FFMPEG_VERSION}.tar.xz && \
tar xf ffmpeg-${FFMPEG_VERSION}.tar.xz && \
cd ffmpeg-${FFMPEG_VERSION} && \
./configure \
--prefix=/usr/local \
--disable-programs \
--disable-doc \
--disable-static \
--disable-x86asm \
--disable-postproc \
--disable-network \
--disable-encoders \
--disable-muxers \
--disable-bsfs \
--disable-devices \
--disable-libdrm \
--enable-shared && \
make -j$(nproc) && \
make install && \
/tmp/use-sccache.sh show-stats "FFMPEG" && \
ldconfig && \
rm -rf /tmp/ffmpeg-${FFMPEG_VERSION}*; \
fi
# Build and install UCX
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
......@@ -307,8 +347,15 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
cd /opt/dynamo && \
uv build --wheel --out-dir /opt/dynamo/dist && \
cd /opt/dynamo/lib/bindings/python && \
FEATURES=""; \
if [ "$ENABLE_MEDIA_NIXL" = "true" ]; then \
maturin build --release --features dynamo-llm/media-nixl --out /opt/dynamo/dist; \
FEATURES="$FEATURES dynamo-llm/media-nixl"; \
fi; \
if [ "$ENABLE_MEDIA_FFMPEG" = "true" ]; then \
FEATURES="$FEATURES media-ffmpeg"; \
fi; \
if [ -n "$FEATURES" ]; then \
maturin build --release --features "$FEATURES" --out /opt/dynamo/dist; \
else \
maturin build --release --out /opt/dynamo/dist; \
fi && \
......@@ -319,6 +366,7 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--exclude libnixl.so \
--exclude libnixl_build.so \
--exclude libnixl_common.so \
--exclude 'lib*.so*' \
--plat manylinux_2_28_${ARCH_ALT} \
--wheel-dir /opt/dynamo/dist \
target/wheels/*.whl; \
......@@ -354,6 +402,13 @@ COPY --chown=dynamo: --from=wheel_builder /opt/nvidia/nvda_nixl/lib64/. ${NIXL_L
COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
COPY --chown=dynamo: --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/
# Copy ffmpeg
RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/local/ \
cp -rnL /tmp/usr/local/include/libav* /tmp/usr/local/include/libsw* /usr/local/include/; \
cp -nL /tmp/usr/local/lib/libav*.so /tmp/usr/local/lib/libsw*.so /usr/local/lib/; \
cp -nL /tmp/usr/local/lib/pkgconfig/libav*.pc /tmp/usr/local/lib/pkgconfig/libsw*.pc /usr/lib/pkgconfig/; \
true # in case ffmpeg not enabled
# Copy built artifacts
COPY --chown=dynamo: --from=wheel_builder $CARGO_TARGET_DIR $CARGO_TARGET_DIR
COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
......@@ -374,6 +429,7 @@ RUN apt-get update -y \
clang \
libclang-dev \
protobuf-compiler \
pkg-config \
# sudo for dev stage
sudo \
&& apt-get clean \
......
......@@ -40,6 +40,7 @@ ARG FRAMEWORK_IMAGE_TAG
ARG PYTHON_VERSION
ARG ENABLE_KVBM
ARG ENABLE_MEDIA_NIXL
ARG ENABLE_MEDIA_FFMPEG
ARG CARGO_BUILD_JOBS
ARG CUDA_VERSION
......@@ -159,6 +160,7 @@ RUN yum groupinstall -y 'Development Tools' && \
librdmacm-devel \
numactl-devel
# Ensure a modern protoc is available (required for --experimental_allow_proto3_optional)
RUN set -eux; \
PROTOC_VERSION=25.3; \
......@@ -214,6 +216,43 @@ ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \
RUSTC_WRAPPER=${USE_SCCACHE:+sccache}
# Build FFmpeg from source
ARG FFMPEG_VERSION=7.1
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
if [ "$ENABLE_MEDIA_FFMPEG" = "true" ]; then \
export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
export RUSTC_WRAPPER="sccache"; \
fi && \
dnf install -y pkg-config && \
cd /tmp && \
curl -LO https://ffmpeg.org/releases/ffmpeg-${FFMPEG_VERSION}.tar.xz && \
tar xf ffmpeg-${FFMPEG_VERSION}.tar.xz && \
cd ffmpeg-${FFMPEG_VERSION} && \
./configure \
--prefix=/usr/local \
--disable-programs \
--disable-doc \
--disable-static \
--disable-x86asm \
--disable-postproc \
--disable-network \
--disable-encoders \
--disable-muxers \
--disable-bsfs \
--disable-devices \
--disable-libdrm \
--enable-shared && \
make -j$(nproc) && \
make install && \
/tmp/use-sccache.sh show-stats "FFMPEG" && \
ldconfig && \
rm -rf /tmp/ffmpeg-${FFMPEG_VERSION}*; \
fi
# Build and install UCX
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
......@@ -311,8 +350,15 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
cd /opt/dynamo && \
uv build --wheel --out-dir /opt/dynamo/dist && \
cd /opt/dynamo/lib/bindings/python && \
FEATURES=""; \
if [ "$ENABLE_MEDIA_NIXL" = "true" ]; then \
maturin build --release --features dynamo-llm/media-nixl --out /opt/dynamo/dist; \
FEATURES="$FEATURES dynamo-llm/media-nixl"; \
fi; \
if [ "$ENABLE_MEDIA_FFMPEG" = "true" ]; then \
FEATURES="$FEATURES media-ffmpeg"; \
fi; \
if [ -n "$FEATURES" ]; then \
maturin build --release --features "$FEATURES" --out /opt/dynamo/dist; \
else \
maturin build --release --out /opt/dynamo/dist; \
fi && \
......@@ -616,6 +662,7 @@ ${NIXL_PLUGIN_DIR}:\
/usr/local/ucx/lib/ucx:\
/usr/local/nvidia/lib64:\
${LD_LIBRARY_PATH}
ENV NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
# Copy NATS and ETCD from dynamo_base, and UCX/NIXL from wheel_builder
COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
......@@ -627,6 +674,13 @@ COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/whe
COPY --chown=dynamo: --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/
ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:${HOME}/.local/bin:$PATH
# Copy ffmpeg
RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/local/ \
cp -rnL /tmp/usr/local/include/libav* /tmp/usr/local/include/libsw* /usr/local/include/; \
cp -nL /tmp/usr/local/lib/libav*.so /tmp/usr/local/lib/libsw*.so /usr/local/lib/; \
cp -nL /tmp/usr/local/lib/pkgconfig/libav*.pc /tmp/usr/local/lib/pkgconfig/libsw*.pc /usr/lib/pkgconfig/; \
true # in case ffmpeg not enabled
# Install Dynamo wheels from dynamo_base wheelhouse
# Pattern: COPY --chmod=775 <path>; chmod g+w <path> done later as root because COPY --chmod only affects <path>/*, not <path>
COPY --chmod=775 --chown=dynamo:0 benchmarks/ /opt/dynamo/benchmarks/
......@@ -744,6 +798,7 @@ RUN apt-get update -y && \
bear \
ccache \
less \
pkg-config \
# Language and development support
clang \
libclang-dev \
......
......@@ -36,6 +36,7 @@ ARG BASE_IMAGE_TAG
ARG PYTHON_VERSION
ARG ENABLE_KVBM
ARG ENABLE_MEDIA_NIXL
ARG ENABLE_MEDIA_FFMPEG
ARG CARGO_BUILD_JOBS
ARG PYTORCH_BASE_IMAGE="nvcr.io/nvidia/pytorch"
......@@ -180,6 +181,7 @@ RUN yum groupinstall -y 'Development Tools' && \
librdmacm-devel \
numactl-devel
# Ensure a modern protoc is available (required for --experimental_allow_proto3_optional)
RUN set -eux; \
PROTOC_VERSION=25.3; \
......@@ -234,6 +236,43 @@ RUN if [ "$USE_SCCACHE" = "true" ]; then \
ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}}
# Build FFmpeg from source
ARG FFMPEG_VERSION=7.1
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
if [ "$ENABLE_MEDIA_FFMPEG" = "true" ]; then \
export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
export RUSTC_WRAPPER="sccache"; \
fi && \
dnf install -y pkg-config && \
cd /tmp && \
curl -LO https://ffmpeg.org/releases/ffmpeg-${FFMPEG_VERSION}.tar.xz && \
tar xf ffmpeg-${FFMPEG_VERSION}.tar.xz && \
cd ffmpeg-${FFMPEG_VERSION} && \
./configure \
--prefix=/usr/local \
--disable-programs \
--disable-doc \
--disable-static \
--disable-x86asm \
--disable-postproc \
--disable-network \
--disable-encoders \
--disable-muxers \
--disable-bsfs \
--disable-devices \
--disable-libdrm \
--enable-shared && \
make -j$(nproc) && \
make install && \
/tmp/use-sccache.sh show-stats "FFMPEG" && \
ldconfig && \
rm -rf /tmp/ffmpeg-${FFMPEG_VERSION}*; \
fi
# Build and install UCX
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
......@@ -332,8 +371,15 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
cd /opt/dynamo && \
uv build --wheel --out-dir /opt/dynamo/dist && \
cd /opt/dynamo/lib/bindings/python && \
FEATURES=""; \
if [ "$ENABLE_MEDIA_NIXL" = "true" ]; then \
maturin build --release --features dynamo-llm/media-nixl --out /opt/dynamo/dist; \
FEATURES="$FEATURES dynamo-llm/media-nixl"; \
fi; \
if [ "$ENABLE_MEDIA_FFMPEG" = "true" ]; then \
FEATURES="$FEATURES media-ffmpeg"; \
fi; \
if [ -n "$FEATURES" ]; then \
maturin build --release --features "$FEATURES" --out /opt/dynamo/dist; \
else \
maturin build --release --out /opt/dynamo/dist; \
fi && \
......@@ -629,6 +675,13 @@ COPY --chown=dynamo: --from=wheel_builder /opt/nvidia/nvda_nixl/lib64/. ${NIXL_L
COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
COPY --chown=dynamo: --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/
# Copy ffmpeg
RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/local/ \
cp -rnL /tmp/usr/local/include/libav* /tmp/usr/local/include/libsw* /usr/local/include/; \
cp -nL /tmp/usr/local/lib/libav*.so /tmp/usr/local/lib/libsw*.so /usr/local/lib/; \
cp -nL /tmp/usr/local/lib/pkgconfig/libav*.pc /tmp/usr/local/lib/pkgconfig/libsw*.pc /usr/lib/pkgconfig/; \
true # in case ffmpeg not enabled
ENV TENSORRT_LIB_DIR=/usr/local/tensorrt/targets/${ARCH_ALT}-linux-gnu/lib
ENV PATH="/usr/local/ucx/bin:${VIRTUAL_ENV}/bin:/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/bin:/usr/local/cuda/nvvm/bin:$PATH"
ENV LD_LIBRARY_PATH=\
......@@ -642,6 +695,7 @@ $TENSORRT_LIB_DIR:\
/opt/dynamo/venv/lib/python${PYTHON_VERSION}/site-packages/torch/lib:\
/opt/dynamo/venv/lib/python${PYTHON_VERSION}/site-packages/torch_tensorrt/lib:\
$LD_LIBRARY_PATH
ENV NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
ENV OPAL_PREFIX=/opt/hpcx/ompi
COPY --chmod=664 --chown=dynamo:0 ATTRIBUTION* LICENSE /workspace/
......@@ -754,7 +808,8 @@ RUN apt-get update -y && \
# Rust build dependencies
clang \
libclang-dev \
protobuf-compiler && \
protobuf-compiler \
pkg-config && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
......
......@@ -42,6 +42,7 @@ ARG BASE_IMAGE_TAG
ARG PYTHON_VERSION
ARG ENABLE_KVBM
ARG ENABLE_MEDIA_NIXL
ARG ENABLE_MEDIA_FFMPEG
ARG CARGO_BUILD_JOBS
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
......@@ -237,6 +238,43 @@ RUN if [ "$USE_SCCACHE" = "true" ]; then \
ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}}
# Build FFmpeg from source
ARG FFMPEG_VERSION=7.1
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
if [ "$ENABLE_MEDIA_FFMPEG" = "true" ]; then \
export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
export RUSTC_WRAPPER="sccache"; \
fi && \
dnf install -y pkg-config && \
cd /tmp && \
curl -LO https://ffmpeg.org/releases/ffmpeg-${FFMPEG_VERSION}.tar.xz && \
tar xf ffmpeg-${FFMPEG_VERSION}.tar.xz && \
cd ffmpeg-${FFMPEG_VERSION} && \
./configure \
--prefix=/usr/local \
--disable-programs \
--disable-doc \
--disable-static \
--disable-x86asm \
--disable-postproc \
--disable-network \
--disable-encoders \
--disable-muxers \
--disable-bsfs \
--disable-devices \
--disable-libdrm \
--enable-shared && \
make -j$(nproc) && \
make install && \
/tmp/use-sccache.sh show-stats "FFMPEG" && \
ldconfig && \
rm -rf /tmp/ffmpeg-${FFMPEG_VERSION}*; \
fi
# Build and install UCX
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
......@@ -334,8 +372,15 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
cd /opt/dynamo && \
uv build --wheel --out-dir /opt/dynamo/dist && \
cd /opt/dynamo/lib/bindings/python && \
if [ "$ENABLE_MEDIA_NIXL" == "true" ]; then \
maturin build --release --features dynamo-llm/media-nixl --out /opt/dynamo/dist; \
FEATURES=""; \
if [ "$ENABLE_MEDIA_NIXL" = "true" ]; then \
FEATURES="$FEATURES dynamo-llm/media-nixl"; \
fi; \
if [ "$ENABLE_MEDIA_FFMPEG" = "true" ]; then \
FEATURES="$FEATURES media-ffmpeg"; \
fi; \
if [ -n "$FEATURES" ]; then \
maturin build --release --features "$FEATURES" --out /opt/dynamo/dist; \
else \
maturin build --release --out /opt/dynamo/dist; \
fi && \
......@@ -548,6 +593,13 @@ COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/whe
COPY --chown=dynamo: --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/
ENV PATH=/usr/local/ucx/bin:$PATH
# Copy ffmpeg
RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/local/ \
cp -rnL /tmp/usr/local/include/libav* /tmp/usr/local/include/libsw* /usr/local/include/; \
cp -nL /tmp/usr/local/lib/libav*.so /tmp/usr/local/lib/libsw*.so /usr/local/lib/; \
cp -nL /tmp/usr/local/lib/pkgconfig/libav*.pc /tmp/usr/local/lib/pkgconfig/libsw*.pc /usr/lib/pkgconfig/; \
true # in case ffmpeg not enabled
ENV LD_LIBRARY_PATH=\
/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
$NIXL_LIB_DIR:\
......@@ -555,6 +607,7 @@ $NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
$LD_LIBRARY_PATH
ENV NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
# Copy attribution files
COPY --chmod=664 --chown=dynamo:0 ATTRIBUTION* LICENSE /workspace/
......@@ -663,7 +716,8 @@ RUN apt-get update -y && \
# Rust build dependencies
clang \
libclang-dev \
protobuf-compiler && \
protobuf-compiler \
pkg-config && \
rm -rf /var/lib/apt/lists/*
# Set umask for group-writable files in dev stage (runs as root)
......
......@@ -296,6 +296,9 @@ get_options() {
--enable-media-nixl)
ENABLE_MEDIA_NIXL=true
;;
--enable-media-ffmpeg)
ENABLE_MEDIA_FFMPEG=true
;;
--make-efa)
NIXL_UCX_REF=$NIXL_UCX_EFA_REF
;;
......@@ -458,6 +461,7 @@ show_help() {
echo " [--make-efa Enables EFA support for NIXL]"
echo " [--enable-kvbm Enables KVBM support in Python 3.12]"
echo " [--enable-media-nixl Enable media processing with NIXL support (default: true for frameworks, false for none)]"
echo " [--enable-media-ffmpeg Enable media processing with FFMPEG support (default: true for frameworks, false for none)]"
echo " [--use-sccache enable sccache for Rust/C/C++ compilation caching]"
echo " [--sccache-bucket S3 bucket name for sccache (required with --use-sccache)]"
echo " [--sccache-region S3 region for sccache (required with --use-sccache)]"
......@@ -808,7 +812,19 @@ if [ -z "${ENABLE_MEDIA_NIXL}" ]; then
fi
BUILD_ARGS+=" --build-arg ENABLE_MEDIA_NIXL=${ENABLE_MEDIA_NIXL} "
# NIXL_UCX_REF: Used in dynamo base stages.
# ENABLE_MEDIA_FFMPEG: Enable media processing with FFMPEG support
# Used in base Dockerfile for maturin build feature flag.
# Can be explicitly overridden with --enable-media-ffmpeg flag
if [ -z "${ENABLE_MEDIA_FFMPEG}" ]; then
if [[ $FRAMEWORK == "VLLM" ]] || [[ $FRAMEWORK == "TRTLLM" ]] || [[ $FRAMEWORK == "SGLANG" ]]; then
ENABLE_MEDIA_FFMPEG=true
else
ENABLE_MEDIA_FFMPEG=false
fi
fi
BUILD_ARGS+=" --build-arg ENABLE_MEDIA_FFMPEG=${ENABLE_MEDIA_FFMPEG} "
# NIXL_UCX_REF: Used in base Dockerfile only.
if [ -n "${NIXL_UCX_REF}" ]; then
BUILD_ARGS+=" --build-arg NIXL_UCX_REF=${NIXL_UCX_REF} "
fi
......
......@@ -371,7 +371,7 @@ version = "0.33.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1826f2e4cfc2cd19ee53c42fbf68e2f81ec21108e0b7ecf6a71cf062137360fc"
dependencies = [
"bindgen",
"bindgen 0.72.1",
"cc",
"cmake",
"dunce",
......@@ -552,6 +552,44 @@ dependencies = [
"virtue",
]
[[package]]
name = "bindgen"
version = "0.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f"
dependencies = [
"bitflags 2.10.0",
"cexpr",
"clang-sys",
"itertools 0.11.0",
"proc-macro2",
"quote",
"regex",
"rustc-hash 1.1.0",
"shlex",
"syn 2.0.110",
]
[[package]]
name = "bindgen"
version = "0.71.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3"
dependencies = [
"bitflags 2.10.0",
"cexpr",
"clang-sys",
"itertools 0.11.0",
"log",
"prettyplease",
"proc-macro2",
"quote",
"regex",
"rustc-hash 2.1.1",
"shlex",
"syn 2.0.110",
]
[[package]]
name = "bindgen"
version = "0.72.1"
......@@ -653,6 +691,15 @@ dependencies = [
"generic-array",
]
[[package]]
name = "block2"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdeb9d870516001442e364c5220d3574d2da8dc765554b4a617230d33fa58ef5"
dependencies = [
"objc2",
]
[[package]]
name = "bs62"
version = "0.1.4"
......@@ -1439,6 +1486,16 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "dispatch2"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89a09f22a6c6069a18470eb92d2298acf25463f14256d24778e1230d789a2aec"
dependencies = [
"bitflags 2.10.0",
"objc2",
]
[[package]]
name = "displaydoc"
version = "0.2.5"
......@@ -1533,6 +1590,13 @@ dependencies = [
"uuid",
]
[[package]]
name = "dynamo-config"
version = "0.7.0"
dependencies = [
"anyhow",
]
[[package]]
name = "dynamo-llm"
version = "0.7.0"
......@@ -1561,11 +1625,13 @@ dependencies = [
"derive_builder",
"dialoguer",
"dynamo-async-openai",
"dynamo-memory",
"dynamo-parsers",
"dynamo-runtime",
"either",
"erased-serde",
"etcd-client",
"ffmpeg-next",
"futures",
"futures-util",
"galil-seiferas",
......@@ -1576,6 +1642,7 @@ dependencies = [
"image",
"itertools 0.14.0",
"json-five",
"memfile",
"minijinja",
"minijinja-contrib",
"modelexpress-client",
......@@ -1583,6 +1650,7 @@ dependencies = [
"ndarray",
"ndarray-interp",
"ndarray-npy",
"nixl-sys",
"object_store",
"offset-allocator",
"oneshot",
......@@ -1619,10 +1687,27 @@ dependencies = [
"utoipa-swagger-ui",
"uuid",
"validator",
"video-rs",
"xxhash-rust",
"zeromq",
]
[[package]]
name = "dynamo-memory"
version = "0.7.0"
dependencies = [
"anyhow",
"cudarc",
"dynamo-config",
"libc",
"nix 0.30.1",
"nixl-sys",
"offset-allocator",
"serde",
"thiserror 2.0.17",
"tracing",
]
[[package]]
name = "dynamo-parsers"
version = "0.7.0"
......@@ -1709,7 +1794,7 @@ dependencies = [
"local-ip-address",
"log",
"nid",
"nix",
"nix 0.29.0",
"notify",
"nuid",
"once_cell",
......@@ -2011,6 +2096,31 @@ dependencies = [
"simd-adler32",
]
[[package]]
name = "ffmpeg-next"
version = "7.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da02698288e0275e442a47fc12ca26d50daf0d48b15398ba5906f20ac2e2a9f9"
dependencies = [
"bitflags 2.10.0",
"ffmpeg-sys-next",
"libc",
]
[[package]]
name = "ffmpeg-sys-next"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9e9c75ebd4463de9d8998fb134ba26347fe5faee62fabf0a4b4d41bd500b4ad"
dependencies = [
"bindgen 0.70.1",
"cc",
"libc",
"num_cpus",
"pkg-config",
"vcpkg",
]
[[package]]
name = "fiat-crypto"
version = "0.2.9"
......@@ -3720,6 +3830,15 @@ version = "2.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
[[package]]
name = "memfile"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f64636fdb65a5f0740f920c4281f3dbb76a71e25e25914b6d27000739897d40e"
dependencies = [
"libc",
]
[[package]]
name = "memmap2"
version = "0.9.9"
......@@ -4041,6 +4160,34 @@ dependencies = [
"libc",
]
[[package]]
name = "nix"
version = "0.30.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6"
dependencies = [
"bitflags 2.10.0",
"cfg-if 1.0.4",
"cfg_aliases",
"libc",
]
[[package]]
name = "nixl-sys"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d80bd4b5b8363cfd933000a8757a453e58ee10ee6e400c38ae31db512444a31"
dependencies = [
"bindgen 0.71.1",
"cc",
"libc",
"os_info",
"pkg-config",
"serde",
"thiserror 2.0.17",
"tracing",
]
[[package]]
name = "nkeys"
version = "0.4.5"
......@@ -4262,6 +4409,165 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
[[package]]
name = "objc2"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7c2599ce0ec54857b29ce62166b0ed9b4f6f1a70ccc9a71165b6154caca8c05"
dependencies = [
"objc2-encode",
]
[[package]]
name = "objc2-cloud-kit"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73ad74d880bb43877038da939b7427bba67e9dd42004a18b809ba7d87cee241c"
dependencies = [
"bitflags 2.10.0",
"objc2",
"objc2-foundation",
]
[[package]]
name = "objc2-core-data"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b402a653efbb5e82ce4df10683b6b28027616a2715e90009947d50b8dd298fa"
dependencies = [
"objc2",
"objc2-foundation",
]
[[package]]
name = "objc2-core-foundation"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536"
dependencies = [
"bitflags 2.10.0",
"dispatch2",
"objc2",
]
[[package]]
name = "objc2-core-graphics"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e022c9d066895efa1345f8e33e584b9f958da2fd4cd116792e15e07e4720a807"
dependencies = [
"bitflags 2.10.0",
"dispatch2",
"objc2",
"objc2-core-foundation",
"objc2-io-surface",
]
[[package]]
name = "objc2-core-image"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5d563b38d2b97209f8e861173de434bd0214cf020e3423a52624cd1d989f006"
dependencies = [
"objc2",
"objc2-foundation",
]
[[package]]
name = "objc2-core-location"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca347214e24bc973fc025fd0d36ebb179ff30536ed1f80252706db19ee452009"
dependencies = [
"objc2",
"objc2-foundation",
]
[[package]]
name = "objc2-core-text"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0cde0dfb48d25d2b4862161a4d5fcc0e3c24367869ad306b0c9ec0073bfed92d"
dependencies = [
"bitflags 2.10.0",
"objc2",
"objc2-core-foundation",
"objc2-core-graphics",
]
[[package]]
name = "objc2-encode"
version = "4.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef25abbcd74fb2609453eb695bd2f860d389e457f67dc17cafc8b8cbc89d0c33"
[[package]]
name = "objc2-foundation"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3e0adef53c21f888deb4fa59fc59f7eb17404926ee8a6f59f5df0fd7f9f3272"
dependencies = [
"bitflags 2.10.0",
"block2",
"libc",
"objc2",
"objc2-core-foundation",
]
[[package]]
name = "objc2-io-surface"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "180788110936d59bab6bd83b6060ffdfffb3b922ba1396b312ae795e1de9d81d"
dependencies = [
"bitflags 2.10.0",
"objc2",
"objc2-core-foundation",
]
[[package]]
name = "objc2-quartz-core"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96c1358452b371bf9f104e21ec536d37a650eb10f7ee379fff67d2e08d537f1f"
dependencies = [
"bitflags 2.10.0",
"objc2",
"objc2-core-foundation",
"objc2-foundation",
]
[[package]]
name = "objc2-ui-kit"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d87d638e33c06f577498cbcc50491496a3ed4246998a7fbba7ccb98b1e7eab22"
dependencies = [
"bitflags 2.10.0",
"block2",
"objc2",
"objc2-cloud-kit",
"objc2-core-data",
"objc2-core-foundation",
"objc2-core-graphics",
"objc2-core-image",
"objc2-core-location",
"objc2-core-text",
"objc2-foundation",
"objc2-quartz-core",
"objc2-user-notifications",
]
[[package]]
name = "objc2-user-notifications"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9df9128cbbfef73cda168416ccf7f837b62737d748333bfe9ab71c245d76613e"
dependencies = [
"objc2",
"objc2-foundation",
]
[[package]]
name = "object"
version = "0.37.3"
......@@ -4490,6 +4796,22 @@ dependencies = [
"hashbrown 0.14.5",
]
[[package]]
name = "os_info"
version = "3.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c39b5918402d564846d5aba164c09a66cc88d232179dfd3e3c619a25a268392"
dependencies = [
"android_system_properties",
"log",
"nix 0.30.1",
"objc2",
"objc2-foundation",
"objc2-ui-kit",
"serde",
"windows-sys 0.61.2",
]
[[package]]
name = "parking"
version = "2.2.1"
......@@ -7548,6 +7870,12 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
[[package]]
name = "vcpkg"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]]
name = "version-compare"
version = "0.2.1"
......@@ -7560,6 +7888,17 @@ version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
name = "video-rs"
version = "0.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "859aad7261bac267f90f9635ec9addba3b4bcb4bbb2edb03fec3e6b765657bee"
dependencies = [
"ffmpeg-next",
"tracing",
"url",
]
[[package]]
name = "virtue"
version = "0.0.18"
......
......@@ -23,6 +23,7 @@ crate-type = ["cdylib", "rlib"]
[features]
default = []
media-ffmpeg = ["dynamo-llm/media-ffmpeg"]
[dependencies]
dynamo-llm = { path = "../../llm" }
......
......@@ -99,6 +99,15 @@ impl MediaDecoder {
self.inner.image_decoder = image_decoder;
Ok(())
}
#[cfg(feature = "media-ffmpeg")]
fn video_decoder(&mut self, video_decoder: &Bound<'_, PyDict>) -> PyResult<()> {
let video_decoder = pythonize::depythonize(video_decoder).map_err(|err| {
PyErr::new::<PyException, _>(format!("Failed to parse video_decoder: {}", err))
})?;
self.inner.video_decoder = video_decoder;
Ok(())
}
}
#[pyclass]
......
......@@ -26,6 +26,7 @@ block-manager-bench = ["block-manager", "testing-full", "dep:clap", "dep:indicat
cuda = ["dep:cudarc"]
integration = ["dynamo-runtime/integration"]
media-nixl = ["dep:nixl-sys", "dep:dynamo-memory"]
media-ffmpeg = ["dep:video-rs", "dep:ffmpeg-next", "dep:memfile", "media-nixl"]
[[bench]]
name = "tokenizer"
......@@ -157,6 +158,9 @@ json-five = { version = "0.3" }
reqwest = { workspace = true }
base64 = { version = "0.22" }
image = { version = "0.25", features = ["serde"] }
video-rs = { version = "0.10.5", optional = true }
ffmpeg-next = { version = "7.1.0", optional = true }
memfile = { version = "0.3.2", optional = true }
tokio-rayon = {version = "2" }
ndarray = { version = "0.16" }
ndarray-npy = { version = "0.9" }
......
......@@ -25,6 +25,7 @@ Set media decoding options:
from dynamo.llm import MediaDecoder
decoder = MediaDecoder()
decoder.image_decoder({"max_image_width": 4096, "max_image_height": 4096, "max_alloc": 16*1024*1024})
decoder.video_decoder({"strict": True, "fps": 2.0, "max_frames": 128, "max_alloc": 1024*1024*128*3})
```
And register the LLM as usual, adding the media configuration:
......@@ -46,12 +47,32 @@ register_llm(
> [!WARNING]
> **Requires GPU node**: The frontend must run on a node with GPU access. During media processing, decoded tensors are written to GPU memory via NIXL, which requires `libcuda.so.1` to be available. Running the frontend on a CPU-only node will fail with something like: `Failed to initialize required backends: [UCX: No UCX plugin found]`.
## Image decoding options
- **max_image_width** (uint32, > 0): If the image width exceeds this value, abort the decoding.
- **max_image_height** (uint32, > 0): If the image height exceeds this value, abort the decoding.
- **max_alloc** (uint64, > 0): Maximum allowed total allocation (RAM) of the decoder in bytes
## Video decoding options
### Sampling
There are two ways to configure video sampling: either with a fixed number of frames, or with FPS-based sampling. Sampled frames are distributed uniformly in both cases.
- **num_frames** (uint32, > 0): Attempt to decode exactly this number of frames from the input video.
- **fps** (float32, > 0) and optionally **max_frames** (uint32, > 0): Attempt to decode at a given framerate, with a potential cap on the number of decoded frames.
### Others
- **strict** (bool): if strict mode is enabled, any failure to decode a requested frame will abort the whole video decoding and error out. When strict mode is disabled, it is possible that the decoding of some requested frame fails, and the resulting set of decoded frames might container fewer frames than expected.
- **max_alloc** (usize, > 0): If the total number of bytes in the decoded frames would exceed this value, abort the decoding.
## TODOs
### Modalities
- [x] Image decoding
- [ ] Video decoding
- [x] Video decoding
- [ ] Audio decoding
### Performance
......
......@@ -7,8 +7,12 @@ use serde::{Deserialize, Serialize};
use super::common::EncodedMediaData;
use super::rdma::DecodedMediaData;
pub mod image;
#[cfg(feature = "media-ffmpeg")]
pub mod video;
pub use image::{ImageDecoder, ImageMetadata};
#[cfg(feature = "media-ffmpeg")]
pub use video::{VideoDecoder, VideoMetadata};
#[async_trait::async_trait]
pub trait Decoder: Clone + Send + 'static {
......@@ -27,10 +31,15 @@ pub trait Decoder: Clone + Send + 'static {
pub struct MediaDecoder {
#[serde(default)]
pub image_decoder: ImageDecoder,
// TODO: video, audio decoders
#[cfg(feature = "media-ffmpeg")]
#[serde(default)]
pub video_decoder: VideoDecoder,
// TODO: audio decoder
}
#[derive(Serialize, Deserialize, Clone, Copy, Debug)]
#[derive(Serialize, Deserialize, Clone, Debug)]
pub enum DecodedMediaMetadata {
Image(ImageMetadata),
#[cfg(feature = "media-ffmpeg")]
Video(VideoMetadata),
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use std::io::Write;
use std::os::fd::AsRawFd;
use anyhow::Result;
use ffmpeg_next::Rational;
use ffmpeg_next::ffi::{AVPixelFormat, av_image_copy_to_buffer};
use memfile::{CreateOptions, MemFile, Seal};
use ndarray::Array4;
use serde::{Deserialize, Serialize};
use video_rs::frame::RawFrame;
use video_rs::{Location, Time};
use super::Decoder;
use crate::preprocessor::media::{
DecodedMediaData, EncodedMediaData, decoders::DecodedMediaMetadata,
};
/// Small time buffer (seconds) to avoid edge cases when seeking near frame boundaries
const FRAME_TIME_BUFFER_SECS: f64 = 0.001;
#[derive(Clone, Default, Debug, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct VideoDecoder {
/// sample N frames per second
#[serde(default)]
pub(crate) fps: Option<f64>,
/// sample at most N frames (used with fps)
#[serde(default)]
pub(crate) max_frames: Option<u64>,
/// sample N frames in total (linspace)
#[serde(default)]
pub(crate) num_frames: Option<u64>,
/// fail if some frames fail to decode
#[serde(default)]
pub(crate) strict: bool,
/// maximum allowed total allocation of the decoded frames in bytes
#[serde(default)]
pub(crate) max_alloc: Option<u64>,
}
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct VideoMetadata {
pub(crate) source_fps: f64,
pub(crate) source_duration: f64,
pub(crate) sampled_timestamps: Vec<f64>,
}
fn get_num_requested_frames(
config: &VideoDecoder,
decoder: &video_rs::decode::Decoder,
) -> Result<u64> {
// careful, duration and frames come from file metadata, might be inaccurate
let duration_secs = decoder.duration()?.as_secs() as f64;
let frame_rate = decoder.frame_rate() as f64;
let mut total_frames = decoder.frames().unwrap_or(0);
if total_frames == 0 && duration_secs > 0.0 && frame_rate > 0.0 {
total_frames = (duration_secs * frame_rate) as u64;
}
anyhow::ensure!(total_frames > 0, "Cannot determine the video frame count");
let requested_frames = if let Some(target_fps) = config.fps {
// fps based sampling
anyhow::ensure!(duration_secs > 0.0, "Cannot determine the video duration");
(duration_secs * target_fps) as u64
} else {
// frame count based sampling
// last fallback is to decode all frames
config.num_frames.unwrap_or(total_frames)
};
let requested_frames = requested_frames
.min(config.max_frames.unwrap_or(requested_frames))
.max(1);
anyhow::ensure!(
requested_frames > 0 && requested_frames <= total_frames,
"Cannot decode {requested_frames} frames from {total_frames} total frames",
);
Ok(requested_frames)
}
fn get_target_times(
requested_frames: u64,
duration_secs: f64,
frame_rate: f64,
) -> Result<Vec<Time>> {
anyhow::ensure!(
requested_frames > 0,
"Invalid requested frames {requested_frames}"
);
anyhow::ensure!(duration_secs > 0.0, "Invalid duration {duration_secs}");
anyhow::ensure!(frame_rate > 0.0, "Invalid frame rate {frame_rate}");
let frame_duration = 1.0 / frame_rate;
// Add small buffer to avoid edge cases
// Variable frame rate might not work well here
let last_frame_time = (duration_secs - frame_duration - FRAME_TIME_BUFFER_SECS).max(0.0);
if requested_frames == 1 {
return Ok(vec![Time::from_secs(last_frame_time as f32 / 2.0)]);
}
Ok((0..requested_frames)
.map(|i| {
let time_secs = (i as f64 * last_frame_time) / (requested_frames as f64 - 1.0);
Time::from_secs(time_secs.max(0.0) as f32)
})
.collect())
}
fn get_frame_timestamp(frame: &RawFrame, time_base: Rational) -> Result<f64> {
anyhow::ensure!(!frame.is_corrupt(), "Frame is corrupt");
// get timestamp from frame metadata: best_effort_timestamp or pts from ffmpeg
let best_effort_pts = frame.timestamp();
let pts = frame.pts();
match best_effort_pts.or(pts) {
Some(ts) => Ok(Time::new(Some(ts), time_base).as_secs() as f64),
None => anyhow::bail!("No timestamp found (both best_effort_pts and pts are None)"),
}
}
fn decode_frame_at_timestamp(
decoder: &mut video_rs::decode::Decoder,
target_time: &Time,
output_buffer: &mut [u8],
) -> Result<f64> {
let target_timestamp = target_time.as_secs() as f64;
let time_base = decoder.time_base();
// Decode until we reach or pass the target timestamp
// Caller is responsible for seeking to the appropriate position
// We use decode_raw_iter to handle timestamps better than video-rs
for frame_result in decoder.decode_raw_iter() {
let mut raw_frame =
frame_result.map_err(|e| anyhow::anyhow!("Frame decode error: {}", e))?;
let timestamp = match get_frame_timestamp(&raw_frame, time_base) {
Ok(ts) => ts,
Err(_) => continue,
};
// If we reached the target time or passed it
if timestamp >= target_timestamp {
// Copy frame data to provided buffer
// Adapted from video-rs convert_frame_to_ndarray_rgb24 (private function)
unsafe {
let frame_ptr = raw_frame.as_mut_ptr();
let frame_format = std::mem::transmute::<i32, AVPixelFormat>((*frame_ptr).format);
let bytes_copied = av_image_copy_to_buffer(
output_buffer.as_mut_ptr(),
output_buffer.len() as i32,
(*frame_ptr).data.as_ptr() as *const *const u8,
(*frame_ptr).linesize.as_ptr(),
frame_format,
raw_frame.width() as i32,
raw_frame.height() as i32,
1,
);
anyhow::ensure!(
bytes_copied == output_buffer.len() as i32,
"Failed to copy frame data: expected {} bytes, copied {}",
output_buffer.len(),
bytes_copied
);
}
return Ok(timestamp);
}
}
anyhow::bail!("No frame found for timestamp {target_timestamp:.3}s");
}
impl Decoder for VideoDecoder {
fn decode(&self, data: EncodedMediaData) -> Result<DecodedMediaData> {
anyhow::ensure!(
self.fps.is_none() || self.num_frames.is_none(),
"fps and num_frames cannot be specified at the same time"
);
anyhow::ensure!(
self.max_frames.is_none() || self.num_frames.is_none(),
"max_frames and num_frames cannot be specified at the same time"
);
// video-rs wants a file path, we use memfile for in-memory file
let mut mem_file = MemFile::create("video", CreateOptions::new().allow_sealing(true))?;
mem_file.write_all(&data.into_bytes()?)?; // one-liner so result of into_bytes will be dropped asap
mem_file.add_seals(Seal::Write | Seal::Shrink | Seal::Grow)?;
let fd_path = format!("/proc/self/fd/{}", mem_file.as_raw_fd());
let location = Location::File(fd_path.into());
let mut decoder = video_rs::decode::Decoder::new(location)?;
let requested_frames = get_num_requested_frames(self, &decoder)?;
let source_duration = decoder.duration()?.as_secs() as f64;
let source_fps = decoder.frame_rate() as f64;
let target_times = get_target_times(requested_frames, source_duration, source_fps)?;
let (width, height) = decoder.size();
anyhow::ensure!(
width > 0 && height > 0,
"Invalid video dimensions {width}x{height}"
);
let max_alloc = self.max_alloc.unwrap_or(u64::MAX);
anyhow::ensure!(
(width as u64) * (height as u64) * requested_frames * 3 <= max_alloc,
"Video dimensions {requested_frames}x{width}x{height}x3 exceed max alloc {max_alloc}"
);
// Preallocate the buffer for all frames
let frame_size = width as usize * height as usize * 3;
let total_size = requested_frames as usize * frame_size;
let mut all_frames = vec![0u8; total_size];
let mut sampled_timestamps: Vec<f64> = Vec::with_capacity(requested_frames as usize);
let mut sequential_mode = false;
let mut last_successful_time = Time::from_secs(0.0);
for time in target_times.iter() {
// Try to seek if not in sequential mode
if !sequential_mode && let Ok(_) = decoder.seek((time.as_secs() * 1000.0) as i64) {
sequential_mode = true;
// Re-establish decoder position at last known good position
decoder.seek((last_successful_time.as_secs() * 1000.0) as i64)?;
}
let offset = sampled_timestamps.len() * frame_size;
let frame_buffer = &mut all_frames[offset..offset + frame_size];
match decode_frame_at_timestamp(&mut decoder, time, frame_buffer) {
Ok(timestamp) => {
sampled_timestamps.push(timestamp);
last_successful_time = *time;
}
Err(error) => {
if self.strict {
anyhow::bail!(
"Frame decode error at timestamp {:.3}s: {}",
time.as_secs(),
error
);
}
continue;
}
}
}
let num_frames_decoded = sampled_timestamps.len();
anyhow::ensure!(
num_frames_decoded > 0,
"Failed to decode any frames, check for video corruption"
);
// Truncate buffer to actual frames decoded (in case some failed in non-strict mode)
all_frames.truncate(num_frames_decoded * frame_size);
let shape = (num_frames_decoded, height as usize, width as usize, 3);
let array = Array4::from_shape_vec(shape, all_frames)?;
let mut decoded: DecodedMediaData = array.try_into()?;
decoded.tensor_info.metadata = Some(DecodedMediaMetadata::Video(VideoMetadata {
source_fps,
source_duration,
sampled_timestamps,
}));
Ok(decoded)
}
}
#[cfg(test)]
mod tests {
use super::super::super::rdma::DataType;
use super::*;
use rstest::rstest;
/// Load test video and parse expected dimensions from filename.
/// Filename format: "{resolution}_{frames}.mp4" (e.g., "240p_10.mp4" -> 320x240, 10 frames)
fn load_test_video(filename: &str) -> (EncodedMediaData, u32, u32, u32) {
let path = format!(
"{}/tests/data/media/{}",
env!("CARGO_MANIFEST_DIR"),
filename
);
let bytes =
std::fs::read(&path).unwrap_or_else(|_| panic!("Failed to read test video: {}", path));
let parts: Vec<&str> = filename.strip_suffix(".mp4").unwrap().split('_').collect();
let resolution = parts[0];
let frames = parts[1].parse::<u32>().unwrap();
let (width, height) = match resolution {
"2p" => (2, 2),
"240p" => (320, 240),
"2160p" => (3840, 2160),
_ => panic!("Unknown resolution: {}", resolution),
};
let encoded = EncodedMediaData {
bytes,
b64_encoded: false,
};
(encoded, width, height, frames)
}
#[test]
fn test_decode_video_num_frames() {
let (encoded_data, width, height, _total_frames) = load_test_video("240p_10.mp4");
let requested_frames = 5u64;
let decoder = VideoDecoder {
fps: None,
max_frames: None,
num_frames: Some(requested_frames),
strict: false,
max_alloc: None,
};
let decoded = decoder.decode(encoded_data).unwrap();
assert_eq!(decoded.tensor_info.shape[0], requested_frames as usize);
assert_eq!(decoded.tensor_info.shape[1], height as usize);
assert_eq!(decoded.tensor_info.shape[2], width as usize);
assert_eq!(decoded.tensor_info.shape[3], 3);
assert_eq!(decoded.tensor_info.dtype, DataType::UINT8);
}
#[test]
fn test_decode_video_fps_sampling() {
let (encoded_data, width, height, _total_frames) = load_test_video("240p_100.mp4");
let target_fps = 0.5f64;
let decoder = VideoDecoder {
fps: Some(target_fps),
max_frames: None,
num_frames: None,
strict: false,
max_alloc: None,
};
let decoded = decoder.decode(encoded_data).unwrap();
// fps * duration calculation - video decoder uses duration from file
// Source file is at 1fps, should get exactly 50 frames
assert_eq!(decoded.tensor_info.shape[0], 50);
assert_eq!(decoded.tensor_info.shape[1], height as usize);
assert_eq!(decoded.tensor_info.shape[2], width as usize);
assert_eq!(decoded.tensor_info.shape[3], 3);
assert_eq!(decoded.tensor_info.dtype, DataType::UINT8);
}
#[rstest]
#[case(Some(320 * 240 * 5 * 3), "240p_10.mp4", 5, true, "within limit")]
#[case(Some(320 * 240 * 2 * 3), "240p_10.mp4", 5, false, "exceeds limit")]
#[case(Some(2 * 2 * 10 * 3), "2p_10.mp4", 10, true, "exactly at limit")]
#[case(None, "2160p_10.mp4", 10, true, "no limit")]
fn test_max_alloc(
#[case] max_alloc: Option<u64>,
#[case] video_file: &str,
#[case] num_frames: u64,
#[case] should_succeed: bool,
#[case] test_case: &str,
) {
let (encoded_data, width, height, _) = load_test_video(video_file);
let decoder = VideoDecoder {
fps: None,
max_frames: None,
num_frames: Some(num_frames),
strict: false,
max_alloc,
};
let result = decoder.decode(encoded_data);
if should_succeed {
assert!(
result.is_ok(),
"Should decode successfully for case: {test_case}",
);
let decoded = result.unwrap();
assert_eq!(decoded.tensor_info.shape[1], height as usize);
assert_eq!(decoded.tensor_info.shape[2], width as usize);
assert_eq!(decoded.tensor_info.dtype, DataType::UINT8);
} else {
assert!(result.is_err(), "Should fail for case: {}", test_case);
}
}
#[test]
fn test_conflicting_fps_and_num_frames() {
let (encoded_data, ..) = load_test_video("240p_10.mp4");
let decoder = VideoDecoder {
fps: Some(2.0f64),
max_frames: None,
num_frames: Some(5u64),
strict: false,
max_alloc: None,
};
let result = decoder.decode(encoded_data);
assert!(
result.is_err(),
"Should fail when both fps and num_frames are specified"
);
let error_msg = result.unwrap_err().to_string();
assert!(error_msg.contains("cannot be specified at the same time"));
}
// Unit tests for get_target_times
#[test]
fn test_get_target_times() {
// 10 frames at 1fps over 10s duration
let times = get_target_times(10u64, 10.0f64, 1.0f64).unwrap();
assert_eq!(times.len(), 10);
assert_eq!(times[0].as_secs(), 0.0);
// Last frame should be less than 9s (10 - 1/1fps - 0.001)
let last_time = times[9].as_secs();
assert!(
last_time < 9.0,
"Last time should be < 9s, got {}",
last_time
);
assert!(
last_time > 8.0,
"Last time should be > 8s, got {}",
last_time
);
}
}
......@@ -124,8 +124,15 @@ impl MediaLoader {
ChatCompletionRequestUserMessageContentPart::VideoUrl(video_part) => {
let url = &video_part.video_url.url;
self.check_if_url_allowed(url)?;
EncodedMediaData::from_url(url, &self.http_client).await?;
anyhow::bail!("Video decoding is not supported yet");
let data = EncodedMediaData::from_url(url, &self.http_client).await?;
#[cfg(not(feature = "media-ffmpeg"))]
anyhow::bail!(
"Video decoding requires the 'media-ffmpeg' feature to be enabled"
);
#[cfg(feature = "media-ffmpeg")]
self.media_decoder.video_decoder.decode_async(data).await?
}
ChatCompletionRequestUserMessageContentPart::AudioUrl(_) => {
anyhow::bail!("Audio decoding is not supported yet");
......
llm-optimize-deploy-graphic.png filter=lfs diff=lfs merge=lfs -text
240p_1.mp4 filter=lfs diff=lfs merge=lfs -text
2p_10.mp4 filter=lfs diff=lfs merge=lfs -text
2160p_10.mp4 filter=lfs diff=lfs merge=lfs -text
240p_100.mp4 filter=lfs diff=lfs merge=lfs -text
240p_10.mp4 filter=lfs diff=lfs merge=lfs -text
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment