Unverified Commit 723676b4 authored by Richard Huo's avatar Richard Huo Committed by GitHub
Browse files

fix: remove the aws-ofi-nccl plugin from linker cache in regular trtllm runtime image (#6944)

parent e6ddf0ea
......@@ -143,6 +143,13 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
# Create libnccl.so symlink pointing to libnccl.so.2. TensorRT-LLM requires explicit libnccl.so
ln -sf /usr/lib/${ARCH_ALT}-linux-gnu/libnccl.so.2 /usr/lib/${ARCH_ALT}-linux-gnu/libnccl.so
# nvcr.io/nvidia/cuda-dl-base includes the AWS OFI NCCL plugin, which can crash TRTLLM.
# Disable it by renaming aws-ofi-nccl.conf and refreshing the dynamic linker cache.
RUN if [ -f /etc/ld.so.conf.d/aws-ofi-nccl.conf ]; then \
mv /etc/ld.so.conf.d/aws-ofi-nccl.conf /etc/ld.so.conf.d/aws-ofi-nccl.conf.disabled; \
fi && \
ldconfig
{% if context.trtllm.enable_media_ffmpeg == "true" %}
# Copy ffmpeg libraries from wheel_builder (requires root, runs before USER dynamo)
RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/local/ \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment