Unverified Commit e84b1e77 authored by Tanmay Verma's avatar Tanmay Verma Committed by GitHub
Browse files

fix: Disable NIXL backend for TRTLLM on ARM (#1639)

parent bed8b335
......@@ -90,9 +90,17 @@ cp $MAIN_DIR/deps/tensorrt_llm/install_nixl.sh docker/common/install_nixl.sh
sed -i "s/NIXL_COMMIT=\"[^\"]*\"/NIXL_COMMIT=\"${NIXL_COMMIT}\"/" docker/common/install_nixl.sh
# Need to build in the Triton Devel Image for NIXL support.
make -C docker tritondevel_build
make -C docker wheel_build DEVEL_IMAGE=tritondevel BUILD_WHEEL_OPTS='--extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl'
if [ "$ARCH" = "amd64" ]; then
# Need to build in the Triton Devel Image for NIXL support.
make -C docker tritondevel_build
make -C docker wheel_build DEVEL_IMAGE=tritondevel BUILD_WHEEL_OPTS='--extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl'
else
# NIXL backend is not supported on arm64 for TensorRT-LLM.
# See here: https://github.com/NVIDIA/TensorRT-LLM/blob/main/docker/common/install_nixl.sh
make -C docker wheel_build
fi
# Copy the wheel to the host
mkdir -p $OUTPUT_DIR
......
......@@ -312,6 +312,8 @@ TensorRT-LLM also provides experimental support for using **NIXL** (NVIDIA Infer
#### Using NIXL for KV Cache Transfer
**Note:** NIXL backend for TensorRT-LLM is currently only supported on AMD64 (x86_64) architecture. If you're running on ARM64, you'll need to use the default UCX method for KV cache transfer.
To enable NIXL for KV cache transfer in disaggregated serving:
1. **Build the container with NIXL support:**
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment