Unverified Commit 59a0b855 authored by youkaichao's avatar youkaichao Committed by GitHub
Browse files

[bugfix] fix blackwell deepep installation (#22255)

parent 469b3ffa
......@@ -13,16 +13,16 @@ All scripts accept a positional argument as workspace path for staging the build
## Usage
### Single-node
```bash
bash install_python_libraries.sh
# for hopper
TORCH_CUDA_ARCH_LIST="9.0" bash install_python_libraries.sh
# for blackwell
TORCH_CUDA_ARCH_LIST="10.0" bash install_python_libraries.sh
```
### Multi-node
Additional step for multi-node deployment:
```bash
bash install_python_libraries.sh
sudo bash configure_system_drivers.sh
sudo reboot # Reboot is required to load the new driver
```
......@@ -29,6 +29,12 @@ if [ -z "$CUDA_HOME" ]; then
exit 1
fi
# assume TORCH_CUDA_ARCH_LIST is set correctly
if [ -z "$TORCH_CUDA_ARCH_LIST" ]; then
echo "TORCH_CUDA_ARCH_LIST is not set, please set it to your desired architecture."
exit 1
fi
# disable all features except IBGDA
export NVSHMEM_IBGDA_SUPPORT=1
......@@ -95,7 +101,7 @@ clone_repo "https://github.com/ppl-ai/pplx-kernels" "pplx-kernels" "setup.py"
cd pplx-kernels
# see https://github.com/pypa/pip/issues/9955#issuecomment-838065925
# PIP_NO_BUILD_ISOLATION=0 disables build isolation
PIP_NO_BUILD_ISOLATION=0 TORCH_CUDA_ARCH_LIST=9.0a+PTX pip install -vvv -e .
PIP_NO_BUILD_ISOLATION=0 pip install -vvv -e .
popd
# build and install deepep, require pytorch installed
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment