Unverified Commit ac963be2 authored by Yineng Zhang's avatar Yineng Zhang Committed by GitHub
Browse files

update flashinfer-python (#3557)

parent e0b9a423
...@@ -33,7 +33,7 @@ Add [performance optimization options](#performance-optimization-options) as nee ...@@ -33,7 +33,7 @@ Add [performance optimization options](#performance-optimization-options) as nee
### Using pip ### Using pip
```bash ```bash
# Installation # Installation
pip install "sglang[all]>=0.4.3" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer pip install "sglang[all]>=0.4.3" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer-python
# Launch # Launch
python3 -m sglang.launch_server --model deepseek-ai/DeepSeek-V3 --tp 8 --trust-remote-code python3 -m sglang.launch_server --model deepseek-ai/DeepSeek-V3 --tp 8 --trust-remote-code
......
...@@ -43,26 +43,26 @@ RUN python3 -m pip install --upgrade pip setuptools wheel html5lib six \ ...@@ -43,26 +43,26 @@ RUN python3 -m pip install --upgrade pip setuptools wheel html5lib six \
&& cd sglang \ && cd sglang \
&& if [ "$BUILD_TYPE" = "srt" ]; then \ && if [ "$BUILD_TYPE" = "srt" ]; then \
if [ "$CUDA_VERSION" = "12.1.1" ]; then \ if [ "$CUDA_VERSION" = "12.1.1" ]; then \
python3 -m pip --no-cache-dir install -e "python[srt]" --find-links https://flashinfer.ai/whl/cu121/torch2.5/flashinfer/; \ python3 -m pip --no-cache-dir install -e "python[srt]" --find-links https://flashinfer.ai/whl/cu121/torch2.5/flashinfer-python; \
elif [ "$CUDA_VERSION" = "12.4.1" ]; then \ elif [ "$CUDA_VERSION" = "12.4.1" ]; then \
python3 -m pip --no-cache-dir install -e "python[srt]" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer/; \ python3 -m pip --no-cache-dir install -e "python[srt]" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer-python; \
elif [ "$CUDA_VERSION" = "12.5.1" ]; then \ elif [ "$CUDA_VERSION" = "12.5.1" ]; then \
python3 -m pip --no-cache-dir install -e "python[srt]" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer/; \ python3 -m pip --no-cache-dir install -e "python[srt]" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer-python; \
elif [ "$CUDA_VERSION" = "11.8.0" ]; then \ elif [ "$CUDA_VERSION" = "11.8.0" ]; then \
python3 -m pip --no-cache-dir install -e "python[srt]" --find-links https://flashinfer.ai/whl/cu118/torch2.5/flashinfer/; \ python3 -m pip --no-cache-dir install -e "python[srt]" --find-links https://flashinfer.ai/whl/cu118/torch2.5/flashinfer-python; \
python3 -m pip install sgl-kernel -i https://docs.sglang.ai/whl/cu118; \ python3 -m pip install sgl-kernel -i https://docs.sglang.ai/whl/cu118; \
else \ else \
echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1; \ echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1; \
fi; \ fi; \
else \ else \
if [ "$CUDA_VERSION" = "12.1.1" ]; then \ if [ "$CUDA_VERSION" = "12.1.1" ]; then \
python3 -m pip --no-cache-dir install -e "python[all]" --find-links https://flashinfer.ai/whl/cu121/torch2.5/flashinfer/; \ python3 -m pip --no-cache-dir install -e "python[all]" --find-links https://flashinfer.ai/whl/cu121/torch2.5/flashinfer-python; \
elif [ "$CUDA_VERSION" = "12.4.1" ]; then \ elif [ "$CUDA_VERSION" = "12.4.1" ]; then \
python3 -m pip --no-cache-dir install -e "python[all]" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer/; \ python3 -m pip --no-cache-dir install -e "python[all]" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer-python; \
elif [ "$CUDA_VERSION" = "12.5.1" ]; then \ elif [ "$CUDA_VERSION" = "12.5.1" ]; then \
python3 -m pip --no-cache-dir install -e "python[all]" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer/; \ python3 -m pip --no-cache-dir install -e "python[all]" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer-python; \
elif [ "$CUDA_VERSION" = "11.8.0" ]; then \ elif [ "$CUDA_VERSION" = "11.8.0" ]; then \
python3 -m pip --no-cache-dir install -e "python[all]" --find-links https://flashinfer.ai/whl/cu118/torch2.5/flashinfer/; \ python3 -m pip --no-cache-dir install -e "python[all]" --find-links https://flashinfer.ai/whl/cu118/torch2.5/flashinfer-python; \
python3 -m pip install sgl-kernel -i https://docs.sglang.ai/whl/cu118; \ python3 -m pip install sgl-kernel -i https://docs.sglang.ai/whl/cu118; \
else \ else \
echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1; \ echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1; \
......
...@@ -6,7 +6,7 @@ You can install SGLang using any of the methods below. ...@@ -6,7 +6,7 @@ You can install SGLang using any of the methods below.
``` ```
pip install --upgrade pip pip install --upgrade pip
pip install sgl-kernel --force-reinstall --no-deps pip install sgl-kernel --force-reinstall --no-deps
pip install "sglang[all]>=0.4.3" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer/ pip install "sglang[all]>=0.4.3" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer-python
``` ```
Note: SGLang currently uses torch 2.5, so you need to install the flashinfer version for torch 2.5. If you want to install flashinfer separately, please refer to [FlashInfer installation doc](https://docs.flashinfer.ai/installation.html). Note: SGLang currently uses torch 2.5, so you need to install the flashinfer version for torch 2.5. If you want to install flashinfer separately, please refer to [FlashInfer installation doc](https://docs.flashinfer.ai/installation.html).
...@@ -24,7 +24,7 @@ cd sglang ...@@ -24,7 +24,7 @@ cd sglang
pip install --upgrade pip pip install --upgrade pip
pip install sgl-kernel --force-reinstall --no-deps pip install sgl-kernel --force-reinstall --no-deps
pip install -e "python[all]" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer/ pip install -e "python[all]" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer-python
``` ```
Note: SGLang currently uses torch 2.5, so you need to install the flashinfer version for torch 2.5. If you want to install flashinfer separately, please refer to [FlashInfer installation doc](https://docs.flashinfer.ai/installation.html). Note: SGLang currently uses torch 2.5, so you need to install the flashinfer version for torch 2.5. If you want to install flashinfer separately, please refer to [FlashInfer installation doc](https://docs.flashinfer.ai/installation.html).
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment