"git@developer.sourcefind.cn:change/sglang.git" did not exist on "1a820e38a2fcc6d0e0324605bb39baec23d81f8d"
Unverified Commit 0c3543d7 authored by Yineng Zhang's avatar Yineng Zhang Committed by GitHub
Browse files

chore: upgrade flashinfer 0.5.0 (#12523)


Co-authored-by: default avatarBaizhou Zhang <sobereddiezhang@gmail.com>
parent 6a3b9fd0
...@@ -26,7 +26,9 @@ dependencies = [ ...@@ -26,7 +26,9 @@ dependencies = [
"datasets", "datasets",
"einops", "einops",
"fastapi", "fastapi",
"flashinfer_python==0.4.1", "flashinfer_python==0.5.0",
"flashinfer_cubin==0.5.0",
"flashinfer_jit_cache==0.5.0",
"gguf", "gguf",
"hf_transfer", "hf_transfer",
"huggingface_hub", "huggingface_hub",
......
...@@ -22,6 +22,8 @@ PACKAGE_LIST = [ ...@@ -22,6 +22,8 @@ PACKAGE_LIST = [
"sglang", "sglang",
"sgl_kernel", "sgl_kernel",
"flashinfer_python", "flashinfer_python",
"flashinfer_cubin",
"flashinfer_jit_cache",
"triton", "triton",
"transformers", "transformers",
"torchao", "torchao",
......
...@@ -712,7 +712,7 @@ def _set_envs_and_config(server_args: ServerArgs): ...@@ -712,7 +712,7 @@ def _set_envs_and_config(server_args: ServerArgs):
if server_args.attention_backend == "flashinfer": if server_args.attention_backend == "flashinfer":
assert_pkg_version( assert_pkg_version(
"flashinfer_python", "flashinfer_python",
"0.4.1", "0.5.0",
"Please uninstall the old version and " "Please uninstall the old version and "
"reinstall the latest version by following the instructions " "reinstall the latest version by following the instructions "
"at https://docs.flashinfer.ai/installation.html.", "at https://docs.flashinfer.ai/installation.html.",
......
...@@ -2386,7 +2386,9 @@ def set_cuda_arch(): ...@@ -2386,7 +2386,9 @@ def set_cuda_arch():
if is_flashinfer_available(): if is_flashinfer_available():
capability = torch.cuda.get_device_capability() capability = torch.cuda.get_device_capability()
arch = f"{capability[0]}.{capability[1]}" arch = f"{capability[0]}.{capability[1]}"
os.environ["TORCH_CUDA_ARCH_LIST"] = f"{arch}{'+PTX' if arch == '9.0' else ''}" os.environ["FLASHINFER_CUDA_ARCH_LIST"] = (
f"{arch}{'a' if capability[0] >= 9 else ''}"
)
def next_power_of_2(n: int): def next_power_of_2(n: int):
......
...@@ -23,6 +23,7 @@ echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-}" ...@@ -23,6 +23,7 @@ echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-}"
# Clear torch compilation cache # Clear torch compilation cache
python3 -c 'import os, shutil, tempfile, getpass; cache_dir = os.environ.get("TORCHINDUCTOR_CACHE_DIR") or os.path.join(tempfile.gettempdir(), "torchinductor_" + getpass.getuser()); shutil.rmtree(cache_dir, ignore_errors=True)' python3 -c 'import os, shutil, tempfile, getpass; cache_dir = os.environ.get("TORCHINDUCTOR_CACHE_DIR") or os.path.join(tempfile.gettempdir(), "torchinductor_" + getpass.getuser()); shutil.rmtree(cache_dir, ignore_errors=True)'
rm -rf /root/.cache/flashinfer rm -rf /root/.cache/flashinfer
pip3 uninstall flashinfer-python flashinfer-cubin flashinfer-jit-cache || true
# Install apt packages # Install apt packages
apt install -y git libnuma-dev libssl-dev pkg-config apt install -y git libnuma-dev libssl-dev pkg-config
...@@ -93,7 +94,7 @@ else ...@@ -93,7 +94,7 @@ else
fi fi
# Install the main package # Install the main package
$PIP_CMD install -e "python[dev]" --extra-index-url https://download.pytorch.org/whl/${CU_VERSION} $PIP_INSTALL_SUFFIX $PIP_CMD install -e "python[dev]" --extra-index-url https://download.pytorch.org/whl/${CU_VERSION} --extra-index-url https://flashinfer.ai/whl/${CU_VERSION} $PIP_INSTALL_SUFFIX
# Install router for pd-disagg test # Install router for pd-disagg test
$PIP_CMD install sglang-router $PIP_INSTALL_SUFFIX $PIP_CMD install sglang-router $PIP_INSTALL_SUFFIX
......
...@@ -147,7 +147,7 @@ docker run --rm \ ...@@ -147,7 +147,7 @@ docker run --rm \
ln -sv /usr/lib64/libibverbs.so.1 /usr/lib64/libibverbs.so && \ ln -sv /usr/lib64/libibverbs.so.1 /usr/lib64/libibverbs.so && \
${PYTHON_ROOT_PATH}/bin/${TORCH_INSTALL} && \ ${PYTHON_ROOT_PATH}/bin/${TORCH_INSTALL} && \
${PYTHON_ROOT_PATH}/bin/pip install --no-cache-dir ninja setuptools==75.0.0 wheel==0.41.0 numpy uv scikit-build-core && \ ${PYTHON_ROOT_PATH}/bin/pip install --no-cache-dir ninja setuptools==75.0.0 wheel==0.41.0 numpy uv scikit-build-core && \
export TORCH_CUDA_ARCH_LIST='8.0 8.9 9.0+PTX' && \ export FLASHINFER_CUDA_ARCH_LIST='8.0 8.9 9.0a 10.0a 12.0a' && \
export CUDA_VERSION=${CUDA_VERSION} && \ export CUDA_VERSION=${CUDA_VERSION} && \
mkdir -p /usr/lib/${ARCH}-linux-gnu/ && \ mkdir -p /usr/lib/${ARCH}-linux-gnu/ && \
ln -s /usr/local/cuda-${CUDA_VERSION}/targets/${LIBCUDA_ARCH}-linux/lib/stubs/libcuda.so /usr/lib/${ARCH}-linux-gnu/libcuda.so && \ ln -s /usr/local/cuda-${CUDA_VERSION}/targets/${LIBCUDA_ARCH}-linux/lib/stubs/libcuda.so /usr/lib/${ARCH}-linux-gnu/libcuda.so && \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment