Unverified Commit 0c3543d7 authored by Yineng Zhang's avatar Yineng Zhang Committed by GitHub
Browse files

chore: upgrade flashinfer 0.5.0 (#12523)


Co-authored-by: default avatarBaizhou Zhang <sobereddiezhang@gmail.com>
parent 6a3b9fd0
......@@ -26,7 +26,9 @@ dependencies = [
"datasets",
"einops",
"fastapi",
"flashinfer_python==0.4.1",
"flashinfer_python==0.5.0",
"flashinfer_cubin==0.5.0",
"flashinfer_jit_cache==0.5.0",
"gguf",
"hf_transfer",
"huggingface_hub",
......
......@@ -22,6 +22,8 @@ PACKAGE_LIST = [
"sglang",
"sgl_kernel",
"flashinfer_python",
"flashinfer_cubin",
"flashinfer_jit_cache",
"triton",
"transformers",
"torchao",
......
......@@ -712,7 +712,7 @@ def _set_envs_and_config(server_args: ServerArgs):
if server_args.attention_backend == "flashinfer":
assert_pkg_version(
"flashinfer_python",
"0.4.1",
"0.5.0",
"Please uninstall the old version and "
"reinstall the latest version by following the instructions "
"at https://docs.flashinfer.ai/installation.html.",
......
......@@ -2386,7 +2386,9 @@ def set_cuda_arch():
if is_flashinfer_available():
capability = torch.cuda.get_device_capability()
arch = f"{capability[0]}.{capability[1]}"
os.environ["TORCH_CUDA_ARCH_LIST"] = f"{arch}{'+PTX' if arch == '9.0' else ''}"
os.environ["FLASHINFER_CUDA_ARCH_LIST"] = (
f"{arch}{'a' if capability[0] >= 9 else ''}"
)
def next_power_of_2(n: int):
......
......@@ -23,6 +23,7 @@ echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-}"
# Clear torch compilation cache
python3 -c 'import os, shutil, tempfile, getpass; cache_dir = os.environ.get("TORCHINDUCTOR_CACHE_DIR") or os.path.join(tempfile.gettempdir(), "torchinductor_" + getpass.getuser()); shutil.rmtree(cache_dir, ignore_errors=True)'
rm -rf /root/.cache/flashinfer
pip3 uninstall flashinfer-python flashinfer-cubin flashinfer-jit-cache || true
# Install apt packages
apt install -y git libnuma-dev libssl-dev pkg-config
......@@ -93,7 +94,7 @@ else
fi
# Install the main package
$PIP_CMD install -e "python[dev]" --extra-index-url https://download.pytorch.org/whl/${CU_VERSION} $PIP_INSTALL_SUFFIX
$PIP_CMD install -e "python[dev]" --extra-index-url https://download.pytorch.org/whl/${CU_VERSION} --extra-index-url https://flashinfer.ai/whl/${CU_VERSION} $PIP_INSTALL_SUFFIX
# Install router for pd-disagg test
$PIP_CMD install sglang-router $PIP_INSTALL_SUFFIX
......
......@@ -147,7 +147,7 @@ docker run --rm \
ln -sv /usr/lib64/libibverbs.so.1 /usr/lib64/libibverbs.so && \
${PYTHON_ROOT_PATH}/bin/${TORCH_INSTALL} && \
${PYTHON_ROOT_PATH}/bin/pip install --no-cache-dir ninja setuptools==75.0.0 wheel==0.41.0 numpy uv scikit-build-core && \
export TORCH_CUDA_ARCH_LIST='8.0 8.9 9.0+PTX' && \
export FLASHINFER_CUDA_ARCH_LIST='8.0 8.9 9.0a 10.0a 12.0a' && \
export CUDA_VERSION=${CUDA_VERSION} && \
mkdir -p /usr/lib/${ARCH}-linux-gnu/ && \
ln -s /usr/local/cuda-${CUDA_VERSION}/targets/${LIBCUDA_ARCH}-linux/lib/stubs/libcuda.so /usr/lib/${ARCH}-linux-gnu/libcuda.so && \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment