Unverified Commit d0cbac58 authored by Michael Goin's avatar Michael Goin Committed by GitHub
Browse files

[Dev UX] Add auto-detection for VLLM_PRECOMPILED_WHEEL_VARIANT during install (#32948)


Signed-off-by: default avatarmgoin <mgoin64@gmail.com>
Signed-off-by: default avatarMichael Goin <mgoin64@gmail.com>
Co-authored-by: default avatarShengqi Chen <i@harrychen.xyz>
parent c0d82045
...@@ -118,7 +118,7 @@ There are more environment variables to control the behavior of Python-only buil ...@@ -118,7 +118,7 @@ There are more environment variables to control the behavior of Python-only buil
* `VLLM_PRECOMPILED_WHEEL_LOCATION`: specify the exact wheel URL or local file path of a pre-compiled wheel to use. All other logic to find the wheel will be skipped. * `VLLM_PRECOMPILED_WHEEL_LOCATION`: specify the exact wheel URL or local file path of a pre-compiled wheel to use. All other logic to find the wheel will be skipped.
* `VLLM_PRECOMPILED_WHEEL_COMMIT`: override the commit hash to download the pre-compiled wheel. It can be `nightly` to use the last **already built** commit on the main branch. * `VLLM_PRECOMPILED_WHEEL_COMMIT`: override the commit hash to download the pre-compiled wheel. It can be `nightly` to use the last **already built** commit on the main branch.
* `VLLM_PRECOMPILED_WHEEL_VARIANT`: specify the variant subdirectory to use on the nightly index, e.g., `cu129`, `cpu`. If not specified, the CUDA variant with `VLLM_MAIN_CUDA_VERSION` will be tried, then fallback to the default variant on the remote index. * `VLLM_PRECOMPILED_WHEEL_VARIANT`: specify the variant subdirectory to use on the nightly index, e.g., `cu129`, `cu130`, `cpu`. If not specified, the variant is auto-detected based on your system's CUDA version (from PyTorch or nvidia-smi). You can also set `VLLM_MAIN_CUDA_VERSION` to override auto-detection.
You can find more information about vLLM's wheels in [Install the latest code](#install-the-latest-code). You can find more information about vLLM's wheels in [Install the latest code](#install-the-latest-code).
......
...@@ -438,6 +438,49 @@ class precompiled_wheel_utils: ...@@ -438,6 +438,49 @@ class precompiled_wheel_utils:
except ImportError: except ImportError:
return False return False
@staticmethod
def detect_system_cuda_variant() -> str:
"""Auto-detect CUDA variant from torch, nvidia-smi, or env default."""
# Map CUDA major version to hosted wheel variants on wheels.vllm.ai
supported = {12: "cu129", 13: "cu130"}
# Respect explicitly set VLLM_MAIN_CUDA_VERSION
if envs.is_set("VLLM_MAIN_CUDA_VERSION"):
v = envs.VLLM_MAIN_CUDA_VERSION
print(f"Using VLLM_MAIN_CUDA_VERSION={v}")
return "cu" + v.replace(".", "")[:3]
# Try torch.version.cuda
cuda_version = None
try:
import torch
cuda_version = torch.version.cuda
except Exception:
pass
# Try nvidia-smi
if not cuda_version:
try:
out = subprocess.run(
["nvidia-smi"], capture_output=True, text=True, timeout=10
)
if m := re.search(r"CUDA Version:\s*(\d+\.\d+)", out.stdout):
cuda_version = m.group(1)
except Exception:
pass
# Fall back to default
if not cuda_version:
cuda_version = envs.VLLM_MAIN_CUDA_VERSION
# Map to supported variant
major = int(cuda_version.split(".")[0])
variant = supported.get(major, supported[max(supported)])
print(f"Detected CUDA {cuda_version}, using variant {variant}")
return variant
@staticmethod @staticmethod
def find_local_rocm_wheel() -> str | None: def find_local_rocm_wheel() -> str | None:
"""Search for a local vllm wheel in common locations.""" """Search for a local vllm wheel in common locations."""
...@@ -513,8 +556,8 @@ class precompiled_wheel_utils: ...@@ -513,8 +556,8 @@ class precompiled_wheel_utils:
1. user-specified wheel location (can be either local or remote, via 1. user-specified wheel location (can be either local or remote, via
VLLM_PRECOMPILED_WHEEL_LOCATION) VLLM_PRECOMPILED_WHEEL_LOCATION)
2. user-specified variant (VLLM_PRECOMPILED_WHEEL_VARIANT) from nightly repo 2. user-specified variant (VLLM_PRECOMPILED_WHEEL_VARIANT) from nightly repo
3. the variant corresponding to VLLM_MAIN_CUDA_VERSION from nightly repo or auto-detected CUDA variant based on system (torch, nvidia-smi)
4. the default variant from nightly repo 3. the default variant from nightly repo
If downloading from the nightly repo, the commit can be specified via If downloading from the nightly repo, the commit can be specified via
VLLM_PRECOMPILED_WHEEL_COMMIT; otherwise, the head commit in the main branch VLLM_PRECOMPILED_WHEEL_COMMIT; otherwise, the head commit in the main branch
...@@ -533,9 +576,11 @@ class precompiled_wheel_utils: ...@@ -533,9 +576,11 @@ class precompiled_wheel_utils:
import platform import platform
arch = platform.machine() arch = platform.machine()
# try to fetch the wheel metadata from the nightly wheel repo # try to fetch the wheel metadata from the nightly wheel repo,
main_variant = "cu" + envs.VLLM_MAIN_CUDA_VERSION.replace(".", "") # detecting CUDA variant from system if not specified
variant = os.getenv("VLLM_PRECOMPILED_WHEEL_VARIANT", main_variant) variant = os.getenv("VLLM_PRECOMPILED_WHEEL_VARIANT", None)
if variant is None:
variant = precompiled_wheel_utils.detect_system_cuda_variant()
commit = os.getenv("VLLM_PRECOMPILED_WHEEL_COMMIT", "").lower() commit = os.getenv("VLLM_PRECOMPILED_WHEEL_COMMIT", "").lower()
if not commit or len(commit) != 40: if not commit or len(commit) != 40:
print( print(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment