Unverified Commit 30a44ae1 authored by Woosuk Kwon's avatar Woosuk Kwon Committed by GitHub
Browse files

Support non-default CUDA version (#14)

parent d562aa63
......@@ -281,11 +281,21 @@ class NinjaBuildExtension(BuildExtension):
PYTORCH_VERSION = "2.4.0"
CUDA_VERSION = "12.1"
MAIN_CUDA_VERSION = "12.1"
def get_version() -> str:
version = get_package_version()
cuda_version = str(bare_metal_version)
if cuda_version != MAIN_CUDA_VERSION:
cuda_version_str = cuda_version.replace(".", "")[:3]
version += f"+cu{cuda_version_str}"
return version
setup(
name="vllm-flash-attn",
version=get_package_version(),
version=get_version(),
packages=find_packages(
exclude=(
"build",
......@@ -300,7 +310,7 @@ setup(
),
author="vLLM Team",
description="Forward-only flash-attn",
long_description=f"Forward-only flash-attn package built for PyTorch {PYTORCH_VERSION} and CUDA {CUDA_VERSION}",
long_description=f"Forward-only flash-attn package built for PyTorch {PYTORCH_VERSION} and CUDA {MAIN_CUDA_VERSION}",
url="https://github.com/vllm-project/flash-attention.git",
classifiers=[
"Programming Language :: Python :: 3",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment