Bump version

8907d182 · Casper · e1ed4bd6 · 8907d182
Commit 8907d182 authored Feb 14, 2024 by Casper
Hide whitespace changes
Inline Side-by-side

Showing with 236 additions and 236 deletions

setup.py setup.py +236 -236

No files found.
--- a/setup.py
+++ b/setup.py
-import os
-import torch
-from pathlib import Path
-from setuptools import setup, find_packages
-from distutils.sysconfig import get_python_lib
-from torch.utils.cpp_extension import BuildExtension, CUDAExtension
-
-os.environ["CC"] = "g++"
-os.environ["CXX"] = "g++"
-AUTOAWQ_KERNELS_VERSION = "0.0.3"
-PYPI_BUILD = os.getenv("PYPI_BUILD", "0") == "1"
-CUDA_VERSION = os.getenv("CUDA_VERSION", None) or torch.version.cuda
-ROCM_VERSION = os.environ.get("ROCM_VERSION", None) or torch.version.hip
-
-
-if not PYPI_BUILD:
-    # only adding CUDA/ROCM version if we are not building for PyPI to comply with PEP 440
-    if CUDA_VERSION:
-        CUDA_VERSION = "".join(CUDA_VERSION.split("."))[:3]
-        AUTOAWQ_KERNELS_VERSION += f"+cu{CUDA_VERSION}"
-    elif ROCM_VERSION:
-        ROCM_VERSION = "".join(ROCM_VERSION.split("."))[:3]
-        AUTOAWQ_KERNELS_VERSION += f"+rocm{ROCM_VERSION}"
-    else:
-        raise RuntimeError(
-            "Your system must have either Nvidia or AMD GPU to build this package."
-        )
-
-print(f"Building AutoAWQ Kernels version {AUTOAWQ_KERNELS_VERSION}")
-
-common_setup_kwargs = {
-    "version": AUTOAWQ_KERNELS_VERSION,
-    "name": "autoawq_kernels",
-    "author": "Casper Hansen",
-    "license": "MIT",
-    "python_requires": ">=3.8.0",
-    "description": "AutoAWQ Kernels implements the AWQ kernels.",
-    "long_description": (Path(__file__).parent / "README.md").read_text(
-        encoding="UTF-8"
-    ),
-    "long_description_content_type": "text/markdown",
-    "url": "https://github.com/casper-hansen/AutoAWQ_kernels",
-    "keywords": ["awq", "autoawq", "quantization", "transformers"],
-    "platforms": ["linux", "windows"],
-    "classifiers": [
-        "Environment :: GPU :: NVIDIA CUDA :: 11.8",
-        "Environment :: GPU :: NVIDIA CUDA :: 12",
-        "License :: OSI Approved :: MIT License",
-        "Natural Language :: English",
-        "Programming Language :: Python :: 3.8",
-        "Programming Language :: Python :: 3.9",
-        "Programming Language :: Python :: 3.10",
-        "Programming Language :: Python :: 3.11",
-        "Programming Language :: C++",
-    ],
-}
-
-requirements = [
-    "torch>=2.0.1",
-]
-
-
-def get_include_dirs():
-    include_dirs = []
-
-    if CUDA_VERSION:
-        conda_cuda_include_dir = os.path.join(
-            get_python_lib(), "nvidia/cuda_runtime/include"
-        )
-        if os.path.isdir(conda_cuda_include_dir):
-            include_dirs.append(conda_cuda_include_dir)
-
-    this_dir = os.path.dirname(os.path.abspath(__file__))
-    include_dirs.append(this_dir)
-
-    return include_dirs
-
-
-def get_generator_flag():
-    generator_flag = []
-
-    # if CUDA_VERSION:
-    torch_dir = torch.__path__[0]
-    if os.path.exists(
-        os.path.join(torch_dir, "include", "ATen", "CUDAGeneratorImpl.h")
-    ):
-        generator_flag = ["-DOLD_GENERATOR_PATH"]
-
-    return generator_flag
-
-
-def get_compute_capabilities():
-    capability_flags = []
-
-    if CUDA_VERSION:
-        # Collect the compute capabilities of all available CUDA GPUs
-        for i in range(torch.cuda.device_count()):
-            major, minor = torch.cuda.get_device_capability(i)
-            cc = major * 10 + minor
-            if cc < 75:
-                raise RuntimeError(
-                    "GPUs with compute capability less than 7.5 are not supported."
-                )
-
-        # Figure out compute capability
-        compute_capabilities = {75, 80, 86, 89, 90}
-        for cap in compute_capabilities:
-            capability_flags += ["-gencode", f"arch=compute_{cap},code=sm_{cap}"]
-
-    return capability_flags
-
-
-def get_extra_compile_args(arch_flags, generator_flags):
-    extra_compile_args = {}
-
-    if os.name == "nt" and CUDA_VERSION:
-        include_arch = os.getenv("INCLUDE_ARCH", "1") == "1"
-        # Relaxed args on Windows
-        if include_arch:
-            extra_compile_args = {"nvcc": arch_flags}
-
-    elif CUDA_VERSION:
-        extra_compile_args = {
-            "cxx": ["-g", "-O3", "-fopenmp", "-lgomp", "-std=c++17", "-DENABLE_BF16"],
-            "nvcc": [
-                "-O3",
-                "-std=c++17",
-                "-DENABLE_BF16",
-                "-U__CUDA_NO_HALF_OPERATORS__",
-                "-U__CUDA_NO_HALF_CONVERSIONS__",
-                "-U__CUDA_NO_BFLOAT16_OPERATORS__",
-                "-U__CUDA_NO_BFLOAT16_CONVERSIONS__",
-                "-U__CUDA_NO_BFLOAT162_OPERATORS__",
-                "-U__CUDA_NO_BFLOAT162_CONVERSIONS__",
-                "--expt-relaxed-constexpr",
-                "--expt-extended-lambda",
-                "--use_fast_math",
-            ]
-            + arch_flags
-            + generator_flags,
-        }
-
-    return extra_compile_args
-
-
-def get_extra_link_args():
-    extra_link_args = []
-
-    if os.name == "nt" and CUDA_VERSION:
-        cuda_path = os.environ.get("CUDA_PATH", None)
-        extra_link_args = ["-L", f"{cuda_path}/lib/x64/cublas.lib"]
-
-    return extra_link_args
-
-
-include_dirs = get_include_dirs()
-extra_link_args = get_extra_link_args()
-generator_flags = get_generator_flag()
-arch_flags = get_compute_capabilities()
-extra_compile_args = get_extra_compile_args(arch_flags, generator_flags)
-
-
-extensions = []
-if CUDA_VERSION:
-    # contain un-hipifiable inline PTX
-    extensions.append(
-        CUDAExtension(
-            "awq_ext",
-            [
-                "awq_ext/pybind_awq.cpp",
-                "awq_ext/quantization/gemm_cuda_gen.cu",
-                "awq_ext/layernorm/layernorm.cu",
-                "awq_ext/position_embedding/pos_encoding_kernels.cu",
-                "awq_ext/quantization/gemv_cuda.cu",
-                "awq_ext/vllm/moe_alig_block.cu",
-                "awq_ext/vllm/activation.cu",
-                "awq_ext/vllm/topk_softmax_kernels.cu",
-            ],
-            extra_compile_args=extra_compile_args,
-        )
-    )
-
-extensions.append(
-    CUDAExtension(
-        "exl_ext",
-        [
-            "awq_ext/exllama/exllama_ext.cpp",
-            "awq_ext/exllama/cuda_buffers.cu",
-            "awq_ext/exllama/cuda_func/column_remap.cu",
-            "awq_ext/exllama/cuda_func/q4_matmul.cu",
-            "awq_ext/exllama/cuda_func/q4_matrix.cu",
-        ],
-        extra_compile_args=extra_compile_args,
-        extra_link_args=extra_link_args,
-    )
-)
-extensions.append(
-    CUDAExtension(
-        "exlv2_ext",
-        [
-            "awq_ext/exllamav2/ext.cpp",
-            "awq_ext/exllamav2/cuda/q_matrix.cu",
-            "awq_ext/exllamav2/cuda/q_gemm.cu",
-        ],
-        extra_compile_args=extra_compile_args,
-        extra_link_args=extra_link_args,
-    )
-)
-
-if os.name != "nt" and CUDA_VERSION:
-    # FasterTransformer kernels
-    extensions.append(
-        CUDAExtension(
-            "awq_ft_ext",
-            [
-                "awq_ext/pybind_awq_ft.cpp",
-                "awq_ext/attention/ft_attention.cpp",
-                "awq_ext/attention/decoder_masked_multihead_attention.cu",
-            ],
-            extra_compile_args=extra_compile_args,
-        )
-    )
-
-additional_setup_kwargs = {
-    "ext_modules": extensions,
-    "cmdclass": {"build_ext": BuildExtension},
-}
-
-common_setup_kwargs.update(additional_setup_kwargs)
-
-setup(
-    packages=find_packages(),
-    install_requires=requirements,
-    include_dirs=include_dirs,
-    **common_setup_kwargs,
-)
+import os
+import torch
+from pathlib import Path
+from setuptools import setup, find_packages
+from distutils.sysconfig import get_python_lib
+from torch.utils.cpp_extension import BuildExtension, CUDAExtension
+
+os.environ["CC"] = "g++"
+os.environ["CXX"] = "g++"
+AUTOAWQ_KERNELS_VERSION = "0.0.4"
+PYPI_BUILD = os.getenv("PYPI_BUILD", "0") == "1"
+CUDA_VERSION = os.getenv("CUDA_VERSION", None) or torch.version.cuda
+ROCM_VERSION = os.environ.get("ROCM_VERSION", None) or torch.version.hip
+
+
+if not PYPI_BUILD:
+    # only adding CUDA/ROCM version if we are not building for PyPI to comply with PEP 440
+    if CUDA_VERSION:
+        CUDA_VERSION = "".join(CUDA_VERSION.split("."))[:3]
+        AUTOAWQ_KERNELS_VERSION += f"+cu{CUDA_VERSION}"
+    elif ROCM_VERSION:
+        ROCM_VERSION = "".join(ROCM_VERSION.split("."))[:3]
+        AUTOAWQ_KERNELS_VERSION += f"+rocm{ROCM_VERSION}"
+    else:
+        raise RuntimeError(
+            "Your system must have either Nvidia or AMD GPU to build this package."
+        )
+
+print(f"Building AutoAWQ Kernels version {AUTOAWQ_KERNELS_VERSION}")
+
+common_setup_kwargs = {
+    "version": AUTOAWQ_KERNELS_VERSION,
+    "name": "autoawq_kernels",
+    "author": "Casper Hansen",
+    "license": "MIT",
+    "python_requires": ">=3.8.0",
+    "description": "AutoAWQ Kernels implements the AWQ kernels.",
+    "long_description": (Path(__file__).parent / "README.md").read_text(
+        encoding="UTF-8"
+    ),
+    "long_description_content_type": "text/markdown",
+    "url": "https://github.com/casper-hansen/AutoAWQ_kernels",
+    "keywords": ["awq", "autoawq", "quantization", "transformers"],
+    "platforms": ["linux", "windows"],
+    "classifiers": [
+        "Environment :: GPU :: NVIDIA CUDA :: 11.8",
+        "Environment :: GPU :: NVIDIA CUDA :: 12",
+        "License :: OSI Approved :: MIT License",
+        "Natural Language :: English",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
+        "Programming Language :: C++",
+    ],
+}
+
+requirements = [
+    "torch>=2.0.1",
+]
+
+
+def get_include_dirs():
+    include_dirs = []
+
+    if CUDA_VERSION:
+        conda_cuda_include_dir = os.path.join(
+            get_python_lib(), "nvidia/cuda_runtime/include"
+        )
+        if os.path.isdir(conda_cuda_include_dir):
+            include_dirs.append(conda_cuda_include_dir)
+
+    this_dir = os.path.dirname(os.path.abspath(__file__))
+    include_dirs.append(this_dir)
+
+    return include_dirs
+
+
+def get_generator_flag():
+    generator_flag = []
+
+    # if CUDA_VERSION:
+    torch_dir = torch.__path__[0]
+    if os.path.exists(
+        os.path.join(torch_dir, "include", "ATen", "CUDAGeneratorImpl.h")
+    ):
+        generator_flag = ["-DOLD_GENERATOR_PATH"]
+
+    return generator_flag
+
+
+def get_compute_capabilities():
+    capability_flags = []
+
+    if CUDA_VERSION:
+        # Collect the compute capabilities of all available CUDA GPUs
+        for i in range(torch.cuda.device_count()):
+            major, minor = torch.cuda.get_device_capability(i)
+            cc = major * 10 + minor
+            if cc < 75:
+                raise RuntimeError(
+                    "GPUs with compute capability less than 7.5 are not supported."
+                )
+
+        # Figure out compute capability
+        compute_capabilities = {75, 80, 86, 89, 90}
+        for cap in compute_capabilities:
+            capability_flags += ["-gencode", f"arch=compute_{cap},code=sm_{cap}"]
+
+    return capability_flags
+
+
+def get_extra_compile_args(arch_flags, generator_flags):
+    extra_compile_args = {}
+
+    if os.name == "nt" and CUDA_VERSION:
+        include_arch = os.getenv("INCLUDE_ARCH", "1") == "1"
+        # Relaxed args on Windows
+        if include_arch:
+            extra_compile_args = {"nvcc": arch_flags}
+
+    elif CUDA_VERSION:
+        extra_compile_args = {
+            "cxx": ["-g", "-O3", "-fopenmp", "-lgomp", "-std=c++17", "-DENABLE_BF16"],
+            "nvcc": [
+                "-O3",
+                "-std=c++17",
+                "-DENABLE_BF16",
+                "-U__CUDA_NO_HALF_OPERATORS__",
+                "-U__CUDA_NO_HALF_CONVERSIONS__",
+                "-U__CUDA_NO_BFLOAT16_OPERATORS__",
+                "-U__CUDA_NO_BFLOAT16_CONVERSIONS__",
+                "-U__CUDA_NO_BFLOAT162_OPERATORS__",
+                "-U__CUDA_NO_BFLOAT162_CONVERSIONS__",
+                "--expt-relaxed-constexpr",
+                "--expt-extended-lambda",
+                "--use_fast_math",
+            ]
+            + arch_flags
+            + generator_flags,
+        }
+
+    return extra_compile_args
+
+
+def get_extra_link_args():
+    extra_link_args = []
+
+    if os.name == "nt" and CUDA_VERSION:
+        cuda_path = os.environ.get("CUDA_PATH", None)
+        extra_link_args = ["-L", f"{cuda_path}/lib/x64/cublas.lib"]
+
+    return extra_link_args
+
+
+include_dirs = get_include_dirs()
+extra_link_args = get_extra_link_args()
+generator_flags = get_generator_flag()
+arch_flags = get_compute_capabilities()
+extra_compile_args = get_extra_compile_args(arch_flags, generator_flags)
+
+
+extensions = []
+if CUDA_VERSION:
+    # contain un-hipifiable inline PTX
+    extensions.append(
+        CUDAExtension(
+            "awq_ext",
+            [
+                "awq_ext/pybind_awq.cpp",
+                "awq_ext/quantization/gemm_cuda_gen.cu",
+                "awq_ext/layernorm/layernorm.cu",
+                "awq_ext/position_embedding/pos_encoding_kernels.cu",
+                "awq_ext/quantization/gemv_cuda.cu",
+                "awq_ext/vllm/moe_alig_block.cu",
+                "awq_ext/vllm/activation.cu",
+                "awq_ext/vllm/topk_softmax_kernels.cu",
+            ],
+            extra_compile_args=extra_compile_args,
+        )
+    )
+
+extensions.append(
+    CUDAExtension(
+        "exl_ext",
+        [
+            "awq_ext/exllama/exllama_ext.cpp",
+            "awq_ext/exllama/cuda_buffers.cu",
+            "awq_ext/exllama/cuda_func/column_remap.cu",
+            "awq_ext/exllama/cuda_func/q4_matmul.cu",
+            "awq_ext/exllama/cuda_func/q4_matrix.cu",
+        ],
+        extra_compile_args=extra_compile_args,
+        extra_link_args=extra_link_args,
+    )
+)
+extensions.append(
+    CUDAExtension(
+        "exlv2_ext",
+        [
+            "awq_ext/exllamav2/ext.cpp",
+            "awq_ext/exllamav2/cuda/q_matrix.cu",
+            "awq_ext/exllamav2/cuda/q_gemm.cu",
+        ],
+        extra_compile_args=extra_compile_args,
+        extra_link_args=extra_link_args,
+    )
+)
+
+if os.name != "nt" and CUDA_VERSION:
+    # FasterTransformer kernels
+    extensions.append(
+        CUDAExtension(
+            "awq_ft_ext",
+            [
+                "awq_ext/pybind_awq_ft.cpp",
+                "awq_ext/attention/ft_attention.cpp",
+                "awq_ext/attention/decoder_masked_multihead_attention.cu",
+            ],
+            extra_compile_args=extra_compile_args,
+        )
+    )
+
+additional_setup_kwargs = {
+    "ext_modules": extensions,
+    "cmdclass": {"build_ext": BuildExtension},
+}
+
+common_setup_kwargs.update(additional_setup_kwargs)
+
+setup(
+    packages=find_packages(),
+    install_requires=requirements,
+    include_dirs=include_dirs,
+    **common_setup_kwargs,
+)