import os import torch import subprocess from pathlib import Path from setuptools import setup, find_packages from distutils.sysconfig import get_python_lib from torch.utils.cpp_extension import BuildExtension, CUDAExtension from typing import Optional, Union pwd = os.path.dirname(__file__) add_git_version = False if int(os.environ.get('ADD_GIT_VERSION', '0')) == 1: add_git_version = True os.environ["CC"] = "g++" os.environ["CXX"] = "g++" AUTOAWQ_KERNELS_VERSION = "0.0.6" PYPI_BUILD = os.getenv("PYPI_BUILD", "0") == "1" CUDA_VERSION = os.getenv("CUDA_VERSION", None) or torch.version.cuda ROCM_VERSION = os.environ.get("ROCM_VERSION", None) or torch.version.hip def get_sha(pytorch_root: Union[str, Path]) -> str: try: return subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=pytorch_root).decode('ascii').strip() except Exception: return 'Unknown' def get_abi(): try: command = "echo '#include ' | gcc -x c++ -E -dM - | fgrep _GLIBCXX_USE_CXX11_ABI" result = subprocess.run(command, shell=True, capture_output=True, text=True) output = result.stdout.strip() abi = "abi" + output.split(" ")[-1] return abi except Exception: return 'abiUnknown' def get_version_add(sha: Optional[str] = None) -> str: command = "git config --global --add safe.directory "+pwd result = subprocess.run(command, shell=True, capture_output=False, text=True) version='' autoawq_root = os.path.dirname(os.path.abspath(__file__)) add_version_path = os.path.join(os.path.join(autoawq_root, "awq_ext"), "__init__.py") if add_git_version: if sha != 'Unknown': if sha is None: sha = get_sha(autoawq_root) version = 'das.opt1.' + sha[:7] else: version = 'das.opt1' # abi #version += "." + get_abi() # dtk version if os.getenv("ROCM_PATH"): rocm_path = os.getenv('ROCM_PATH', "") rocm_version_path = os.path.join(rocm_path, '.info', "rocm_version") with open(rocm_version_path, 'r',encoding='utf-8') as file: lines = file.readlines() rocm_version=lines[0][:-2].replace(".", "") version += ".dtk" + rocm_version # torch version version += ".torch" + torch.__version__[:5] lines=[] with open(add_version_path, 'r',encoding='utf-8') as file: lines = file.readlines() if len(lines) > 1: lines[1] = "__dcu_version__ = '0.0.6+{}'\n".format(version) else: # 如果文件行数不足，可以选择添加空行或者抛出异常 lines.append("__dcu_version__ = '0.0.6+{}'\n".format(version)) print("Added missing line to the file content.") with open(add_version_path, encoding="utf-8",mode="w") as file: file.writelines(lines) file.close() def get_version(): get_version_add() version_file = 'awq_ext/__init__.py' with open(version_file, encoding='utf-8') as f: exec(compile(f.read(), version_file, 'exec')) return locals()['__dcu_version__'] if not PYPI_BUILD: # only adding CUDA/ROCM version if we are not building for PyPI to comply with PEP 440 if CUDA_VERSION: CUDA_VERSION = "".join(CUDA_VERSION.split("."))[:3] AUTOAWQ_KERNELS_VERSION += f"+cu{CUDA_VERSION}" elif ROCM_VERSION: ROCM_VERSION = "".join(ROCM_VERSION.split("."))[:3] #AUTOAWQ_KERNELS_VERSION += f"+rocm{ROCM_VERSION}" AUTOAWQ_KERNELS_VERSION = get_version() else: raise RuntimeError( "Your system must have either Nvidia or AMD GPU to build this package." ) print(f"Building AutoAWQ Kernels version {AUTOAWQ_KERNELS_VERSION}") common_setup_kwargs = { "version": AUTOAWQ_KERNELS_VERSION, "name": "autoawq_kernels", "author": "Casper Hansen", "license": "MIT", "python_requires": ">=3.8.0", "description": "AutoAWQ Kernels implements the AWQ kernels.", "long_description": (Path(__file__).parent / "README.md").read_text( encoding="UTF-8" ), "long_description_content_type": "text/markdown", "url": "https://github.com/casper-hansen/AutoAWQ_kernels", "keywords": ["awq", "autoawq", "quantization", "transformers"], "platforms": ["linux", "windows"], "classifiers": [ "Environment :: GPU :: NVIDIA CUDA :: 11.8", "Environment :: GPU :: NVIDIA CUDA :: 12", "License :: OSI Approved :: MIT License", "Natural Language :: English", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: C++", ], } requirements = [ "torch>=2.0.1", ] def get_include_dirs(): include_dirs = [] if CUDA_VERSION: conda_cuda_include_dir = os.path.join( get_python_lib(), "nvidia/cuda_runtime/include" ) if os.path.isdir(conda_cuda_include_dir): include_dirs.append(conda_cuda_include_dir) this_dir = os.path.dirname(os.path.abspath(__file__)) include_dirs.append(this_dir) return include_dirs def get_generator_flag(): generator_flag = [] # if CUDA_VERSION: torch_dir = torch.__path__[0] if os.path.exists( os.path.join(torch_dir, "include", "ATen", "CUDAGeneratorImpl.h") ): generator_flag = ["-DOLD_GENERATOR_PATH"] return generator_flag def get_compute_capabilities( compute_capabilities={75, 80, 86, 89, 90} ): capability_flags = [] if CUDA_VERSION: # Collect the compute capabilities of all available CUDA GPUs for i in range(torch.cuda.device_count()): major, minor = torch.cuda.get_device_capability(i) cc = major * 10 + minor if cc < 75: raise RuntimeError( "GPUs with compute capability less than 7.5 are not supported." ) # Figure out compute capability for cap in compute_capabilities: capability_flags += ["-gencode", f"arch=compute_{cap},code=sm_{cap}"] return capability_flags def get_extra_compile_args(arch_flags, generator_flags): extra_compile_args = {} if os.name == "nt" and CUDA_VERSION: include_arch = os.getenv("INCLUDE_ARCH", "1") == "1" # Relaxed args on Windows if include_arch: extra_compile_args = {"nvcc": arch_flags} elif CUDA_VERSION: extra_compile_args = { "cxx": ["-g", "-O3", "-fopenmp", "-lgomp", "-std=c++17", "-DENABLE_BF16"], "nvcc": [ "-O3", "-std=c++17", "-DENABLE_BF16", "-U__CUDA_NO_HALF_OPERATORS__", "-U__CUDA_NO_HALF_CONVERSIONS__", "-U__CUDA_NO_BFLOAT16_OPERATORS__", "-U__CUDA_NO_BFLOAT16_CONVERSIONS__", "-U__CUDA_NO_BFLOAT162_OPERATORS__", "-U__CUDA_NO_BFLOAT162_CONVERSIONS__", "--expt-relaxed-constexpr", "--expt-extended-lambda", "--use_fast_math", ] + arch_flags + generator_flags, } return extra_compile_args def get_extra_link_args(): extra_link_args = [] if os.name == "nt" and CUDA_VERSION: cuda_path = os.environ.get("CUDA_PATH", None) extra_link_args = ["-L", f"{cuda_path}/lib/x64/cublas.lib"] return extra_link_args include_dirs = get_include_dirs() extra_link_args = get_extra_link_args() generator_flags = get_generator_flag() arch_flags = get_compute_capabilities() extra_compile_args = get_extra_compile_args(arch_flags, generator_flags) extensions = [] if CUDA_VERSION: # contain un-hipifiable inline PTX extensions.append( CUDAExtension( "awq_ext", [ "awq_ext/pybind_awq.cpp", "awq_ext/quantization/gemm_cuda_gen.cu", "awq_ext/layernorm/layernorm.cu", "awq_ext/position_embedding/pos_encoding_kernels.cu", "awq_ext/quantization/gemv_cuda.cu", "awq_ext/vllm/moe_alig_block.cu", "awq_ext/vllm/activation.cu", "awq_ext/vllm/topk_softmax_kernels.cu", ], extra_compile_args=extra_compile_args, ) ) # only compatible with ampere arch_flags = get_compute_capabilities({80, 86, 89, 90}) extra_compile_args_v2 = get_extra_compile_args(arch_flags, generator_flags) extensions.append( CUDAExtension( "awq_v2_ext", [ "awq_ext/pybind_awq_v2.cpp", "awq_ext/quantization_new/gemv/gemv_cuda.cu", "awq_ext/quantization_new/gemm/gemm_cuda.cu", ], extra_compile_args=extra_compile_args_v2, ) ) extensions.append( CUDAExtension( "exl_ext", [ "awq_ext/exllama/exllama_ext.cpp", "awq_ext/exllama/cuda_buffers.cu", "awq_ext/exllama/cuda_func/column_remap.cu", "awq_ext/exllama/cuda_func/q4_matmul.cu", "awq_ext/exllama/cuda_func/q4_matrix.cu", ], extra_compile_args=extra_compile_args, extra_link_args=extra_link_args, ) ) extensions.append( CUDAExtension( "exlv2_ext", [ "awq_ext/exllamav2/ext.cpp", "awq_ext/exllamav2/cuda/q_matrix.cu", "awq_ext/exllamav2/cuda/q_gemm.cu", ], extra_compile_args=extra_compile_args, extra_link_args=extra_link_args, ) ) if os.name != "nt" and CUDA_VERSION: # FasterTransformer kernels extensions.append( CUDAExtension( "awq_ft_ext", [ "awq_ext/pybind_awq_ft.cpp", "awq_ext/attention/ft_attention.cpp", "awq_ext/attention/decoder_masked_multihead_attention.cu", ], extra_compile_args=extra_compile_args, ) ) additional_setup_kwargs = { "ext_modules": extensions, "cmdclass": {"build_ext": BuildExtension}, } common_setup_kwargs.update(additional_setup_kwargs) setup( packages=find_packages(), install_requires=requirements, include_dirs=include_dirs, **common_setup_kwargs, )