import os import torch import platform import requests import subprocess from pathlib import Path from setuptools import setup, find_packages from torch.utils.cpp_extension import CUDAExtension from typing import Optional, Union pwd = os.path.dirname(__file__) add_git_version = False if int(os.environ.get('ADD_GIT_VERSION', '0')) == 1: add_git_version = True def get_latest_kernels_version(repo): """ Get the latest version of the kernels from the github repo. """ response = requests.get(f"https://api.github.com/repos/{repo}/releases/latest") data = response.json() tag_name = data["tag_name"] version = tag_name.replace("v", "") return version def get_kernels_whl_url( gpu_system_version, release_version, python_version, platform, architecture, ): """ Get the url for the kernels wheel file. """ return f"https://github.com/casper-hansen/AutoAWQ_kernels/releases/download/v{release_version}/autoawq_kernels-{release_version}+{gpu_system_version}-cp{python_version}-cp{python_version}-{platform}_{architecture}.whl" def get_sha(pytorch_root: Union[str, Path]) -> str: try: return subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=pytorch_root).decode('ascii').strip() except Exception: return 'Unknown' def get_abi(): try: command = "echo '#include ' | gcc -x c++ -E -dM - | fgrep _GLIBCXX_USE_CXX11_ABI" result = subprocess.run(command, shell=True, capture_output=True, text=True) output = result.stdout.strip() abi = "abi" + output.split(" ")[-1] return abi except Exception: return 'abiUnknown' def get_version_add(sha: Optional[str] = None) -> str: command = "git config --global --add safe.directory "+pwd result = subprocess.run(command, shell=True, capture_output=False, text=True) version='' autoawq_root = os.path.dirname(os.path.abspath(__file__)) add_version_path = os.path.join(os.path.join(autoawq_root, "awq"), "__init__.py") if add_git_version: if sha != 'Unknown': if sha is None: sha = get_sha(autoawq_root) version = 'das.opt1.' + sha[:7] else: version = 'das.opt1' # abi #version += "." + get_abi() # dtk version if os.getenv("ROCM_PATH"): rocm_path = os.getenv('ROCM_PATH', "") rocm_version_path = os.path.join(rocm_path, '.info', "rocm_version") with open(rocm_version_path, 'r',encoding='utf-8') as file: lines = file.readlines() rocm_version=lines[0][:].replace(".", "") version += ".dtk" + rocm_version # torch version version += ".torch" + torch.__version__[:5] lines=[] with open(add_version_path, 'r',encoding='utf-8') as file: lines = file.readlines() lines[1] = "__dcu_version__ = '0.2.5+{}'\n".format(version) with open(add_version_path, encoding="utf-8",mode="w") as file: file.writelines(lines) file.close() def get_version(): get_version_add() version_file = 'awq/__init__.py' with open(version_file, encoding='utf-8') as f: exec(compile(f.read(), version_file, 'exec')) return locals()['__dcu_version__'] AUTOAWQ_VERSION = "" PYPI_BUILD = os.getenv("PYPI_BUILD", "0") == "1" CUDA_VERSION = os.getenv("CUDA_VERSION", None) or torch.version.cuda if CUDA_VERSION: CUDA_VERSION = "".join(CUDA_VERSION.split("."))[:3] ROCM_VERSION = os.getenv("ROCM_VERSION", None) or torch.version.hip if ROCM_VERSION: if ROCM_VERSION.startswith("5.6"): ROCM_VERSION = "5.6.1" elif ROCM_VERSION.startswith("5.7"): ROCM_VERSION = "5.7.1" ROCM_VERSION = "".join(ROCM_VERSION.split("."))[:3] if not PYPI_BUILD: if CUDA_VERSION: AUTOAWQ_VERSION += f"+cu{CUDA_VERSION}" elif ROCM_VERSION: #version_info = get_version() AUTOAWQ_VERSION += get_version()#f"+rocm{ROCM_VERSION}" else: raise RuntimeError( "Your system must have either Nvidia or AMD GPU to build this package." ) common_setup_kwargs = { "version": AUTOAWQ_VERSION, "name": "autoawq", "author": "Casper Hansen", "license": "MIT", "python_requires": ">=3.8.0", "description": "AutoAWQ implements the AWQ algorithm for 4-bit quantization with a 2x speedup during inference.", "long_description": (Path(__file__).parent / "README.md").read_text( encoding="UTF-8" ), "long_description_content_type": "text/markdown", "url": "https://github.com/casper-hansen/AutoAWQ", "keywords": ["awq", "autoawq", "quantization", "transformers"], "platforms": ["linux", "windows"], "classifiers": [ "Environment :: GPU :: NVIDIA CUDA :: 11.8", "Environment :: GPU :: NVIDIA CUDA :: 12", "License :: OSI Approved :: MIT License", "Natural Language :: English", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: C++", ], } requirements = [ "torch>=2.0.1", "transformers>=4.35.0", "tokenizers>=0.12.1", "typing_extensions>=4.8.0", "accelerate", "datasets", "zstandard", ] try: if ROCM_VERSION: import exlv2_ext else: import awq_ext KERNELS_INSTALLED = True except ImportError: KERNELS_INSTALLED = False # kernels can be downloaded from pypi for cuda+121 only # for everything else, we need to download the wheels from github if not KERNELS_INSTALLED and (CUDA_VERSION or ROCM_VERSION): if CUDA_VERSION and CUDA_VERSION.startswith("12"): requirements.append("autoawq-kernels") elif CUDA_VERSION and CUDA_VERSION.startswith("11") or ROCM_VERSION in ["561", "571"]: gpu_system_version = ( f"cu{CUDA_VERSION}" if CUDA_VERSION else f"rocm{ROCM_VERSION}" ) kernels_version = get_latest_kernels_version("casper-hansen/AutoAWQ_kernels") python_version = "".join(platform.python_version_tuple()[:2]) platform_name = platform.system().lower() architecture = platform.machine().lower() latest_rocm_kernels_wheels = get_kernels_whl_url( gpu_system_version, kernels_version, python_version, platform_name, architecture, ) requirements.append(f"autoawq-kernels@{latest_rocm_kernels_wheels}") else: raise RuntimeError( "Your system have a GPU with an unsupported CUDA or ROCm version. " "Please install the kernels manually from https://github.com/casper-hansen/AutoAWQ_kernels" ) force_extension = os.getenv("PYPI_FORCE_TAGS", "0") if force_extension == "1": # NOTE: We create an empty CUDAExtension because torch helps us with # creating the right boilerplate to enable correct targeting of # the autoawq-kernels package common_setup_kwargs["ext_modules"] = [ CUDAExtension( name="test_kernel", sources=[], ) ] setup( packages=find_packages(), install_requires=requirements, extras_require={ "eval": ["lm_eval==0.4.1", "tabulate", "protobuf", "evaluate", "scipy"], "dev": ["black", "mkdocstrings-python", "mkdocs-material", "griffe-typingdoc"] }, **common_setup_kwargs, )