setup.py 7.17 KB
Newer Older
qwopqwop200's avatar
qwopqwop200 committed
1
2
import os
import torch
3
import platform
Ilyas Moutawwakil's avatar
Ilyas Moutawwakil committed
4
import requests
gaoqiong's avatar
gaoqiong committed
5
import subprocess
qwopqwop200's avatar
qwopqwop200 committed
6
7
from pathlib import Path
from setuptools import setup, find_packages
Casper's avatar
Casper committed
8
from torch.utils.cpp_extension import CUDAExtension
gaoqiong's avatar
gaoqiong committed
9
from typing import Optional, Union
Ilyas Moutawwakil's avatar
Ilyas Moutawwakil committed
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33

def get_latest_kernels_version(repo):
    """
    Get the latest version of the kernels from the github repo.
    """
    response = requests.get(f"https://api.github.com/repos/{repo}/releases/latest")
    data = response.json()
    tag_name = data["tag_name"]
    version = tag_name.replace("v", "")
    return version


def get_kernels_whl_url(
    gpu_system_version,
    release_version,
    python_version,
    platform,
    architecture,
):
    """
    Get the url for the kernels wheel file.
    """
    return f"https://github.com/casper-hansen/AutoAWQ_kernels/releases/download/v{release_version}/autoawq_kernels-{release_version}+{gpu_system_version}-cp{python_version}-cp{python_version}-{platform}_{architecture}.whl"

gaoqiong's avatar
gaoqiong committed
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
def get_sha(pytorch_root: Union[str, Path]) -> str:
    try:
        return subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=pytorch_root).decode('ascii').strip()
    except Exception:
        return 'Unknown'


def get_abi():
    try:
        command = "echo '#include <string>' | gcc -x c++ -E -dM - | fgrep _GLIBCXX_USE_CXX11_ABI" 
        result = subprocess.run(command, shell=True, capture_output=True, text=True) 
        output = result.stdout.strip() 
        abi = "abi" + output.split(" ")[-1]
        return abi
    except Exception:
        return 'abiUnknown'


def get_version_add(sha: Optional[str] = None) -> str:
    version=''
    autoawq_root = os.path.dirname(os.path.abspath(__file__))
    add_version_path = os.path.join(os.path.join(autoawq_root, "awq"), "__init__.py")
    if sha != 'Unknown':
        if sha is None:
            sha = get_sha(autoawq_root)
        version = 'git' + sha[:7]

    # abi
    version += "." + get_abi()

    # dtk version
    if os.getenv("ROCM_PATH"):
        rocm_path = os.getenv('ROCM_PATH', "")
        rocm_version_path = os.path.join(rocm_path, '.info', "rocm_version")
        with open(rocm_version_path, 'r',encoding='utf-8') as file:
            lines = file.readlines()
        rocm_version=lines[0][:-2].replace(".", "")
        version += ".dtk" + rocm_version
    
    # torch version
    version += ".torch" + torch.__version__[:5]

    
    lines=[]
    with open(add_version_path, 'r',encoding='utf-8') as file:
        lines = file.readlines()
        
    lines[1] = "__dcu_version__ = '0.2.5+das1.1.{}'\n".format(version)
    with open(add_version_path, encoding="utf-8",mode="w") as file:
        file.writelines(lines)
    file.close()

def get_version():
    get_version_add()
    version_file = 'awq/__init__.py'
    with open(version_file, encoding='utf-8') as f:
        exec(compile(f.read(), version_file, 'exec'))
    return locals()['__dcu_version__']

Ilyas Moutawwakil's avatar
Ilyas Moutawwakil committed
93

gaoqiong's avatar
gaoqiong committed
94
AUTOAWQ_VERSION = ""
Casper's avatar
Casper committed
95
PYPI_BUILD = os.getenv("PYPI_BUILD", "0") == "1"
qwopqwop200's avatar
qwopqwop200 committed
96

Ilyas Moutawwakil's avatar
Ilyas Moutawwakil committed
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
CUDA_VERSION = os.getenv("CUDA_VERSION", None) or torch.version.cuda
if CUDA_VERSION:
    CUDA_VERSION = "".join(CUDA_VERSION.split("."))[:3]

ROCM_VERSION = os.getenv("ROCM_VERSION", None) or torch.version.hip
if ROCM_VERSION:
    if ROCM_VERSION.startswith("5.6"):
        ROCM_VERSION = "5.6.1"
    elif ROCM_VERSION.startswith("5.7"):
        ROCM_VERSION = "5.7.1"

    ROCM_VERSION = "".join(ROCM_VERSION.split("."))[:3]

if not PYPI_BUILD:
    if CUDA_VERSION:
Casper's avatar
Casper committed
112
        AUTOAWQ_VERSION += f"+cu{CUDA_VERSION}"
Ilyas Moutawwakil's avatar
Ilyas Moutawwakil committed
113
    elif ROCM_VERSION:
gaoqiong's avatar
gaoqiong committed
114
115
        #version_info = get_version()
        AUTOAWQ_VERSION += get_version()#f"+rocm{ROCM_VERSION}"
Ilyas Moutawwakil's avatar
Ilyas Moutawwakil committed
116
117
118
119
    else:
        raise RuntimeError(
            "Your system must have either Nvidia or AMD GPU to build this package."
        )
Casper's avatar
Casper committed
120

qwopqwop200's avatar
qwopqwop200 committed
121
common_setup_kwargs = {
Casper's avatar
Casper committed
122
    "version": AUTOAWQ_VERSION,
qwopqwop200's avatar
qwopqwop200 committed
123
124
125
126
127
    "name": "autoawq",
    "author": "Casper Hansen",
    "license": "MIT",
    "python_requires": ">=3.8.0",
    "description": "AutoAWQ implements the AWQ algorithm for 4-bit quantization with a 2x speedup during inference.",
Ilyas Moutawwakil's avatar
Ilyas Moutawwakil committed
128
129
130
    "long_description": (Path(__file__).parent / "README.md").read_text(
        encoding="UTF-8"
    ),
qwopqwop200's avatar
qwopqwop200 committed
131
132
133
134
135
136
137
138
139
140
141
142
143
144
    "long_description_content_type": "text/markdown",
    "url": "https://github.com/casper-hansen/AutoAWQ",
    "keywords": ["awq", "autoawq", "quantization", "transformers"],
    "platforms": ["linux", "windows"],
    "classifiers": [
        "Environment :: GPU :: NVIDIA CUDA :: 11.8",
        "Environment :: GPU :: NVIDIA CUDA :: 12",
        "License :: OSI Approved :: MIT License",
        "Natural Language :: English",
        "Programming Language :: Python :: 3.8",
        "Programming Language :: Python :: 3.9",
        "Programming Language :: Python :: 3.10",
        "Programming Language :: Python :: 3.11",
        "Programming Language :: C++",
Ilyas Moutawwakil's avatar
Ilyas Moutawwakil committed
145
    ],
qwopqwop200's avatar
qwopqwop200 committed
146
147
148
}

requirements = [
Casper's avatar
Casper committed
149
    "torch>=2.0.1",
150
    "transformers>=4.35.0",
qwopqwop200's avatar
qwopqwop200 committed
151
    "tokenizers>=0.12.1",
Casper's avatar
Casper committed
152
    "typing_extensions>=4.8.0",
qwopqwop200's avatar
qwopqwop200 committed
153
    "accelerate",
Casper's avatar
Casper committed
154
    "datasets",
Casper's avatar
Casper committed
155
    "zstandard",
qwopqwop200's avatar
qwopqwop200 committed
156
157
]

Ilyas Moutawwakil's avatar
Ilyas Moutawwakil committed
158
try:
Casper's avatar
Casper committed
159
160
161
162
163
    if ROCM_VERSION:
        import exlv2_ext
    else:
        import awq_ext

Ilyas Moutawwakil's avatar
Ilyas Moutawwakil committed
164
    KERNELS_INSTALLED = True
Casper's avatar
Casper committed
165
except ImportError:
Ilyas Moutawwakil's avatar
Ilyas Moutawwakil committed
166
167
168
169
170
    KERNELS_INSTALLED = False

# kernels can be downloaded from pypi for cuda+121 only
# for everything else, we need to download the wheels from github
if not KERNELS_INSTALLED and (CUDA_VERSION or ROCM_VERSION):
Casper's avatar
Casper committed
171
    if CUDA_VERSION and CUDA_VERSION.startswith("12"):
Ilyas Moutawwakil's avatar
Ilyas Moutawwakil committed
172
        requirements.append("autoawq-kernels")
Casper's avatar
Casper committed
173
    elif CUDA_VERSION and CUDA_VERSION.startswith("11") or ROCM_VERSION in ["561", "571"]:
Ilyas Moutawwakil's avatar
Ilyas Moutawwakil committed
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
        gpu_system_version = (
            f"cu{CUDA_VERSION}" if CUDA_VERSION else f"rocm{ROCM_VERSION}"
        )
        kernels_version = get_latest_kernels_version("casper-hansen/AutoAWQ_kernels")
        python_version = "".join(platform.python_version_tuple()[:2])
        platform_name = platform.system().lower()
        architecture = platform.machine().lower()
        latest_rocm_kernels_wheels = get_kernels_whl_url(
            gpu_system_version,
            kernels_version,
            python_version,
            platform_name,
            architecture,
        )
        requirements.append(f"autoawq-kernels@{latest_rocm_kernels_wheels}")
    else:
        raise RuntimeError(
            "Your system have a GPU with an unsupported CUDA or ROCm version. "
            "Please install the kernels manually from https://github.com/casper-hansen/AutoAWQ_kernels"
        )
194

195
196
197
198
199
200
201
202
203
204
205
206
force_extension = os.getenv("PYPI_FORCE_TAGS", "0")
if force_extension == "1":
    # NOTE: We create an empty CUDAExtension because torch helps us with
    # creating the right boilerplate to enable correct targeting of
    # the autoawq-kernels package
    common_setup_kwargs["ext_modules"] = [
        CUDAExtension(
            name="test_kernel",
            sources=[],
        )
    ]

qwopqwop200's avatar
qwopqwop200 committed
207
208
209
setup(
    packages=find_packages(),
    install_requires=requirements,
Casper's avatar
Casper committed
210
    extras_require={
Casper's avatar
Casper committed
211
        "eval": ["lm_eval==0.4.1", "tabulate", "protobuf", "evaluate", "scipy"],
Casper's avatar
Casper committed
212
        "dev": ["black", "mkdocstrings-python", "mkdocs-material", "griffe-typingdoc"]
Casper's avatar
Casper committed
213
    },
Ilyas Moutawwakil's avatar
Ilyas Moutawwakil committed
214
    **common_setup_kwargs,
215
)