setup.py 2.38 KB
Newer Older
Casper Hansen's avatar
Casper Hansen committed
1
import os
2
3
from pathlib import Path
from torch.utils import cpp_extension
Casper Hansen's avatar
Casper Hansen committed
4
from setuptools import setup, find_packages
Casper's avatar
Casper committed
5
from distutils.sysconfig import get_python_lib
Casper's avatar
Casper committed
6

7
8
os.environ["CC"] = "g++"
os.environ["CXX"] = "g++"
Casper's avatar
Casper committed
9

10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
common_setup_kwargs = {
    "version": "0.0.1",
    "name": "autoawq",
    "author": "Casper Hansen",
    "license": "MIT",
    "python_requires": ">=3.8.0",
    "description": "AutoAWQ implements the AWQ algorithm for 4-bit quantization with a 2x speedup during inference.",
    "long_description": (Path(__file__).parent / "README.md").read_text(encoding="UTF-8"),
    "long_description_content_type": "text/markdown",
    "url": "https://github.com/casper-hansen/AutoAWQ",
    "keywords": ["awq", "autoawq", "quantization", "transformers"],
    "platforms": ["linux"],
    "classifiers": [
        "Environment :: GPU :: NVIDIA CUDA :: 11.8",
        "Environment :: GPU :: NVIDIA CUDA :: 12",
        "License :: OSI Approved :: MIT License",
        "Natural Language :: English",
        "Programming Language :: Python :: 3.8",
        "Programming Language :: Python :: 3.9",
        "Programming Language :: Python :: 3.10",
        "Programming Language :: Python :: 3.11",
        "Programming Language :: C++",
    ]
}
Casper Hansen's avatar
Casper Hansen committed
34

35
36
37
38
39
40
41
42
43
44
requirements = [
    "torch>=2.0.0",
    "transformers>=4.32.0",
    "tokenizers>=0.12.1",
    "accelerate",
    "sentencepiece",
    "lm_eval",
    "texttable",
    "toml",
    "attributedict",
Casper's avatar
Casper committed
45
    "protobuf",
46
    "torchvision"
Casper Hansen's avatar
Casper Hansen committed
47
48
]

49
include_dirs = []
50

51
52
53
conda_cuda_include_dir = os.path.join(get_python_lib(), "nvidia/cuda_runtime/include")
if os.path.isdir(conda_cuda_include_dir):
    include_dirs.append(conda_cuda_include_dir)
Casper's avatar
Casper committed
54

55
56
57
58
59
60
61
62
63
64
65
66
extensions = [
    cpp_extension.CppExtension(
        "awq_inference_engine",
        [
            "awq_cuda/pybind.cpp",
            "awq_cuda/quantization/gemm_cuda_gen.cu",
            "awq_cuda/layernorm/layernorm.cu",
            "awq_cuda/position_embedding/pos_encoding_kernels.cu"
        ], extra_compile_args={
            "cxx": ["-g", "-O3", "-fopenmp", "-lgomp", "-std=c++17"],
            "nvcc": ["-O3", "-std=c++17"]
        }
Casper Hansen's avatar
Casper Hansen committed
67
    )
68
]
Casper Hansen's avatar
Casper Hansen committed
69

70
71
72
73
additional_setup_kwargs = {
    "ext_modules": extensions,
    "cmdclass": {'build_ext': cpp_extension.BuildExtension}
}
Casper's avatar
Casper committed
74

75
common_setup_kwargs.update(additional_setup_kwargs)
Casper's avatar
Casper committed
76

Casper Hansen's avatar
Casper Hansen committed
77
setup(
78
79
    packages=find_packages(),
    install_requires=requirements,
Casper's avatar
Casper committed
80
    include_dirs=include_dirs,
81
82
    **common_setup_kwargs
)