Simplify AWQ setup.

417bbad2 · Casper Hansen · dc139757 · 417bbad2 · dc139757 · dc139757
Commit 417bbad2 authored Aug 11, 2023 by Casper Hansen
Hide whitespace changes
Inline Side-by-side

Showing with 70 additions and 70 deletions

README.md README.md +14 -16

awq/kernels/setup.py awq/kernels/setup.py +0 -26

pyproject.toml pyproject.toml +0 -28

setup.py setup.py +56 -0

No files found.
--- a/README.md
+++ b/README.md
@@ -34,32 +34,30 @@ It also offers a turn-key solution for **on-device inferecne** of LLMs on **reso
 ## Install
-1. Clone this repository and navigate to AWQ folder
+Clone this repository and install with pip.
 ```
 git clone https://github.com/mit-han-lab/llm-awq
 cd llm-awq
-```
-2. Install Package
-```
-conda create -n awq python=3.10 -y
-conda activate awq
-pip install --upgrade pip  # enable PEP 660 support
 pip install -e .
 ```
-* For **edge devices** like Orin, before running the commands above, please:
+### CPU only
+If you want to avoid installing CUDA kernels, pass the BUILD_CUDA_EXT environment variable:
-    1. Modify [pyproject.toml](pyproject.toml) by commenting out [this line](https://github.com/mit-han-lab/llm-awq/blob/3fce69061682fdd528824e5da3d03a8a8b545f2a/pyproject.toml#L17).
-    2. Manually install precompiled PyTorch binaries (>=2.0.0) from [NVIDIA](https://forums.developer.nvidia.com/t/pytorch-for-jetson/72048).
-    3. Set the appropriate Python version for conda environment (e.g., `conda create -n awq python=3.8 -y` for JetPack 5).
-3. Install efficient W4A16 (4-bit weight, 16-bit activation) CUDA kernel and optimized FP16 kernels (e.g. layernorm, positional encodings).
 ```
-cd awq/kernels
+BUILD_CUDA_EXT=0 pip install -e .
-python setup.py install
 ```
+### Edge device
+For **edge devices** like Orin, before running the commands above, please:
+1. Manually install precompiled PyTorch binaries (>=2.0.0) from [NVIDIA](https://forums.developer.nvidia.com/t/pytorch-for-jetson/72048).
+2. Set the appropriate Python version for conda environment (e.g., `conda create -n awq python=3.8 -y` for JetPack 5).
+3. Install AWQ: `TORCH_IS_PREBUILT=1 pip install -e .`
 ## AWQ Model Zoo
 We provide pre-computed AWQ search results for multiple model families, including LLaMA, OPT, Vicuna, and LLaVA. To get the pre-computed AWQ search results, run:

--- a/awq/kernels/setup.py
+++ b/awq/kernels/setup.py
-from setuptools import find_packages, setup
-from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CppExtension
-extra_compile_args = {
-    "cxx": ["-g", "-O3", "-fopenmp", "-lgomp", "-std=c++17"],
-    "nvcc": ["-O3", "-std=c++17"],
-}
-setup(
-    name="awq_inference_engine",
-    packages=find_packages(),
-    ext_modules=[
-        CUDAExtension(
-            name="awq_inference_engine",
-            sources=[
-                "csrc/pybind.cpp", 
-                "csrc/quantization/gemm_cuda_gen.cu",
-                "csrc/layernorm/layernorm.cu",
-                "csrc/position_embedding/pos_encoding_kernels.cu"
-            ],
-            extra_compile_args=extra_compile_args,
-        ),
-    ],
-    cmdclass={"build_ext": BuildExtension},
-    install_requires=["torch"],
-)
--- a/pyproject.toml
+++ b/pyproject.toml
-[build-system]
-requires = ["setuptools>=61.0"]
-build-backend = "setuptools.build_meta"
-[project]
-name = "awq"
-version = "0.1.0"
-description = "An efficient and accurate low-bit weight quantization(INT3/4) method for LLMs."
-readme = "README.md"
-requires-python = ">=3.8"
-classifiers = [
-    "Programming Language :: Python :: 3",
-    "License :: OSI Approved :: Apache Software License",
-]
-dependencies = [
-    "accelerate", "sentencepiece", "tokenizers>=0.12.1",
-    "torch>=2.0.0", "torchvision", 
-    "transformers>=4.31.0", 
-    "lm_eval", "texttable",
-    "toml", "attributedict",
-    "protobuf"
-]
-[tool.setuptools.packages.find]
-exclude = ["results*", "scripts*", "examples*"]
-[tool.wheel]
-exclude = ["results*", "scripts*", "examples*"]
--- a/setup.py
+++ b/setup.py
+import os
+from setuptools import setup, find_packages
+from torch.utils.cpp_extension import BuildExtension, CUDAExtension
+# Get environment variables
+build_cuda_extension = os.environ.get('BUILD_CUDA_EXT', '1') == '1'
+torch_is_prebuilt = os.environ.get('TORCH_IS_PREBUILT', '0') == '1'
+# Define dependencies
+dependencies = [
+    "accelerate", "sentencepiece", "tokenizers>=0.12.1",
+    "transformers>=4.31.0", 
+    "lm_eval", "texttable",
+    "toml", "attributedict",
+    "protobuf"
+]
+if not torch_is_prebuilt:
+    dependencies.extend(["torch>=2.0.0", "torchvision"])
+# Setup CUDA extension
+ext_modules = []
+if build_cuda_extension:
+    ext_modules.append(
+        CUDAExtension(
+            name="awq_inference_engine",
+            sources=[
+                "awq/kernels/csrc/pybind.cpp",
+                "awq/kernels/csrc/quantization/gemm_cuda_gen.cu",
+                "awq/kernels/csrc/layernorm/layernorm.cu",
+                "awq/kernels/csrc/position_embedding/pos_encoding_kernels.cu"
+            ],
+            extra_compile_args={
+                "cxx": ["-g", "-O3", "-fopenmp", "-lgomp", "-std=c++17"],
+                "nvcc": ["-O3", "-std=c++17"]
+            },
+        )
+    )
+setup(
+    name="awq",
+    version="0.1.0",
+    description="An efficient and accurate low-bit weight quantization(INT3/4) method for LLMs.",
+    long_description=open("README.md", "r").read(),
+    long_description_content_type="text/markdown",
+    python_requires=">=3.8",
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: Apache Software License",
+    ],
+    install_requires=dependencies,
+    packages=find_packages(exclude=["results*", "scripts*", "examples*"]),
+    ext_modules=ext_modules,
+    cmdclass={"build_ext": BuildExtension}
+)