Improved setup.py structure and build instructions

b574767f · Casper · 7e361d16 · b574767f · b574767f
Commit b574767f authored Aug 29, 2023 by Casper
Hide whitespace changes
Inline Side-by-side

Showing with 126 additions and 159 deletions

.github/workflows/build.yaml .github/workflows/build.yaml +61 -71

setup.py setup.py +65 -88

No files found.
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -22,91 +22,81 @@ jobs:
        run: |
          echo "release_tag=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV

-  #     - name: Create Release
-  #       id: create_release
-  #       uses: "actions/github-script@v6"
-  #       env:
-  #         RELEASE_TAG: ${{ env.release_tag }}
-  #       with:
-  #         github-token: "${{ secrets.GITHUB_TOKEN }}"
-  #         script: |
-  #           const script = require('.github/workflows/scripts/github_create_release.js')
-  #           await script(github, context, core)
+      - name: Create Release
+        id: create_release
+        uses: "actions/github-script@v6"
+        env:
+          RELEASE_TAG: ${{ env.release_tag }}
+        with:
+          github-token: "${{ secrets.GITHUB_TOKEN }}"
+          script: |
+            const script = require('.github/workflows/scripts/github_create_release.js')
+            await script(github, context, core)

  # build AWQ
-  build:
+  build_wheels:
    name: Build AWQ
    runs-on: ${{ matrix.os }}
    needs: release
    strategy:
      matrix:
        os: [ubuntu-20.04]
-        python-version: ["3.8", "3.9", "3.10", "3.11"]
-        cuda-version: ["11.8"]
+        pyver: ["3.8", "3.9", "3.10", "3.11"]
+        cuda: ["11.8"]
    defaults:
      run:
        shell: pwsh
+    env:
+        CUDA_VERSION: ${{ matrix.cuda }}

    steps:
-    - name: Checkout code
-      uses: actions/checkout@v3
-    
-    - name: Setup Python
-      uses: actions/setup-python@v4
-      with:
-        python-version: ${{ matrix.python-version }}
-    
-    - name: Setup Miniconda
-      uses: conda-incubator/setup-miniconda@v2.2.0
-      with:
-        activate-environment: "build"
-        python-version: ${{ matrix.python-version }}
-        mamba-version: "*"
-        use-mamba: false
-        channels: conda-forge,defaults
-        channel-priority: true
-        add-pip-as-python-dependency: true
-        auto-activate-base: false
-    
-    - name: Install CUDA
-      run: |
-        conda install cuda-toolkit -c "nvidia/label/cuda-${{ matrix.cuda-version }}.0"
-        $env:CUDA_PATH = $env:CONDA_PREFIX
-        $env:CUDA_HOME = $env:CONDA_PREFIX
-        
-        echo "$env:CUDA_PATH"
-        echo "$env:CUDA_HOME"
-    
-    - name: Install PyTorch-cu${{ matrix.cuda-version }}
-      run: |
-        $env:TORCH_CUDA_ARCH_LIST = '8.0 8.6 8.9 9.0+PTX'
-        if ($IsLinux) { $env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH }
-        
-        # Install torch
-        $env:CUDA_VERSION = ${{ matrix.cuda-version }} -replace '\.', ''
-        pip install --upgrade --no-cache-dir torch==2.0.1+cu$env:CUDA_VERSION --index-url https://download.pytorch.org/whl/cu$env:CUDA_VERSION
-        python -m pip install --upgrade build setuptools wheel ninja
+      - uses: actions/checkout@v3

-        # Print version information
-        python --version
-        python -c "import torch; print('PyTorch:', torch.__version__)"
-        python -c "import torch; print('CUDA:', torch.version.cuda)"
-        python -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)"
-    
-    - name: Build Wheel
-      run: |
-        $env:PYPI_RELEASE = "1"
+      - uses: actions/setup-python@v3
+        with:
+          python-version: ${{ matrix.pyver }}
+
+      - name: Setup Miniconda
+        uses: conda-incubator/setup-miniconda@v2.2.0
+        with:
+          activate-environment: "build"
+          python-version: ${{ matrix.pyver }}
+          mamba-version: "*"
+          use-mamba: false
+          channels: conda-forge,defaults
+          channel-priority: true
+          add-pip-as-python-dependency: true
+          auto-activate-base: false
+
+      - name: Install Dependencies
+        run: |
+          conda install cuda-toolkit -c "nvidia/label/cuda-${env:CUDA_VERSION}.0"
+          conda install pytorch "pytorch-cuda=${env:CUDA_VERSION}" -c pytorch -c nvidia
+          python -m pip install --upgrade build setuptools wheel ninja
+
+          # Print version information
+          python --version
+          python -c "import torch; print('PyTorch:', torch.__version__)"
+          python -c "import torch; print('CUDA:', torch.version.cuda)"
+          python -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)"
+
+      - name: Build Wheel
+        run: |
+          $env:CUDA_PATH = $env:CONDA_PREFIX
+          $env:CUDA_HOME = $env:CONDA_PREFIX
+          if ($IsLinux) {$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH}
+          $env:TORCH_CUDA_ARCH_LIST = '8.0 8.6 8.9 9.0+PTX'

-        python setup.py sdist bdist_wheel
+          python setup.py sdist bdist_wheel

-        $wheel_path = Get-ChildItem dist\*.whl | ForEach-Object { $_.Name }
-        echo "wheel_path=$wheel_path" >> $env:GITHUB_ENV
+          $wheel_path = Get-ChildItem dist\*.whl | ForEach-Object { $_.Name }
+          echo "wheel_path=$wheel_path" >> $env:GITHUB_ENV
    
-    - name: Upload Release Asset
-      uses: actions/upload-release-asset@v1
-      env:
-        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      with:
-        upload_url: ${{ needs.release.outputs.upload_url }}
-        asset_path: ./dist/${{ env.wheel_path }}
-        asset_content_type: application/*
\ No newline at end of file
+      - name: Upload Release Asset
+        uses: shogo82148/actions-upload-release-asset@v1
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        with:
+          upload_url: ${{ needs.release.outputs.upload_url }}
+          asset_path: ./dist/${{ env.wheel_path }}
+          asset_content_type: application/*
\ No newline at end of file
--- a/setup.py
+++ b/setup.py
 import os
-import torch
+from pathlib import Path
+from torch.utils import cpp_extension
 from setuptools import setup, find_packages
 from distutils.sysconfig import get_python_lib
-from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CUDA_HOME

-def check_dependencies():
-    if CUDA_HOME is None:
-        raise RuntimeError(
-            f"Cannot find CUDA_HOME. CUDA must be available to build the package.")
+os.environ["CC"] = "g++"
+os.environ["CXX"] = "g++"

-def get_compute_capabilities():
-    # Collect the compute capabilities of all available GPUs.
-    compute_capabilities = set()
-    for i in range(torch.cuda.device_count()):
-        major, minor = torch.cuda.get_device_capability(i)
-        if major < 8:
-            raise RuntimeError("GPUs with compute capability less than 8.0 are not supported.")
-        compute_capabilities.add(major * 10 + minor)
-    
-    # figure out compute capability
-    compute_capabilities = {80, 86, 89, 90}
-    
-    capability_flags = []
-    for cap in compute_capabilities:
-        capability_flags += ["-gencode", f"arch=compute_{cap},code=sm_{cap}"]
-    
-    return capability_flags
+common_setup_kwargs = {
+    "version": "0.0.1",
+    "name": "autoawq",
+    "author": "Casper Hansen",
+    "license": "MIT",
+    "python_requires": ">=3.8.0",
+    "description": "AutoAWQ implements the AWQ algorithm for 4-bit quantization with a 2x speedup during inference.",
+    "long_description": (Path(__file__).parent / "README.md").read_text(encoding="UTF-8"),
+    "long_description_content_type": "text/markdown",
+    "url": "https://github.com/casper-hansen/AutoAWQ",
+    "keywords": ["awq", "autoawq", "quantization", "transformers"],
+    "platforms": ["linux"],
+    "classifiers": [
+        "Environment :: GPU :: NVIDIA CUDA :: 11.8",
+        "Environment :: GPU :: NVIDIA CUDA :: 12",
+        "License :: OSI Approved :: MIT License",
+        "Natural Language :: English",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
+        "Programming Language :: C++",
+    ]
+}

-# Define dependencies
-dependencies = [
-    "accelerate", "sentencepiece", "tokenizers>=0.12.1",
-    "transformers>=4.32.0", 
-    "lm_eval", "texttable",
-    "toml", "attributedict",
+requirements = [
+    "torch>=2.0.0",
+    "transformers>=4.32.0",
+    "tokenizers>=0.12.1",
+    "accelerate",
+    "sentencepiece",
+    "lm_eval",
+    "texttable",
+    "toml",
+    "attributedict",
    "protobuf",
-    "torch>=2.0.0", "torchvision"
+    "torchvision"
 ]

-# Get environment variables
-build_cuda_extension = os.environ.get('BUILD_CUDA_EXT', '1') == '1'
-
-# Setup CUDA extension
-ext_modules = []
-
-if build_cuda_extension:
-    # num threads
-    n_threads = str(min(os.cpu_count(), 8))
+include_dirs = []

-    # final args
-    capability_flags = get_compute_capabilities()
-    cxx_args = ["-g", "-O3", "-fopenmp", "-lgomp", "-std=c++17"]
-    nvcc_args = ["-O3", "-std=c++17", "--threads", n_threads] + capability_flags
+conda_cuda_include_dir = os.path.join(get_python_lib(), "nvidia/cuda_runtime/include")
+if os.path.isdir(conda_cuda_include_dir):
+    include_dirs.append(conda_cuda_include_dir)

-    ext_modules.append(
-        CUDAExtension(
-            name="awq_inference_engine",
-            sources=[
-                "awq_cuda/pybind.cpp",
-                "awq_cuda/quantization/gemm_cuda_gen.cu",
-                "awq_cuda/layernorm/layernorm.cu",
-                "awq_cuda/position_embedding/pos_encoding_kernels.cu"
-            ],
-            extra_compile_args={
-                "cxx": cxx_args,
-                "nvcc": nvcc_args
-            },
-        )
+extensions = [
+    cpp_extension.CppExtension(
+        "awq_inference_engine",
+        [
+            "awq_cuda/pybind.cpp",
+            "awq_cuda/quantization/gemm_cuda_gen.cu",
+            "awq_cuda/layernorm/layernorm.cu",
+            "awq_cuda/position_embedding/pos_encoding_kernels.cu"
+        ], extra_compile_args={
+            "cxx": ["-g", "-O3", "-fopenmp", "-lgomp", "-std=c++17"],
+            "nvcc": ["-O3", "-std=c++17"]
+        }
    )
+]

-# Find directories to be included in setup
-include_dirs = []
-conda_cuda_include_dir = os.path.join(get_python_lib(), "nvidia/cuda_runtime/include")
+additional_setup_kwargs = {
+    "ext_modules": extensions,
+    "cmdclass": {'build_ext': cpp_extension.BuildExtension}
+}

-if os.path.isdir(conda_cuda_include_dir):
-    include_dirs.append(conda_cuda_include_dir)
+common_setup_kwargs.update(additional_setup_kwargs)

 setup(
-    name="autoawq",
-    version="0.1.0",
-    author="Casper Hansen",
-    license="MIT",
-    description="AutoAWQ implements the AWQ algorithm for 4-bit quantization with a 2x speedup during inference.",
-    long_description=open("README.md", "r").read(),
-    long_description_content_type="text/markdown",
-    python_requires=">=3.8",
-    url="https://github.com/casper-hansen/AutoAWQ",
-    keywords=["awq", "autoawq", "quantization", "transformers"],
-    classifiers=[
-        "Environment :: GPU :: NVIDIA CUDA :: 11.8",
-        "Environment :: GPU :: NVIDIA CUDA :: 12",
-        "License :: OSI Approved :: MIT License",
-        "Natural Language :: English",
-        "Programming Language :: Python :: 3.8",
-        "Programming Language :: Python :: 3.9",
-        "Programming Language :: Python :: 3.10",
-        "Programming Language :: Python :: 3.11",
-        "Programming Language :: C++",
-    ],
-    install_requires=dependencies,
+    packages=find_packages(),
+    install_requires=requirements,
    include_dirs=include_dirs,
-    packages=find_packages(exclude=["examples*"]),
-    ext_modules=ext_modules,
-    cmdclass={"build_ext": BuildExtension}
-)
+    **common_setup_kwargs
+)
\ No newline at end of file