Commit b574767f authored by Casper's avatar Casper
Browse files

Improved setup.py structure and build instructions

parent 7e361d16
......@@ -22,91 +22,81 @@ jobs:
run: |
echo "release_tag=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV
# - name: Create Release
# id: create_release
# uses: "actions/github-script@v6"
# env:
# RELEASE_TAG: ${{ env.release_tag }}
# with:
# github-token: "${{ secrets.GITHUB_TOKEN }}"
# script: |
# const script = require('.github/workflows/scripts/github_create_release.js')
# await script(github, context, core)
- name: Create Release
id: create_release
uses: "actions/github-script@v6"
env:
RELEASE_TAG: ${{ env.release_tag }}
with:
github-token: "${{ secrets.GITHUB_TOKEN }}"
script: |
const script = require('.github/workflows/scripts/github_create_release.js')
await script(github, context, core)
# build AWQ
build:
build_wheels:
name: Build AWQ
runs-on: ${{ matrix.os }}
needs: release
strategy:
matrix:
os: [ubuntu-20.04]
python-version: ["3.8", "3.9", "3.10", "3.11"]
cuda-version: ["11.8"]
pyver: ["3.8", "3.9", "3.10", "3.11"]
cuda: ["11.8"]
defaults:
run:
shell: pwsh
env:
CUDA_VERSION: ${{ matrix.cuda }}
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Setup Miniconda
uses: conda-incubator/setup-miniconda@v2.2.0
with:
activate-environment: "build"
python-version: ${{ matrix.python-version }}
mamba-version: "*"
use-mamba: false
channels: conda-forge,defaults
channel-priority: true
add-pip-as-python-dependency: true
auto-activate-base: false
- name: Install CUDA
run: |
conda install cuda-toolkit -c "nvidia/label/cuda-${{ matrix.cuda-version }}.0"
$env:CUDA_PATH = $env:CONDA_PREFIX
$env:CUDA_HOME = $env:CONDA_PREFIX
echo "$env:CUDA_PATH"
echo "$env:CUDA_HOME"
- name: Install PyTorch-cu${{ matrix.cuda-version }}
run: |
$env:TORCH_CUDA_ARCH_LIST = '8.0 8.6 8.9 9.0+PTX'
if ($IsLinux) { $env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH }
# Install torch
$env:CUDA_VERSION = ${{ matrix.cuda-version }} -replace '\.', ''
pip install --upgrade --no-cache-dir torch==2.0.1+cu$env:CUDA_VERSION --index-url https://download.pytorch.org/whl/cu$env:CUDA_VERSION
python -m pip install --upgrade build setuptools wheel ninja
- uses: actions/checkout@v3
# Print version information
python --version
python -c "import torch; print('PyTorch:', torch.__version__)"
python -c "import torch; print('CUDA:', torch.version.cuda)"
python -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)"
- name: Build Wheel
run: |
$env:PYPI_RELEASE = "1"
- uses: actions/setup-python@v3
with:
python-version: ${{ matrix.pyver }}
- name: Setup Miniconda
uses: conda-incubator/setup-miniconda@v2.2.0
with:
activate-environment: "build"
python-version: ${{ matrix.pyver }}
mamba-version: "*"
use-mamba: false
channels: conda-forge,defaults
channel-priority: true
add-pip-as-python-dependency: true
auto-activate-base: false
- name: Install Dependencies
run: |
conda install cuda-toolkit -c "nvidia/label/cuda-${env:CUDA_VERSION}.0"
conda install pytorch "pytorch-cuda=${env:CUDA_VERSION}" -c pytorch -c nvidia
python -m pip install --upgrade build setuptools wheel ninja
# Print version information
python --version
python -c "import torch; print('PyTorch:', torch.__version__)"
python -c "import torch; print('CUDA:', torch.version.cuda)"
python -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)"
- name: Build Wheel
run: |
$env:CUDA_PATH = $env:CONDA_PREFIX
$env:CUDA_HOME = $env:CONDA_PREFIX
if ($IsLinux) {$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH}
$env:TORCH_CUDA_ARCH_LIST = '8.0 8.6 8.9 9.0+PTX'
python setup.py sdist bdist_wheel
python setup.py sdist bdist_wheel
$wheel_path = Get-ChildItem dist\*.whl | ForEach-Object { $_.Name }
echo "wheel_path=$wheel_path" >> $env:GITHUB_ENV
$wheel_path = Get-ChildItem dist\*.whl | ForEach-Object { $_.Name }
echo "wheel_path=$wheel_path" >> $env:GITHUB_ENV
- name: Upload Release Asset
uses: actions/upload-release-asset@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ needs.release.outputs.upload_url }}
asset_path: ./dist/${{ env.wheel_path }}
asset_content_type: application/*
\ No newline at end of file
- name: Upload Release Asset
uses: shogo82148/actions-upload-release-asset@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ needs.release.outputs.upload_url }}
asset_path: ./dist/${{ env.wheel_path }}
asset_content_type: application/*
\ No newline at end of file
import os
import torch
from pathlib import Path
from torch.utils import cpp_extension
from setuptools import setup, find_packages
from distutils.sysconfig import get_python_lib
from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CUDA_HOME
def check_dependencies():
if CUDA_HOME is None:
raise RuntimeError(
f"Cannot find CUDA_HOME. CUDA must be available to build the package.")
os.environ["CC"] = "g++"
os.environ["CXX"] = "g++"
def get_compute_capabilities():
# Collect the compute capabilities of all available GPUs.
compute_capabilities = set()
for i in range(torch.cuda.device_count()):
major, minor = torch.cuda.get_device_capability(i)
if major < 8:
raise RuntimeError("GPUs with compute capability less than 8.0 are not supported.")
compute_capabilities.add(major * 10 + minor)
# figure out compute capability
compute_capabilities = {80, 86, 89, 90}
capability_flags = []
for cap in compute_capabilities:
capability_flags += ["-gencode", f"arch=compute_{cap},code=sm_{cap}"]
return capability_flags
common_setup_kwargs = {
"version": "0.0.1",
"name": "autoawq",
"author": "Casper Hansen",
"license": "MIT",
"python_requires": ">=3.8.0",
"description": "AutoAWQ implements the AWQ algorithm for 4-bit quantization with a 2x speedup during inference.",
"long_description": (Path(__file__).parent / "README.md").read_text(encoding="UTF-8"),
"long_description_content_type": "text/markdown",
"url": "https://github.com/casper-hansen/AutoAWQ",
"keywords": ["awq", "autoawq", "quantization", "transformers"],
"platforms": ["linux"],
"classifiers": [
"Environment :: GPU :: NVIDIA CUDA :: 11.8",
"Environment :: GPU :: NVIDIA CUDA :: 12",
"License :: OSI Approved :: MIT License",
"Natural Language :: English",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: C++",
]
}
# Define dependencies
dependencies = [
"accelerate", "sentencepiece", "tokenizers>=0.12.1",
"transformers>=4.32.0",
"lm_eval", "texttable",
"toml", "attributedict",
requirements = [
"torch>=2.0.0",
"transformers>=4.32.0",
"tokenizers>=0.12.1",
"accelerate",
"sentencepiece",
"lm_eval",
"texttable",
"toml",
"attributedict",
"protobuf",
"torch>=2.0.0", "torchvision"
"torchvision"
]
# Get environment variables
build_cuda_extension = os.environ.get('BUILD_CUDA_EXT', '1') == '1'
# Setup CUDA extension
ext_modules = []
if build_cuda_extension:
# num threads
n_threads = str(min(os.cpu_count(), 8))
include_dirs = []
# final args
capability_flags = get_compute_capabilities()
cxx_args = ["-g", "-O3", "-fopenmp", "-lgomp", "-std=c++17"]
nvcc_args = ["-O3", "-std=c++17", "--threads", n_threads] + capability_flags
conda_cuda_include_dir = os.path.join(get_python_lib(), "nvidia/cuda_runtime/include")
if os.path.isdir(conda_cuda_include_dir):
include_dirs.append(conda_cuda_include_dir)
ext_modules.append(
CUDAExtension(
name="awq_inference_engine",
sources=[
"awq_cuda/pybind.cpp",
"awq_cuda/quantization/gemm_cuda_gen.cu",
"awq_cuda/layernorm/layernorm.cu",
"awq_cuda/position_embedding/pos_encoding_kernels.cu"
],
extra_compile_args={
"cxx": cxx_args,
"nvcc": nvcc_args
},
)
extensions = [
cpp_extension.CppExtension(
"awq_inference_engine",
[
"awq_cuda/pybind.cpp",
"awq_cuda/quantization/gemm_cuda_gen.cu",
"awq_cuda/layernorm/layernorm.cu",
"awq_cuda/position_embedding/pos_encoding_kernels.cu"
], extra_compile_args={
"cxx": ["-g", "-O3", "-fopenmp", "-lgomp", "-std=c++17"],
"nvcc": ["-O3", "-std=c++17"]
}
)
]
# Find directories to be included in setup
include_dirs = []
conda_cuda_include_dir = os.path.join(get_python_lib(), "nvidia/cuda_runtime/include")
additional_setup_kwargs = {
"ext_modules": extensions,
"cmdclass": {'build_ext': cpp_extension.BuildExtension}
}
if os.path.isdir(conda_cuda_include_dir):
include_dirs.append(conda_cuda_include_dir)
common_setup_kwargs.update(additional_setup_kwargs)
setup(
name="autoawq",
version="0.1.0",
author="Casper Hansen",
license="MIT",
description="AutoAWQ implements the AWQ algorithm for 4-bit quantization with a 2x speedup during inference.",
long_description=open("README.md", "r").read(),
long_description_content_type="text/markdown",
python_requires=">=3.8",
url="https://github.com/casper-hansen/AutoAWQ",
keywords=["awq", "autoawq", "quantization", "transformers"],
classifiers=[
"Environment :: GPU :: NVIDIA CUDA :: 11.8",
"Environment :: GPU :: NVIDIA CUDA :: 12",
"License :: OSI Approved :: MIT License",
"Natural Language :: English",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: C++",
],
install_requires=dependencies,
packages=find_packages(),
install_requires=requirements,
include_dirs=include_dirs,
packages=find_packages(exclude=["examples*"]),
ext_modules=ext_modules,
cmdclass={"build_ext": BuildExtension}
)
**common_setup_kwargs
)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment