Commit b574767f authored by Casper's avatar Casper
Browse files

Improved setup.py structure and build instructions

parent 7e361d16
...@@ -22,91 +22,81 @@ jobs: ...@@ -22,91 +22,81 @@ jobs:
run: | run: |
echo "release_tag=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV echo "release_tag=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV
# - name: Create Release - name: Create Release
# id: create_release id: create_release
# uses: "actions/github-script@v6" uses: "actions/github-script@v6"
# env: env:
# RELEASE_TAG: ${{ env.release_tag }} RELEASE_TAG: ${{ env.release_tag }}
# with: with:
# github-token: "${{ secrets.GITHUB_TOKEN }}" github-token: "${{ secrets.GITHUB_TOKEN }}"
# script: | script: |
# const script = require('.github/workflows/scripts/github_create_release.js') const script = require('.github/workflows/scripts/github_create_release.js')
# await script(github, context, core) await script(github, context, core)
# build AWQ # build AWQ
build: build_wheels:
name: Build AWQ name: Build AWQ
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
needs: release needs: release
strategy: strategy:
matrix: matrix:
os: [ubuntu-20.04] os: [ubuntu-20.04]
python-version: ["3.8", "3.9", "3.10", "3.11"] pyver: ["3.8", "3.9", "3.10", "3.11"]
cuda-version: ["11.8"] cuda: ["11.8"]
defaults: defaults:
run: run:
shell: pwsh shell: pwsh
env:
CUDA_VERSION: ${{ matrix.cuda }}
steps: steps:
- name: Checkout code - uses: actions/checkout@v3
uses: actions/checkout@v3
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Setup Miniconda
uses: conda-incubator/setup-miniconda@v2.2.0
with:
activate-environment: "build"
python-version: ${{ matrix.python-version }}
mamba-version: "*"
use-mamba: false
channels: conda-forge,defaults
channel-priority: true
add-pip-as-python-dependency: true
auto-activate-base: false
- name: Install CUDA
run: |
conda install cuda-toolkit -c "nvidia/label/cuda-${{ matrix.cuda-version }}.0"
$env:CUDA_PATH = $env:CONDA_PREFIX
$env:CUDA_HOME = $env:CONDA_PREFIX
echo "$env:CUDA_PATH"
echo "$env:CUDA_HOME"
- name: Install PyTorch-cu${{ matrix.cuda-version }}
run: |
$env:TORCH_CUDA_ARCH_LIST = '8.0 8.6 8.9 9.0+PTX'
if ($IsLinux) { $env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH }
# Install torch
$env:CUDA_VERSION = ${{ matrix.cuda-version }} -replace '\.', ''
pip install --upgrade --no-cache-dir torch==2.0.1+cu$env:CUDA_VERSION --index-url https://download.pytorch.org/whl/cu$env:CUDA_VERSION
python -m pip install --upgrade build setuptools wheel ninja
# Print version information - uses: actions/setup-python@v3
python --version with:
python -c "import torch; print('PyTorch:', torch.__version__)" python-version: ${{ matrix.pyver }}
python -c "import torch; print('CUDA:', torch.version.cuda)"
python -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)" - name: Setup Miniconda
uses: conda-incubator/setup-miniconda@v2.2.0
- name: Build Wheel with:
run: | activate-environment: "build"
$env:PYPI_RELEASE = "1" python-version: ${{ matrix.pyver }}
mamba-version: "*"
use-mamba: false
channels: conda-forge,defaults
channel-priority: true
add-pip-as-python-dependency: true
auto-activate-base: false
- name: Install Dependencies
run: |
conda install cuda-toolkit -c "nvidia/label/cuda-${env:CUDA_VERSION}.0"
conda install pytorch "pytorch-cuda=${env:CUDA_VERSION}" -c pytorch -c nvidia
python -m pip install --upgrade build setuptools wheel ninja
# Print version information
python --version
python -c "import torch; print('PyTorch:', torch.__version__)"
python -c "import torch; print('CUDA:', torch.version.cuda)"
python -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)"
- name: Build Wheel
run: |
$env:CUDA_PATH = $env:CONDA_PREFIX
$env:CUDA_HOME = $env:CONDA_PREFIX
if ($IsLinux) {$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH}
$env:TORCH_CUDA_ARCH_LIST = '8.0 8.6 8.9 9.0+PTX'
python setup.py sdist bdist_wheel python setup.py sdist bdist_wheel
$wheel_path = Get-ChildItem dist\*.whl | ForEach-Object { $_.Name } $wheel_path = Get-ChildItem dist\*.whl | ForEach-Object { $_.Name }
echo "wheel_path=$wheel_path" >> $env:GITHUB_ENV echo "wheel_path=$wheel_path" >> $env:GITHUB_ENV
- name: Upload Release Asset - name: Upload Release Asset
uses: actions/upload-release-asset@v1 uses: shogo82148/actions-upload-release-asset@v1
env: env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with: with:
upload_url: ${{ needs.release.outputs.upload_url }} upload_url: ${{ needs.release.outputs.upload_url }}
asset_path: ./dist/${{ env.wheel_path }} asset_path: ./dist/${{ env.wheel_path }}
asset_content_type: application/* asset_content_type: application/*
\ No newline at end of file \ No newline at end of file
import os import os
import torch from pathlib import Path
from torch.utils import cpp_extension
from setuptools import setup, find_packages from setuptools import setup, find_packages
from distutils.sysconfig import get_python_lib from distutils.sysconfig import get_python_lib
from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CUDA_HOME
def check_dependencies(): os.environ["CC"] = "g++"
if CUDA_HOME is None: os.environ["CXX"] = "g++"
raise RuntimeError(
f"Cannot find CUDA_HOME. CUDA must be available to build the package.")
def get_compute_capabilities(): common_setup_kwargs = {
# Collect the compute capabilities of all available GPUs. "version": "0.0.1",
compute_capabilities = set() "name": "autoawq",
for i in range(torch.cuda.device_count()): "author": "Casper Hansen",
major, minor = torch.cuda.get_device_capability(i) "license": "MIT",
if major < 8: "python_requires": ">=3.8.0",
raise RuntimeError("GPUs with compute capability less than 8.0 are not supported.") "description": "AutoAWQ implements the AWQ algorithm for 4-bit quantization with a 2x speedup during inference.",
compute_capabilities.add(major * 10 + minor) "long_description": (Path(__file__).parent / "README.md").read_text(encoding="UTF-8"),
"long_description_content_type": "text/markdown",
# figure out compute capability "url": "https://github.com/casper-hansen/AutoAWQ",
compute_capabilities = {80, 86, 89, 90} "keywords": ["awq", "autoawq", "quantization", "transformers"],
"platforms": ["linux"],
capability_flags = [] "classifiers": [
for cap in compute_capabilities: "Environment :: GPU :: NVIDIA CUDA :: 11.8",
capability_flags += ["-gencode", f"arch=compute_{cap},code=sm_{cap}"] "Environment :: GPU :: NVIDIA CUDA :: 12",
"License :: OSI Approved :: MIT License",
return capability_flags "Natural Language :: English",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: C++",
]
}
# Define dependencies requirements = [
dependencies = [ "torch>=2.0.0",
"accelerate", "sentencepiece", "tokenizers>=0.12.1", "transformers>=4.32.0",
"transformers>=4.32.0", "tokenizers>=0.12.1",
"lm_eval", "texttable", "accelerate",
"toml", "attributedict", "sentencepiece",
"lm_eval",
"texttable",
"toml",
"attributedict",
"protobuf", "protobuf",
"torch>=2.0.0", "torchvision" "torchvision"
] ]
# Get environment variables include_dirs = []
build_cuda_extension = os.environ.get('BUILD_CUDA_EXT', '1') == '1'
# Setup CUDA extension
ext_modules = []
if build_cuda_extension:
# num threads
n_threads = str(min(os.cpu_count(), 8))
# final args conda_cuda_include_dir = os.path.join(get_python_lib(), "nvidia/cuda_runtime/include")
capability_flags = get_compute_capabilities() if os.path.isdir(conda_cuda_include_dir):
cxx_args = ["-g", "-O3", "-fopenmp", "-lgomp", "-std=c++17"] include_dirs.append(conda_cuda_include_dir)
nvcc_args = ["-O3", "-std=c++17", "--threads", n_threads] + capability_flags
ext_modules.append( extensions = [
CUDAExtension( cpp_extension.CppExtension(
name="awq_inference_engine", "awq_inference_engine",
sources=[ [
"awq_cuda/pybind.cpp", "awq_cuda/pybind.cpp",
"awq_cuda/quantization/gemm_cuda_gen.cu", "awq_cuda/quantization/gemm_cuda_gen.cu",
"awq_cuda/layernorm/layernorm.cu", "awq_cuda/layernorm/layernorm.cu",
"awq_cuda/position_embedding/pos_encoding_kernels.cu" "awq_cuda/position_embedding/pos_encoding_kernels.cu"
], ], extra_compile_args={
extra_compile_args={ "cxx": ["-g", "-O3", "-fopenmp", "-lgomp", "-std=c++17"],
"cxx": cxx_args, "nvcc": ["-O3", "-std=c++17"]
"nvcc": nvcc_args }
},
)
) )
]
# Find directories to be included in setup additional_setup_kwargs = {
include_dirs = [] "ext_modules": extensions,
conda_cuda_include_dir = os.path.join(get_python_lib(), "nvidia/cuda_runtime/include") "cmdclass": {'build_ext': cpp_extension.BuildExtension}
}
if os.path.isdir(conda_cuda_include_dir): common_setup_kwargs.update(additional_setup_kwargs)
include_dirs.append(conda_cuda_include_dir)
setup( setup(
name="autoawq", packages=find_packages(),
version="0.1.0", install_requires=requirements,
author="Casper Hansen",
license="MIT",
description="AutoAWQ implements the AWQ algorithm for 4-bit quantization with a 2x speedup during inference.",
long_description=open("README.md", "r").read(),
long_description_content_type="text/markdown",
python_requires=">=3.8",
url="https://github.com/casper-hansen/AutoAWQ",
keywords=["awq", "autoawq", "quantization", "transformers"],
classifiers=[
"Environment :: GPU :: NVIDIA CUDA :: 11.8",
"Environment :: GPU :: NVIDIA CUDA :: 12",
"License :: OSI Approved :: MIT License",
"Natural Language :: English",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: C++",
],
install_requires=dependencies,
include_dirs=include_dirs, include_dirs=include_dirs,
packages=find_packages(exclude=["examples*"]), **common_setup_kwargs
ext_modules=ext_modules, )
cmdclass={"build_ext": BuildExtension} \ No newline at end of file
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment