Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
AutoAWQ
Commits
b574767f
Commit
b574767f
authored
Aug 29, 2023
by
Casper
Browse files
Improved setup.py structure and build instructions
parent
7e361d16
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
126 additions
and
159 deletions
+126
-159
.github/workflows/build.yaml
.github/workflows/build.yaml
+61
-71
setup.py
setup.py
+65
-88
No files found.
.github/workflows/build.yaml
View file @
b574767f
...
@@ -22,91 +22,81 @@ jobs:
...
@@ -22,91 +22,81 @@ jobs:
run
:
|
run
:
|
echo "release_tag=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV
echo "release_tag=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV
#
- name: Create Release
-
name
:
Create Release
#
id: create_release
id
:
create_release
#
uses: "actions/github-script@v6"
uses
:
"
actions/github-script@v6"
#
env:
env
:
#
RELEASE_TAG: ${{ env.release_tag }}
RELEASE_TAG
:
${{ env.release_tag }}
#
with:
with
:
#
github-token: "${{ secrets.GITHUB_TOKEN }}"
github-token
:
"
${{
secrets.GITHUB_TOKEN
}}"
#
script: |
script
:
|
#
const script = require('.github/workflows/scripts/github_create_release.js')
const script = require('.github/workflows/scripts/github_create_release.js')
#
await script(github, context, core)
await script(github, context, core)
# build AWQ
# build AWQ
build
:
build
_wheels
:
name
:
Build AWQ
name
:
Build AWQ
runs-on
:
${{ matrix.os }}
runs-on
:
${{ matrix.os }}
needs
:
release
needs
:
release
strategy
:
strategy
:
matrix
:
matrix
:
os
:
[
ubuntu-20.04
]
os
:
[
ubuntu-20.04
]
py
thon-version
:
[
"
3.8"
,
"
3.9"
,
"
3.10"
,
"
3.11"
]
py
ver
:
[
"
3.8"
,
"
3.9"
,
"
3.10"
,
"
3.11"
]
cuda
-version
:
[
"
11.8"
]
cuda
:
[
"
11.8"
]
defaults
:
defaults
:
run
:
run
:
shell
:
pwsh
shell
:
pwsh
env
:
CUDA_VERSION
:
${{ matrix.cuda }}
steps
:
steps
:
-
name
:
Checkout code
-
uses
:
actions/checkout@v3
uses
:
actions/checkout@v3
-
name
:
Setup Python
uses
:
actions/setup-python@v4
with
:
python-version
:
${{ matrix.python-version }}
-
name
:
Setup Miniconda
uses
:
conda-incubator/setup-miniconda@v2.2.0
with
:
activate-environment
:
"
build"
python-version
:
${{ matrix.python-version }}
mamba-version
:
"
*"
use-mamba
:
false
channels
:
conda-forge,defaults
channel-priority
:
true
add-pip-as-python-dependency
:
true
auto-activate-base
:
false
-
name
:
Install CUDA
run
:
|
conda install cuda-toolkit -c "nvidia/label/cuda-${{ matrix.cuda-version }}.0"
$env:CUDA_PATH = $env:CONDA_PREFIX
$env:CUDA_HOME = $env:CONDA_PREFIX
echo "$env:CUDA_PATH"
echo "$env:CUDA_HOME"
-
name
:
Install PyTorch-cu${{ matrix.cuda-version }}
run
:
|
$env:TORCH_CUDA_ARCH_LIST = '8.0 8.6 8.9 9.0+PTX'
if ($IsLinux) { $env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH }
# Install torch
$env:CUDA_VERSION = ${{ matrix.cuda-version }} -replace '\.', ''
pip install --upgrade --no-cache-dir torch==2.0.1+cu$env:CUDA_VERSION --index-url https://download.pytorch.org/whl/cu$env:CUDA_VERSION
python -m pip install --upgrade build setuptools wheel ninja
# Print version information
-
uses
:
actions/setup-python@v3
python --version
with
:
python -c "import torch; print('PyTorch:', torch.__version__)"
python-version
:
${{ matrix.pyver }}
python -c "import torch; print('CUDA:', torch.version.cuda)"
python -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)"
-
name
:
Setup Miniconda
uses
:
conda-incubator/setup-miniconda@v2.2.0
-
name
:
Build Wheel
with
:
run
:
|
activate-environment
:
"
build"
$env:PYPI_RELEASE = "1"
python-version
:
${{ matrix.pyver }}
mamba-version
:
"
*"
use-mamba
:
false
channels
:
conda-forge,defaults
channel-priority
:
true
add-pip-as-python-dependency
:
true
auto-activate-base
:
false
-
name
:
Install Dependencies
run
:
|
conda install cuda-toolkit -c "nvidia/label/cuda-${env:CUDA_VERSION}.0"
conda install pytorch "pytorch-cuda=${env:CUDA_VERSION}" -c pytorch -c nvidia
python -m pip install --upgrade build setuptools wheel ninja
# Print version information
python --version
python -c "import torch; print('PyTorch:', torch.__version__)"
python -c "import torch; print('CUDA:', torch.version.cuda)"
python -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)"
-
name
:
Build Wheel
run
:
|
$env:CUDA_PATH = $env:CONDA_PREFIX
$env:CUDA_HOME = $env:CONDA_PREFIX
if ($IsLinux) {$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH}
$env:TORCH_CUDA_ARCH_LIST = '8.0 8.6 8.9 9.0+PTX'
python setup.py sdist bdist_wheel
python setup.py sdist bdist_wheel
$wheel_path = Get-ChildItem dist\*.whl | ForEach-Object { $_.Name }
$wheel_path = Get-ChildItem dist\*.whl | ForEach-Object { $_.Name }
echo "wheel_path=$wheel_path" >> $env:GITHUB_ENV
echo "wheel_path=$wheel_path" >> $env:GITHUB_ENV
-
name
:
Upload Release Asset
-
name
:
Upload Release Asset
uses
:
actions/upload-release-asset@v1
uses
:
shogo82148/actions-upload-release-asset@v1
env
:
env
:
GITHUB_TOKEN
:
${{ secrets.GITHUB_TOKEN }}
GITHUB_TOKEN
:
${{ secrets.GITHUB_TOKEN }}
with
:
with
:
upload_url
:
${{ needs.release.outputs.upload_url }}
upload_url
:
${{ needs.release.outputs.upload_url }}
asset_path
:
./dist/${{ env.wheel_path }}
asset_path
:
./dist/${{ env.wheel_path }}
asset_content_type
:
application/*
asset_content_type
:
application/*
\ No newline at end of file
\ No newline at end of file
setup.py
View file @
b574767f
import
os
import
os
import
torch
from
pathlib
import
Path
from
torch.utils
import
cpp_extension
from
setuptools
import
setup
,
find_packages
from
setuptools
import
setup
,
find_packages
from
distutils.sysconfig
import
get_python_lib
from
distutils.sysconfig
import
get_python_lib
from
torch.utils.cpp_extension
import
BuildExtension
,
CUDAExtension
,
CUDA_HOME
def
check_dependencies
():
os
.
environ
[
"CC"
]
=
"g++"
if
CUDA_HOME
is
None
:
os
.
environ
[
"CXX"
]
=
"g++"
raise
RuntimeError
(
f
"Cannot find CUDA_HOME. CUDA must be available to build the package."
)
def
get_compute_capabilities
():
common_setup_kwargs
=
{
# Collect the compute capabilities of all available GPUs.
"version"
:
"0.0.1"
,
compute_capabilities
=
set
()
"name"
:
"autoawq"
,
for
i
in
range
(
torch
.
cuda
.
device_count
()):
"author"
:
"Casper Hansen"
,
major
,
minor
=
torch
.
cuda
.
get_device_capability
(
i
)
"license"
:
"MIT"
,
if
major
<
8
:
"python_requires"
:
">=3.8.0"
,
raise
RuntimeError
(
"GPUs with compute capability less than 8.0 are not supported."
)
"description"
:
"AutoAWQ implements the AWQ algorithm for 4-bit quantization with a 2x speedup during inference."
,
compute_capabilities
.
add
(
major
*
10
+
minor
)
"long_description"
:
(
Path
(
__file__
).
parent
/
"README.md"
).
read_text
(
encoding
=
"UTF-8"
),
"long_description_content_type"
:
"text/markdown"
,
# figure out compute capability
"url"
:
"https://github.com/casper-hansen/AutoAWQ"
,
compute_capabilities
=
{
80
,
86
,
89
,
90
}
"keywords"
:
[
"awq"
,
"autoawq"
,
"quantization"
,
"transformers"
],
"platforms"
:
[
"linux"
],
capability_flags
=
[]
"classifiers"
:
[
for
cap
in
compute_capabilities
:
"Environment :: GPU :: NVIDIA CUDA :: 11.8"
,
capability_flags
+=
[
"-gencode"
,
f
"arch=compute_
{
cap
}
,code=sm_
{
cap
}
"
]
"Environment :: GPU :: NVIDIA CUDA :: 12"
,
"License :: OSI Approved :: MIT License"
,
return
capability_flags
"Natural Language :: English"
,
"Programming Language :: Python :: 3.8"
,
"Programming Language :: Python :: 3.9"
,
"Programming Language :: Python :: 3.10"
,
"Programming Language :: Python :: 3.11"
,
"Programming Language :: C++"
,
]
}
# Define dependencies
requirements
=
[
dependencies
=
[
"torch>=2.0.0"
,
"accelerate"
,
"sentencepiece"
,
"tokenizers>=0.12.1"
,
"transformers>=4.32.0"
,
"transformers>=4.32.0"
,
"tokenizers>=0.12.1"
,
"lm_eval"
,
"texttable"
,
"accelerate"
,
"toml"
,
"attributedict"
,
"sentencepiece"
,
"lm_eval"
,
"texttable"
,
"toml"
,
"attributedict"
,
"protobuf"
,
"protobuf"
,
"torch>=2.0.0"
,
"torchvision"
"torchvision"
]
]
# Get environment variables
include_dirs
=
[]
build_cuda_extension
=
os
.
environ
.
get
(
'BUILD_CUDA_EXT'
,
'1'
)
==
'1'
# Setup CUDA extension
ext_modules
=
[]
if
build_cuda_extension
:
# num threads
n_threads
=
str
(
min
(
os
.
cpu_count
(),
8
))
# final args
conda_cuda_include_dir
=
os
.
path
.
join
(
get_python_lib
(),
"nvidia/cuda_runtime/include"
)
capability_flags
=
get_compute_capabilities
()
if
os
.
path
.
isdir
(
conda_cuda_include_dir
):
cxx_args
=
[
"-g"
,
"-O3"
,
"-fopenmp"
,
"-lgomp"
,
"-std=c++17"
]
include_dirs
.
append
(
conda_cuda_include_dir
)
nvcc_args
=
[
"-O3"
,
"-std=c++17"
,
"--threads"
,
n_threads
]
+
capability_flags
ext_modules
.
append
(
extensions
=
[
CUDAExtension
(
cpp_extension
.
CppExtension
(
name
=
"awq_inference_engine"
,
"awq_inference_engine"
,
sources
=
[
[
"awq_cuda/pybind.cpp"
,
"awq_cuda/pybind.cpp"
,
"awq_cuda/quantization/gemm_cuda_gen.cu"
,
"awq_cuda/quantization/gemm_cuda_gen.cu"
,
"awq_cuda/layernorm/layernorm.cu"
,
"awq_cuda/layernorm/layernorm.cu"
,
"awq_cuda/position_embedding/pos_encoding_kernels.cu"
"awq_cuda/position_embedding/pos_encoding_kernels.cu"
],
],
extra_compile_args
=
{
extra_compile_args
=
{
"cxx"
:
[
"-g"
,
"-O3"
,
"-fopenmp"
,
"-lgomp"
,
"-std=c++17"
],
"cxx"
:
cxx_args
,
"nvcc"
:
[
"-O3"
,
"-std=c++17"
]
"nvcc"
:
nvcc_args
}
},
)
)
)
]
# Find directories to be included in setup
additional_setup_kwargs
=
{
include_dirs
=
[]
"ext_modules"
:
extensions
,
conda_cuda_include_dir
=
os
.
path
.
join
(
get_python_lib
(),
"nvidia/cuda_runtime/include"
)
"cmdclass"
:
{
'build_ext'
:
cpp_extension
.
BuildExtension
}
}
if
os
.
path
.
isdir
(
conda_cuda_include_dir
):
common_setup_kwargs
.
update
(
additional_setup_kwargs
)
include_dirs
.
append
(
conda_cuda_include_dir
)
setup
(
setup
(
name
=
"autoawq"
,
packages
=
find_packages
(),
version
=
"0.1.0"
,
install_requires
=
requirements
,
author
=
"Casper Hansen"
,
license
=
"MIT"
,
description
=
"AutoAWQ implements the AWQ algorithm for 4-bit quantization with a 2x speedup during inference."
,
long_description
=
open
(
"README.md"
,
"r"
).
read
(),
long_description_content_type
=
"text/markdown"
,
python_requires
=
">=3.8"
,
url
=
"https://github.com/casper-hansen/AutoAWQ"
,
keywords
=
[
"awq"
,
"autoawq"
,
"quantization"
,
"transformers"
],
classifiers
=
[
"Environment :: GPU :: NVIDIA CUDA :: 11.8"
,
"Environment :: GPU :: NVIDIA CUDA :: 12"
,
"License :: OSI Approved :: MIT License"
,
"Natural Language :: English"
,
"Programming Language :: Python :: 3.8"
,
"Programming Language :: Python :: 3.9"
,
"Programming Language :: Python :: 3.10"
,
"Programming Language :: Python :: 3.11"
,
"Programming Language :: C++"
,
],
install_requires
=
dependencies
,
include_dirs
=
include_dirs
,
include_dirs
=
include_dirs
,
packages
=
find_packages
(
exclude
=
[
"examples*"
]),
**
common_setup_kwargs
ext_modules
=
ext_modules
,
)
cmdclass
=
{
"build_ext"
:
BuildExtension
}
\ No newline at end of file
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment