Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
flash-attention
Commits
2ddeaa40
Commit
2ddeaa40
authored
Aug 13, 2023
by
Tri Dao
Browse files
Fix wheel building
parent
d8ec6a2f
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
180 additions
and
363 deletions
+180
-363
.github/workflows/cuda/cu102-Linux-env.sh
.github/workflows/cuda/cu102-Linux-env.sh
+0
-9
.github/workflows/cuda/cu102-Linux.sh
.github/workflows/cuda/cu102-Linux.sh
+0
-17
.github/workflows/cuda/cu113-Linux-env.sh
.github/workflows/cuda/cu113-Linux-env.sh
+0
-9
.github/workflows/cuda/cu113-Linux.sh
.github/workflows/cuda/cu113-Linux.sh
+0
-21
.github/workflows/cuda/cu116-Linux-env.sh
.github/workflows/cuda/cu116-Linux-env.sh
+0
-9
.github/workflows/cuda/cu116-Linux.sh
.github/workflows/cuda/cu116-Linux.sh
+0
-18
.github/workflows/cuda/cu117-Linux-env.sh
.github/workflows/cuda/cu117-Linux-env.sh
+0
-9
.github/workflows/cuda/cu117-Linux.sh
.github/workflows/cuda/cu117-Linux.sh
+0
-18
.github/workflows/cuda/cu120-Linux-env.sh
.github/workflows/cuda/cu120-Linux-env.sh
+0
-9
.github/workflows/cuda/cu120-Linux.sh
.github/workflows/cuda/cu120-Linux.sh
+0
-18
.github/workflows/env.sh
.github/workflows/env.sh
+0
-53
.github/workflows/publish.yml
.github/workflows/publish.yml
+143
-147
setup.py
setup.py
+37
-26
No files found.
.github/workflows/cuda/cu102-Linux-env.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
CUDA_HOME
=
/usr/local/cuda-10.2
LD_LIBRARY_PATH
=
${
CUDA_HOME
}
/lib64:
${
LD_LIBRARY_PATH
}
PATH
=
${
CUDA_HOME
}
/bin:
${
PATH
}
export
FORCE_CUDA
=
1
export
TORCH_CUDA_ARCH_LIST
=
"3.5;5.0+PTX;6.0;7.0;7.5"
export
CUDA_HOME
=
/usr/local/cuda-10.2
\ No newline at end of file
.github/workflows/cuda/cu102-Linux.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
# Strip the periods from the version number
OS_VERSION
=
$(
echo
$(
lsb_release
-sr
)
|
tr
-d
.
)
OS
=
ubuntu
${
OS_VERSION
}
wget
-nv
https://developer.download.nvidia.com/compute/cuda/repos/
${
OS
}
/x86_64/cuda-
${
OS
}
.pin
sudo mv
cuda-
${
OS
}
.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget
-nv
https://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda-repo-
${
OS
}
-10-2-local-10
.2.89-440.33.01_1.0-1_amd64.deb
sudo
dpkg
-i
cuda-repo-
${
OS
}
-10-2-local-10
.2.89-440.33.01_1.0-1_amd64.deb
sudo
apt-key add /var/cuda-repo-10-2-local-10.2.89-440.33.01/7fa2af80.pub
sudo
apt-get
-qq
update
sudo
apt
install
cuda cuda-nvcc-10-2 cuda-libraries-dev-10-2
sudo
apt clean
rm
-f
https://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda-repo-
${
OS
}
-10-2-local-10
.2.89-440.33.01_1.0-1_amd64.deb
\ No newline at end of file
.github/workflows/cuda/cu113-Linux-env.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
CUDA_HOME
=
/usr/local/cuda-11.3
LD_LIBRARY_PATH
=
${
CUDA_HOME
}
/lib64:
${
LD_LIBRARY_PATH
}
PATH
=
${
CUDA_HOME
}
/bin:
${
PATH
}
export
FORCE_CUDA
=
1
export
TORCH_CUDA_ARCH_LIST
=
"3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
export
CUDA_HOME
=
/usr/local/cuda-11.3
\ No newline at end of file
.github/workflows/cuda/cu113-Linux.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
# Strip the periods from the version number
OS_VERSION
=
$(
echo
$(
lsb_release
-sr
)
|
tr
-d
.
)
OS
=
ubuntu
${
OS_VERSION
}
wget
-nv
https://developer.download.nvidia.com/compute/cuda/repos/
${
OS
}
/x86_64/cuda-
${
OS
}
.pin
sudo mv
cuda-
${
OS
}
.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget
-nv
https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda-repo-
${
OS
}
-11-3-local_11
.3.0-465.19.01-1_amd64.deb
sudo
dpkg
-i
cuda-repo-
${
OS
}
-11-3-local_11
.3.0-465.19.01-1_amd64.deb
# TODO: If on version < 22.04, install via signal-desktop-keyring
# For future versions it's deprecated and should be moved into the trusted folder
# sudo mv /var/cuda-repo-${OS}-11-3-local/7fa2af80.pub /etc/apt/trusted.gpg.d/
sudo
apt-key add /var/cuda-repo-
${
OS
}
-11-3-local
/7fa2af80.pub
sudo
apt-get
-qq
update
sudo
apt
install
cuda cuda-nvcc-11-3 cuda-libraries-dev-11-3
sudo
apt clean
rm
-f
https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda-repo-
${
OS
}
-11-3-local_11
.3.0-465.19.01-1_amd64.deb
\ No newline at end of file
.github/workflows/cuda/cu116-Linux-env.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
CUDA_HOME
=
/usr/local/cuda-11.6
LD_LIBRARY_PATH
=
${
CUDA_HOME
}
/lib64:
${
LD_LIBRARY_PATH
}
PATH
=
${
CUDA_HOME
}
/bin:
${
PATH
}
export
FORCE_CUDA
=
1
export
TORCH_CUDA_ARCH_LIST
=
"3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
export
CUDA_HOME
=
/usr/local/cuda-11.6
\ No newline at end of file
.github/workflows/cuda/cu116-Linux.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
# Strip the periods from the version number
OS_VERSION
=
$(
echo
$(
lsb_release
-sr
)
|
tr
-d
.
)
OS
=
ubuntu
${
OS_VERSION
}
wget
-nv
https://developer.download.nvidia.com/compute/cuda/repos/
${
OS
}
/x86_64/cuda-
${
OS
}
.pin
sudo mv
cuda-
${
OS
}
.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget
-nv
https://developer.download.nvidia.com/compute/cuda/11.6.2/local_installers/cuda-repo-
${
OS
}
-11-6-local_11
.6.2-510.47.03-1_amd64.deb
sudo
dpkg
-i
cuda-repo-
${
OS
}
-11-6-local_11
.6.2-510.47.03-1_amd64.deb
sudo
apt-key add /var/cuda-repo-
${
OS
}
-11-6-local
/7fa2af80.pub
sudo
apt-get
-qq
update
sudo
apt
install
cuda cuda-nvcc-11-6 cuda-libraries-dev-11-6
sudo
apt clean
rm
-f
https://developer.download.nvidia.com/compute/cuda/11.5.2/local_installers/cuda-repo-
${
OS
}
-11-6-local_11
.6.2-510.47.03-1_amd64.deb
\ No newline at end of file
.github/workflows/cuda/cu117-Linux-env.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
CUDA_HOME
=
/usr/local/cuda-11.7
LD_LIBRARY_PATH
=
${
CUDA_HOME
}
/lib64:
${
LD_LIBRARY_PATH
}
PATH
=
${
CUDA_HOME
}
/bin:
${
PATH
}
export
FORCE_CUDA
=
1
export
TORCH_CUDA_ARCH_LIST
=
"3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
export
CUDA_HOME
=
/usr/local/cuda-11.7
\ No newline at end of file
.github/workflows/cuda/cu117-Linux.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
# Strip the periods from the version number
OS_VERSION
=
$(
echo
$(
lsb_release
-sr
)
|
tr
-d
.
)
OS
=
ubuntu
${
OS_VERSION
}
wget
-nv
https://developer.download.nvidia.com/compute/cuda/repos/
${
OS
}
/x86_64/cuda-
${
OS
}
.pin
sudo mv
cuda-
${
OS
}
.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget
-nv
https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda-repo-
${
OS
}
-11-7-local_11
.7.0-515.43.04-1_amd64.deb
sudo
dpkg
-i
cuda-repo-
${
OS
}
-11-7-local_11
.7.0-515.43.04-1_amd64.deb
sudo cp
/var/cuda-repo-
${
OS
}
-11-7-local
/cuda-
*
-keyring
.gpg /usr/share/keyrings/
sudo
apt-get
-qq
update
sudo
apt
install
cuda cuda-nvcc-11-7 cuda-libraries-dev-11-7
sudo
apt clean
rm
-f
https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda-repo-
${
OS
}
-11-7-local_11
.7.0-515.43.04-1_amd64.deb
.github/workflows/cuda/cu120-Linux-env.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
CUDA_HOME
=
/usr/local/cuda-12.0
LD_LIBRARY_PATH
=
${
CUDA_HOME
}
/lib64:
${
LD_LIBRARY_PATH
}
PATH
=
${
CUDA_HOME
}
/bin:
${
PATH
}
export
FORCE_CUDA
=
1
export
TORCH_CUDA_ARCH_LIST
=
"3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
export
CUDA_HOME
=
/usr/local/cuda-12.0
\ No newline at end of file
.github/workflows/cuda/cu120-Linux.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
# Strip the periods from the version number
OS_VERSION
=
$(
echo
$(
lsb_release
-sr
)
|
tr
-d
.
)
OS
=
ubuntu
${
OS_VERSION
}
wget
-nv
https://developer.download.nvidia.com/compute/cuda/repos/
${
OS
}
/x86_64/cuda-
${
OS
}
.pin
sudo mv
cuda-
${
OS
}
.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget
-nv
https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda-repo-
${
OS
}
-12-0-local_12
.0.0-525.60.13-1_amd64.deb
sudo
dpkg
-i
cuda-repo-
${
OS
}
-12-0-local_12
.0.0-525.60.13-1_amd64.deb
sudo cp
/var/cuda-repo-
${
OS
}
-12-0-local
/cuda-
*
-keyring
.gpg /usr/share/keyrings/
sudo
apt-get
-qq
update
sudo
apt
install
cuda cuda-nvcc-12-0 cuda-libraries-dev-12-0
sudo
apt clean
rm
-f
https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda-repo-
${
OS
}
-12-0-local_12
.0.0-525.60.13-1_amd64.deb
.github/workflows/env.sh
deleted
100644 → 0
View file @
d8ec6a2f
export
LANG C.UTF-8
export
OFED_VERSION
=
5.3-1.0.0.1
sudo
apt-get update
&&
\
sudo
apt-get
install
-y
--no-install-recommends
\
software-properties-common
\
sudo
apt-get
install
-y
--no-install-recommends
\
build-essential
\
apt-utils
\
ca-certificates
\
wget
\
git
\
vim
\
libssl-dev
\
curl
\
unzip
\
unrar
\
cmake
\
net-tools
\
sudo
\
autotools-dev
\
rsync
\
jq
\
openssh-server
\
tmux
\
screen
\
htop
\
pdsh
\
openssh-client
\
lshw
\
dmidecode
\
util-linux
\
automake
\
autoconf
\
libtool
\
net-tools
\
pciutils
\
libpci-dev
\
libaio-dev
\
libcap2
\
libtinfo5
\
fakeroot
\
devscripts
\
debhelper
\
nfs-common
# wget -O ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
# chmod +x ~/miniconda.sh && \
# ~/miniconda.sh -b -p /opt/conda && \
# rm ~/miniconda.sh
# export PATH=/opt/conda/bin:$PATH
\ No newline at end of file
.github/workflows/publish.yml
View file @
2ddeaa40
...
...
@@ -7,116 +7,120 @@
name
:
Build wheels and deploy
#on:
# create:
# tags:
# - '**'
on
:
push
create
:
tags
:
-
v*
jobs
:
# setup_release:
# name: Create Release
# runs-on: ubuntu-latest
# steps:
# - name: Get the tag version
# id: extract_branch
# run: echo ::set-output name=branch::${GITHUB_REF#refs/tags/}
# shell: bash
# - name: Create Release
# id: create_release
# uses: actions/create-release@v1
# env:
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# with:
# tag_name: ${{ steps.extract_branch.outputs.branch }}
# release_name: ${{ steps.extract_branch.outputs.branch }}
setup_release
:
name
:
Create Release
runs-on
:
ubuntu-latest
steps
:
-
name
:
Get the tag version
id
:
extract_branch
run
:
echo ::set-output name=branch::${GITHUB_REF#refs/tags/}
shell
:
bash
-
name
:
Create Release
id
:
create_release
uses
:
actions/create-release@v1
env
:
GITHUB_TOKEN
:
${{ secrets.GITHUB_TOKEN }}
with
:
tag_name
:
${{ steps.extract_branch.outputs.branch }}
release_name
:
${{ steps.extract_branch.outputs.branch }}
build_wheels
:
name
:
Build Wheel
needs
:
setup_release
runs-on
:
${{ matrix.os }}
#needs: setup_release
strategy
:
fail-fast
:
false
matrix
:
os
:
[
ubuntu-20.04
,
ubuntu-22.04
]
#python-version: ['3.7', '3.8', '3.9', '3.10']
#torch-version: ['1.11.0', '1.12.0', '1.13.0', '2.0.1']
#cuda-version: ['113', '116', '117', '120']
python-version
:
[
'
3.10'
]
torch-version
:
[
'
2.0.1'
]
cuda-version
:
[
'
120'
]
# Using ubuntu-20.04 instead of 22.04 for more compatibility (glibc). Ideally we'd use the
# manylinux docker image, but I haven't figured out how to install CUDA on manylinux.
os
:
[
ubuntu-20.04
]
python-version
:
[
'
3.7'
,
'
3.8'
,
'
3.9'
,
'
3.10'
]
torch-version
:
[
'
1.12.1'
,
'
1.13.1'
,
'
2.0.1'
,
'
2.1.0.dev20230731'
]
cuda-version
:
[
'
11.6.2'
,
'
11.7.1'
,
'
11.8.0'
,
'
12.1.0'
]
# We need separate wheels that either uses C++11 ABI (-D_GLIBCXX_USE_CXX11_ABI) or not.
# Pytorch wheels currently don't use it, but nvcr images have Pytorch compiled with C++11 ABI.
# Without this we get import error (undefined symbol: _ZN3c105ErrorC2ENS_14SourceLocationESs)
# when building without C++11 ABI and using it on nvcr images.
cxx11_abi
:
[
'
FALSE'
,
'
TRUE'
]
exclude
:
# Nvidia only supports 11.7+ for ubuntu-22.04
-
os
:
ubuntu-22.04
cuda-version
:
'
116'
-
os
:
ubuntu-22.04
cuda-version
:
'
113'
# Torch only builds cuda 117 for 1.13.0+
-
cuda-version
:
'
117'
torch-version
:
'
1.11.0'
-
cuda-version
:
'
117'
torch-version
:
'
1.12.0'
# Torch only builds cuda 116 for 1.12.0+
-
cuda-version
:
'
116'
torch-version
:
'
1.11.0'
# Torch only builds cuda 120 for 2.0.1+
-
cuda-version
:
'
120'
torch-version
:
'
1.11.0'
-
cuda-version
:
'
120'
torch-version
:
'
1.12.0'
-
cuda-version
:
'
120'
torch-version
:
'
1.13.0'
# 1.13.0 drops support for cuda 11.3
-
cuda-version
:
'
113'
torch-version
:
'
1.13.0'
-
cuda-version
:
'
113'
torch-version
:
'
2.0.1'
# Fails with "Validation Error" on artifact upload
-
cuda-version
:
'
117'
torch-version
:
'
1.13.0'
os
:
ubuntu-20.04
# Pytorch >= 2.0 only supports Python >= 3.8
-
torch-version
:
'
2.0.1'
python-version
:
'
3.7'
-
torch-version
:
'
2.1.0.dev20230731'
python-version
:
'
3.7'
# Pytorch <= 2.0 only supports CUDA <= 11.8
-
torch-version
:
'
1.12.1'
cuda-version
:
'
12.1.0'
-
torch-version
:
'
1.13.1'
cuda-version
:
'
12.1.0'
-
torch-version
:
'
2.0.1'
cuda-version
:
'
12.1.0'
# Pytorch >= 2.1 only supports CUDA 12.1
-
torch-version
:
'
2.1.0.dev20230731'
cuda-version
:
'
11.6.2'
-
torch-version
:
'
2.1.0.dev20230731'
cuda-version
:
'
11.7.1'
-
torch-version
:
'
2.1.0.dev20230731'
cuda-version
:
'
11.8.0'
steps
:
-
name
:
Checkout
uses
:
actions/checkout@v3
-
name
:
Set up Python
uses
:
actions/setup-python@v
3
uses
:
actions/setup-python@v
4
with
:
python-version
:
${{ matrix.python-version }}
-
name
:
Set up Linux Env
-
name
:
Set CUDA and PyTorch versions
run
:
|
echo "MATRIX_CUDA_VERSION=$(echo ${{ matrix.cuda-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV
echo "MATRIX_TORCH_VERSION=$(echo ${{ matrix.torch-version }} | awk -F \. {'print $1 "." $2'})" >> $GITHUB_ENV
-
name
:
Free up disk space
if
:
${{ runner.os == 'Linux' }}
run
:
|
sudo rm -rf /usr/share/dotnet
bash .github/workflows/env.sh
echo ${{ needs.create_release.outputs.upload_url }}
echo ${{ needs.steps.extract_branch.outputs.upload_url }}
shell
:
bash
-
name
:
Install CUDA ${{ matrix.cuda-version }}
if
:
${{ matrix.cuda-version != 'cpu' }}
run
:
|
bash .github/workflows/cuda/cu${{ matrix.cuda-version }}-${{ runner.os }}.sh
shell
:
bash
-
name
:
Check GPU Env
if
:
${{ matrix.cuda-version != 'cpu' }}
run
:
|
source .github/workflows/cuda/cu${{ matrix.cuda-version }}-${{ runner.os }}-env.sh
nvcc --version
shell
:
bash
uses
:
Jimver/cuda-toolkit@v0.2.11
id
:
cuda-toolkit
with
:
cuda
:
${{ matrix.cuda-version }}
linux-local-args
:
'
["--toolkit"]'
# default method is "local", and we're hitting some error with caching for CUDA 11.8 and 12.1
# method: ${{ (matrix.cuda-version == '11.8.0' || matrix.cuda-version == '12.1.0') && 'network' || 'local' }}
method
:
'
network'
# We need the cuda libraries (e.g. cuSparse, cuSolver) for compiling PyTorch extensions,
# not just nvcc
# sub-packages: '["nvcc"]'
-
name
:
Install PyTorch ${{ matrix.torch-version }}+cu${{ matrix.cuda-version }}
run
:
|
pip install numpy pyyaml scipy ipython mkl mkl-include ninja cython typing pandas typing-extensions dataclasses && conda clean -ya
pip install --no-cache-dir torch==${{ matrix.torch-version }}
pip install --upgrade pip
# If we don't install before installing Pytorch, we get error for torch 2.0.1
# ERROR: Could not find a version that satisfies the requirement setuptools>=40.8.0 (from versions: none)
pip install lit
# We want to figure out the CUDA version to download pytorch
# e.g. we can have system CUDA version being 11.7 but if torch==1.12 then we need to download the wheel from cu116
# This code is ugly, maybe there's a better way to do this.
export TORCH_CUDA_VERSION=$(python -c "import os; minv = {'1.12': 113, '1.13': 116, '2.0': 117, '2.1': 121}[os.environ['MATRIX_TORCH_VERSION']]; maxv = {'1.12': 116, '1.13': 117, '2.0': 118, '2.1': 121}[os.environ['MATRIX_TORCH_VERSION']]; print(max(min(int(os.environ['MATRIX_CUDA_VERSION']), maxv), minv))")
if [[ ${{ matrix.torch-version }} == *"dev"* ]]; then
pip install --no-cache-dir --pre torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION}
else
pip install --no-cache-dir torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION}
fi
nvcc --version
python --version
python -c "import torch; print('PyTorch:', torch.__version__)"
python -c "import torch; print('CUDA:', torch.version.cuda)"
...
...
@@ -124,17 +128,26 @@ jobs:
shell
:
bash
# - name: Install PyTorch ${{ matrix.torch-version }}+cu${{ matrix.cuda-version }}
# run: |
# pip install numpy pyyaml scipy ipython mkl mkl-include ninja cython typing pandas typing-extensions dataclasses && conda clean -ya
# pip install --no-index --no-cache-dir torch==${{ matrix.torch-version }} -f https://download.pytorch.org/whl/cu${{ matrix.cuda-version }}/torch_stable.html
# python --version
# python -c "import torch; print('PyTorch:', torch.__version__)"
# python -c "import torch; print('CUDA:', torch.version.cuda)"
# python -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)"
# shell:
# bash
-
name
:
Build wheel
run
:
|
# We want setuptools >= 49.6.0 otherwise we can't compile the extension if system CUDA version is 11.7 and pytorch cuda version is 11.6
# https://github.com/pytorch/pytorch/blob/664058fa83f1d8eede5d66418abff6e20bd76ca8/torch/utils/cpp_extension.py#L810
# However this still fails so I'm using a newer version of setuptools
pip install setuptools==68.0.0
pip install ninja packaging wheel
export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
# Limit MAX_JOBS otherwise the github runner goes OOM
MAX_JOBS=1 FLASH_ATTENTION_FORCE_BUILD="TRUE" FLASH_ATTENTION_FORCE_CXX11_ABI=${{ matrix.cxx11_abi}} python setup.py bdist_wheel --dist-dir=dist
tmpname=cu${MATRIX_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi${{ matrix.cxx11_abi }}
wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")
ls dist/*whl |xargs -I {} mv {} dist/${wheel_name}
echo "wheel_name=${wheel_name}" >> $GITHUB_ENV
-
name
:
Log Built Wheels
run
:
|
ls dist
-
name
:
Get the tag version
id
:
extract_branch
run
:
echo ::set-output name=branch::${GITHUB_REF#refs/tags/}
...
...
@@ -147,62 +160,45 @@ jobs:
env
:
GITHUB_TOKEN
:
${{ secrets.GITHUB_TOKEN }}
-
name
:
Build wheel
-
name
:
Upload Release Asset
id
:
upload_release_asset
uses
:
actions/upload-release-asset@v1
env
:
GITHUB_TOKEN
:
${{ secrets.GITHUB_TOKEN }}
with
:
upload_url
:
${{ steps.get_current_release.outputs.upload_url }}
asset_path
:
./dist/${{env.wheel_name}}
asset_name
:
${{env.wheel_name}}
asset_content_type
:
application/*
publish_package
:
name
:
Publish package
needs
:
[
build_wheels
]
runs-on
:
ubuntu-latest
steps
:
-
uses
:
actions/checkout@v3
-
uses
:
actions/setup-python@v4
with
:
python-version
:
'
3.10'
-
name
:
Install dependencies
run
:
|
export FLASH_ATTENTION_FORCE_BUILD="TRUE"
export FORCE_CUDA="1"
export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
export CUDA_INSTALL_DIR=/usr/local/cuda-11.3$CUDA_INSTALL_DIR
pip install ninja packaging setuptools wheel
python setup.py bdist_wheel --dist-dir=dist
tmpname=cu${{ matrix.cuda-version }}torch${{ matrix.torch-version }}
wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")
ls dist/*whl |xargs -I {} mv {} dist/${wheel_name}
echo "wheel_name=${wheel_name}" >> $GITHUB_ENV
pip install ninja packaging setuptools wheel twine
# We don't want to download anything CUDA-related here
pip install torch --index-url https://download.pytorch.org/whl/cpu
-
name
:
Log Built Wheels
-
name
:
Build core package
env
:
FLASH_ATTENTION_SKIP_CUDA_BUILD
:
"
TRUE"
run
:
|
ls
dist
python setup.py sdist --dist-dir=
dist
# - name: Upload Release Asset
# id: upload_release_asset
# uses: actions/upload-release-asset@v1
# env:
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# with:
# upload_url: ${{ steps.get_current_release.outputs.upload_url }}
# asset_path: ./dist/${{env.wheel_name}}
# asset_name: ${{env.wheel_name}}
# asset_content_type: application/*
# publish_package:
# name: Publish package
# needs: [build_wheels]
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v3
# - uses: actions/setup-python@v4
# with:
# python-version: '3.10'
# - name: Install dependencies
# run: |
# pip install ninja packaging setuptools wheel twine
# pip install torch
# - name: Build core package
# env:
# FLASH_ATTENTION_SKIP_CUDA_BUILD: "TRUE"
# run: |
# python setup.py sdist --dist-dir=dist
# - name: Deploy
# env:
# TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
# TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
# run: |
# python -m twine upload dist/*
-
name
:
Deploy
env
:
TWINE_USERNAME
:
"
__token__"
TWINE_PASSWORD
:
${{ secrets.PYPI_API_TOKEN }}
run
:
|
python -m twine upload dist/*
setup.py
View file @
2ddeaa40
...
...
@@ -13,9 +13,10 @@ import subprocess
import
urllib.request
import
urllib.error
from
wheel.bdist_wheel
import
bdist_wheel
as
_bdist_wheel
import
torch
from
torch.utils.cpp_extension
import
BuildExtension
,
CppExtension
,
CUDAExtension
,
CUDA_HOME
from
wheel.bdist_wheel
import
bdist_wheel
as
_bdist_wheel
with
open
(
"README.md"
,
"r"
,
encoding
=
"utf-8"
)
as
fh
:
...
...
@@ -33,6 +34,8 @@ BASE_WHEEL_URL = "https://github.com/Dao-AILab/flash-attention/releases/download
# SKIP_CUDA_BUILD: Intended to allow CI to use a simple `python setup.py sdist` run to copy over raw files, without any cuda compilation
FORCE_BUILD
=
os
.
getenv
(
"FLASH_ATTENTION_FORCE_BUILD"
,
"FALSE"
)
==
"TRUE"
SKIP_CUDA_BUILD
=
os
.
getenv
(
"FLASH_ATTENTION_SKIP_CUDA_BUILD"
,
"FALSE"
)
==
"TRUE"
# For CI, we want the option to build with C++11 ABI since the nvcr images use C++11 ABI
FORCE_CXX11_ABI
=
os
.
getenv
(
"FLASH_ATTENTION_FORCE_CXX11_ABI"
,
"FALSE"
)
==
"TRUE"
def
get_platform
():
...
...
@@ -101,26 +104,27 @@ if not torch.cuda.is_available():
print
(
"
\n
Warning: Torch did not find available GPUs on this system.
\n
"
,
"If your intention is to cross-compile, this is not an error.
\n
"
"By default, Apex will cross-compile for Pascal (compute capabilities 6.0, 6.1, 6.2),
\n
"
"Volta (compute capability 7.0), Turing (compute capability 7.5),
\n
"
"and, if the CUDA version is >= 11.0, Ampere (compute capability 8.0).
\n
"
"By default, FlashAttention will cross-compile for Ampere (compute capability 8.0, 8.6, "
"8.9), and, if the CUDA version is >= 11.8, Hopper (compute capability 9.0).
\n
"
"If you wish to cross-compile for a single specific architecture,
\n
"
'export TORCH_CUDA_ARCH_LIST="compute capability" before running setup.py.
\n
'
,
)
if
os
.
environ
.
get
(
"TORCH_CUDA_ARCH_LIST"
,
None
)
is
None
and
CUDA_HOME
is
not
None
:
_
,
bare_metal_version
=
get_cuda_bare_metal_version
(
CUDA_HOME
)
if
bare_metal_version
>=
Version
(
"11.8"
):
os
.
environ
[
"TORCH_CUDA_ARCH_LIST"
]
=
"6.0;6.1;6.2;7.0;7.5;8.0;8.6;9.0"
elif
bare_metal_version
>=
Version
(
"11.1"
):
os
.
environ
[
"TORCH_CUDA_ARCH_LIST"
]
=
"6.0;6.1;6.2;7.0;7.5;8.0;8.6"
elif
bare_metal_version
==
Version
(
"11.0"
):
os
.
environ
[
"TORCH_CUDA_ARCH_LIST"
]
=
"6.0;6.1;6.2;7.0;7.5;8.0"
os
.
environ
[
"TORCH_CUDA_ARCH_LIST"
]
=
"8.0;8.6;9.0"
elif
bare_metal_version
>=
Version
(
"11.4"
):
os
.
environ
[
"TORCH_CUDA_ARCH_LIST"
]
=
"8.0;8.6"
else
:
os
.
environ
[
"TORCH_CUDA_ARCH_LIST"
]
=
"
6
.0;
6.1;6.2;7.0;7.5
"
os
.
environ
[
"TORCH_CUDA_ARCH_LIST"
]
=
"
8
.0;
8.6
"
cmdclass
=
{}
ext_modules
=
[]
# We want this even if SKIP_CUDA_BUILD because when we run python setup.py sdist we want the .hpp
# files included in the source distribution, in case the user compiles from source.
subprocess
.
run
([
"git"
,
"submodule"
,
"update"
,
"--init"
,
"csrc/cutlass"
])
if
not
SKIP_CUDA_BUILD
:
print
(
"
\n\n
torch.__version__ = {}
\n\n
"
.
format
(
torch
.
__version__
))
TORCH_MAJOR
=
int
(
torch
.
__version__
.
split
(
"."
)[
0
])
...
...
@@ -137,8 +141,8 @@ if not SKIP_CUDA_BUILD:
# Check, if CUDA11 is installed for compute capability 8.0
cc_flag
=
[]
_
,
bare_metal_version
=
get_cuda_bare_metal_version
(
CUDA_HOME
)
if
bare_metal_version
<
Version
(
"11.
0
"
):
raise
RuntimeError
(
"FlashAttention is only supported on CUDA 11 and above"
)
if
bare_metal_version
<
Version
(
"11.
4
"
):
raise
RuntimeError
(
"FlashAttention is only supported on CUDA 11
.4
and above"
)
# cc_flag.append("-gencode")
# cc_flag.append("arch=compute_75,code=sm_75")
cc_flag
.
append
(
"-gencode"
)
...
...
@@ -147,7 +151,11 @@ if not SKIP_CUDA_BUILD:
cc_flag
.
append
(
"-gencode"
)
cc_flag
.
append
(
"arch=compute_90,code=sm_90"
)
subprocess
.
run
([
"git"
,
"submodule"
,
"update"
,
"--init"
,
"csrc/cutlass"
])
# HACK: The compiler flag -D_GLIBCXX_USE_CXX11_ABI is set to be the same as
# torch._C._GLIBCXX_USE_CXX11_ABI
# https://github.com/pytorch/pytorch/blob/8472c24e3b5b60150096486616d98b7bea01500b/torch/utils/cpp_extension.py#L920
if
FORCE_CXX11_ABI
:
torch
.
_C
.
_GLIBCXX_USE_CXX11_ABI
=
True
ext_modules
.
append
(
CUDAExtension
(
name
=
"flash_attn_2_cuda"
,
...
...
@@ -213,6 +221,7 @@ if not SKIP_CUDA_BUILD:
Path
(
this_dir
)
/
'csrc'
/
'cutlass'
/
'include'
,
],
)
)
def
get_package_version
():
...
...
@@ -227,30 +236,33 @@ def get_package_version():
class
CachedWheelsCommand
(
_bdist_wheel
):
"""
The CachedWheelsCommand plugs into the default bdist wheel, which is ran by pip when it cannot
find an existing wheel (which is currently the case for all flash attention installs). We use
the environment parameters to detect whether there is already a pre-built version of a compatible
wheel available and short-circuits the standard full build pipeline.
"""
def
run
(
self
):
"""
The CachedWheelsCommand plugs into the default bdist wheel, which is ran by pip when it cannot
find an existing wheel (which is currently the case for all flash attention installs). We use
the environment parameters to detect whether there is already a pre-built version of a compatible
wheel available and short-circuits the standard full build pipeline.
"""
def
run
(
self
):
if
FORCE_BUILD
:
return
super
().
run
()
raise_if_cuda_home_none
(
"flash_attn"
)
# Determine the version numbers that will be used to determine the correct wheel
_
,
cuda_version_raw
=
get_cuda_bare_metal_version
(
CUDA_HOME
)
# We're using the CUDA version used to build torch, not the one currently installed
# _, cuda_version_raw = get_cuda_bare_metal_version(CUDA_HOME)
torch_cuda_version
=
parse
(
torch
.
version
.
cuda
)
torch_version_raw
=
parse
(
torch
.
__version__
)
python_version
=
f
"cp
{
sys
.
version_info
.
major
}{
sys
.
version_info
.
minor
}
"
platform_name
=
get_platform
()
flash_version
=
get_package_version
()
cuda_version
=
f
"
{
cuda_version_raw
.
major
}{
cuda_version_raw
.
minor
}
"
torch_version
=
f
"
{
torch_version_raw
.
major
}
.
{
torch_version_raw
.
minor
}
.
{
torch_version_raw
.
micro
}
"
# cuda_version = f"{cuda_version_raw.major}{cuda_version_raw.minor}"
cuda_version
=
f
"
{
torch_cuda_version
.
major
}{
torch_cuda_version
.
minor
}
"
torch_version
=
f
"
{
torch_version_raw
.
major
}
.
{
torch_version_raw
.
minor
}
"
cxx11_abi
=
str
(
torch
.
_C
.
_GLIBCXX_USE_CXX11_ABI
).
upper
()
# Determine wheel URL based on CUDA version, torch version, python version and OS
wheel_filename
=
f
'
{
PACKAGE_NAME
}
-
{
flash_version
}
+cu
{
cuda_version
}
torch
{
torch_version
}
-
{
python_version
}
-
{
python_version
}
-
{
platform_name
}
.whl'
wheel_filename
=
f
'
{
PACKAGE_NAME
}
-
{
flash_version
}
+cu
{
cuda_version
}
torch
{
torch_version
}
cxx11abi
{
cxx11_abi
}
-
{
python_version
}
-
{
python_version
}
-
{
platform_name
}
.whl'
wheel_url
=
BASE_WHEEL_URL
.
format
(
tag_name
=
f
"v
{
flash_version
}
"
,
wheel_name
=
wheel_filename
...
...
@@ -279,7 +291,6 @@ class CachedWheelsCommand(_bdist_wheel):
setup
(
# @pierce - TODO: Revert for official release
name
=
PACKAGE_NAME
,
version
=
get_package_version
(),
packages
=
find_packages
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment