Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
flash-attention
Commits
2ddeaa40
Commit
2ddeaa40
authored
Aug 13, 2023
by
Tri Dao
Browse files
Fix wheel building
parent
d8ec6a2f
Changes
13
Show whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
180 additions
and
363 deletions
+180
-363
.github/workflows/cuda/cu102-Linux-env.sh
.github/workflows/cuda/cu102-Linux-env.sh
+0
-9
.github/workflows/cuda/cu102-Linux.sh
.github/workflows/cuda/cu102-Linux.sh
+0
-17
.github/workflows/cuda/cu113-Linux-env.sh
.github/workflows/cuda/cu113-Linux-env.sh
+0
-9
.github/workflows/cuda/cu113-Linux.sh
.github/workflows/cuda/cu113-Linux.sh
+0
-21
.github/workflows/cuda/cu116-Linux-env.sh
.github/workflows/cuda/cu116-Linux-env.sh
+0
-9
.github/workflows/cuda/cu116-Linux.sh
.github/workflows/cuda/cu116-Linux.sh
+0
-18
.github/workflows/cuda/cu117-Linux-env.sh
.github/workflows/cuda/cu117-Linux-env.sh
+0
-9
.github/workflows/cuda/cu117-Linux.sh
.github/workflows/cuda/cu117-Linux.sh
+0
-18
.github/workflows/cuda/cu120-Linux-env.sh
.github/workflows/cuda/cu120-Linux-env.sh
+0
-9
.github/workflows/cuda/cu120-Linux.sh
.github/workflows/cuda/cu120-Linux.sh
+0
-18
.github/workflows/env.sh
.github/workflows/env.sh
+0
-53
.github/workflows/publish.yml
.github/workflows/publish.yml
+143
-147
setup.py
setup.py
+37
-26
No files found.
.github/workflows/cuda/cu102-Linux-env.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
CUDA_HOME
=
/usr/local/cuda-10.2
LD_LIBRARY_PATH
=
${
CUDA_HOME
}
/lib64:
${
LD_LIBRARY_PATH
}
PATH
=
${
CUDA_HOME
}
/bin:
${
PATH
}
export
FORCE_CUDA
=
1
export
TORCH_CUDA_ARCH_LIST
=
"3.5;5.0+PTX;6.0;7.0;7.5"
export
CUDA_HOME
=
/usr/local/cuda-10.2
\ No newline at end of file
.github/workflows/cuda/cu102-Linux.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
# Strip the periods from the version number
OS_VERSION
=
$(
echo
$(
lsb_release
-sr
)
|
tr
-d
.
)
OS
=
ubuntu
${
OS_VERSION
}
wget
-nv
https://developer.download.nvidia.com/compute/cuda/repos/
${
OS
}
/x86_64/cuda-
${
OS
}
.pin
sudo mv
cuda-
${
OS
}
.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget
-nv
https://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda-repo-
${
OS
}
-10-2-local-10
.2.89-440.33.01_1.0-1_amd64.deb
sudo
dpkg
-i
cuda-repo-
${
OS
}
-10-2-local-10
.2.89-440.33.01_1.0-1_amd64.deb
sudo
apt-key add /var/cuda-repo-10-2-local-10.2.89-440.33.01/7fa2af80.pub
sudo
apt-get
-qq
update
sudo
apt
install
cuda cuda-nvcc-10-2 cuda-libraries-dev-10-2
sudo
apt clean
rm
-f
https://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda-repo-
${
OS
}
-10-2-local-10
.2.89-440.33.01_1.0-1_amd64.deb
\ No newline at end of file
.github/workflows/cuda/cu113-Linux-env.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
CUDA_HOME
=
/usr/local/cuda-11.3
LD_LIBRARY_PATH
=
${
CUDA_HOME
}
/lib64:
${
LD_LIBRARY_PATH
}
PATH
=
${
CUDA_HOME
}
/bin:
${
PATH
}
export
FORCE_CUDA
=
1
export
TORCH_CUDA_ARCH_LIST
=
"3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
export
CUDA_HOME
=
/usr/local/cuda-11.3
\ No newline at end of file
.github/workflows/cuda/cu113-Linux.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
# Strip the periods from the version number
OS_VERSION
=
$(
echo
$(
lsb_release
-sr
)
|
tr
-d
.
)
OS
=
ubuntu
${
OS_VERSION
}
wget
-nv
https://developer.download.nvidia.com/compute/cuda/repos/
${
OS
}
/x86_64/cuda-
${
OS
}
.pin
sudo mv
cuda-
${
OS
}
.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget
-nv
https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda-repo-
${
OS
}
-11-3-local_11
.3.0-465.19.01-1_amd64.deb
sudo
dpkg
-i
cuda-repo-
${
OS
}
-11-3-local_11
.3.0-465.19.01-1_amd64.deb
# TODO: If on version < 22.04, install via signal-desktop-keyring
# For future versions it's deprecated and should be moved into the trusted folder
# sudo mv /var/cuda-repo-${OS}-11-3-local/7fa2af80.pub /etc/apt/trusted.gpg.d/
sudo
apt-key add /var/cuda-repo-
${
OS
}
-11-3-local
/7fa2af80.pub
sudo
apt-get
-qq
update
sudo
apt
install
cuda cuda-nvcc-11-3 cuda-libraries-dev-11-3
sudo
apt clean
rm
-f
https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda-repo-
${
OS
}
-11-3-local_11
.3.0-465.19.01-1_amd64.deb
\ No newline at end of file
.github/workflows/cuda/cu116-Linux-env.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
CUDA_HOME
=
/usr/local/cuda-11.6
LD_LIBRARY_PATH
=
${
CUDA_HOME
}
/lib64:
${
LD_LIBRARY_PATH
}
PATH
=
${
CUDA_HOME
}
/bin:
${
PATH
}
export
FORCE_CUDA
=
1
export
TORCH_CUDA_ARCH_LIST
=
"3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
export
CUDA_HOME
=
/usr/local/cuda-11.6
\ No newline at end of file
.github/workflows/cuda/cu116-Linux.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
# Strip the periods from the version number
OS_VERSION
=
$(
echo
$(
lsb_release
-sr
)
|
tr
-d
.
)
OS
=
ubuntu
${
OS_VERSION
}
wget
-nv
https://developer.download.nvidia.com/compute/cuda/repos/
${
OS
}
/x86_64/cuda-
${
OS
}
.pin
sudo mv
cuda-
${
OS
}
.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget
-nv
https://developer.download.nvidia.com/compute/cuda/11.6.2/local_installers/cuda-repo-
${
OS
}
-11-6-local_11
.6.2-510.47.03-1_amd64.deb
sudo
dpkg
-i
cuda-repo-
${
OS
}
-11-6-local_11
.6.2-510.47.03-1_amd64.deb
sudo
apt-key add /var/cuda-repo-
${
OS
}
-11-6-local
/7fa2af80.pub
sudo
apt-get
-qq
update
sudo
apt
install
cuda cuda-nvcc-11-6 cuda-libraries-dev-11-6
sudo
apt clean
rm
-f
https://developer.download.nvidia.com/compute/cuda/11.5.2/local_installers/cuda-repo-
${
OS
}
-11-6-local_11
.6.2-510.47.03-1_amd64.deb
\ No newline at end of file
.github/workflows/cuda/cu117-Linux-env.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
CUDA_HOME
=
/usr/local/cuda-11.7
LD_LIBRARY_PATH
=
${
CUDA_HOME
}
/lib64:
${
LD_LIBRARY_PATH
}
PATH
=
${
CUDA_HOME
}
/bin:
${
PATH
}
export
FORCE_CUDA
=
1
export
TORCH_CUDA_ARCH_LIST
=
"3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
export
CUDA_HOME
=
/usr/local/cuda-11.7
\ No newline at end of file
.github/workflows/cuda/cu117-Linux.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
# Strip the periods from the version number
OS_VERSION
=
$(
echo
$(
lsb_release
-sr
)
|
tr
-d
.
)
OS
=
ubuntu
${
OS_VERSION
}
wget
-nv
https://developer.download.nvidia.com/compute/cuda/repos/
${
OS
}
/x86_64/cuda-
${
OS
}
.pin
sudo mv
cuda-
${
OS
}
.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget
-nv
https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda-repo-
${
OS
}
-11-7-local_11
.7.0-515.43.04-1_amd64.deb
sudo
dpkg
-i
cuda-repo-
${
OS
}
-11-7-local_11
.7.0-515.43.04-1_amd64.deb
sudo cp
/var/cuda-repo-
${
OS
}
-11-7-local
/cuda-
*
-keyring
.gpg /usr/share/keyrings/
sudo
apt-get
-qq
update
sudo
apt
install
cuda cuda-nvcc-11-7 cuda-libraries-dev-11-7
sudo
apt clean
rm
-f
https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda-repo-
${
OS
}
-11-7-local_11
.7.0-515.43.04-1_amd64.deb
.github/workflows/cuda/cu120-Linux-env.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
CUDA_HOME
=
/usr/local/cuda-12.0
LD_LIBRARY_PATH
=
${
CUDA_HOME
}
/lib64:
${
LD_LIBRARY_PATH
}
PATH
=
${
CUDA_HOME
}
/bin:
${
PATH
}
export
FORCE_CUDA
=
1
export
TORCH_CUDA_ARCH_LIST
=
"3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
export
CUDA_HOME
=
/usr/local/cuda-12.0
\ No newline at end of file
.github/workflows/cuda/cu120-Linux.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
# Strip the periods from the version number
OS_VERSION
=
$(
echo
$(
lsb_release
-sr
)
|
tr
-d
.
)
OS
=
ubuntu
${
OS_VERSION
}
wget
-nv
https://developer.download.nvidia.com/compute/cuda/repos/
${
OS
}
/x86_64/cuda-
${
OS
}
.pin
sudo mv
cuda-
${
OS
}
.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget
-nv
https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda-repo-
${
OS
}
-12-0-local_12
.0.0-525.60.13-1_amd64.deb
sudo
dpkg
-i
cuda-repo-
${
OS
}
-12-0-local_12
.0.0-525.60.13-1_amd64.deb
sudo cp
/var/cuda-repo-
${
OS
}
-12-0-local
/cuda-
*
-keyring
.gpg /usr/share/keyrings/
sudo
apt-get
-qq
update
sudo
apt
install
cuda cuda-nvcc-12-0 cuda-libraries-dev-12-0
sudo
apt clean
rm
-f
https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda-repo-
${
OS
}
-12-0-local_12
.0.0-525.60.13-1_amd64.deb
.github/workflows/env.sh
deleted
100644 → 0
View file @
d8ec6a2f
export
LANG C.UTF-8
export
OFED_VERSION
=
5.3-1.0.0.1
sudo
apt-get update
&&
\
sudo
apt-get
install
-y
--no-install-recommends
\
software-properties-common
\
sudo
apt-get
install
-y
--no-install-recommends
\
build-essential
\
apt-utils
\
ca-certificates
\
wget
\
git
\
vim
\
libssl-dev
\
curl
\
unzip
\
unrar
\
cmake
\
net-tools
\
sudo
\
autotools-dev
\
rsync
\
jq
\
openssh-server
\
tmux
\
screen
\
htop
\
pdsh
\
openssh-client
\
lshw
\
dmidecode
\
util-linux
\
automake
\
autoconf
\
libtool
\
net-tools
\
pciutils
\
libpci-dev
\
libaio-dev
\
libcap2
\
libtinfo5
\
fakeroot
\
devscripts
\
debhelper
\
nfs-common
# wget -O ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
# chmod +x ~/miniconda.sh && \
# ~/miniconda.sh -b -p /opt/conda && \
# rm ~/miniconda.sh
# export PATH=/opt/conda/bin:$PATH
\ No newline at end of file
.github/workflows/publish.yml
View file @
2ddeaa40
...
...
@@ -7,116 +7,120 @@
name
:
Build wheels and deploy
#on:
# create:
# tags:
# - '**'
on
:
push
create
:
tags
:
-
v*
jobs
:
# setup_release:
# name: Create Release
# runs-on: ubuntu-latest
# steps:
# - name: Get the tag version
# id: extract_branch
# run: echo ::set-output name=branch::${GITHUB_REF#refs/tags/}
# shell: bash
# - name: Create Release
# id: create_release
# uses: actions/create-release@v1
# env:
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# with:
# tag_name: ${{ steps.extract_branch.outputs.branch }}
# release_name: ${{ steps.extract_branch.outputs.branch }}
setup_release
:
name
:
Create Release
runs-on
:
ubuntu-latest
steps
:
-
name
:
Get the tag version
id
:
extract_branch
run
:
echo ::set-output name=branch::${GITHUB_REF#refs/tags/}
shell
:
bash
-
name
:
Create Release
id
:
create_release
uses
:
actions/create-release@v1
env
:
GITHUB_TOKEN
:
${{ secrets.GITHUB_TOKEN }}
with
:
tag_name
:
${{ steps.extract_branch.outputs.branch }}
release_name
:
${{ steps.extract_branch.outputs.branch }}
build_wheels
:
name
:
Build Wheel
needs
:
setup_release
runs-on
:
${{ matrix.os }}
#needs: setup_release
strategy
:
fail-fast
:
false
matrix
:
os
:
[
ubuntu-20.04
,
ubuntu-22.04
]
#python-version: ['3.7', '3.8', '3.9', '3.10']
#torch-version: ['1.11.0', '1.12.0', '1.13.0', '2.0.1']
#cuda-version: ['113', '116', '117', '120']
python-version
:
[
'
3.10'
]
torch-version
:
[
'
2.0.1'
]
cuda-version
:
[
'
120'
]
# Using ubuntu-20.04 instead of 22.04 for more compatibility (glibc). Ideally we'd use the
# manylinux docker image, but I haven't figured out how to install CUDA on manylinux.
os
:
[
ubuntu-20.04
]
python-version
:
[
'
3.7'
,
'
3.8'
,
'
3.9'
,
'
3.10'
]
torch-version
:
[
'
1.12.1'
,
'
1.13.1'
,
'
2.0.1'
,
'
2.1.0.dev20230731'
]
cuda-version
:
[
'
11.6.2'
,
'
11.7.1'
,
'
11.8.0'
,
'
12.1.0'
]
# We need separate wheels that either uses C++11 ABI (-D_GLIBCXX_USE_CXX11_ABI) or not.
# Pytorch wheels currently don't use it, but nvcr images have Pytorch compiled with C++11 ABI.
# Without this we get import error (undefined symbol: _ZN3c105ErrorC2ENS_14SourceLocationESs)
# when building without C++11 ABI and using it on nvcr images.
cxx11_abi
:
[
'
FALSE'
,
'
TRUE'
]
exclude
:
# Nvidia only supports 11.7+ for ubuntu-22.04
-
os
:
ubuntu-22.04
cuda-version
:
'
116'
-
os
:
ubuntu-22.04
cuda-version
:
'
113'
# Torch only builds cuda 117 for 1.13.0+
-
cuda-version
:
'
117'
torch-version
:
'
1.11.0'
-
cuda-version
:
'
117'
torch-version
:
'
1.12.0'
# Torch only builds cuda 116 for 1.12.0+
-
cuda-version
:
'
116'
torch-version
:
'
1.11.0'
# Torch only builds cuda 120 for 2.0.1+
-
cuda-version
:
'
120'
torch-version
:
'
1.11.0'
-
cuda-version
:
'
120'
torch-version
:
'
1.12.0'
-
cuda-version
:
'
120'
torch-version
:
'
1.13.0'
# 1.13.0 drops support for cuda 11.3
-
cuda-version
:
'
113'
torch-version
:
'
1.13.0'
-
cuda-version
:
'
113'
torch-version
:
'
2.0.1'
# Fails with "Validation Error" on artifact upload
-
cuda-version
:
'
117'
torch-version
:
'
1.13.0'
os
:
ubuntu-20.04
# Pytorch >= 2.0 only supports Python >= 3.8
-
torch-version
:
'
2.0.1'
python-version
:
'
3.7'
-
torch-version
:
'
2.1.0.dev20230731'
python-version
:
'
3.7'
# Pytorch <= 2.0 only supports CUDA <= 11.8
-
torch-version
:
'
1.12.1'
cuda-version
:
'
12.1.0'
-
torch-version
:
'
1.13.1'
cuda-version
:
'
12.1.0'
-
torch-version
:
'
2.0.1'
cuda-version
:
'
12.1.0'
# Pytorch >= 2.1 only supports CUDA 12.1
-
torch-version
:
'
2.1.0.dev20230731'
cuda-version
:
'
11.6.2'
-
torch-version
:
'
2.1.0.dev20230731'
cuda-version
:
'
11.7.1'
-
torch-version
:
'
2.1.0.dev20230731'
cuda-version
:
'
11.8.0'
steps
:
-
name
:
Checkout
uses
:
actions/checkout@v3
-
name
:
Set up Python
uses
:
actions/setup-python@v
3
uses
:
actions/setup-python@v
4
with
:
python-version
:
${{ matrix.python-version }}
-
name
:
Set up Linux Env
-
name
:
Set CUDA and PyTorch versions
run
:
|
echo "MATRIX_CUDA_VERSION=$(echo ${{ matrix.cuda-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV
echo "MATRIX_TORCH_VERSION=$(echo ${{ matrix.torch-version }} | awk -F \. {'print $1 "." $2'})" >> $GITHUB_ENV
-
name
:
Free up disk space
if
:
${{ runner.os == 'Linux' }}
run
:
|
sudo rm -rf /usr/share/dotnet
bash .github/workflows/env.sh
echo ${{ needs.create_release.outputs.upload_url }}
echo ${{ needs.steps.extract_branch.outputs.upload_url }}
shell
:
bash
-
name
:
Install CUDA ${{ matrix.cuda-version }}
if
:
${{ matrix.cuda-version != 'cpu' }}
run
:
|
bash .github/workflows/cuda/cu${{ matrix.cuda-version }}-${{ runner.os }}.sh
shell
:
bash
-
name
:
Check GPU Env
if
:
${{ matrix.cuda-version != 'cpu' }}
run
:
|
source .github/workflows/cuda/cu${{ matrix.cuda-version }}-${{ runner.os }}-env.sh
nvcc --version
shell
:
bash
uses
:
Jimver/cuda-toolkit@v0.2.11
id
:
cuda-toolkit
with
:
cuda
:
${{ matrix.cuda-version }}
linux-local-args
:
'
["--toolkit"]'
# default method is "local", and we're hitting some error with caching for CUDA 11.8 and 12.1
# method: ${{ (matrix.cuda-version == '11.8.0' || matrix.cuda-version == '12.1.0') && 'network' || 'local' }}
method
:
'
network'
# We need the cuda libraries (e.g. cuSparse, cuSolver) for compiling PyTorch extensions,
# not just nvcc
# sub-packages: '["nvcc"]'
-
name
:
Install PyTorch ${{ matrix.torch-version }}+cu${{ matrix.cuda-version }}
run
:
|
pip install numpy pyyaml scipy ipython mkl mkl-include ninja cython typing pandas typing-extensions dataclasses && conda clean -ya
pip install --no-cache-dir torch==${{ matrix.torch-version }}
pip install --upgrade pip
# If we don't install before installing Pytorch, we get error for torch 2.0.1
# ERROR: Could not find a version that satisfies the requirement setuptools>=40.8.0 (from versions: none)
pip install lit
# We want to figure out the CUDA version to download pytorch
# e.g. we can have system CUDA version being 11.7 but if torch==1.12 then we need to download the wheel from cu116
# This code is ugly, maybe there's a better way to do this.
export TORCH_CUDA_VERSION=$(python -c "import os; minv = {'1.12': 113, '1.13': 116, '2.0': 117, '2.1': 121}[os.environ['MATRIX_TORCH_VERSION']]; maxv = {'1.12': 116, '1.13': 117, '2.0': 118, '2.1': 121}[os.environ['MATRIX_TORCH_VERSION']]; print(max(min(int(os.environ['MATRIX_CUDA_VERSION']), maxv), minv))")
if [[ ${{ matrix.torch-version }} == *"dev"* ]]; then
pip install --no-cache-dir --pre torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION}
else
pip install --no-cache-dir torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION}
fi
nvcc --version
python --version
python -c "import torch; print('PyTorch:', torch.__version__)"
python -c "import torch; print('CUDA:', torch.version.cuda)"
...
...
@@ -124,16 +128,25 @@ jobs:
shell
:
bash
# - name: Install PyTorch ${{ matrix.torch-version }}+cu${{ matrix.cuda-version }}
# run: |
# pip install numpy pyyaml scipy ipython mkl mkl-include ninja cython typing pandas typing-extensions dataclasses && conda clean -ya
# pip install --no-index --no-cache-dir torch==${{ matrix.torch-version }} -f https://download.pytorch.org/whl/cu${{ matrix.cuda-version }}/torch_stable.html
# python --version
# python -c "import torch; print('PyTorch:', torch.__version__)"
# python -c "import torch; print('CUDA:', torch.version.cuda)"
# python -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)"
# shell:
# bash
-
name
:
Build wheel
run
:
|
# We want setuptools >= 49.6.0 otherwise we can't compile the extension if system CUDA version is 11.7 and pytorch cuda version is 11.6
# https://github.com/pytorch/pytorch/blob/664058fa83f1d8eede5d66418abff6e20bd76ca8/torch/utils/cpp_extension.py#L810
# However this still fails so I'm using a newer version of setuptools
pip install setuptools==68.0.0
pip install ninja packaging wheel
export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
# Limit MAX_JOBS otherwise the github runner goes OOM
MAX_JOBS=1 FLASH_ATTENTION_FORCE_BUILD="TRUE" FLASH_ATTENTION_FORCE_CXX11_ABI=${{ matrix.cxx11_abi}} python setup.py bdist_wheel --dist-dir=dist
tmpname=cu${MATRIX_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi${{ matrix.cxx11_abi }}
wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")
ls dist/*whl |xargs -I {} mv {} dist/${wheel_name}
echo "wheel_name=${wheel_name}" >> $GITHUB_ENV
-
name
:
Log Built Wheels
run
:
|
ls dist
-
name
:
Get the tag version
id
:
extract_branch
...
...
@@ -147,62 +160,45 @@ jobs:
env
:
GITHUB_TOKEN
:
${{ secrets.GITHUB_TOKEN }}
-
name
:
Build wheel
-
name
:
Upload Release Asset
id
:
upload_release_asset
uses
:
actions/upload-release-asset@v1
env
:
GITHUB_TOKEN
:
${{ secrets.GITHUB_TOKEN }}
with
:
upload_url
:
${{ steps.get_current_release.outputs.upload_url }}
asset_path
:
./dist/${{env.wheel_name}}
asset_name
:
${{env.wheel_name}}
asset_content_type
:
application/*
publish_package
:
name
:
Publish package
needs
:
[
build_wheels
]
runs-on
:
ubuntu-latest
steps
:
-
uses
:
actions/checkout@v3
-
uses
:
actions/setup-python@v4
with
:
python-version
:
'
3.10'
-
name
:
Install dependencies
run
:
|
export FLASH_ATTENTION_FORCE_BUILD="TRUE"
export FORCE_CUDA="1"
export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
export CUDA_INSTALL_DIR=/usr/local/cuda-11.3$CUDA_INSTALL_DIR
pip install ninja packaging setuptools wheel
python setup.py bdist_wheel --dist-dir=dist
tmpname=cu${{ matrix.cuda-version }}torch${{ matrix.torch-version }}
wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")
ls dist/*whl |xargs -I {} mv {} dist/${wheel_name}
echo "wheel_name=${wheel_name}" >> $GITHUB_ENV
pip install ninja packaging setuptools wheel twine
# We don't want to download anything CUDA-related here
pip install torch --index-url https://download.pytorch.org/whl/cpu
-
name
:
Log Built Wheels
-
name
:
Build core package
env
:
FLASH_ATTENTION_SKIP_CUDA_BUILD
:
"
TRUE"
run
:
|
ls
dist
python setup.py sdist --dist-dir=
dist
# - name: Upload Release Asset
# id: upload_release_asset
# uses: actions/upload-release-asset@v1
# env:
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# with:
# upload_url: ${{ steps.get_current_release.outputs.upload_url }}
# asset_path: ./dist/${{env.wheel_name}}
# asset_name: ${{env.wheel_name}}
# asset_content_type: application/*
# publish_package:
# name: Publish package
# needs: [build_wheels]
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v3
# - uses: actions/setup-python@v4
# with:
# python-version: '3.10'
# - name: Install dependencies
# run: |
# pip install ninja packaging setuptools wheel twine
# pip install torch
# - name: Build core package
# env:
# FLASH_ATTENTION_SKIP_CUDA_BUILD: "TRUE"
# run: |
# python setup.py sdist --dist-dir=dist
# - name: Deploy
# env:
# TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
# TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
# run: |
# python -m twine upload dist/*
-
name
:
Deploy
env
:
TWINE_USERNAME
:
"
__token__"
TWINE_PASSWORD
:
${{ secrets.PYPI_API_TOKEN }}
run
:
|
python -m twine upload dist/*
setup.py
View file @
2ddeaa40
...
...
@@ -13,9 +13,10 @@ import subprocess
import
urllib.request
import
urllib.error
from
wheel.bdist_wheel
import
bdist_wheel
as
_bdist_wheel
import
torch
from
torch.utils.cpp_extension
import
BuildExtension
,
CppExtension
,
CUDAExtension
,
CUDA_HOME
from
wheel.bdist_wheel
import
bdist_wheel
as
_bdist_wheel
with
open
(
"README.md"
,
"r"
,
encoding
=
"utf-8"
)
as
fh
:
...
...
@@ -33,6 +34,8 @@ BASE_WHEEL_URL = "https://github.com/Dao-AILab/flash-attention/releases/download
# SKIP_CUDA_BUILD: Intended to allow CI to use a simple `python setup.py sdist` run to copy over raw files, without any cuda compilation
FORCE_BUILD
=
os
.
getenv
(
"FLASH_ATTENTION_FORCE_BUILD"
,
"FALSE"
)
==
"TRUE"
SKIP_CUDA_BUILD
=
os
.
getenv
(
"FLASH_ATTENTION_SKIP_CUDA_BUILD"
,
"FALSE"
)
==
"TRUE"
# For CI, we want the option to build with C++11 ABI since the nvcr images use C++11 ABI
FORCE_CXX11_ABI
=
os
.
getenv
(
"FLASH_ATTENTION_FORCE_CXX11_ABI"
,
"FALSE"
)
==
"TRUE"
def
get_platform
():
...
...
@@ -101,26 +104,27 @@ if not torch.cuda.is_available():
print
(
"
\n
Warning: Torch did not find available GPUs on this system.
\n
"
,
"If your intention is to cross-compile, this is not an error.
\n
"
"By default, Apex will cross-compile for Pascal (compute capabilities 6.0, 6.1, 6.2),
\n
"
"Volta (compute capability 7.0), Turing (compute capability 7.5),
\n
"
"and, if the CUDA version is >= 11.0, Ampere (compute capability 8.0).
\n
"
"By default, FlashAttention will cross-compile for Ampere (compute capability 8.0, 8.6, "
"8.9), and, if the CUDA version is >= 11.8, Hopper (compute capability 9.0).
\n
"
"If you wish to cross-compile for a single specific architecture,
\n
"
'export TORCH_CUDA_ARCH_LIST="compute capability" before running setup.py.
\n
'
,
)
if
os
.
environ
.
get
(
"TORCH_CUDA_ARCH_LIST"
,
None
)
is
None
and
CUDA_HOME
is
not
None
:
_
,
bare_metal_version
=
get_cuda_bare_metal_version
(
CUDA_HOME
)
if
bare_metal_version
>=
Version
(
"11.8"
):
os
.
environ
[
"TORCH_CUDA_ARCH_LIST"
]
=
"6.0;6.1;6.2;7.0;7.5;8.0;8.6;9.0"
elif
bare_metal_version
>=
Version
(
"11.1"
):
os
.
environ
[
"TORCH_CUDA_ARCH_LIST"
]
=
"6.0;6.1;6.2;7.0;7.5;8.0;8.6"
elif
bare_metal_version
==
Version
(
"11.0"
):
os
.
environ
[
"TORCH_CUDA_ARCH_LIST"
]
=
"6.0;6.1;6.2;7.0;7.5;8.0"
os
.
environ
[
"TORCH_CUDA_ARCH_LIST"
]
=
"8.0;8.6;9.0"
elif
bare_metal_version
>=
Version
(
"11.4"
):
os
.
environ
[
"TORCH_CUDA_ARCH_LIST"
]
=
"8.0;8.6"
else
:
os
.
environ
[
"TORCH_CUDA_ARCH_LIST"
]
=
"
6
.0;
6.1;6.2;7.0;7.5
"
os
.
environ
[
"TORCH_CUDA_ARCH_LIST"
]
=
"
8
.0;
8.6
"
cmdclass
=
{}
ext_modules
=
[]
# We want this even if SKIP_CUDA_BUILD because when we run python setup.py sdist we want the .hpp
# files included in the source distribution, in case the user compiles from source.
subprocess
.
run
([
"git"
,
"submodule"
,
"update"
,
"--init"
,
"csrc/cutlass"
])
if
not
SKIP_CUDA_BUILD
:
print
(
"
\n\n
torch.__version__ = {}
\n\n
"
.
format
(
torch
.
__version__
))
TORCH_MAJOR
=
int
(
torch
.
__version__
.
split
(
"."
)[
0
])
...
...
@@ -137,8 +141,8 @@ if not SKIP_CUDA_BUILD:
# Check, if CUDA11 is installed for compute capability 8.0
cc_flag
=
[]
_
,
bare_metal_version
=
get_cuda_bare_metal_version
(
CUDA_HOME
)
if
bare_metal_version
<
Version
(
"11.
0
"
):
raise
RuntimeError
(
"FlashAttention is only supported on CUDA 11 and above"
)
if
bare_metal_version
<
Version
(
"11.
4
"
):
raise
RuntimeError
(
"FlashAttention is only supported on CUDA 11
.4
and above"
)
# cc_flag.append("-gencode")
# cc_flag.append("arch=compute_75,code=sm_75")
cc_flag
.
append
(
"-gencode"
)
...
...
@@ -147,7 +151,11 @@ if not SKIP_CUDA_BUILD:
cc_flag
.
append
(
"-gencode"
)
cc_flag
.
append
(
"arch=compute_90,code=sm_90"
)
subprocess
.
run
([
"git"
,
"submodule"
,
"update"
,
"--init"
,
"csrc/cutlass"
])
# HACK: The compiler flag -D_GLIBCXX_USE_CXX11_ABI is set to be the same as
# torch._C._GLIBCXX_USE_CXX11_ABI
# https://github.com/pytorch/pytorch/blob/8472c24e3b5b60150096486616d98b7bea01500b/torch/utils/cpp_extension.py#L920
if
FORCE_CXX11_ABI
:
torch
.
_C
.
_GLIBCXX_USE_CXX11_ABI
=
True
ext_modules
.
append
(
CUDAExtension
(
name
=
"flash_attn_2_cuda"
,
...
...
@@ -213,6 +221,7 @@ if not SKIP_CUDA_BUILD:
Path
(
this_dir
)
/
'csrc'
/
'cutlass'
/
'include'
,
],
)
)
def
get_package_version
():
...
...
@@ -232,7 +241,6 @@ class CachedWheelsCommand(_bdist_wheel):
find an existing wheel (which is currently the case for all flash attention installs). We use
the environment parameters to detect whether there is already a pre-built version of a compatible
wheel available and short-circuits the standard full build pipeline.
"""
def
run
(
self
):
if
FORCE_BUILD
:
...
...
@@ -241,16 +249,20 @@ class CachedWheelsCommand(_bdist_wheel):
raise_if_cuda_home_none
(
"flash_attn"
)
# Determine the version numbers that will be used to determine the correct wheel
_
,
cuda_version_raw
=
get_cuda_bare_metal_version
(
CUDA_HOME
)
# We're using the CUDA version used to build torch, not the one currently installed
# _, cuda_version_raw = get_cuda_bare_metal_version(CUDA_HOME)
torch_cuda_version
=
parse
(
torch
.
version
.
cuda
)
torch_version_raw
=
parse
(
torch
.
__version__
)
python_version
=
f
"cp
{
sys
.
version_info
.
major
}{
sys
.
version_info
.
minor
}
"
platform_name
=
get_platform
()
flash_version
=
get_package_version
()
cuda_version
=
f
"
{
cuda_version_raw
.
major
}{
cuda_version_raw
.
minor
}
"
torch_version
=
f
"
{
torch_version_raw
.
major
}
.
{
torch_version_raw
.
minor
}
.
{
torch_version_raw
.
micro
}
"
# cuda_version = f"{cuda_version_raw.major}{cuda_version_raw.minor}"
cuda_version
=
f
"
{
torch_cuda_version
.
major
}{
torch_cuda_version
.
minor
}
"
torch_version
=
f
"
{
torch_version_raw
.
major
}
.
{
torch_version_raw
.
minor
}
"
cxx11_abi
=
str
(
torch
.
_C
.
_GLIBCXX_USE_CXX11_ABI
).
upper
()
# Determine wheel URL based on CUDA version, torch version, python version and OS
wheel_filename
=
f
'
{
PACKAGE_NAME
}
-
{
flash_version
}
+cu
{
cuda_version
}
torch
{
torch_version
}
-
{
python_version
}
-
{
python_version
}
-
{
platform_name
}
.whl'
wheel_filename
=
f
'
{
PACKAGE_NAME
}
-
{
flash_version
}
+cu
{
cuda_version
}
torch
{
torch_version
}
cxx11abi
{
cxx11_abi
}
-
{
python_version
}
-
{
python_version
}
-
{
platform_name
}
.whl'
wheel_url
=
BASE_WHEEL_URL
.
format
(
tag_name
=
f
"v
{
flash_version
}
"
,
wheel_name
=
wheel_filename
...
...
@@ -279,7 +291,6 @@ class CachedWheelsCommand(_bdist_wheel):
setup
(
# @pierce - TODO: Revert for official release
name
=
PACKAGE_NAME
,
version
=
get_package_version
(),
packages
=
find_packages
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment