Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
flash-attention
Commits
2ddeaa40
Commit
2ddeaa40
authored
Aug 13, 2023
by
Tri Dao
Browse files
Fix wheel building
parent
d8ec6a2f
Changes
13
Show whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
180 additions
and
363 deletions
+180
-363
.github/workflows/cuda/cu102-Linux-env.sh
.github/workflows/cuda/cu102-Linux-env.sh
+0
-9
.github/workflows/cuda/cu102-Linux.sh
.github/workflows/cuda/cu102-Linux.sh
+0
-17
.github/workflows/cuda/cu113-Linux-env.sh
.github/workflows/cuda/cu113-Linux-env.sh
+0
-9
.github/workflows/cuda/cu113-Linux.sh
.github/workflows/cuda/cu113-Linux.sh
+0
-21
.github/workflows/cuda/cu116-Linux-env.sh
.github/workflows/cuda/cu116-Linux-env.sh
+0
-9
.github/workflows/cuda/cu116-Linux.sh
.github/workflows/cuda/cu116-Linux.sh
+0
-18
.github/workflows/cuda/cu117-Linux-env.sh
.github/workflows/cuda/cu117-Linux-env.sh
+0
-9
.github/workflows/cuda/cu117-Linux.sh
.github/workflows/cuda/cu117-Linux.sh
+0
-18
.github/workflows/cuda/cu120-Linux-env.sh
.github/workflows/cuda/cu120-Linux-env.sh
+0
-9
.github/workflows/cuda/cu120-Linux.sh
.github/workflows/cuda/cu120-Linux.sh
+0
-18
.github/workflows/env.sh
.github/workflows/env.sh
+0
-53
.github/workflows/publish.yml
.github/workflows/publish.yml
+143
-147
setup.py
setup.py
+37
-26
No files found.
.github/workflows/cuda/cu102-Linux-env.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
CUDA_HOME
=
/usr/local/cuda-10.2
LD_LIBRARY_PATH
=
${
CUDA_HOME
}
/lib64:
${
LD_LIBRARY_PATH
}
PATH
=
${
CUDA_HOME
}
/bin:
${
PATH
}
export
FORCE_CUDA
=
1
export
TORCH_CUDA_ARCH_LIST
=
"3.5;5.0+PTX;6.0;7.0;7.5"
export
CUDA_HOME
=
/usr/local/cuda-10.2
\ No newline at end of file
.github/workflows/cuda/cu102-Linux.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
# Strip the periods from the version number
OS_VERSION
=
$(
echo
$(
lsb_release
-sr
)
|
tr
-d
.
)
OS
=
ubuntu
${
OS_VERSION
}
wget
-nv
https://developer.download.nvidia.com/compute/cuda/repos/
${
OS
}
/x86_64/cuda-
${
OS
}
.pin
sudo mv
cuda-
${
OS
}
.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget
-nv
https://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda-repo-
${
OS
}
-10-2-local-10
.2.89-440.33.01_1.0-1_amd64.deb
sudo
dpkg
-i
cuda-repo-
${
OS
}
-10-2-local-10
.2.89-440.33.01_1.0-1_amd64.deb
sudo
apt-key add /var/cuda-repo-10-2-local-10.2.89-440.33.01/7fa2af80.pub
sudo
apt-get
-qq
update
sudo
apt
install
cuda cuda-nvcc-10-2 cuda-libraries-dev-10-2
sudo
apt clean
rm
-f
https://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda-repo-
${
OS
}
-10-2-local-10
.2.89-440.33.01_1.0-1_amd64.deb
\ No newline at end of file
.github/workflows/cuda/cu113-Linux-env.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
CUDA_HOME
=
/usr/local/cuda-11.3
LD_LIBRARY_PATH
=
${
CUDA_HOME
}
/lib64:
${
LD_LIBRARY_PATH
}
PATH
=
${
CUDA_HOME
}
/bin:
${
PATH
}
export
FORCE_CUDA
=
1
export
TORCH_CUDA_ARCH_LIST
=
"3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
export
CUDA_HOME
=
/usr/local/cuda-11.3
\ No newline at end of file
.github/workflows/cuda/cu113-Linux.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
# Strip the periods from the version number
OS_VERSION
=
$(
echo
$(
lsb_release
-sr
)
|
tr
-d
.
)
OS
=
ubuntu
${
OS_VERSION
}
wget
-nv
https://developer.download.nvidia.com/compute/cuda/repos/
${
OS
}
/x86_64/cuda-
${
OS
}
.pin
sudo mv
cuda-
${
OS
}
.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget
-nv
https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda-repo-
${
OS
}
-11-3-local_11
.3.0-465.19.01-1_amd64.deb
sudo
dpkg
-i
cuda-repo-
${
OS
}
-11-3-local_11
.3.0-465.19.01-1_amd64.deb
# TODO: If on version < 22.04, install via signal-desktop-keyring
# For future versions it's deprecated and should be moved into the trusted folder
# sudo mv /var/cuda-repo-${OS}-11-3-local/7fa2af80.pub /etc/apt/trusted.gpg.d/
sudo
apt-key add /var/cuda-repo-
${
OS
}
-11-3-local
/7fa2af80.pub
sudo
apt-get
-qq
update
sudo
apt
install
cuda cuda-nvcc-11-3 cuda-libraries-dev-11-3
sudo
apt clean
rm
-f
https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda-repo-
${
OS
}
-11-3-local_11
.3.0-465.19.01-1_amd64.deb
\ No newline at end of file
.github/workflows/cuda/cu116-Linux-env.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
CUDA_HOME
=
/usr/local/cuda-11.6
LD_LIBRARY_PATH
=
${
CUDA_HOME
}
/lib64:
${
LD_LIBRARY_PATH
}
PATH
=
${
CUDA_HOME
}
/bin:
${
PATH
}
export
FORCE_CUDA
=
1
export
TORCH_CUDA_ARCH_LIST
=
"3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
export
CUDA_HOME
=
/usr/local/cuda-11.6
\ No newline at end of file
.github/workflows/cuda/cu116-Linux.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
# Strip the periods from the version number
OS_VERSION
=
$(
echo
$(
lsb_release
-sr
)
|
tr
-d
.
)
OS
=
ubuntu
${
OS_VERSION
}
wget
-nv
https://developer.download.nvidia.com/compute/cuda/repos/
${
OS
}
/x86_64/cuda-
${
OS
}
.pin
sudo mv
cuda-
${
OS
}
.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget
-nv
https://developer.download.nvidia.com/compute/cuda/11.6.2/local_installers/cuda-repo-
${
OS
}
-11-6-local_11
.6.2-510.47.03-1_amd64.deb
sudo
dpkg
-i
cuda-repo-
${
OS
}
-11-6-local_11
.6.2-510.47.03-1_amd64.deb
sudo
apt-key add /var/cuda-repo-
${
OS
}
-11-6-local
/7fa2af80.pub
sudo
apt-get
-qq
update
sudo
apt
install
cuda cuda-nvcc-11-6 cuda-libraries-dev-11-6
sudo
apt clean
rm
-f
https://developer.download.nvidia.com/compute/cuda/11.5.2/local_installers/cuda-repo-
${
OS
}
-11-6-local_11
.6.2-510.47.03-1_amd64.deb
\ No newline at end of file
.github/workflows/cuda/cu117-Linux-env.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
CUDA_HOME
=
/usr/local/cuda-11.7
LD_LIBRARY_PATH
=
${
CUDA_HOME
}
/lib64:
${
LD_LIBRARY_PATH
}
PATH
=
${
CUDA_HOME
}
/bin:
${
PATH
}
export
FORCE_CUDA
=
1
export
TORCH_CUDA_ARCH_LIST
=
"3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
export
CUDA_HOME
=
/usr/local/cuda-11.7
\ No newline at end of file
.github/workflows/cuda/cu117-Linux.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
# Strip the periods from the version number
OS_VERSION
=
$(
echo
$(
lsb_release
-sr
)
|
tr
-d
.
)
OS
=
ubuntu
${
OS_VERSION
}
wget
-nv
https://developer.download.nvidia.com/compute/cuda/repos/
${
OS
}
/x86_64/cuda-
${
OS
}
.pin
sudo mv
cuda-
${
OS
}
.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget
-nv
https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda-repo-
${
OS
}
-11-7-local_11
.7.0-515.43.04-1_amd64.deb
sudo
dpkg
-i
cuda-repo-
${
OS
}
-11-7-local_11
.7.0-515.43.04-1_amd64.deb
sudo cp
/var/cuda-repo-
${
OS
}
-11-7-local
/cuda-
*
-keyring
.gpg /usr/share/keyrings/
sudo
apt-get
-qq
update
sudo
apt
install
cuda cuda-nvcc-11-7 cuda-libraries-dev-11-7
sudo
apt clean
rm
-f
https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda-repo-
${
OS
}
-11-7-local_11
.7.0-515.43.04-1_amd64.deb
.github/workflows/cuda/cu120-Linux-env.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
CUDA_HOME
=
/usr/local/cuda-12.0
LD_LIBRARY_PATH
=
${
CUDA_HOME
}
/lib64:
${
LD_LIBRARY_PATH
}
PATH
=
${
CUDA_HOME
}
/bin:
${
PATH
}
export
FORCE_CUDA
=
1
export
TORCH_CUDA_ARCH_LIST
=
"3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
export
CUDA_HOME
=
/usr/local/cuda-12.0
\ No newline at end of file
.github/workflows/cuda/cu120-Linux.sh
deleted
100644 → 0
View file @
d8ec6a2f
#!/bin/bash
# Strip the periods from the version number
OS_VERSION
=
$(
echo
$(
lsb_release
-sr
)
|
tr
-d
.
)
OS
=
ubuntu
${
OS_VERSION
}
wget
-nv
https://developer.download.nvidia.com/compute/cuda/repos/
${
OS
}
/x86_64/cuda-
${
OS
}
.pin
sudo mv
cuda-
${
OS
}
.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget
-nv
https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda-repo-
${
OS
}
-12-0-local_12
.0.0-525.60.13-1_amd64.deb
sudo
dpkg
-i
cuda-repo-
${
OS
}
-12-0-local_12
.0.0-525.60.13-1_amd64.deb
sudo cp
/var/cuda-repo-
${
OS
}
-12-0-local
/cuda-
*
-keyring
.gpg /usr/share/keyrings/
sudo
apt-get
-qq
update
sudo
apt
install
cuda cuda-nvcc-12-0 cuda-libraries-dev-12-0
sudo
apt clean
rm
-f
https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda-repo-
${
OS
}
-12-0-local_12
.0.0-525.60.13-1_amd64.deb
.github/workflows/env.sh
deleted
100644 → 0
View file @
d8ec6a2f
export
LANG C.UTF-8
export
OFED_VERSION
=
5.3-1.0.0.1
sudo
apt-get update
&&
\
sudo
apt-get
install
-y
--no-install-recommends
\
software-properties-common
\
sudo
apt-get
install
-y
--no-install-recommends
\
build-essential
\
apt-utils
\
ca-certificates
\
wget
\
git
\
vim
\
libssl-dev
\
curl
\
unzip
\
unrar
\
cmake
\
net-tools
\
sudo
\
autotools-dev
\
rsync
\
jq
\
openssh-server
\
tmux
\
screen
\
htop
\
pdsh
\
openssh-client
\
lshw
\
dmidecode
\
util-linux
\
automake
\
autoconf
\
libtool
\
net-tools
\
pciutils
\
libpci-dev
\
libaio-dev
\
libcap2
\
libtinfo5
\
fakeroot
\
devscripts
\
debhelper
\
nfs-common
# wget -O ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
# chmod +x ~/miniconda.sh && \
# ~/miniconda.sh -b -p /opt/conda && \
# rm ~/miniconda.sh
# export PATH=/opt/conda/bin:$PATH
\ No newline at end of file
.github/workflows/publish.yml
View file @
2ddeaa40
...
@@ -7,116 +7,120 @@
...
@@ -7,116 +7,120 @@
name
:
Build wheels and deploy
name
:
Build wheels and deploy
#on:
# create:
# tags:
# - '**'
on
:
on
:
push
create
:
tags
:
-
v*
jobs
:
jobs
:
# setup_release:
# name: Create Release
setup_release
:
# runs-on: ubuntu-latest
name
:
Create Release
# steps:
runs-on
:
ubuntu-latest
# - name: Get the tag version
steps
:
# id: extract_branch
-
name
:
Get the tag version
# run: echo ::set-output name=branch::${GITHUB_REF#refs/tags/}
id
:
extract_branch
# shell: bash
run
:
echo ::set-output name=branch::${GITHUB_REF#refs/tags/}
shell
:
bash
# - name: Create Release
# id: create_release
-
name
:
Create Release
# uses: actions/create-release@v1
id
:
create_release
# env:
uses
:
actions/create-release@v1
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
env
:
# with:
GITHUB_TOKEN
:
${{ secrets.GITHUB_TOKEN }}
# tag_name: ${{ steps.extract_branch.outputs.branch }}
with
:
# release_name: ${{ steps.extract_branch.outputs.branch }}
tag_name
:
${{ steps.extract_branch.outputs.branch }}
release_name
:
${{ steps.extract_branch.outputs.branch }}
build_wheels
:
build_wheels
:
name
:
Build Wheel
name
:
Build Wheel
needs
:
setup_release
runs-on
:
${{ matrix.os }}
runs-on
:
${{ matrix.os }}
#needs: setup_release
strategy
:
strategy
:
fail-fast
:
false
fail-fast
:
false
matrix
:
matrix
:
os
:
[
ubuntu-20.04
,
ubuntu-22.04
]
# Using ubuntu-20.04 instead of 22.04 for more compatibility (glibc). Ideally we'd use the
#python-version: ['3.7', '3.8', '3.9', '3.10']
# manylinux docker image, but I haven't figured out how to install CUDA on manylinux.
#torch-version: ['1.11.0', '1.12.0', '1.13.0', '2.0.1']
os
:
[
ubuntu-20.04
]
#cuda-version: ['113', '116', '117', '120']
python-version
:
[
'
3.7'
,
'
3.8'
,
'
3.9'
,
'
3.10'
]
python-version
:
[
'
3.10'
]
torch-version
:
[
'
1.12.1'
,
'
1.13.1'
,
'
2.0.1'
,
'
2.1.0.dev20230731'
]
torch-version
:
[
'
2.0.1'
]
cuda-version
:
[
'
11.6.2'
,
'
11.7.1'
,
'
11.8.0'
,
'
12.1.0'
]
cuda-version
:
[
'
120'
]
# We need separate wheels that either uses C++11 ABI (-D_GLIBCXX_USE_CXX11_ABI) or not.
# Pytorch wheels currently don't use it, but nvcr images have Pytorch compiled with C++11 ABI.
# Without this we get import error (undefined symbol: _ZN3c105ErrorC2ENS_14SourceLocationESs)
# when building without C++11 ABI and using it on nvcr images.
cxx11_abi
:
[
'
FALSE'
,
'
TRUE'
]
exclude
:
exclude
:
# Nvidia only supports 11.7+ for ubuntu-22.04
# Pytorch >= 2.0 only supports Python >= 3.8
-
os
:
ubuntu-22.04
-
torch-version
:
'
2.0.1'
cuda-version
:
'
116'
python-version
:
'
3.7'
-
os
:
ubuntu-22.04
-
torch-version
:
'
2.1.0.dev20230731'
cuda-version
:
'
113'
python-version
:
'
3.7'
# Torch only builds cuda 117 for 1.13.0+
# Pytorch <= 2.0 only supports CUDA <= 11.8
-
cuda-version
:
'
117'
-
torch-version
:
'
1.12.1'
torch-version
:
'
1.11.0'
cuda-version
:
'
12.1.0'
-
cuda-version
:
'
117'
-
torch-version
:
'
1.13.1'
torch-version
:
'
1.12.0'
cuda-version
:
'
12.1.0'
# Torch only builds cuda 116 for 1.12.0+
-
torch-version
:
'
2.0.1'
-
cuda-version
:
'
116'
cuda-version
:
'
12.1.0'
torch-version
:
'
1.11.0'
# Pytorch >= 2.1 only supports CUDA 12.1
# Torch only builds cuda 120 for 2.0.1+
-
torch-version
:
'
2.1.0.dev20230731'
-
cuda-version
:
'
120'
cuda-version
:
'
11.6.2'
torch-version
:
'
1.11.0'
-
torch-version
:
'
2.1.0.dev20230731'
-
cuda-version
:
'
120'
cuda-version
:
'
11.7.1'
torch-version
:
'
1.12.0'
-
torch-version
:
'
2.1.0.dev20230731'
-
cuda-version
:
'
120'
cuda-version
:
'
11.8.0'
torch-version
:
'
1.13.0'
# 1.13.0 drops support for cuda 11.3
-
cuda-version
:
'
113'
torch-version
:
'
1.13.0'
-
cuda-version
:
'
113'
torch-version
:
'
2.0.1'
# Fails with "Validation Error" on artifact upload
-
cuda-version
:
'
117'
torch-version
:
'
1.13.0'
os
:
ubuntu-20.04
steps
:
steps
:
-
name
:
Checkout
-
name
:
Checkout
uses
:
actions/checkout@v3
uses
:
actions/checkout@v3
-
name
:
Set up Python
-
name
:
Set up Python
uses
:
actions/setup-python@v
3
uses
:
actions/setup-python@v
4
with
:
with
:
python-version
:
${{ matrix.python-version }}
python-version
:
${{ matrix.python-version }}
-
name
:
Set up Linux Env
-
name
:
Set CUDA and PyTorch versions
run
:
|
echo "MATRIX_CUDA_VERSION=$(echo ${{ matrix.cuda-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV
echo "MATRIX_TORCH_VERSION=$(echo ${{ matrix.torch-version }} | awk -F \. {'print $1 "." $2'})" >> $GITHUB_ENV
-
name
:
Free up disk space
if
:
${{ runner.os == 'Linux' }}
if
:
${{ runner.os == 'Linux' }}
run
:
|
run
:
|
sudo rm -rf /usr/share/dotnet
sudo rm -rf /usr/share/dotnet
bash .github/workflows/env.sh
echo ${{ needs.create_release.outputs.upload_url }}
echo ${{ needs.steps.extract_branch.outputs.upload_url }}
shell
:
bash
-
name
:
Install CUDA ${{ matrix.cuda-version }}
-
name
:
Install CUDA ${{ matrix.cuda-version }}
if
:
${{ matrix.cuda-version != 'cpu' }}
if
:
${{ matrix.cuda-version != 'cpu' }}
run
:
|
uses
:
Jimver/cuda-toolkit@v0.2.11
bash .github/workflows/cuda/cu${{ matrix.cuda-version }}-${{ runner.os }}.sh
id
:
cuda-toolkit
shell
:
with
:
bash
cuda
:
${{ matrix.cuda-version }}
linux-local-args
:
'
["--toolkit"]'
-
name
:
Check GPU Env
# default method is "local", and we're hitting some error with caching for CUDA 11.8 and 12.1
if
:
${{ matrix.cuda-version != 'cpu' }}
# method: ${{ (matrix.cuda-version == '11.8.0' || matrix.cuda-version == '12.1.0') && 'network' || 'local' }}
run
:
|
method
:
'
network'
source .github/workflows/cuda/cu${{ matrix.cuda-version }}-${{ runner.os }}-env.sh
# We need the cuda libraries (e.g. cuSparse, cuSolver) for compiling PyTorch extensions,
nvcc --version
# not just nvcc
shell
:
# sub-packages: '["nvcc"]'
bash
-
name
:
Install PyTorch ${{ matrix.torch-version }}+cu${{ matrix.cuda-version }}
-
name
:
Install PyTorch ${{ matrix.torch-version }}+cu${{ matrix.cuda-version }}
run
:
|
run
:
|
pip install numpy pyyaml scipy ipython mkl mkl-include ninja cython typing pandas typing-extensions dataclasses && conda clean -ya
pip install --upgrade pip
pip install --no-cache-dir torch==${{ matrix.torch-version }}
# If we don't install before installing Pytorch, we get error for torch 2.0.1
# ERROR: Could not find a version that satisfies the requirement setuptools>=40.8.0 (from versions: none)
pip install lit
# We want to figure out the CUDA version to download pytorch
# e.g. we can have system CUDA version being 11.7 but if torch==1.12 then we need to download the wheel from cu116
# This code is ugly, maybe there's a better way to do this.
export TORCH_CUDA_VERSION=$(python -c "import os; minv = {'1.12': 113, '1.13': 116, '2.0': 117, '2.1': 121}[os.environ['MATRIX_TORCH_VERSION']]; maxv = {'1.12': 116, '1.13': 117, '2.0': 118, '2.1': 121}[os.environ['MATRIX_TORCH_VERSION']]; print(max(min(int(os.environ['MATRIX_CUDA_VERSION']), maxv), minv))")
if [[ ${{ matrix.torch-version }} == *"dev"* ]]; then
pip install --no-cache-dir --pre torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION}
else
pip install --no-cache-dir torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION}
fi
nvcc --version
python --version
python --version
python -c "import torch; print('PyTorch:', torch.__version__)"
python -c "import torch; print('PyTorch:', torch.__version__)"
python -c "import torch; print('CUDA:', torch.version.cuda)"
python -c "import torch; print('CUDA:', torch.version.cuda)"
...
@@ -124,16 +128,25 @@ jobs:
...
@@ -124,16 +128,25 @@ jobs:
shell
:
shell
:
bash
bash
# - name: Install PyTorch ${{ matrix.torch-version }}+cu${{ matrix.cuda-version }}
-
name
:
Build wheel
# run: |
run
:
|
# pip install numpy pyyaml scipy ipython mkl mkl-include ninja cython typing pandas typing-extensions dataclasses && conda clean -ya
# We want setuptools >= 49.6.0 otherwise we can't compile the extension if system CUDA version is 11.7 and pytorch cuda version is 11.6
# pip install --no-index --no-cache-dir torch==${{ matrix.torch-version }} -f https://download.pytorch.org/whl/cu${{ matrix.cuda-version }}/torch_stable.html
# https://github.com/pytorch/pytorch/blob/664058fa83f1d8eede5d66418abff6e20bd76ca8/torch/utils/cpp_extension.py#L810
# python --version
# However this still fails so I'm using a newer version of setuptools
# python -c "import torch; print('PyTorch:', torch.__version__)"
pip install setuptools==68.0.0
# python -c "import torch; print('CUDA:', torch.version.cuda)"
pip install ninja packaging wheel
# python -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)"
export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
# shell:
export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
# bash
# Limit MAX_JOBS otherwise the github runner goes OOM
MAX_JOBS=1 FLASH_ATTENTION_FORCE_BUILD="TRUE" FLASH_ATTENTION_FORCE_CXX11_ABI=${{ matrix.cxx11_abi}} python setup.py bdist_wheel --dist-dir=dist
tmpname=cu${MATRIX_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi${{ matrix.cxx11_abi }}
wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")
ls dist/*whl |xargs -I {} mv {} dist/${wheel_name}
echo "wheel_name=${wheel_name}" >> $GITHUB_ENV
-
name
:
Log Built Wheels
run
:
|
ls dist
-
name
:
Get the tag version
-
name
:
Get the tag version
id
:
extract_branch
id
:
extract_branch
...
@@ -147,62 +160,45 @@ jobs:
...
@@ -147,62 +160,45 @@ jobs:
env
:
env
:
GITHUB_TOKEN
:
${{ secrets.GITHUB_TOKEN }}
GITHUB_TOKEN
:
${{ secrets.GITHUB_TOKEN }}
-
name
:
Build wheel
-
name
:
Upload Release Asset
id
:
upload_release_asset
uses
:
actions/upload-release-asset@v1
env
:
GITHUB_TOKEN
:
${{ secrets.GITHUB_TOKEN }}
with
:
upload_url
:
${{ steps.get_current_release.outputs.upload_url }}
asset_path
:
./dist/${{env.wheel_name}}
asset_name
:
${{env.wheel_name}}
asset_content_type
:
application/*
publish_package
:
name
:
Publish package
needs
:
[
build_wheels
]
runs-on
:
ubuntu-latest
steps
:
-
uses
:
actions/checkout@v3
-
uses
:
actions/setup-python@v4
with
:
python-version
:
'
3.10'
-
name
:
Install dependencies
run
:
|
run
:
|
export FLASH_ATTENTION_FORCE_BUILD="TRUE"
pip install ninja packaging setuptools wheel twine
export FORCE_CUDA="1"
# We don't want to download anything CUDA-related here
export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
pip install torch --index-url https://download.pytorch.org/whl/cpu
export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
export CUDA_INSTALL_DIR=/usr/local/cuda-11.3$CUDA_INSTALL_DIR
pip install ninja packaging setuptools wheel
python setup.py bdist_wheel --dist-dir=dist
tmpname=cu${{ matrix.cuda-version }}torch${{ matrix.torch-version }}
wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")
ls dist/*whl |xargs -I {} mv {} dist/${wheel_name}
echo "wheel_name=${wheel_name}" >> $GITHUB_ENV
-
name
:
Log Built Wheels
-
name
:
Build core package
env
:
FLASH_ATTENTION_SKIP_CUDA_BUILD
:
"
TRUE"
run
:
|
run
:
|
ls
dist
python setup.py sdist --dist-dir=
dist
# - name: Upload Release Asset
-
name
:
Deploy
# id: upload_release_asset
env
:
# uses: actions/upload-release-asset@v1
TWINE_USERNAME
:
"
__token__"
# env:
TWINE_PASSWORD
:
${{ secrets.PYPI_API_TOKEN }}
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run
:
|
# with:
python -m twine upload dist/*
# upload_url: ${{ steps.get_current_release.outputs.upload_url }}
# asset_path: ./dist/${{env.wheel_name}}
# asset_name: ${{env.wheel_name}}
# asset_content_type: application/*
# publish_package:
# name: Publish package
# needs: [build_wheels]
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v3
# - uses: actions/setup-python@v4
# with:
# python-version: '3.10'
# - name: Install dependencies
# run: |
# pip install ninja packaging setuptools wheel twine
# pip install torch
# - name: Build core package
# env:
# FLASH_ATTENTION_SKIP_CUDA_BUILD: "TRUE"
# run: |
# python setup.py sdist --dist-dir=dist
# - name: Deploy
# env:
# TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
# TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
# run: |
# python -m twine upload dist/*
setup.py
View file @
2ddeaa40
...
@@ -13,9 +13,10 @@ import subprocess
...
@@ -13,9 +13,10 @@ import subprocess
import
urllib.request
import
urllib.request
import
urllib.error
import
urllib.error
from
wheel.bdist_wheel
import
bdist_wheel
as
_bdist_wheel
import
torch
import
torch
from
torch.utils.cpp_extension
import
BuildExtension
,
CppExtension
,
CUDAExtension
,
CUDA_HOME
from
torch.utils.cpp_extension
import
BuildExtension
,
CppExtension
,
CUDAExtension
,
CUDA_HOME
from
wheel.bdist_wheel
import
bdist_wheel
as
_bdist_wheel
with
open
(
"README.md"
,
"r"
,
encoding
=
"utf-8"
)
as
fh
:
with
open
(
"README.md"
,
"r"
,
encoding
=
"utf-8"
)
as
fh
:
...
@@ -33,6 +34,8 @@ BASE_WHEEL_URL = "https://github.com/Dao-AILab/flash-attention/releases/download
...
@@ -33,6 +34,8 @@ BASE_WHEEL_URL = "https://github.com/Dao-AILab/flash-attention/releases/download
# SKIP_CUDA_BUILD: Intended to allow CI to use a simple `python setup.py sdist` run to copy over raw files, without any cuda compilation
# SKIP_CUDA_BUILD: Intended to allow CI to use a simple `python setup.py sdist` run to copy over raw files, without any cuda compilation
FORCE_BUILD
=
os
.
getenv
(
"FLASH_ATTENTION_FORCE_BUILD"
,
"FALSE"
)
==
"TRUE"
FORCE_BUILD
=
os
.
getenv
(
"FLASH_ATTENTION_FORCE_BUILD"
,
"FALSE"
)
==
"TRUE"
SKIP_CUDA_BUILD
=
os
.
getenv
(
"FLASH_ATTENTION_SKIP_CUDA_BUILD"
,
"FALSE"
)
==
"TRUE"
SKIP_CUDA_BUILD
=
os
.
getenv
(
"FLASH_ATTENTION_SKIP_CUDA_BUILD"
,
"FALSE"
)
==
"TRUE"
# For CI, we want the option to build with C++11 ABI since the nvcr images use C++11 ABI
FORCE_CXX11_ABI
=
os
.
getenv
(
"FLASH_ATTENTION_FORCE_CXX11_ABI"
,
"FALSE"
)
==
"TRUE"
def
get_platform
():
def
get_platform
():
...
@@ -101,26 +104,27 @@ if not torch.cuda.is_available():
...
@@ -101,26 +104,27 @@ if not torch.cuda.is_available():
print
(
print
(
"
\n
Warning: Torch did not find available GPUs on this system.
\n
"
,
"
\n
Warning: Torch did not find available GPUs on this system.
\n
"
,
"If your intention is to cross-compile, this is not an error.
\n
"
"If your intention is to cross-compile, this is not an error.
\n
"
"By default, Apex will cross-compile for Pascal (compute capabilities 6.0, 6.1, 6.2),
\n
"
"By default, FlashAttention will cross-compile for Ampere (compute capability 8.0, 8.6, "
"Volta (compute capability 7.0), Turing (compute capability 7.5),
\n
"
"8.9), and, if the CUDA version is >= 11.8, Hopper (compute capability 9.0).
\n
"
"and, if the CUDA version is >= 11.0, Ampere (compute capability 8.0).
\n
"
"If you wish to cross-compile for a single specific architecture,
\n
"
"If you wish to cross-compile for a single specific architecture,
\n
"
'export TORCH_CUDA_ARCH_LIST="compute capability" before running setup.py.
\n
'
,
'export TORCH_CUDA_ARCH_LIST="compute capability" before running setup.py.
\n
'
,
)
)
if
os
.
environ
.
get
(
"TORCH_CUDA_ARCH_LIST"
,
None
)
is
None
and
CUDA_HOME
is
not
None
:
if
os
.
environ
.
get
(
"TORCH_CUDA_ARCH_LIST"
,
None
)
is
None
and
CUDA_HOME
is
not
None
:
_
,
bare_metal_version
=
get_cuda_bare_metal_version
(
CUDA_HOME
)
_
,
bare_metal_version
=
get_cuda_bare_metal_version
(
CUDA_HOME
)
if
bare_metal_version
>=
Version
(
"11.8"
):
if
bare_metal_version
>=
Version
(
"11.8"
):
os
.
environ
[
"TORCH_CUDA_ARCH_LIST"
]
=
"6.0;6.1;6.2;7.0;7.5;8.0;8.6;9.0"
os
.
environ
[
"TORCH_CUDA_ARCH_LIST"
]
=
"8.0;8.6;9.0"
elif
bare_metal_version
>=
Version
(
"11.1"
):
elif
bare_metal_version
>=
Version
(
"11.4"
):
os
.
environ
[
"TORCH_CUDA_ARCH_LIST"
]
=
"6.0;6.1;6.2;7.0;7.5;8.0;8.6"
os
.
environ
[
"TORCH_CUDA_ARCH_LIST"
]
=
"8.0;8.6"
elif
bare_metal_version
==
Version
(
"11.0"
):
os
.
environ
[
"TORCH_CUDA_ARCH_LIST"
]
=
"6.0;6.1;6.2;7.0;7.5;8.0"
else
:
else
:
os
.
environ
[
"TORCH_CUDA_ARCH_LIST"
]
=
"
6
.0;
6.1;6.2;7.0;7.5
"
os
.
environ
[
"TORCH_CUDA_ARCH_LIST"
]
=
"
8
.0;
8.6
"
cmdclass
=
{}
cmdclass
=
{}
ext_modules
=
[]
ext_modules
=
[]
# We want this even if SKIP_CUDA_BUILD because when we run python setup.py sdist we want the .hpp
# files included in the source distribution, in case the user compiles from source.
subprocess
.
run
([
"git"
,
"submodule"
,
"update"
,
"--init"
,
"csrc/cutlass"
])
if
not
SKIP_CUDA_BUILD
:
if
not
SKIP_CUDA_BUILD
:
print
(
"
\n\n
torch.__version__ = {}
\n\n
"
.
format
(
torch
.
__version__
))
print
(
"
\n\n
torch.__version__ = {}
\n\n
"
.
format
(
torch
.
__version__
))
TORCH_MAJOR
=
int
(
torch
.
__version__
.
split
(
"."
)[
0
])
TORCH_MAJOR
=
int
(
torch
.
__version__
.
split
(
"."
)[
0
])
...
@@ -137,8 +141,8 @@ if not SKIP_CUDA_BUILD:
...
@@ -137,8 +141,8 @@ if not SKIP_CUDA_BUILD:
# Check, if CUDA11 is installed for compute capability 8.0
# Check, if CUDA11 is installed for compute capability 8.0
cc_flag
=
[]
cc_flag
=
[]
_
,
bare_metal_version
=
get_cuda_bare_metal_version
(
CUDA_HOME
)
_
,
bare_metal_version
=
get_cuda_bare_metal_version
(
CUDA_HOME
)
if
bare_metal_version
<
Version
(
"11.
0
"
):
if
bare_metal_version
<
Version
(
"11.
4
"
):
raise
RuntimeError
(
"FlashAttention is only supported on CUDA 11 and above"
)
raise
RuntimeError
(
"FlashAttention is only supported on CUDA 11
.4
and above"
)
# cc_flag.append("-gencode")
# cc_flag.append("-gencode")
# cc_flag.append("arch=compute_75,code=sm_75")
# cc_flag.append("arch=compute_75,code=sm_75")
cc_flag
.
append
(
"-gencode"
)
cc_flag
.
append
(
"-gencode"
)
...
@@ -147,7 +151,11 @@ if not SKIP_CUDA_BUILD:
...
@@ -147,7 +151,11 @@ if not SKIP_CUDA_BUILD:
cc_flag
.
append
(
"-gencode"
)
cc_flag
.
append
(
"-gencode"
)
cc_flag
.
append
(
"arch=compute_90,code=sm_90"
)
cc_flag
.
append
(
"arch=compute_90,code=sm_90"
)
subprocess
.
run
([
"git"
,
"submodule"
,
"update"
,
"--init"
,
"csrc/cutlass"
])
# HACK: The compiler flag -D_GLIBCXX_USE_CXX11_ABI is set to be the same as
# torch._C._GLIBCXX_USE_CXX11_ABI
# https://github.com/pytorch/pytorch/blob/8472c24e3b5b60150096486616d98b7bea01500b/torch/utils/cpp_extension.py#L920
if
FORCE_CXX11_ABI
:
torch
.
_C
.
_GLIBCXX_USE_CXX11_ABI
=
True
ext_modules
.
append
(
ext_modules
.
append
(
CUDAExtension
(
CUDAExtension
(
name
=
"flash_attn_2_cuda"
,
name
=
"flash_attn_2_cuda"
,
...
@@ -213,6 +221,7 @@ if not SKIP_CUDA_BUILD:
...
@@ -213,6 +221,7 @@ if not SKIP_CUDA_BUILD:
Path
(
this_dir
)
/
'csrc'
/
'cutlass'
/
'include'
,
Path
(
this_dir
)
/
'csrc'
/
'cutlass'
/
'include'
,
],
],
)
)
)
def
get_package_version
():
def
get_package_version
():
...
@@ -232,7 +241,6 @@ class CachedWheelsCommand(_bdist_wheel):
...
@@ -232,7 +241,6 @@ class CachedWheelsCommand(_bdist_wheel):
find an existing wheel (which is currently the case for all flash attention installs). We use
find an existing wheel (which is currently the case for all flash attention installs). We use
the environment parameters to detect whether there is already a pre-built version of a compatible
the environment parameters to detect whether there is already a pre-built version of a compatible
wheel available and short-circuits the standard full build pipeline.
wheel available and short-circuits the standard full build pipeline.
"""
"""
def
run
(
self
):
def
run
(
self
):
if
FORCE_BUILD
:
if
FORCE_BUILD
:
...
@@ -241,16 +249,20 @@ class CachedWheelsCommand(_bdist_wheel):
...
@@ -241,16 +249,20 @@ class CachedWheelsCommand(_bdist_wheel):
raise_if_cuda_home_none
(
"flash_attn"
)
raise_if_cuda_home_none
(
"flash_attn"
)
# Determine the version numbers that will be used to determine the correct wheel
# Determine the version numbers that will be used to determine the correct wheel
_
,
cuda_version_raw
=
get_cuda_bare_metal_version
(
CUDA_HOME
)
# We're using the CUDA version used to build torch, not the one currently installed
# _, cuda_version_raw = get_cuda_bare_metal_version(CUDA_HOME)
torch_cuda_version
=
parse
(
torch
.
version
.
cuda
)
torch_version_raw
=
parse
(
torch
.
__version__
)
torch_version_raw
=
parse
(
torch
.
__version__
)
python_version
=
f
"cp
{
sys
.
version_info
.
major
}{
sys
.
version_info
.
minor
}
"
python_version
=
f
"cp
{
sys
.
version_info
.
major
}{
sys
.
version_info
.
minor
}
"
platform_name
=
get_platform
()
platform_name
=
get_platform
()
flash_version
=
get_package_version
()
flash_version
=
get_package_version
()
cuda_version
=
f
"
{
cuda_version_raw
.
major
}{
cuda_version_raw
.
minor
}
"
# cuda_version = f"{cuda_version_raw.major}{cuda_version_raw.minor}"
torch_version
=
f
"
{
torch_version_raw
.
major
}
.
{
torch_version_raw
.
minor
}
.
{
torch_version_raw
.
micro
}
"
cuda_version
=
f
"
{
torch_cuda_version
.
major
}{
torch_cuda_version
.
minor
}
"
torch_version
=
f
"
{
torch_version_raw
.
major
}
.
{
torch_version_raw
.
minor
}
"
cxx11_abi
=
str
(
torch
.
_C
.
_GLIBCXX_USE_CXX11_ABI
).
upper
()
# Determine wheel URL based on CUDA version, torch version, python version and OS
# Determine wheel URL based on CUDA version, torch version, python version and OS
wheel_filename
=
f
'
{
PACKAGE_NAME
}
-
{
flash_version
}
+cu
{
cuda_version
}
torch
{
torch_version
}
-
{
python_version
}
-
{
python_version
}
-
{
platform_name
}
.whl'
wheel_filename
=
f
'
{
PACKAGE_NAME
}
-
{
flash_version
}
+cu
{
cuda_version
}
torch
{
torch_version
}
cxx11abi
{
cxx11_abi
}
-
{
python_version
}
-
{
python_version
}
-
{
platform_name
}
.whl'
wheel_url
=
BASE_WHEEL_URL
.
format
(
wheel_url
=
BASE_WHEEL_URL
.
format
(
tag_name
=
f
"v
{
flash_version
}
"
,
tag_name
=
f
"v
{
flash_version
}
"
,
wheel_name
=
wheel_filename
wheel_name
=
wheel_filename
...
@@ -279,7 +291,6 @@ class CachedWheelsCommand(_bdist_wheel):
...
@@ -279,7 +291,6 @@ class CachedWheelsCommand(_bdist_wheel):
setup
(
setup
(
# @pierce - TODO: Revert for official release
name
=
PACKAGE_NAME
,
name
=
PACKAGE_NAME
,
version
=
get_package_version
(),
version
=
get_package_version
(),
packages
=
find_packages
(
packages
=
find_packages
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment