Commit 2ddeaa40 authored by Tri Dao's avatar Tri Dao
Browse files

Fix wheel building

parent d8ec6a2f
#!/bin/bash
CUDA_HOME=/usr/local/cuda-10.2
LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
PATH=${CUDA_HOME}/bin:${PATH}
export FORCE_CUDA=1
export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5"
export CUDA_HOME=/usr/local/cuda-10.2
\ No newline at end of file
#!/bin/bash
# Strip the periods from the version number
OS_VERSION=$(echo $(lsb_release -sr) | tr -d .)
OS=ubuntu${OS_VERSION}
wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin
sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget -nv https://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda-repo-${OS}-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb
sudo dpkg -i cuda-repo-${OS}-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb
sudo apt-key add /var/cuda-repo-10-2-local-10.2.89-440.33.01/7fa2af80.pub
sudo apt-get -qq update
sudo apt install cuda cuda-nvcc-10-2 cuda-libraries-dev-10-2
sudo apt clean
rm -f https://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda-repo-${OS}-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb
\ No newline at end of file
#!/bin/bash
CUDA_HOME=/usr/local/cuda-11.3
LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
PATH=${CUDA_HOME}/bin:${PATH}
export FORCE_CUDA=1
export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
export CUDA_HOME=/usr/local/cuda-11.3
\ No newline at end of file
#!/bin/bash
# Strip the periods from the version number
OS_VERSION=$(echo $(lsb_release -sr) | tr -d .)
OS=ubuntu${OS_VERSION}
wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin
sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget -nv https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda-repo-${OS}-11-3-local_11.3.0-465.19.01-1_amd64.deb
sudo dpkg -i cuda-repo-${OS}-11-3-local_11.3.0-465.19.01-1_amd64.deb
# TODO: If on version < 22.04, install via signal-desktop-keyring
# For future versions it's deprecated and should be moved into the trusted folder
# sudo mv /var/cuda-repo-${OS}-11-3-local/7fa2af80.pub /etc/apt/trusted.gpg.d/
sudo apt-key add /var/cuda-repo-${OS}-11-3-local/7fa2af80.pub
sudo apt-get -qq update
sudo apt install cuda cuda-nvcc-11-3 cuda-libraries-dev-11-3
sudo apt clean
rm -f https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda-repo-${OS}-11-3-local_11.3.0-465.19.01-1_amd64.deb
\ No newline at end of file
#!/bin/bash
CUDA_HOME=/usr/local/cuda-11.6
LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
PATH=${CUDA_HOME}/bin:${PATH}
export FORCE_CUDA=1
export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
export CUDA_HOME=/usr/local/cuda-11.6
\ No newline at end of file
#!/bin/bash
# Strip the periods from the version number
OS_VERSION=$(echo $(lsb_release -sr) | tr -d .)
OS=ubuntu${OS_VERSION}
wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin
sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget -nv https://developer.download.nvidia.com/compute/cuda/11.6.2/local_installers/cuda-repo-${OS}-11-6-local_11.6.2-510.47.03-1_amd64.deb
sudo dpkg -i cuda-repo-${OS}-11-6-local_11.6.2-510.47.03-1_amd64.deb
sudo apt-key add /var/cuda-repo-${OS}-11-6-local/7fa2af80.pub
sudo apt-get -qq update
sudo apt install cuda cuda-nvcc-11-6 cuda-libraries-dev-11-6
sudo apt clean
rm -f https://developer.download.nvidia.com/compute/cuda/11.5.2/local_installers/cuda-repo-${OS}-11-6-local_11.6.2-510.47.03-1_amd64.deb
\ No newline at end of file
#!/bin/bash
CUDA_HOME=/usr/local/cuda-11.7
LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
PATH=${CUDA_HOME}/bin:${PATH}
export FORCE_CUDA=1
export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
export CUDA_HOME=/usr/local/cuda-11.7
\ No newline at end of file
#!/bin/bash
# Strip the periods from the version number
OS_VERSION=$(echo $(lsb_release -sr) | tr -d .)
OS=ubuntu${OS_VERSION}
wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin
sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget -nv https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda-repo-${OS}-11-7-local_11.7.0-515.43.04-1_amd64.deb
sudo dpkg -i cuda-repo-${OS}-11-7-local_11.7.0-515.43.04-1_amd64.deb
sudo cp /var/cuda-repo-${OS}-11-7-local/cuda-*-keyring.gpg /usr/share/keyrings/
sudo apt-get -qq update
sudo apt install cuda cuda-nvcc-11-7 cuda-libraries-dev-11-7
sudo apt clean
rm -f https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda-repo-${OS}-11-7-local_11.7.0-515.43.04-1_amd64.deb
#!/bin/bash
CUDA_HOME=/usr/local/cuda-12.0
LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
PATH=${CUDA_HOME}/bin:${PATH}
export FORCE_CUDA=1
export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
export CUDA_HOME=/usr/local/cuda-12.0
\ No newline at end of file
#!/bin/bash
# Strip the periods from the version number
OS_VERSION=$(echo $(lsb_release -sr) | tr -d .)
OS=ubuntu${OS_VERSION}
wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin
sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget -nv https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda-repo-${OS}-12-0-local_12.0.0-525.60.13-1_amd64.deb
sudo dpkg -i cuda-repo-${OS}-12-0-local_12.0.0-525.60.13-1_amd64.deb
sudo cp /var/cuda-repo-${OS}-12-0-local/cuda-*-keyring.gpg /usr/share/keyrings/
sudo apt-get -qq update
sudo apt install cuda cuda-nvcc-12-0 cuda-libraries-dev-12-0
sudo apt clean
rm -f https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda-repo-${OS}-12-0-local_12.0.0-525.60.13-1_amd64.deb
export LANG C.UTF-8
export OFED_VERSION=5.3-1.0.0.1
sudo apt-get update && \
sudo apt-get install -y --no-install-recommends \
software-properties-common \
sudo apt-get install -y --no-install-recommends \
build-essential \
apt-utils \
ca-certificates \
wget \
git \
vim \
libssl-dev \
curl \
unzip \
unrar \
cmake \
net-tools \
sudo \
autotools-dev \
rsync \
jq \
openssh-server \
tmux \
screen \
htop \
pdsh \
openssh-client \
lshw \
dmidecode \
util-linux \
automake \
autoconf \
libtool \
net-tools \
pciutils \
libpci-dev \
libaio-dev \
libcap2 \
libtinfo5 \
fakeroot \
devscripts \
debhelper \
nfs-common
# wget -O ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
# chmod +x ~/miniconda.sh && \
# ~/miniconda.sh -b -p /opt/conda && \
# rm ~/miniconda.sh
# export PATH=/opt/conda/bin:$PATH
\ No newline at end of file
......@@ -7,116 +7,120 @@
name: Build wheels and deploy
#on:
# create:
# tags:
# - '**'
on:
push
create:
tags:
- v*
jobs:
# setup_release:
# name: Create Release
# runs-on: ubuntu-latest
# steps:
# - name: Get the tag version
# id: extract_branch
# run: echo ::set-output name=branch::${GITHUB_REF#refs/tags/}
# shell: bash
# - name: Create Release
# id: create_release
# uses: actions/create-release@v1
# env:
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# with:
# tag_name: ${{ steps.extract_branch.outputs.branch }}
# release_name: ${{ steps.extract_branch.outputs.branch }}
setup_release:
name: Create Release
runs-on: ubuntu-latest
steps:
- name: Get the tag version
id: extract_branch
run: echo ::set-output name=branch::${GITHUB_REF#refs/tags/}
shell: bash
- name: Create Release
id: create_release
uses: actions/create-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tag_name: ${{ steps.extract_branch.outputs.branch }}
release_name: ${{ steps.extract_branch.outputs.branch }}
build_wheels:
name: Build Wheel
needs: setup_release
runs-on: ${{ matrix.os }}
#needs: setup_release
strategy:
fail-fast: false
matrix:
os: [ubuntu-20.04, ubuntu-22.04]
#python-version: ['3.7', '3.8', '3.9', '3.10']
#torch-version: ['1.11.0', '1.12.0', '1.13.0', '2.0.1']
#cuda-version: ['113', '116', '117', '120']
python-version: ['3.10']
torch-version: ['2.0.1']
cuda-version: ['120']
# Using ubuntu-20.04 instead of 22.04 for more compatibility (glibc). Ideally we'd use the
# manylinux docker image, but I haven't figured out how to install CUDA on manylinux.
os: [ubuntu-20.04]
python-version: ['3.7', '3.8', '3.9', '3.10']
torch-version: ['1.12.1', '1.13.1', '2.0.1', '2.1.0.dev20230731']
cuda-version: ['11.6.2', '11.7.1', '11.8.0', '12.1.0']
# We need separate wheels that either uses C++11 ABI (-D_GLIBCXX_USE_CXX11_ABI) or not.
# Pytorch wheels currently don't use it, but nvcr images have Pytorch compiled with C++11 ABI.
# Without this we get import error (undefined symbol: _ZN3c105ErrorC2ENS_14SourceLocationESs)
# when building without C++11 ABI and using it on nvcr images.
cxx11_abi: ['FALSE', 'TRUE']
exclude:
# Nvidia only supports 11.7+ for ubuntu-22.04
- os: ubuntu-22.04
cuda-version: '116'
- os: ubuntu-22.04
cuda-version: '113'
# Torch only builds cuda 117 for 1.13.0+
- cuda-version: '117'
torch-version: '1.11.0'
- cuda-version: '117'
torch-version: '1.12.0'
# Torch only builds cuda 116 for 1.12.0+
- cuda-version: '116'
torch-version: '1.11.0'
# Torch only builds cuda 120 for 2.0.1+
- cuda-version: '120'
torch-version: '1.11.0'
- cuda-version: '120'
torch-version: '1.12.0'
- cuda-version: '120'
torch-version: '1.13.0'
# 1.13.0 drops support for cuda 11.3
- cuda-version: '113'
torch-version: '1.13.0'
- cuda-version: '113'
torch-version: '2.0.1'
# Fails with "Validation Error" on artifact upload
- cuda-version: '117'
torch-version: '1.13.0'
os: ubuntu-20.04
# Pytorch >= 2.0 only supports Python >= 3.8
- torch-version: '2.0.1'
python-version: '3.7'
- torch-version: '2.1.0.dev20230731'
python-version: '3.7'
# Pytorch <= 2.0 only supports CUDA <= 11.8
- torch-version: '1.12.1'
cuda-version: '12.1.0'
- torch-version: '1.13.1'
cuda-version: '12.1.0'
- torch-version: '2.0.1'
cuda-version: '12.1.0'
# Pytorch >= 2.1 only supports CUDA 12.1
- torch-version: '2.1.0.dev20230731'
cuda-version: '11.6.2'
- torch-version: '2.1.0.dev20230731'
cuda-version: '11.7.1'
- torch-version: '2.1.0.dev20230731'
cuda-version: '11.8.0'
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v3
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Set up Linux Env
- name: Set CUDA and PyTorch versions
run: |
echo "MATRIX_CUDA_VERSION=$(echo ${{ matrix.cuda-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV
echo "MATRIX_TORCH_VERSION=$(echo ${{ matrix.torch-version }} | awk -F \. {'print $1 "." $2'})" >> $GITHUB_ENV
- name: Free up disk space
if: ${{ runner.os == 'Linux' }}
run: |
sudo rm -rf /usr/share/dotnet
bash .github/workflows/env.sh
echo ${{ needs.create_release.outputs.upload_url }}
echo ${{ needs.steps.extract_branch.outputs.upload_url }}
shell:
bash
- name: Install CUDA ${{ matrix.cuda-version }}
if: ${{ matrix.cuda-version != 'cpu' }}
run: |
bash .github/workflows/cuda/cu${{ matrix.cuda-version }}-${{ runner.os }}.sh
shell:
bash
- name: Check GPU Env
if: ${{ matrix.cuda-version != 'cpu' }}
run: |
source .github/workflows/cuda/cu${{ matrix.cuda-version }}-${{ runner.os }}-env.sh
nvcc --version
shell:
bash
uses: Jimver/cuda-toolkit@v0.2.11
id: cuda-toolkit
with:
cuda: ${{ matrix.cuda-version }}
linux-local-args: '["--toolkit"]'
# default method is "local", and we're hitting some error with caching for CUDA 11.8 and 12.1
# method: ${{ (matrix.cuda-version == '11.8.0' || matrix.cuda-version == '12.1.0') && 'network' || 'local' }}
method: 'network'
# We need the cuda libraries (e.g. cuSparse, cuSolver) for compiling PyTorch extensions,
# not just nvcc
# sub-packages: '["nvcc"]'
- name: Install PyTorch ${{ matrix.torch-version }}+cu${{ matrix.cuda-version }}
run: |
pip install numpy pyyaml scipy ipython mkl mkl-include ninja cython typing pandas typing-extensions dataclasses && conda clean -ya
pip install --no-cache-dir torch==${{ matrix.torch-version }}
pip install --upgrade pip
# If we don't install before installing Pytorch, we get error for torch 2.0.1
# ERROR: Could not find a version that satisfies the requirement setuptools>=40.8.0 (from versions: none)
pip install lit
# We want to figure out the CUDA version to download pytorch
# e.g. we can have system CUDA version being 11.7 but if torch==1.12 then we need to download the wheel from cu116
# This code is ugly, maybe there's a better way to do this.
export TORCH_CUDA_VERSION=$(python -c "import os; minv = {'1.12': 113, '1.13': 116, '2.0': 117, '2.1': 121}[os.environ['MATRIX_TORCH_VERSION']]; maxv = {'1.12': 116, '1.13': 117, '2.0': 118, '2.1': 121}[os.environ['MATRIX_TORCH_VERSION']]; print(max(min(int(os.environ['MATRIX_CUDA_VERSION']), maxv), minv))")
if [[ ${{ matrix.torch-version }} == *"dev"* ]]; then
pip install --no-cache-dir --pre torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION}
else
pip install --no-cache-dir torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION}
fi
nvcc --version
python --version
python -c "import torch; print('PyTorch:', torch.__version__)"
python -c "import torch; print('CUDA:', torch.version.cuda)"
......@@ -124,16 +128,25 @@ jobs:
shell:
bash
# - name: Install PyTorch ${{ matrix.torch-version }}+cu${{ matrix.cuda-version }}
# run: |
# pip install numpy pyyaml scipy ipython mkl mkl-include ninja cython typing pandas typing-extensions dataclasses && conda clean -ya
# pip install --no-index --no-cache-dir torch==${{ matrix.torch-version }} -f https://download.pytorch.org/whl/cu${{ matrix.cuda-version }}/torch_stable.html
# python --version
# python -c "import torch; print('PyTorch:', torch.__version__)"
# python -c "import torch; print('CUDA:', torch.version.cuda)"
# python -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)"
# shell:
# bash
- name: Build wheel
run: |
# We want setuptools >= 49.6.0 otherwise we can't compile the extension if system CUDA version is 11.7 and pytorch cuda version is 11.6
# https://github.com/pytorch/pytorch/blob/664058fa83f1d8eede5d66418abff6e20bd76ca8/torch/utils/cpp_extension.py#L810
# However this still fails so I'm using a newer version of setuptools
pip install setuptools==68.0.0
pip install ninja packaging wheel
export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
# Limit MAX_JOBS otherwise the github runner goes OOM
MAX_JOBS=1 FLASH_ATTENTION_FORCE_BUILD="TRUE" FLASH_ATTENTION_FORCE_CXX11_ABI=${{ matrix.cxx11_abi}} python setup.py bdist_wheel --dist-dir=dist
tmpname=cu${MATRIX_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi${{ matrix.cxx11_abi }}
wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")
ls dist/*whl |xargs -I {} mv {} dist/${wheel_name}
echo "wheel_name=${wheel_name}" >> $GITHUB_ENV
- name: Log Built Wheels
run: |
ls dist
- name: Get the tag version
id: extract_branch
......@@ -147,62 +160,45 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Build wheel
- name: Upload Release Asset
id: upload_release_asset
uses: actions/upload-release-asset@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ steps.get_current_release.outputs.upload_url }}
asset_path: ./dist/${{env.wheel_name}}
asset_name: ${{env.wheel_name}}
asset_content_type: application/*
publish_package:
name: Publish package
needs: [build_wheels]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install dependencies
run: |
export FLASH_ATTENTION_FORCE_BUILD="TRUE"
export FORCE_CUDA="1"
export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
export CUDA_INSTALL_DIR=/usr/local/cuda-11.3$CUDA_INSTALL_DIR
pip install ninja packaging setuptools wheel
python setup.py bdist_wheel --dist-dir=dist
tmpname=cu${{ matrix.cuda-version }}torch${{ matrix.torch-version }}
wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")
ls dist/*whl |xargs -I {} mv {} dist/${wheel_name}
echo "wheel_name=${wheel_name}" >> $GITHUB_ENV
pip install ninja packaging setuptools wheel twine
# We don't want to download anything CUDA-related here
pip install torch --index-url https://download.pytorch.org/whl/cpu
- name: Log Built Wheels
- name: Build core package
env:
FLASH_ATTENTION_SKIP_CUDA_BUILD: "TRUE"
run: |
ls dist
python setup.py sdist --dist-dir=dist
# - name: Upload Release Asset
# id: upload_release_asset
# uses: actions/upload-release-asset@v1
# env:
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# with:
# upload_url: ${{ steps.get_current_release.outputs.upload_url }}
# asset_path: ./dist/${{env.wheel_name}}
# asset_name: ${{env.wheel_name}}
# asset_content_type: application/*
# publish_package:
# name: Publish package
# needs: [build_wheels]
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v3
# - uses: actions/setup-python@v4
# with:
# python-version: '3.10'
# - name: Install dependencies
# run: |
# pip install ninja packaging setuptools wheel twine
# pip install torch
# - name: Build core package
# env:
# FLASH_ATTENTION_SKIP_CUDA_BUILD: "TRUE"
# run: |
# python setup.py sdist --dist-dir=dist
# - name: Deploy
# env:
# TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
# TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
# run: |
# python -m twine upload dist/*
- name: Deploy
env:
TWINE_USERNAME: "__token__"
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
run: |
python -m twine upload dist/*
......@@ -13,9 +13,10 @@ import subprocess
import urllib.request
import urllib.error
from wheel.bdist_wheel import bdist_wheel as _bdist_wheel
import torch
from torch.utils.cpp_extension import BuildExtension, CppExtension, CUDAExtension, CUDA_HOME
from wheel.bdist_wheel import bdist_wheel as _bdist_wheel
with open("README.md", "r", encoding="utf-8") as fh:
......@@ -33,6 +34,8 @@ BASE_WHEEL_URL = "https://github.com/Dao-AILab/flash-attention/releases/download
# SKIP_CUDA_BUILD: Intended to allow CI to use a simple `python setup.py sdist` run to copy over raw files, without any cuda compilation
FORCE_BUILD = os.getenv("FLASH_ATTENTION_FORCE_BUILD", "FALSE") == "TRUE"
SKIP_CUDA_BUILD = os.getenv("FLASH_ATTENTION_SKIP_CUDA_BUILD", "FALSE") == "TRUE"
# For CI, we want the option to build with C++11 ABI since the nvcr images use C++11 ABI
FORCE_CXX11_ABI = os.getenv("FLASH_ATTENTION_FORCE_CXX11_ABI", "FALSE") == "TRUE"
def get_platform():
......@@ -101,26 +104,27 @@ if not torch.cuda.is_available():
print(
"\nWarning: Torch did not find available GPUs on this system.\n",
"If your intention is to cross-compile, this is not an error.\n"
"By default, Apex will cross-compile for Pascal (compute capabilities 6.0, 6.1, 6.2),\n"
"Volta (compute capability 7.0), Turing (compute capability 7.5),\n"
"and, if the CUDA version is >= 11.0, Ampere (compute capability 8.0).\n"
"By default, FlashAttention will cross-compile for Ampere (compute capability 8.0, 8.6, "
"8.9), and, if the CUDA version is >= 11.8, Hopper (compute capability 9.0).\n"
"If you wish to cross-compile for a single specific architecture,\n"
'export TORCH_CUDA_ARCH_LIST="compute capability" before running setup.py.\n',
)
if os.environ.get("TORCH_CUDA_ARCH_LIST", None) is None and CUDA_HOME is not None:
_, bare_metal_version = get_cuda_bare_metal_version(CUDA_HOME)
if bare_metal_version >= Version("11.8"):
os.environ["TORCH_CUDA_ARCH_LIST"] = "6.0;6.1;6.2;7.0;7.5;8.0;8.6;9.0"
elif bare_metal_version >= Version("11.1"):
os.environ["TORCH_CUDA_ARCH_LIST"] = "6.0;6.1;6.2;7.0;7.5;8.0;8.6"
elif bare_metal_version == Version("11.0"):
os.environ["TORCH_CUDA_ARCH_LIST"] = "6.0;6.1;6.2;7.0;7.5;8.0"
os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6;9.0"
elif bare_metal_version >= Version("11.4"):
os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
else:
os.environ["TORCH_CUDA_ARCH_LIST"] = "6.0;6.1;6.2;7.0;7.5"
os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
cmdclass = {}
ext_modules = []
# We want this even if SKIP_CUDA_BUILD because when we run python setup.py sdist we want the .hpp
# files included in the source distribution, in case the user compiles from source.
subprocess.run(["git", "submodule", "update", "--init", "csrc/cutlass"])
if not SKIP_CUDA_BUILD:
print("\n\ntorch.__version__ = {}\n\n".format(torch.__version__))
TORCH_MAJOR = int(torch.__version__.split(".")[0])
......@@ -137,8 +141,8 @@ if not SKIP_CUDA_BUILD:
# Check, if CUDA11 is installed for compute capability 8.0
cc_flag = []
_, bare_metal_version = get_cuda_bare_metal_version(CUDA_HOME)
if bare_metal_version < Version("11.0"):
raise RuntimeError("FlashAttention is only supported on CUDA 11 and above")
if bare_metal_version < Version("11.4"):
raise RuntimeError("FlashAttention is only supported on CUDA 11.4 and above")
# cc_flag.append("-gencode")
# cc_flag.append("arch=compute_75,code=sm_75")
cc_flag.append("-gencode")
......@@ -147,7 +151,11 @@ if not SKIP_CUDA_BUILD:
cc_flag.append("-gencode")
cc_flag.append("arch=compute_90,code=sm_90")
subprocess.run(["git", "submodule", "update", "--init", "csrc/cutlass"])
# HACK: The compiler flag -D_GLIBCXX_USE_CXX11_ABI is set to be the same as
# torch._C._GLIBCXX_USE_CXX11_ABI
# https://github.com/pytorch/pytorch/blob/8472c24e3b5b60150096486616d98b7bea01500b/torch/utils/cpp_extension.py#L920
if FORCE_CXX11_ABI:
torch._C._GLIBCXX_USE_CXX11_ABI = True
ext_modules.append(
CUDAExtension(
name="flash_attn_2_cuda",
......@@ -213,6 +221,7 @@ if not SKIP_CUDA_BUILD:
Path(this_dir) / 'csrc' / 'cutlass' / 'include',
],
)
)
def get_package_version():
......@@ -232,7 +241,6 @@ class CachedWheelsCommand(_bdist_wheel):
find an existing wheel (which is currently the case for all flash attention installs). We use
the environment parameters to detect whether there is already a pre-built version of a compatible
wheel available and short-circuits the standard full build pipeline.
"""
def run(self):
if FORCE_BUILD:
......@@ -241,16 +249,20 @@ class CachedWheelsCommand(_bdist_wheel):
raise_if_cuda_home_none("flash_attn")
# Determine the version numbers that will be used to determine the correct wheel
_, cuda_version_raw = get_cuda_bare_metal_version(CUDA_HOME)
# We're using the CUDA version used to build torch, not the one currently installed
# _, cuda_version_raw = get_cuda_bare_metal_version(CUDA_HOME)
torch_cuda_version = parse(torch.version.cuda)
torch_version_raw = parse(torch.__version__)
python_version = f"cp{sys.version_info.major}{sys.version_info.minor}"
platform_name = get_platform()
flash_version = get_package_version()
cuda_version = f"{cuda_version_raw.major}{cuda_version_raw.minor}"
torch_version = f"{torch_version_raw.major}.{torch_version_raw.minor}.{torch_version_raw.micro}"
# cuda_version = f"{cuda_version_raw.major}{cuda_version_raw.minor}"
cuda_version = f"{torch_cuda_version.major}{torch_cuda_version.minor}"
torch_version = f"{torch_version_raw.major}.{torch_version_raw.minor}"
cxx11_abi = str(torch._C._GLIBCXX_USE_CXX11_ABI).upper()
# Determine wheel URL based on CUDA version, torch version, python version and OS
wheel_filename = f'{PACKAGE_NAME}-{flash_version}+cu{cuda_version}torch{torch_version}-{python_version}-{python_version}-{platform_name}.whl'
wheel_filename = f'{PACKAGE_NAME}-{flash_version}+cu{cuda_version}torch{torch_version}cxx11abi{cxx11_abi}-{python_version}-{python_version}-{platform_name}.whl'
wheel_url = BASE_WHEEL_URL.format(
tag_name=f"v{flash_version}",
wheel_name=wheel_filename
......@@ -279,7 +291,6 @@ class CachedWheelsCommand(_bdist_wheel):
setup(
# @pierce - TODO: Revert for official release
name=PACKAGE_NAME,
version=get_package_version(),
packages=find_packages(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment