Commit 2ddeaa40 authored by Tri Dao's avatar Tri Dao
Browse files

Fix wheel building

parent d8ec6a2f
#!/bin/bash
CUDA_HOME=/usr/local/cuda-10.2
LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
PATH=${CUDA_HOME}/bin:${PATH}
export FORCE_CUDA=1
export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5"
export CUDA_HOME=/usr/local/cuda-10.2
\ No newline at end of file
#!/bin/bash
# Strip the periods from the version number
OS_VERSION=$(echo $(lsb_release -sr) | tr -d .)
OS=ubuntu${OS_VERSION}
wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin
sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget -nv https://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda-repo-${OS}-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb
sudo dpkg -i cuda-repo-${OS}-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb
sudo apt-key add /var/cuda-repo-10-2-local-10.2.89-440.33.01/7fa2af80.pub
sudo apt-get -qq update
sudo apt install cuda cuda-nvcc-10-2 cuda-libraries-dev-10-2
sudo apt clean
rm -f https://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda-repo-${OS}-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb
\ No newline at end of file
#!/bin/bash
CUDA_HOME=/usr/local/cuda-11.3
LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
PATH=${CUDA_HOME}/bin:${PATH}
export FORCE_CUDA=1
export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
export CUDA_HOME=/usr/local/cuda-11.3
\ No newline at end of file
#!/bin/bash
# Strip the periods from the version number
OS_VERSION=$(echo $(lsb_release -sr) | tr -d .)
OS=ubuntu${OS_VERSION}
wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin
sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget -nv https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda-repo-${OS}-11-3-local_11.3.0-465.19.01-1_amd64.deb
sudo dpkg -i cuda-repo-${OS}-11-3-local_11.3.0-465.19.01-1_amd64.deb
# TODO: If on version < 22.04, install via signal-desktop-keyring
# For future versions it's deprecated and should be moved into the trusted folder
# sudo mv /var/cuda-repo-${OS}-11-3-local/7fa2af80.pub /etc/apt/trusted.gpg.d/
sudo apt-key add /var/cuda-repo-${OS}-11-3-local/7fa2af80.pub
sudo apt-get -qq update
sudo apt install cuda cuda-nvcc-11-3 cuda-libraries-dev-11-3
sudo apt clean
rm -f https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda-repo-${OS}-11-3-local_11.3.0-465.19.01-1_amd64.deb
\ No newline at end of file
#!/bin/bash
CUDA_HOME=/usr/local/cuda-11.6
LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
PATH=${CUDA_HOME}/bin:${PATH}
export FORCE_CUDA=1
export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
export CUDA_HOME=/usr/local/cuda-11.6
\ No newline at end of file
#!/bin/bash
# Strip the periods from the version number
OS_VERSION=$(echo $(lsb_release -sr) | tr -d .)
OS=ubuntu${OS_VERSION}
wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin
sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget -nv https://developer.download.nvidia.com/compute/cuda/11.6.2/local_installers/cuda-repo-${OS}-11-6-local_11.6.2-510.47.03-1_amd64.deb
sudo dpkg -i cuda-repo-${OS}-11-6-local_11.6.2-510.47.03-1_amd64.deb
sudo apt-key add /var/cuda-repo-${OS}-11-6-local/7fa2af80.pub
sudo apt-get -qq update
sudo apt install cuda cuda-nvcc-11-6 cuda-libraries-dev-11-6
sudo apt clean
rm -f https://developer.download.nvidia.com/compute/cuda/11.5.2/local_installers/cuda-repo-${OS}-11-6-local_11.6.2-510.47.03-1_amd64.deb
\ No newline at end of file
#!/bin/bash
CUDA_HOME=/usr/local/cuda-11.7
LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
PATH=${CUDA_HOME}/bin:${PATH}
export FORCE_CUDA=1
export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
export CUDA_HOME=/usr/local/cuda-11.7
\ No newline at end of file
#!/bin/bash
# Strip the periods from the version number
OS_VERSION=$(echo $(lsb_release -sr) | tr -d .)
OS=ubuntu${OS_VERSION}
wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin
sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget -nv https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda-repo-${OS}-11-7-local_11.7.0-515.43.04-1_amd64.deb
sudo dpkg -i cuda-repo-${OS}-11-7-local_11.7.0-515.43.04-1_amd64.deb
sudo cp /var/cuda-repo-${OS}-11-7-local/cuda-*-keyring.gpg /usr/share/keyrings/
sudo apt-get -qq update
sudo apt install cuda cuda-nvcc-11-7 cuda-libraries-dev-11-7
sudo apt clean
rm -f https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda-repo-${OS}-11-7-local_11.7.0-515.43.04-1_amd64.deb
#!/bin/bash
CUDA_HOME=/usr/local/cuda-12.0
LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
PATH=${CUDA_HOME}/bin:${PATH}
export FORCE_CUDA=1
export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
export CUDA_HOME=/usr/local/cuda-12.0
\ No newline at end of file
#!/bin/bash
# Strip the periods from the version number
OS_VERSION=$(echo $(lsb_release -sr) | tr -d .)
OS=ubuntu${OS_VERSION}
wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin
sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget -nv https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda-repo-${OS}-12-0-local_12.0.0-525.60.13-1_amd64.deb
sudo dpkg -i cuda-repo-${OS}-12-0-local_12.0.0-525.60.13-1_amd64.deb
sudo cp /var/cuda-repo-${OS}-12-0-local/cuda-*-keyring.gpg /usr/share/keyrings/
sudo apt-get -qq update
sudo apt install cuda cuda-nvcc-12-0 cuda-libraries-dev-12-0
sudo apt clean
rm -f https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda-repo-${OS}-12-0-local_12.0.0-525.60.13-1_amd64.deb
export LANG C.UTF-8
export OFED_VERSION=5.3-1.0.0.1
sudo apt-get update && \
sudo apt-get install -y --no-install-recommends \
software-properties-common \
sudo apt-get install -y --no-install-recommends \
build-essential \
apt-utils \
ca-certificates \
wget \
git \
vim \
libssl-dev \
curl \
unzip \
unrar \
cmake \
net-tools \
sudo \
autotools-dev \
rsync \
jq \
openssh-server \
tmux \
screen \
htop \
pdsh \
openssh-client \
lshw \
dmidecode \
util-linux \
automake \
autoconf \
libtool \
net-tools \
pciutils \
libpci-dev \
libaio-dev \
libcap2 \
libtinfo5 \
fakeroot \
devscripts \
debhelper \
nfs-common
# wget -O ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
# chmod +x ~/miniconda.sh && \
# ~/miniconda.sh -b -p /opt/conda && \
# rm ~/miniconda.sh
# export PATH=/opt/conda/bin:$PATH
\ No newline at end of file
...@@ -7,116 +7,120 @@ ...@@ -7,116 +7,120 @@
name: Build wheels and deploy name: Build wheels and deploy
#on:
# create:
# tags:
# - '**'
on: on:
push create:
tags:
- v*
jobs: jobs:
# setup_release:
# name: Create Release setup_release:
# runs-on: ubuntu-latest name: Create Release
# steps: runs-on: ubuntu-latest
# - name: Get the tag version steps:
# id: extract_branch - name: Get the tag version
# run: echo ::set-output name=branch::${GITHUB_REF#refs/tags/} id: extract_branch
# shell: bash run: echo ::set-output name=branch::${GITHUB_REF#refs/tags/}
shell: bash
# - name: Create Release
# id: create_release - name: Create Release
# uses: actions/create-release@v1 id: create_release
# env: uses: actions/create-release@v1
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} env:
# with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# tag_name: ${{ steps.extract_branch.outputs.branch }} with:
# release_name: ${{ steps.extract_branch.outputs.branch }} tag_name: ${{ steps.extract_branch.outputs.branch }}
release_name: ${{ steps.extract_branch.outputs.branch }}
build_wheels: build_wheels:
name: Build Wheel name: Build Wheel
needs: setup_release
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
#needs: setup_release
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
os: [ubuntu-20.04, ubuntu-22.04] # Using ubuntu-20.04 instead of 22.04 for more compatibility (glibc). Ideally we'd use the
#python-version: ['3.7', '3.8', '3.9', '3.10'] # manylinux docker image, but I haven't figured out how to install CUDA on manylinux.
#torch-version: ['1.11.0', '1.12.0', '1.13.0', '2.0.1'] os: [ubuntu-20.04]
#cuda-version: ['113', '116', '117', '120'] python-version: ['3.7', '3.8', '3.9', '3.10']
python-version: ['3.10'] torch-version: ['1.12.1', '1.13.1', '2.0.1', '2.1.0.dev20230731']
torch-version: ['2.0.1'] cuda-version: ['11.6.2', '11.7.1', '11.8.0', '12.1.0']
cuda-version: ['120'] # We need separate wheels that either uses C++11 ABI (-D_GLIBCXX_USE_CXX11_ABI) or not.
# Pytorch wheels currently don't use it, but nvcr images have Pytorch compiled with C++11 ABI.
# Without this we get import error (undefined symbol: _ZN3c105ErrorC2ENS_14SourceLocationESs)
# when building without C++11 ABI and using it on nvcr images.
cxx11_abi: ['FALSE', 'TRUE']
exclude: exclude:
# Nvidia only supports 11.7+ for ubuntu-22.04 # Pytorch >= 2.0 only supports Python >= 3.8
- os: ubuntu-22.04 - torch-version: '2.0.1'
cuda-version: '116' python-version: '3.7'
- os: ubuntu-22.04 - torch-version: '2.1.0.dev20230731'
cuda-version: '113' python-version: '3.7'
# Torch only builds cuda 117 for 1.13.0+ # Pytorch <= 2.0 only supports CUDA <= 11.8
- cuda-version: '117' - torch-version: '1.12.1'
torch-version: '1.11.0' cuda-version: '12.1.0'
- cuda-version: '117' - torch-version: '1.13.1'
torch-version: '1.12.0' cuda-version: '12.1.0'
# Torch only builds cuda 116 for 1.12.0+ - torch-version: '2.0.1'
- cuda-version: '116' cuda-version: '12.1.0'
torch-version: '1.11.0' # Pytorch >= 2.1 only supports CUDA 12.1
# Torch only builds cuda 120 for 2.0.1+ - torch-version: '2.1.0.dev20230731'
- cuda-version: '120' cuda-version: '11.6.2'
torch-version: '1.11.0' - torch-version: '2.1.0.dev20230731'
- cuda-version: '120' cuda-version: '11.7.1'
torch-version: '1.12.0' - torch-version: '2.1.0.dev20230731'
- cuda-version: '120' cuda-version: '11.8.0'
torch-version: '1.13.0'
# 1.13.0 drops support for cuda 11.3
- cuda-version: '113'
torch-version: '1.13.0'
- cuda-version: '113'
torch-version: '2.0.1'
# Fails with "Validation Error" on artifact upload
- cuda-version: '117'
torch-version: '1.13.0'
os: ubuntu-20.04
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v3 uses: actions/checkout@v3
- name: Set up Python - name: Set up Python
uses: actions/setup-python@v3 uses: actions/setup-python@v4
with: with:
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
- name: Set up Linux Env - name: Set CUDA and PyTorch versions
run: |
echo "MATRIX_CUDA_VERSION=$(echo ${{ matrix.cuda-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV
echo "MATRIX_TORCH_VERSION=$(echo ${{ matrix.torch-version }} | awk -F \. {'print $1 "." $2'})" >> $GITHUB_ENV
- name: Free up disk space
if: ${{ runner.os == 'Linux' }} if: ${{ runner.os == 'Linux' }}
run: | run: |
sudo rm -rf /usr/share/dotnet sudo rm -rf /usr/share/dotnet
bash .github/workflows/env.sh
echo ${{ needs.create_release.outputs.upload_url }}
echo ${{ needs.steps.extract_branch.outputs.upload_url }}
shell:
bash
- name: Install CUDA ${{ matrix.cuda-version }} - name: Install CUDA ${{ matrix.cuda-version }}
if: ${{ matrix.cuda-version != 'cpu' }} if: ${{ matrix.cuda-version != 'cpu' }}
run: | uses: Jimver/cuda-toolkit@v0.2.11
bash .github/workflows/cuda/cu${{ matrix.cuda-version }}-${{ runner.os }}.sh id: cuda-toolkit
shell: with:
bash cuda: ${{ matrix.cuda-version }}
linux-local-args: '["--toolkit"]'
- name: Check GPU Env # default method is "local", and we're hitting some error with caching for CUDA 11.8 and 12.1
if: ${{ matrix.cuda-version != 'cpu' }} # method: ${{ (matrix.cuda-version == '11.8.0' || matrix.cuda-version == '12.1.0') && 'network' || 'local' }}
run: | method: 'network'
source .github/workflows/cuda/cu${{ matrix.cuda-version }}-${{ runner.os }}-env.sh # We need the cuda libraries (e.g. cuSparse, cuSolver) for compiling PyTorch extensions,
nvcc --version # not just nvcc
shell: # sub-packages: '["nvcc"]'
bash
- name: Install PyTorch ${{ matrix.torch-version }}+cu${{ matrix.cuda-version }} - name: Install PyTorch ${{ matrix.torch-version }}+cu${{ matrix.cuda-version }}
run: | run: |
pip install numpy pyyaml scipy ipython mkl mkl-include ninja cython typing pandas typing-extensions dataclasses && conda clean -ya pip install --upgrade pip
pip install --no-cache-dir torch==${{ matrix.torch-version }} # If we don't install before installing Pytorch, we get error for torch 2.0.1
# ERROR: Could not find a version that satisfies the requirement setuptools>=40.8.0 (from versions: none)
pip install lit
# We want to figure out the CUDA version to download pytorch
# e.g. we can have system CUDA version being 11.7 but if torch==1.12 then we need to download the wheel from cu116
# This code is ugly, maybe there's a better way to do this.
export TORCH_CUDA_VERSION=$(python -c "import os; minv = {'1.12': 113, '1.13': 116, '2.0': 117, '2.1': 121}[os.environ['MATRIX_TORCH_VERSION']]; maxv = {'1.12': 116, '1.13': 117, '2.0': 118, '2.1': 121}[os.environ['MATRIX_TORCH_VERSION']]; print(max(min(int(os.environ['MATRIX_CUDA_VERSION']), maxv), minv))")
if [[ ${{ matrix.torch-version }} == *"dev"* ]]; then
pip install --no-cache-dir --pre torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION}
else
pip install --no-cache-dir torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION}
fi
nvcc --version
python --version python --version
python -c "import torch; print('PyTorch:', torch.__version__)" python -c "import torch; print('PyTorch:', torch.__version__)"
python -c "import torch; print('CUDA:', torch.version.cuda)" python -c "import torch; print('CUDA:', torch.version.cuda)"
...@@ -124,16 +128,25 @@ jobs: ...@@ -124,16 +128,25 @@ jobs:
shell: shell:
bash bash
# - name: Install PyTorch ${{ matrix.torch-version }}+cu${{ matrix.cuda-version }} - name: Build wheel
# run: | run: |
# pip install numpy pyyaml scipy ipython mkl mkl-include ninja cython typing pandas typing-extensions dataclasses && conda clean -ya # We want setuptools >= 49.6.0 otherwise we can't compile the extension if system CUDA version is 11.7 and pytorch cuda version is 11.6
# pip install --no-index --no-cache-dir torch==${{ matrix.torch-version }} -f https://download.pytorch.org/whl/cu${{ matrix.cuda-version }}/torch_stable.html # https://github.com/pytorch/pytorch/blob/664058fa83f1d8eede5d66418abff6e20bd76ca8/torch/utils/cpp_extension.py#L810
# python --version # However this still fails so I'm using a newer version of setuptools
# python -c "import torch; print('PyTorch:', torch.__version__)" pip install setuptools==68.0.0
# python -c "import torch; print('CUDA:', torch.version.cuda)" pip install ninja packaging wheel
# python -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)" export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
# shell: export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
# bash # Limit MAX_JOBS otherwise the github runner goes OOM
MAX_JOBS=1 FLASH_ATTENTION_FORCE_BUILD="TRUE" FLASH_ATTENTION_FORCE_CXX11_ABI=${{ matrix.cxx11_abi}} python setup.py bdist_wheel --dist-dir=dist
tmpname=cu${MATRIX_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi${{ matrix.cxx11_abi }}
wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")
ls dist/*whl |xargs -I {} mv {} dist/${wheel_name}
echo "wheel_name=${wheel_name}" >> $GITHUB_ENV
- name: Log Built Wheels
run: |
ls dist
- name: Get the tag version - name: Get the tag version
id: extract_branch id: extract_branch
...@@ -147,62 +160,45 @@ jobs: ...@@ -147,62 +160,45 @@ jobs:
env: env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Build wheel - name: Upload Release Asset
id: upload_release_asset
uses: actions/upload-release-asset@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ steps.get_current_release.outputs.upload_url }}
asset_path: ./dist/${{env.wheel_name}}
asset_name: ${{env.wheel_name}}
asset_content_type: application/*
publish_package:
name: Publish package
needs: [build_wheels]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install dependencies
run: | run: |
export FLASH_ATTENTION_FORCE_BUILD="TRUE" pip install ninja packaging setuptools wheel twine
export FORCE_CUDA="1" # We don't want to download anything CUDA-related here
export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH pip install torch --index-url https://download.pytorch.org/whl/cpu
export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
export CUDA_INSTALL_DIR=/usr/local/cuda-11.3$CUDA_INSTALL_DIR
pip install ninja packaging setuptools wheel
python setup.py bdist_wheel --dist-dir=dist
tmpname=cu${{ matrix.cuda-version }}torch${{ matrix.torch-version }}
wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")
ls dist/*whl |xargs -I {} mv {} dist/${wheel_name}
echo "wheel_name=${wheel_name}" >> $GITHUB_ENV
- name: Log Built Wheels - name: Build core package
env:
FLASH_ATTENTION_SKIP_CUDA_BUILD: "TRUE"
run: | run: |
ls dist python setup.py sdist --dist-dir=dist
# - name: Upload Release Asset - name: Deploy
# id: upload_release_asset env:
# uses: actions/upload-release-asset@v1 TWINE_USERNAME: "__token__"
# env: TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: |
# with: python -m twine upload dist/*
# upload_url: ${{ steps.get_current_release.outputs.upload_url }}
# asset_path: ./dist/${{env.wheel_name}}
# asset_name: ${{env.wheel_name}}
# asset_content_type: application/*
# publish_package:
# name: Publish package
# needs: [build_wheels]
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v3
# - uses: actions/setup-python@v4
# with:
# python-version: '3.10'
# - name: Install dependencies
# run: |
# pip install ninja packaging setuptools wheel twine
# pip install torch
# - name: Build core package
# env:
# FLASH_ATTENTION_SKIP_CUDA_BUILD: "TRUE"
# run: |
# python setup.py sdist --dist-dir=dist
# - name: Deploy
# env:
# TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
# TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
# run: |
# python -m twine upload dist/*
...@@ -13,9 +13,10 @@ import subprocess ...@@ -13,9 +13,10 @@ import subprocess
import urllib.request import urllib.request
import urllib.error import urllib.error
from wheel.bdist_wheel import bdist_wheel as _bdist_wheel
import torch import torch
from torch.utils.cpp_extension import BuildExtension, CppExtension, CUDAExtension, CUDA_HOME from torch.utils.cpp_extension import BuildExtension, CppExtension, CUDAExtension, CUDA_HOME
from wheel.bdist_wheel import bdist_wheel as _bdist_wheel
with open("README.md", "r", encoding="utf-8") as fh: with open("README.md", "r", encoding="utf-8") as fh:
...@@ -33,6 +34,8 @@ BASE_WHEEL_URL = "https://github.com/Dao-AILab/flash-attention/releases/download ...@@ -33,6 +34,8 @@ BASE_WHEEL_URL = "https://github.com/Dao-AILab/flash-attention/releases/download
# SKIP_CUDA_BUILD: Intended to allow CI to use a simple `python setup.py sdist` run to copy over raw files, without any cuda compilation # SKIP_CUDA_BUILD: Intended to allow CI to use a simple `python setup.py sdist` run to copy over raw files, without any cuda compilation
FORCE_BUILD = os.getenv("FLASH_ATTENTION_FORCE_BUILD", "FALSE") == "TRUE" FORCE_BUILD = os.getenv("FLASH_ATTENTION_FORCE_BUILD", "FALSE") == "TRUE"
SKIP_CUDA_BUILD = os.getenv("FLASH_ATTENTION_SKIP_CUDA_BUILD", "FALSE") == "TRUE" SKIP_CUDA_BUILD = os.getenv("FLASH_ATTENTION_SKIP_CUDA_BUILD", "FALSE") == "TRUE"
# For CI, we want the option to build with C++11 ABI since the nvcr images use C++11 ABI
FORCE_CXX11_ABI = os.getenv("FLASH_ATTENTION_FORCE_CXX11_ABI", "FALSE") == "TRUE"
def get_platform(): def get_platform():
...@@ -101,26 +104,27 @@ if not torch.cuda.is_available(): ...@@ -101,26 +104,27 @@ if not torch.cuda.is_available():
print( print(
"\nWarning: Torch did not find available GPUs on this system.\n", "\nWarning: Torch did not find available GPUs on this system.\n",
"If your intention is to cross-compile, this is not an error.\n" "If your intention is to cross-compile, this is not an error.\n"
"By default, Apex will cross-compile for Pascal (compute capabilities 6.0, 6.1, 6.2),\n" "By default, FlashAttention will cross-compile for Ampere (compute capability 8.0, 8.6, "
"Volta (compute capability 7.0), Turing (compute capability 7.5),\n" "8.9), and, if the CUDA version is >= 11.8, Hopper (compute capability 9.0).\n"
"and, if the CUDA version is >= 11.0, Ampere (compute capability 8.0).\n"
"If you wish to cross-compile for a single specific architecture,\n" "If you wish to cross-compile for a single specific architecture,\n"
'export TORCH_CUDA_ARCH_LIST="compute capability" before running setup.py.\n', 'export TORCH_CUDA_ARCH_LIST="compute capability" before running setup.py.\n',
) )
if os.environ.get("TORCH_CUDA_ARCH_LIST", None) is None and CUDA_HOME is not None: if os.environ.get("TORCH_CUDA_ARCH_LIST", None) is None and CUDA_HOME is not None:
_, bare_metal_version = get_cuda_bare_metal_version(CUDA_HOME) _, bare_metal_version = get_cuda_bare_metal_version(CUDA_HOME)
if bare_metal_version >= Version("11.8"): if bare_metal_version >= Version("11.8"):
os.environ["TORCH_CUDA_ARCH_LIST"] = "6.0;6.1;6.2;7.0;7.5;8.0;8.6;9.0" os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6;9.0"
elif bare_metal_version >= Version("11.1"): elif bare_metal_version >= Version("11.4"):
os.environ["TORCH_CUDA_ARCH_LIST"] = "6.0;6.1;6.2;7.0;7.5;8.0;8.6" os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
elif bare_metal_version == Version("11.0"):
os.environ["TORCH_CUDA_ARCH_LIST"] = "6.0;6.1;6.2;7.0;7.5;8.0"
else: else:
os.environ["TORCH_CUDA_ARCH_LIST"] = "6.0;6.1;6.2;7.0;7.5" os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
cmdclass = {} cmdclass = {}
ext_modules = [] ext_modules = []
# We want this even if SKIP_CUDA_BUILD because when we run python setup.py sdist we want the .hpp
# files included in the source distribution, in case the user compiles from source.
subprocess.run(["git", "submodule", "update", "--init", "csrc/cutlass"])
if not SKIP_CUDA_BUILD: if not SKIP_CUDA_BUILD:
print("\n\ntorch.__version__ = {}\n\n".format(torch.__version__)) print("\n\ntorch.__version__ = {}\n\n".format(torch.__version__))
TORCH_MAJOR = int(torch.__version__.split(".")[0]) TORCH_MAJOR = int(torch.__version__.split(".")[0])
...@@ -137,8 +141,8 @@ if not SKIP_CUDA_BUILD: ...@@ -137,8 +141,8 @@ if not SKIP_CUDA_BUILD:
# Check, if CUDA11 is installed for compute capability 8.0 # Check, if CUDA11 is installed for compute capability 8.0
cc_flag = [] cc_flag = []
_, bare_metal_version = get_cuda_bare_metal_version(CUDA_HOME) _, bare_metal_version = get_cuda_bare_metal_version(CUDA_HOME)
if bare_metal_version < Version("11.0"): if bare_metal_version < Version("11.4"):
raise RuntimeError("FlashAttention is only supported on CUDA 11 and above") raise RuntimeError("FlashAttention is only supported on CUDA 11.4 and above")
# cc_flag.append("-gencode") # cc_flag.append("-gencode")
# cc_flag.append("arch=compute_75,code=sm_75") # cc_flag.append("arch=compute_75,code=sm_75")
cc_flag.append("-gencode") cc_flag.append("-gencode")
...@@ -147,7 +151,11 @@ if not SKIP_CUDA_BUILD: ...@@ -147,7 +151,11 @@ if not SKIP_CUDA_BUILD:
cc_flag.append("-gencode") cc_flag.append("-gencode")
cc_flag.append("arch=compute_90,code=sm_90") cc_flag.append("arch=compute_90,code=sm_90")
subprocess.run(["git", "submodule", "update", "--init", "csrc/cutlass"]) # HACK: The compiler flag -D_GLIBCXX_USE_CXX11_ABI is set to be the same as
# torch._C._GLIBCXX_USE_CXX11_ABI
# https://github.com/pytorch/pytorch/blob/8472c24e3b5b60150096486616d98b7bea01500b/torch/utils/cpp_extension.py#L920
if FORCE_CXX11_ABI:
torch._C._GLIBCXX_USE_CXX11_ABI = True
ext_modules.append( ext_modules.append(
CUDAExtension( CUDAExtension(
name="flash_attn_2_cuda", name="flash_attn_2_cuda",
...@@ -213,6 +221,7 @@ if not SKIP_CUDA_BUILD: ...@@ -213,6 +221,7 @@ if not SKIP_CUDA_BUILD:
Path(this_dir) / 'csrc' / 'cutlass' / 'include', Path(this_dir) / 'csrc' / 'cutlass' / 'include',
], ],
) )
)
def get_package_version(): def get_package_version():
...@@ -232,7 +241,6 @@ class CachedWheelsCommand(_bdist_wheel): ...@@ -232,7 +241,6 @@ class CachedWheelsCommand(_bdist_wheel):
find an existing wheel (which is currently the case for all flash attention installs). We use find an existing wheel (which is currently the case for all flash attention installs). We use
the environment parameters to detect whether there is already a pre-built version of a compatible the environment parameters to detect whether there is already a pre-built version of a compatible
wheel available and short-circuits the standard full build pipeline. wheel available and short-circuits the standard full build pipeline.
""" """
def run(self): def run(self):
if FORCE_BUILD: if FORCE_BUILD:
...@@ -241,16 +249,20 @@ class CachedWheelsCommand(_bdist_wheel): ...@@ -241,16 +249,20 @@ class CachedWheelsCommand(_bdist_wheel):
raise_if_cuda_home_none("flash_attn") raise_if_cuda_home_none("flash_attn")
# Determine the version numbers that will be used to determine the correct wheel # Determine the version numbers that will be used to determine the correct wheel
_, cuda_version_raw = get_cuda_bare_metal_version(CUDA_HOME) # We're using the CUDA version used to build torch, not the one currently installed
# _, cuda_version_raw = get_cuda_bare_metal_version(CUDA_HOME)
torch_cuda_version = parse(torch.version.cuda)
torch_version_raw = parse(torch.__version__) torch_version_raw = parse(torch.__version__)
python_version = f"cp{sys.version_info.major}{sys.version_info.minor}" python_version = f"cp{sys.version_info.major}{sys.version_info.minor}"
platform_name = get_platform() platform_name = get_platform()
flash_version = get_package_version() flash_version = get_package_version()
cuda_version = f"{cuda_version_raw.major}{cuda_version_raw.minor}" # cuda_version = f"{cuda_version_raw.major}{cuda_version_raw.minor}"
torch_version = f"{torch_version_raw.major}.{torch_version_raw.minor}.{torch_version_raw.micro}" cuda_version = f"{torch_cuda_version.major}{torch_cuda_version.minor}"
torch_version = f"{torch_version_raw.major}.{torch_version_raw.minor}"
cxx11_abi = str(torch._C._GLIBCXX_USE_CXX11_ABI).upper()
# Determine wheel URL based on CUDA version, torch version, python version and OS # Determine wheel URL based on CUDA version, torch version, python version and OS
wheel_filename = f'{PACKAGE_NAME}-{flash_version}+cu{cuda_version}torch{torch_version}-{python_version}-{python_version}-{platform_name}.whl' wheel_filename = f'{PACKAGE_NAME}-{flash_version}+cu{cuda_version}torch{torch_version}cxx11abi{cxx11_abi}-{python_version}-{python_version}-{platform_name}.whl'
wheel_url = BASE_WHEEL_URL.format( wheel_url = BASE_WHEEL_URL.format(
tag_name=f"v{flash_version}", tag_name=f"v{flash_version}",
wheel_name=wheel_filename wheel_name=wheel_filename
...@@ -279,7 +291,6 @@ class CachedWheelsCommand(_bdist_wheel): ...@@ -279,7 +291,6 @@ class CachedWheelsCommand(_bdist_wheel):
setup( setup(
# @pierce - TODO: Revert for official release
name=PACKAGE_NAME, name=PACKAGE_NAME,
version=get_package_version(), version=get_package_version(),
packages=find_packages( packages=find_packages(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment