Unverified Commit 01d2318d authored by Jinze Xue's avatar Jinze Xue Committed by GitHub
Browse files

Fast Build CUAEV (#566)

* Fast Build CUAEV

* fix

* --cuaev for fastbuild, --cuaev-all-sms for all

* update readme

* fix

* fix

* fix

* fix
parent 6a3dd807
......@@ -5,11 +5,15 @@ from setuptools import setup, find_packages
from distutils import log
import sys
BUILD_CUAEV = '--cuaev' in sys.argv
if BUILD_CUAEV:
BUILD_CUAEV_ALL_SM = '--cuaev-all-sms' in sys.argv
if BUILD_CUAEV_ALL_SM:
sys.argv.remove('--cuaev-all-sms')
FAST_BUILD_CUAEV = '--cuaev' in sys.argv
if FAST_BUILD_CUAEV:
sys.argv.remove('--cuaev')
if not BUILD_CUAEV:
if not BUILD_CUAEV_ALL_SM and not FAST_BUILD_CUAEV:
log.warn("Will not install cuaev") # type: ignore
with open("README.md", "r") as fh:
......@@ -42,20 +46,41 @@ def maybe_download_cub():
return [os.path.abspath("./include")]
def cuda_extension():
def cuda_extension(build_all=False):
import torch
from torch.utils.cpp_extension import CUDAExtension
SMs = None
if not build_all:
SMs = []
devices = torch.cuda.device_count()
print('FAST_BUILD_CUAEV: ON')
print('This build will only support the following devices or the devices with same cuda capability: ')
for i in range(devices):
d = 'cuda:{}'.format(i)
sm = torch.cuda.get_device_capability(i)
sm = int(f'{sm[0]}{sm[1]}')
if sm >= 50:
print('{}: {}'.format(i, torch.cuda.get_device_name(d)))
print(' {}'.format(torch.cuda.get_device_properties(i)))
if sm not in SMs and sm >= 50:
SMs.append(sm)
nvcc_args = ["-gencode=arch=compute_50,code=sm_50", "-gencode=arch=compute_60,code=sm_60",
"-gencode=arch=compute_61,code=sm_61", "-gencode=arch=compute_70,code=sm_70",
"-Xptxas=-v", '--expt-extended-lambda', '-use_fast_math']
cuda_version = float(torch.version.cuda)
if cuda_version >= 10:
nvcc_args.append("-gencode=arch=compute_75,code=sm_75")
if cuda_version >= 11:
nvcc_args.append("-gencode=arch=compute_80,code=sm_80")
if cuda_version >= 11.1:
nvcc_args.append("-gencode=arch=compute_86,code=sm_86")
nvcc_args = ["-Xptxas=-v", '--expt-extended-lambda', '-use_fast_math']
if SMs:
for sm in SMs:
nvcc_args.append(f"-gencode=arch=compute_{sm},code=sm_{sm}")
else:
nvcc_args.append("-gencode=arch=compute_60,code=sm_60")
nvcc_args.append("-gencode=arch=compute_61,code=sm_61")
nvcc_args.append("-gencode=arch=compute_70,code=sm_70")
cuda_version = float(torch.version.cuda)
if cuda_version >= 10:
nvcc_args.append("-gencode=arch=compute_75,code=sm_75")
if cuda_version >= 11:
nvcc_args.append("-gencode=arch=compute_80,code=sm_80")
if cuda_version >= 11.1:
nvcc_args.append("-gencode=arch=compute_86,code=sm_86")
print("nvcc_args: ", nvcc_args)
return CUDAExtension(
name='torchani.cuaev',
pkg='torchani.cuaev',
......@@ -65,7 +90,7 @@ def cuda_extension():
def cuaev_kwargs():
if not BUILD_CUAEV:
if not BUILD_CUAEV_ALL_SM and not FAST_BUILD_CUAEV:
return dict(
provides=['torchani']
)
......@@ -76,7 +101,7 @@ def cuaev_kwargs():
'torchani.cuaev',
],
ext_modules=[
cuda_extension()
cuda_extension(BUILD_CUAEV_ALL_SM)
],
cmdclass={
'build_ext': BuildExtension,
......
......@@ -2,16 +2,27 @@
CUDA Extension for AEV calculation.
Performance improvement is expected to be ~3X for AEV computation and ~1.5X for overall training workflow.
## Requirement
CUAEV needs the nightly version [pytorch](https://pytorch.org/) to be able to work.
If you you use conda, you could install it by
```
conda install pytorch torchvision torchaudio cudatoolkit={YOUR_CUDA_VERSION} -c pytorch-nightly
```
## Install
In most cases, if `gcc` and `cuda` environment are well configured, runing the following command at `torchani` directory will install torchani and cuaev together.
```bash
git clone git@github.com:aiqm/torchani.git
cd torchani
# install by
python setup.py install --cuaev
# choose one option below
# use --cuaev-all-sms if you are building in SLURM environment and there are multiple different gpus in a node
# use --cuaev will only build for detected gpus
python setup.py install --cuaev-all-sms # build for all sms
python setup.py install --cuaev # only build for detected gpus
# or for development
# `pip install -e . && ` is only needed for the very first install (because issue of https://github.com/pypa/pip/issues/1883)
pip install -e . && pip install -e . --global-option="--cuaev"
pip install -e . && pip install -v -e . --global-option="--cuaev-all-sms" # build for all sms
pip install -e . && pip install -v -e . --global-option="--cuaev" # only build for detected gpus
```
<del>Notes for install on Hipergator</del> (Currently not working because Pytorch dropped the official build for cuda/10.0)
......@@ -20,7 +31,7 @@ srun -p gpu --gpus=geforce:1 --time=01:00:00 --mem=10gb --pty -u bash -i # com
conda install pytorch torchvision cudatoolkit=10.0 -c pytorch # make sure it's cudatoolkit=10.0
module load cuda/10.0.130
module load gcc/7.3.0
python setup.py install --cuaev
python setup.py install --cuaev-all-sms
```
## Usage
......@@ -47,6 +58,7 @@ Benchmark of [torchani/tools/training-aev-benchmark.py](https://github.com/aiqm/
## Test
```bash
cd torchani
./download.sh
python tests/test_cuaev.py
```
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment