Fast Build CUAEV (#566)

* Fast Build CUAEV * fix * --cuaev for fastbuild, --cuaev-all-sms for all * update readme * fix * fix * fix * fix

Fast Build CUAEV (#566)
* Fast Build CUAEV * fix * --cuaev for fastbuild, --cuaev-all-sms for all * update readme * fix * fix * fix * fix
01d2318d · Jinze Xue · GitHub · 6a3dd807 · 01d2318d · 01d2318d
Unverified Commit 01d2318d authored Jan 30, 2021 by Jinze Xue Committed by GitHub Jan 30, 2021
Hide whitespace changes
Inline Side-by-side

Showing with 57 additions and 20 deletions

setup.py setup.py +41 -16

torchani/cuaev/README.md torchani/cuaev/README.md +16 -4

No files found.
--- a/setup.py
+++ b/setup.py
@@ -5,11 +5,15 @@ from setuptools import setup, find_packages
 from distutils import log
 import sys

-BUILD_CUAEV = '--cuaev' in sys.argv
-if BUILD_CUAEV:
+BUILD_CUAEV_ALL_SM = '--cuaev-all-sms' in sys.argv
+if BUILD_CUAEV_ALL_SM:
+    sys.argv.remove('--cuaev-all-sms')
+
+FAST_BUILD_CUAEV = '--cuaev' in sys.argv
+if FAST_BUILD_CUAEV:
    sys.argv.remove('--cuaev')

-if not BUILD_CUAEV:
+if not BUILD_CUAEV_ALL_SM and not FAST_BUILD_CUAEV:
    log.warn("Will not install cuaev")  # type: ignore

 with open("README.md", "r") as fh:
@@ -42,20 +46,41 @@ def maybe_download_cub():
    return [os.path.abspath("./include")]


-def cuda_extension():
+def cuda_extension(build_all=False):
    import torch
    from torch.utils.cpp_extension import CUDAExtension
+    SMs = None
+    if not build_all:
+        SMs = []
+        devices = torch.cuda.device_count()
+        print('FAST_BUILD_CUAEV: ON')
+        print('This build will only support the following devices or the devices with same cuda capability: ')
+        for i in range(devices):
+            d = 'cuda:{}'.format(i)
+            sm = torch.cuda.get_device_capability(i)
+            sm = int(f'{sm[0]}{sm[1]}')
+            if sm >= 50:
+                print('{}: {}'.format(i, torch.cuda.get_device_name(d)))
+                print('   {}'.format(torch.cuda.get_device_properties(i)))
+            if sm not in SMs and sm >= 50:
+                SMs.append(sm)

-    nvcc_args = ["-gencode=arch=compute_50,code=sm_50", "-gencode=arch=compute_60,code=sm_60",
-                 "-gencode=arch=compute_61,code=sm_61", "-gencode=arch=compute_70,code=sm_70",
-                 "-Xptxas=-v", '--expt-extended-lambda', '-use_fast_math']
-    cuda_version = float(torch.version.cuda)
-    if cuda_version >= 10:
-        nvcc_args.append("-gencode=arch=compute_75,code=sm_75")
-    if cuda_version >= 11:
-        nvcc_args.append("-gencode=arch=compute_80,code=sm_80")
-    if cuda_version >= 11.1:
-        nvcc_args.append("-gencode=arch=compute_86,code=sm_86")
+    nvcc_args = ["-Xptxas=-v", '--expt-extended-lambda', '-use_fast_math']
+    if SMs:
+        for sm in SMs:
+            nvcc_args.append(f"-gencode=arch=compute_{sm},code=sm_{sm}")
+    else:
+        nvcc_args.append("-gencode=arch=compute_60,code=sm_60")
+        nvcc_args.append("-gencode=arch=compute_61,code=sm_61")
+        nvcc_args.append("-gencode=arch=compute_70,code=sm_70")
+        cuda_version = float(torch.version.cuda)
+        if cuda_version >= 10:
+            nvcc_args.append("-gencode=arch=compute_75,code=sm_75")
+        if cuda_version >= 11:
+            nvcc_args.append("-gencode=arch=compute_80,code=sm_80")
+        if cuda_version >= 11.1:
+            nvcc_args.append("-gencode=arch=compute_86,code=sm_86")
+    print("nvcc_args: ", nvcc_args)
    return CUDAExtension(
        name='torchani.cuaev',
        pkg='torchani.cuaev',
@@ -65,7 +90,7 @@ def cuda_extension():


 def cuaev_kwargs():
-    if not BUILD_CUAEV:
+    if not BUILD_CUAEV_ALL_SM and not FAST_BUILD_CUAEV:
        return dict(
            provides=['torchani']
        )
@@ -76,7 +101,7 @@ def cuaev_kwargs():
            'torchani.cuaev',
        ],
        ext_modules=[
-            cuda_extension()
+            cuda_extension(BUILD_CUAEV_ALL_SM)
        ],
        cmdclass={
            'build_ext': BuildExtension,

--- a/torchani/cuaev/README.md
+++ b/torchani/cuaev/README.md
@@ -2,16 +2,27 @@
 CUDA Extension for AEV calculation.
 Performance improvement is expected to be ~3X for AEV computation and ~1.5X for overall training workflow.

+## Requirement
+CUAEV needs the nightly version [pytorch](https://pytorch.org/) to be able to work.
+If you you use conda, you could install it by
+```
+conda install pytorch torchvision torchaudio cudatoolkit={YOUR_CUDA_VERSION} -c pytorch-nightly
+```
+
 ## Install
 In most cases, if `gcc` and `cuda` environment are well configured, runing the following command at `torchani` directory will install torchani and cuaev together.
 ```bash
 git clone git@github.com:aiqm/torchani.git
 cd torchani
-# install by
-python setup.py install --cuaev
+# choose one option below
+# use --cuaev-all-sms if you are building in SLURM environment and there are multiple different gpus in a node
+# use --cuaev will only build for detected gpus
+python setup.py install --cuaev-all-sms  # build for all sms
+python setup.py install --cuaev          # only build for detected gpus
 # or for development
 # `pip install -e . && ` is only needed for the very first install (because issue of https://github.com/pypa/pip/issues/1883)
-pip install -e . && pip install -e . --global-option="--cuaev"
+pip install -e . && pip install -v -e . --global-option="--cuaev-all-sms"  # build for all sms
+pip install -e . && pip install -v -e . --global-option="--cuaev"          # only build for detected gpus
 ```

 <del>Notes for install on Hipergator</del> (Currently not working because Pytorch dropped the official build for cuda/10.0)
@@ -20,7 +31,7 @@ srun -p gpu --gpus=geforce:1 --time=01:00:00 --mem=10gb --pty -u bash -i   # com
 conda install pytorch torchvision cudatoolkit=10.0 -c pytorch              # make sure it's cudatoolkit=10.0
 module load cuda/10.0.130
 module load gcc/7.3.0
-python setup.py install --cuaev
+python setup.py install --cuaev-all-sms
 ```

 ## Usage
@@ -47,6 +58,7 @@ Benchmark of [torchani/tools/training-aev-benchmark.py](https://github.com/aiqm/
 ## Test
 ```bash
 cd torchani
+./download.sh
 python tests/test_cuaev.py
 ```