Unverified Commit 656aa037 authored by akkamesh's avatar akkamesh Committed by GitHub
Browse files

CUDA port for AEV computation (#516)



* cuda port for aev computation

* benchmark, setup and import

* fix bug

* fix flake8 and jit

* fix collections.abc for python>3.7

* setup gitingore

* format code with formatter

* ignore more

* Use torch API to set streams

* Use pytorch's caching allocator

* empty line

* fix

* cuaev correntness testr, instruction of install on readme

* readme

* fix readme

* fix readme

* fix readme

* fix readme

* add usage in readme

* fix readme

* add test in readme

* fix readme

* -std=c++14

* bug fix - add async data copy

* bug fix - add missing stream sync

* code refactor and cosmetic changes

* aev benchmark for big protein

* remove mdtraj

* remove print

* move pdb to dataset folder

* cosmetic changes

* Move torchani/extensions -> torchani/cuaev

* clang-format -i

* cleanup

* return aev from cuComputeAEV

* Update aev.py

* Update aev.py

* fix flake8

* fix LGTM unused local variable

* clang-format

* fix

* save

* install change
Co-authored-by: default avatarrichard <yueyericardo@gmail.com>
Co-authored-by: default avatarXiang Gao <qasdfgtyuiop@gmail.com>
parent 57dd26bf
...@@ -5,7 +5,9 @@ a.out ...@@ -5,7 +5,9 @@ a.out
/test.py /test.py
/.vscode /.vscode
/build* /build*
/.eggs .eggs
*.egg-info
*.ninja
/torchani.egg-info /torchani.egg-info
/*.h5 /*.h5
/*.hdf5 /*.hdf5
...@@ -19,6 +21,9 @@ benchmark_xyz ...@@ -19,6 +21,9 @@ benchmark_xyz
/*.ipt /*.ipt
/*.params /*.params
/*.dat /*.dat
*.o
*.so
*.ninja_log
/tmp /tmp
*_cache *_cache
datacache datacache
......
...@@ -61,6 +61,7 @@ To run the tests and examples, you must manually download a data package ...@@ -61,6 +61,7 @@ To run the tests and examples, you must manually download a data package
./download.sh ./download.sh
``` ```
(Optional) To install AEV CUDA Extension (speedup for AEV computation), please follow the instruction at [torchani/cuaev](https://github.com/aiqm/torchani/tree/master/torchani/cuaev).
# Citation # Citation
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -59,7 +59,7 @@ def cuda_extension(): ...@@ -59,7 +59,7 @@ def cuda_extension():
return CUDAExtension( return CUDAExtension(
name='torchani.cuaev', name='torchani.cuaev',
pkg='torchani.cuaev', pkg='torchani.cuaev',
sources=glob.glob('torchani/cuaev/*'), sources=glob.glob('torchani/cuaev/*.cu'),
include_dirs=maybe_download_cub(), include_dirs=maybe_download_cub(),
extra_compile_args={'cxx': ['-std=c++14'], 'nvcc': nvcc_args}) extra_compile_args={'cxx': ['-std=c++14'], 'nvcc': nvcc_args})
......
import os
import torch
import torchani import torchani
import unittest import unittest
import torch import pickle
import os
from torchani.testing import TestCase, make_tensor from torchani.testing import TestCase, make_tensor
path = os.path.dirname(os.path.realpath(__file__))
skipIfNoGPU = unittest.skipIf(not torch.cuda.is_available(), skipIfNoGPU = unittest.skipIf(not torch.cuda.is_available(),
'There is no device to run this test') 'There is no device to run this test')
skipIfNoCUAEV = unittest.skipIf(not torchani.aev.has_cuaev, "only valid when cuaev is installed")
@unittest.skipIf(not torchani.aev.has_cuaev, "only valid when cuaev is installed") @skipIfNoCUAEV
class TestCUAEVNoGPU(TestCase): class TestCUAEVNoGPU(TestCase):
def testSimple(self): def testSimple(self):
...@@ -29,11 +34,65 @@ class TestCUAEVNoGPU(TestCase): ...@@ -29,11 +34,65 @@ class TestCUAEVNoGPU(TestCase):
self.assertIn("cuaev::cuComputeAEV", str(s.graph_for((species, coordinates)))) self.assertIn("cuaev::cuComputeAEV", str(s.graph_for((species, coordinates))))
@unittest.skipIf(not torchani.aev.has_cuaev, "only valid when cuaev is installed")
@skipIfNoGPU @skipIfNoGPU
@skipIfNoCUAEV
class TestCUAEV(TestCase): class TestCUAEV(TestCase):
def testHello(self):
pass def setUp(self):
self.tolerance = 5e-5
self.device = 'cuda'
Rcr = 5.2000e+00
Rca = 3.5000e+00
EtaR = torch.tensor([1.6000000e+01], device=self.device)
ShfR = torch.tensor([9.0000000e-01, 1.1687500e+00, 1.4375000e+00, 1.7062500e+00, 1.9750000e+00, 2.2437500e+00, 2.5125000e+00, 2.7812500e+00, 3.0500000e+00, 3.3187500e+00, 3.5875000e+00, 3.8562500e+00, 4.1250000e+00, 4.3937500e+00, 4.6625000e+00, 4.9312500e+00], device=self.device)
Zeta = torch.tensor([3.2000000e+01], device=self.device)
ShfZ = torch.tensor([1.9634954e-01, 5.8904862e-01, 9.8174770e-01, 1.3744468e+00, 1.7671459e+00, 2.1598449e+00, 2.5525440e+00, 2.9452431e+00], device=self.device)
EtaA = torch.tensor([8.0000000e+00], device=self.device)
ShfA = torch.tensor([9.0000000e-01, 1.5500000e+00, 2.2000000e+00, 2.8500000e+00], device=self.device)
num_species = 4
self.aev_computer = torchani.AEVComputer(Rcr, Rca, EtaR, ShfR, EtaA, Zeta, ShfA, ShfZ, num_species)
self.cuaev_computer = torchani.AEVComputer(Rcr, Rca, EtaR, ShfR, EtaA, Zeta, ShfA, ShfZ, num_species, use_cuda_extension=True)
def testSimple(self):
coordinates = torch.tensor([
[[0.03192167, 0.00638559, 0.01301679],
[-0.83140486, 0.39370209, -0.26395324],
[-0.66518241, -0.84461308, 0.20759389],
[0.45554739, 0.54289633, 0.81170881],
[0.66091919, -0.16799635, -0.91037834]],
[[-4.1862600, 0.0575700, -0.0381200],
[-3.1689400, 0.0523700, 0.0200000],
[-4.4978600, 0.8211300, 0.5604100],
[-4.4978700, -0.8000100, 0.4155600],
[0.00000000, -0.00000000, -0.00000000]]
], requires_grad=True, device=self.device)
species = torch.tensor([[1, 0, 0, 0, 0], [2, 0, 0, 0, -1]], device=self.device)
_, aev = self.aev_computer((species, coordinates))
_, cu_aev = self.cuaev_computer((species, coordinates))
self.assertEqual(cu_aev, aev)
def testTripeptideMD(self):
for i in range(100):
datafile = os.path.join(path, 'test_data/tripeptide-md/{}.dat'.format(i))
with open(datafile, 'rb') as f:
coordinates, species, _, _, _, _, _, _ = pickle.load(f)
coordinates = torch.from_numpy(coordinates).float().unsqueeze(0).to(self.device)
species = torch.from_numpy(species).unsqueeze(0).to(self.device)
_, aev = self.aev_computer((species, coordinates))
_, cu_aev = self.cuaev_computer((species, coordinates))
self.assertEqual(cu_aev, aev)
def testNIST(self):
datafile = os.path.join(path, 'test_data/NIST/all')
with open(datafile, 'rb') as f:
data = pickle.load(f)
for coordinates, species, _, _, _, _ in data:
coordinates = torch.from_numpy(coordinates).to(torch.float).to(self.device)
species = torch.from_numpy(species).to(self.device)
_, aev = self.aev_computer((species, coordinates))
_, cu_aev = self.cuaev_computer((species, coordinates))
self.assertEqual(cu_aev, aev)
if __name__ == '__main__': if __name__ == '__main__':
......
import time
import torch
import torchani
import pynvml
import gc
import os
from ase.io import read
import argparse
def checkgpu(device=None):
i = device if device else torch.cuda.current_device()
real_i = int(os.environ['CUDA_VISIBLE_DEVICES'][0]) if 'CUDA_VISIBLE_DEVICES' in os.environ else i
pynvml.nvmlInit()
h = pynvml.nvmlDeviceGetHandleByIndex(real_i)
info = pynvml.nvmlDeviceGetMemoryInfo(h)
name = pynvml.nvmlDeviceGetName(h)
print(' GPU Memory Used (nvidia-smi): {:7.1f}MB / {:.1f}MB ({})'.format(info.used / 1024 / 1024, info.total / 1024 / 1024, name.decode()))
def alert(text):
print('\033[91m{}\33[0m'.format(text)) # red
def info(text):
print('\033[32m{}\33[0m'.format(text)) # green
def benchmark(speciesPositions, aev_comp, N, check_gpu_mem):
torch.cuda.empty_cache()
gc.collect()
torch.cuda.synchronize()
start = time.time()
for i in range(N):
aev = aev_comp(speciesPositions).aevs
if i == 2 and check_gpu_mem:
checkgpu()
torch.cuda.synchronize()
delta = time.time() - start
print(f' Duration: {delta:.2f} s')
print(f' Speed: {delta/N*1000:.2f} ms/it')
return aev, delta
def check_speedup_error(aev, aev_ref, speed, speed_ref):
speedUP = speed_ref / speed
if speedUP > 1:
info(f' Speed up: {speedUP:.2f} X\n')
else:
alert(f' Speed up (slower): {speedUP:.2f} X\n')
aev_error = torch.max(torch.abs(aev - aev_ref))
assert aev_error < 0.02, f' Error: {aev_error:.1e}\n'
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-c', '--check_gpu_mem',
dest='check_gpu_mem',
action='store_const',
const=1)
parser.set_defaults(check_gpu_mem=0)
parser = parser.parse_args()
path = os.path.dirname(os.path.realpath(__file__))
check_gpu_mem = parser.check_gpu_mem
device = torch.device('cuda')
files = ['small.pdb', '1hz5.pdb', '6W8H.pdb']
for file in files:
datafile = os.path.join(path, f'../dataset/pdb/{file}')
mol = read(datafile)
species = torch.tensor([mol.get_atomic_numbers()], device=device)
positions = torch.tensor([mol.get_positions()], dtype=torch.float32, requires_grad=False, device=device)
print(f'File: {file}, Molecule size: {species.shape[-1]}\n')
nnp = torchani.models.ANI2x(periodic_table_index=True, model_index=None).to(device)
speciesPositions = nnp.species_converter((species, positions))
aev_computer = nnp.aev_computer
N = 500
print('Original TorchANI:')
aev_ref, delta_ref = benchmark(speciesPositions, aev_computer, N, check_gpu_mem)
print()
print('CUaev:')
nnp.aev_computer.use_cuda_extension = True
cuaev_computer = nnp.aev_computer
aev, delta = benchmark(speciesPositions, cuaev_computer, N, check_gpu_mem)
check_speedup_error(aev, aev_ref, delta, delta_ref)
print('-' * 70 + '\n')
import torch
import torchani
import time
import timeit
import argparse
import pkbar
import gc
import pynvml
import os
from torchani.units import hartree2kcalmol
def build_network():
H_network = torch.nn.Sequential(
torch.nn.Linear(384, 160),
torch.nn.CELU(0.1),
torch.nn.Linear(160, 128),
torch.nn.CELU(0.1),
torch.nn.Linear(128, 96),
torch.nn.CELU(0.1),
torch.nn.Linear(96, 1)
)
C_network = torch.nn.Sequential(
torch.nn.Linear(384, 144),
torch.nn.CELU(0.1),
torch.nn.Linear(144, 112),
torch.nn.CELU(0.1),
torch.nn.Linear(112, 96),
torch.nn.CELU(0.1),
torch.nn.Linear(96, 1)
)
N_network = torch.nn.Sequential(
torch.nn.Linear(384, 128),
torch.nn.CELU(0.1),
torch.nn.Linear(128, 112),
torch.nn.CELU(0.1),
torch.nn.Linear(112, 96),
torch.nn.CELU(0.1),
torch.nn.Linear(96, 1)
)
O_network = torch.nn.Sequential(
torch.nn.Linear(384, 128),
torch.nn.CELU(0.1),
torch.nn.Linear(128, 112),
torch.nn.CELU(0.1),
torch.nn.Linear(112, 96),
torch.nn.CELU(0.1),
torch.nn.Linear(96, 1)
)
return [H_network, C_network, N_network, O_network]
def checkgpu(device=None):
i = device if device else torch.cuda.current_device()
t = torch.cuda.get_device_properties(i).total_memory
c = torch.cuda.memory_reserved(i)
name = torch.cuda.get_device_properties(i).name
print(' GPU Memory Cached (pytorch) : {:7.1f}MB / {:.1f}MB ({})'.format(c / 1024 / 1024, t / 1024 / 1024, name))
real_i = int(os.environ['CUDA_VISIBLE_DEVICES'][0]) if 'CUDA_VISIBLE_DEVICES' in os.environ else i
pynvml.nvmlInit()
h = pynvml.nvmlDeviceGetHandleByIndex(real_i)
info = pynvml.nvmlDeviceGetMemoryInfo(h)
name = pynvml.nvmlDeviceGetName(h)
print(' GPU Memory Used (nvidia-smi): {:7.1f}MB / {:.1f}MB ({})'.format(info.used / 1024 / 1024, info.total / 1024 / 1024, name.decode()))
def alert(text):
print('\033[91m{}\33[0m'.format(text)) # red
def sync_cuda(sync):
if sync:
torch.cuda.synchronize()
def benchmark(parser, dataset, use_cuda_extension, force_training=False):
synchronize = True if parser.synchronize else False
timers = {}
def time_func(key, func):
timers[key] = 0
def wrapper(*args, **kwargs):
start = timeit.default_timer()
ret = func(*args, **kwargs)
if synchronize:
torch.cuda.synchronize()
end = timeit.default_timer()
timers[key] += end - start
return ret
return wrapper
Rcr = 5.2000e+00
Rca = 3.5000e+00
EtaR = torch.tensor([1.6000000e+01], device=parser.device)
ShfR = torch.tensor([9.0000000e-01, 1.1687500e+00, 1.4375000e+00, 1.7062500e+00, 1.9750000e+00, 2.2437500e+00, 2.5125000e+00, 2.7812500e+00, 3.0500000e+00, 3.3187500e+00, 3.5875000e+00, 3.8562500e+00, 4.1250000e+00, 4.3937500e+00, 4.6625000e+00, 4.9312500e+00], device=parser.device)
Zeta = torch.tensor([3.2000000e+01], device=parser.device)
ShfZ = torch.tensor([1.9634954e-01, 5.8904862e-01, 9.8174770e-01, 1.3744468e+00, 1.7671459e+00, 2.1598449e+00, 2.5525440e+00, 2.9452431e+00], device=parser.device)
EtaA = torch.tensor([8.0000000e+00], device=parser.device)
ShfA = torch.tensor([9.0000000e-01, 1.5500000e+00, 2.2000000e+00, 2.8500000e+00], device=parser.device)
num_species = 4
aev_computer = torchani.AEVComputer(Rcr, Rca, EtaR, ShfR, EtaA, Zeta, ShfA, ShfZ, num_species, use_cuda_extension)
nn = torchani.ANIModel(build_network())
model = torch.nn.Sequential(aev_computer, nn).to(parser.device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.000001)
mse = torch.nn.MSELoss(reduction='none')
# enable timers
torchani.aev.cutoff_cosine = time_func('torchani.aev.cutoff_cosine', torchani.aev.cutoff_cosine)
torchani.aev.radial_terms = time_func('torchani.aev.radial_terms', torchani.aev.radial_terms)
torchani.aev.angular_terms = time_func('torchani.aev.angular_terms', torchani.aev.angular_terms)
torchani.aev.compute_shifts = time_func('torchani.aev.compute_shifts', torchani.aev.compute_shifts)
torchani.aev.neighbor_pairs = time_func('torchani.aev.neighbor_pairs', torchani.aev.neighbor_pairs)
torchani.aev.neighbor_pairs_nopbc = time_func('torchani.aev.neighbor_pairs_nopbc', torchani.aev.neighbor_pairs_nopbc)
torchani.aev.triu_index = time_func('torchani.aev.triu_index', torchani.aev.triu_index)
torchani.aev.cumsum_from_zero = time_func('torchani.aev.cumsum_from_zero', torchani.aev.cumsum_from_zero)
torchani.aev.triple_by_molecule = time_func('torchani.aev.triple_by_molecule', torchani.aev.triple_by_molecule)
torchani.aev.compute_aev = time_func('torchani.aev.compute_aev', torchani.aev.compute_aev)
model[0].forward = time_func('total', model[0].forward)
model[1].forward = time_func('forward', model[1].forward)
optimizer.step = time_func('optimizer.step', optimizer.step)
print('=> start training')
start = time.time()
loss_time = 0
for epoch in range(0, parser.num_epochs):
print('Epoch: %d/%d' % (epoch + 1, parser.num_epochs))
progbar = pkbar.Kbar(target=len(dataset) - 1, width=8)
for i, properties in enumerate(dataset):
species = properties['species'].to(parser.device)
coordinates = properties['coordinates'].to(parser.device).float().requires_grad_(force_training)
true_energies = properties['energies'].to(parser.device).float()
num_atoms = (species >= 0).sum(dim=1, dtype=true_energies.dtype)
_, predicted_energies = model((species, coordinates))
# TODO add sync after aev is done
sync_cuda(synchronize)
energy_loss = (mse(predicted_energies, true_energies) / num_atoms.sqrt()).mean()
if force_training:
force_coefficient = 0.1
true_forces = properties['forces'].to(parser.device).float()
try:
forces = -torch.autograd.grad(predicted_energies.sum(), coordinates, create_graph=True, retain_graph=True)[0]
except Exception as e:
alert('Error: {}'.format(e))
return
force_loss = (mse(true_forces, forces).sum(dim=(1, 2)) / num_atoms).mean()
loss = energy_loss + force_coefficient * force_loss
else:
loss = energy_loss
rmse = hartree2kcalmol((mse(predicted_energies, true_energies)).mean()).detach().cpu().numpy()
sync_cuda(synchronize)
loss_start = time.time()
loss.backward()
sync_cuda(synchronize)
loss_stop = time.time()
loss_time += loss_stop - loss_start
optimizer.step()
sync_cuda(synchronize)
progbar.update(i, values=[("rmse", rmse)])
checkgpu()
sync_cuda(synchronize)
stop = time.time()
print('=> More detail about benchmark PER EPOCH')
for k in timers:
if k.startswith('torchani.'):
print(' {} - {:.1f}s'.format(k, timers[k] / parser.num_epochs))
total_time = (stop - start) / parser.num_epochs
loss_time = loss_time / parser.num_epochs
opti_time = timers['optimizer.step'] / parser.num_epochs
forward_time = timers['forward'] / parser.num_epochs
aev_time = timers['total'] / parser.num_epochs
print('Total AEV - {:.1f}s'.format(aev_time))
print('Forward - {:.1f}s'.format(forward_time))
print('Backward - {:.1f}s'.format(loss_time))
print('Optimizer - {:.1f}s'.format(opti_time))
print('Others - {:.1f}s'.format(total_time - loss_time - aev_time - forward_time - opti_time))
print('Epoch time - {:.1f}s'.format(total_time))
if __name__ == "__main__":
# parse command line arguments
parser = argparse.ArgumentParser()
parser.add_argument('dataset_path',
help='Path of the dataset, can a hdf5 file \
or a directory containing hdf5 files')
parser.add_argument('-d', '--device',
help='Device of modules and tensors',
default=('cuda' if torch.cuda.is_available() else 'cpu'))
parser.add_argument('-b', '--batch_size',
help='Number of conformations of each batch',
default=2560, type=int)
parser.add_argument('-y', '--synchronize',
action='store_true',
help='whether to insert torch.cuda.synchronize() at the end of each function')
parser.add_argument('-n', '--num_epochs',
help='epochs',
default=1, type=int)
parser = parser.parse_args()
print('=> loading dataset...')
shifter = torchani.EnergyShifter(None)
# parser.batch_size = 1280
dataset = torchani.data.load(parser.dataset_path, additional_properties=('forces',)).subtract_self_energies(shifter).species_to_indices()
print('=> Caching shuffled dataset...')
dataset_shuffled = list(dataset.shuffle().collate(parser.batch_size))
print('=> Caching non-shuffled dataset...')
dataset = list(dataset.collate(parser.batch_size))
print("=> CUDA info:")
devices = torch.cuda.device_count()
print('Total devices: {}'.format(devices))
for i in range(devices):
d = 'cuda:{}'.format(i)
print('{}: {}'.format(i, torch.cuda.get_device_name(d)))
print(' {}'.format(torch.cuda.get_device_properties(i)))
checkgpu(i)
print("\n\n=> Test 1/8: Shuffled Dataset, USE cuda extension, Energy training")
torch.cuda.empty_cache()
gc.collect()
benchmark(parser, dataset_shuffled, use_cuda_extension=True, force_training=False)
print("\n\n=> Test 2/8: Shuffled Dataset, NO cuda extension, Energy training")
torch.cuda.empty_cache()
gc.collect()
benchmark(parser, dataset_shuffled, use_cuda_extension=False, force_training=False)
print("\n\n=> Test 3/8: Non-Shuffled Dataset, USE cuda extension, Energy training")
torch.cuda.empty_cache()
gc.collect()
benchmark(parser, dataset, use_cuda_extension=True, force_training=False)
print("\n\n=> Test 4/8: Non-Shuffled Dataset, NO cuda extension, Energy training")
torch.cuda.empty_cache()
gc.collect()
benchmark(parser, dataset, use_cuda_extension=False, force_training=False)
print("\n\n=> Test 5/8: Shuffled Dataset, USE cuda extension, Force and Energy training")
torch.cuda.empty_cache()
gc.collect()
benchmark(parser, dataset_shuffled, use_cuda_extension=True, force_training=True)
print("\n\n=> Test 6/8: Shuffled Dataset, NO cuda extension, Force and Energy training")
torch.cuda.empty_cache()
gc.collect()
benchmark(parser, dataset_shuffled, use_cuda_extension=False, force_training=True)
print("\n\n=> Test 7/8: Non-Shuffled Dataset, USE cuda extension, Force and Energy training")
torch.cuda.empty_cache()
gc.collect()
benchmark(parser, dataset, use_cuda_extension=True, force_training=True)
print("\n\n=> Test 8/8: Non-Shuffled Dataset, NO cuda extension, Force and Energy training")
torch.cuda.empty_cache()
gc.collect()
benchmark(parser, dataset, use_cuda_extension=False, force_training=True)
...@@ -359,6 +359,7 @@ class AEVComputer(torch.nn.Module): ...@@ -359,6 +359,7 @@ class AEVComputer(torch.nn.Module):
ShfZ (:class:`torch.Tensor`): The 1D tensor of :math:`\theta_s` in ShfZ (:class:`torch.Tensor`): The 1D tensor of :math:`\theta_s` in
equation (4) in the `ANI paper`_. equation (4) in the `ANI paper`_.
num_species (int): Number of supported atom types. num_species (int): Number of supported atom types.
use_cuda_extension (bool): Whether to use cuda extension for faster calculation (needs cuaev installed).
.. _ANI paper: .. _ANI paper:
http://pubs.rsc.org/en/Content/ArticleLanding/2017/SC/C6SC05720A#!divAbstract http://pubs.rsc.org/en/Content/ArticleLanding/2017/SC/C6SC05720A#!divAbstract
......
# CUAEV
CUDA Extension for AEV calculation.
Performance improvement is expected to be ~3X for AEV computation and ~1.5X for overall training workflow.
## Install
In most cases, if `gcc` and `cuda` environment are well configured, runing the following command at `torchani` directory will install torchani and cuaev together.
```bash
git clone git@github.com:aiqm/torchani.git
cd torchani
# install by
python setup.py install --cuaev
# or for development
pip install -e . --global-option="--cuaev"
```
Notes for install on Hipergator
```bash
srun -p gpu --gpus=geforce:1 --time=01:00:00 --mem=10gb --pty -u bash -i # compile may fail because of low on memery (when memery is less than 5gb)
conda install pytorch torchvision cudatoolkit=10.0 -c pytorch # make sure it's cudatoolkit=10.0
module load cuda/10.0.130
module load gcc/7.3.0
python setup.py install --cuaev
```
## Usage
Pass `use_cuda_extension=True` when construct aev_computer, for example:
```python
cuaev_computer = torchani.AEVComputer(Rcr, Rca, EtaR, ShfR, EtaA, Zeta, ShfA, ShfZ, num_species, use_cuda_extension=True)
```
## Limitations
Current implementation of CUAEV does not support pbc and force calculation.
## Benchmark
Benchmark of [torchani/tools/training-aev-benchmark.py](https://github.com/aiqm/torchani/tree/master/torchani/tools/training-aev-benchmark.py) on RTX 2080 Ti:
| ANI-1x | Without Shuffle | Shuffle |
|:-----------------------:|:-----------------------:|:-----------------------:|
| Time per Epoch / Memory | AEV / Total / GPU Mem | AEV / Total/ GPU Mem |
| aev cuda extension | 7.7s / 26.3s / 2289 MB | 8.5s / 27.6s / 2425 MB |
| aev python code | 21.1s / 40.0s / 7361 MB | 28.7s / 47.8s / 3475 MB |
| improvements | 2.74 / 1.52 / 3.22 | 3.38 / 1.73 / 1.43 |
## Test
```bash
cd torchani
python tools/training-aev-benchmark.py download/dataset/ani-1x/sample.h5 -y
python tests/test_cuaev.py
```
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment