Unverified Commit f149f6e1 authored by Gao, Xiang's avatar Gao, Xiang Committed by GitHub
Browse files

Tools for COMP6 benchmark (#196)

parent 5bac472d
...@@ -6,7 +6,7 @@ import unittest ...@@ -6,7 +6,7 @@ import unittest
path = os.path.dirname(os.path.realpath(__file__)) path = os.path.dirname(os.path.realpath(__file__))
iptpath = os.path.join(path, 'test_data/inputtrain.ipt') iptpath = os.path.join(path, 'test_data/inputtrain.ipt')
dspath = os.path.join(path, '../dataset/ani_gdb_s01.h5') dspath = os.path.join(path, '../dataset/ani1-up_to_gdb4/ani_gdb_s01.h5')
class TestNeuroChem(unittest.TestCase): class TestNeuroChem(unittest.TestCase):
......
import os
import torch
import torchani
from torchani.data._pyanitools import anidataloader
import argparse
import math
import tqdm
HARTREE2KCAL = 627.509
dtype = torch.float32
# parse command line arguments
parser = argparse.ArgumentParser()
parser.add_argument('dir', help='Path to the COMP6 directory')
parser.add_argument('-b', '--batchatoms', type=int, default=512,
help='Maximum number of ATOMs in each batch')
parser.add_argument('-d', '--device',
help='Device of modules and tensors',
default=('cuda' if torch.cuda.is_available() else 'cpu'))
parser = parser.parse_args()
# run benchmark
ani1x = torchani.models.ANI1x().to(dtype).to(parser.device)
def recursive_h5_files(base):
inside = os.listdir(base)
for i in inside:
path = os.path.join(base, i)
if os.path.isfile(path) and path.endswith(".h5"):
yield from anidataloader(path)
elif os.path.isdir(path):
yield from recursive_h5_files(path)
def by_batch(species, coordinates, model):
shape = species.shape
batchsize = max(1, parser.batchatoms // shape[1])
coordinates = coordinates.clone().detach().requires_grad_(True)
species = torch.split(species, batchsize)
coordinates = torch.split(coordinates, batchsize)
energies = []
forces = []
for s, c in tqdm.tqdm(zip(species, coordinates), total=len(species),
position=1, desc="batch of {}x{}".format(*shape)):
_, e = model((s, c))
f, = torch.autograd.grad(e.sum(), c)
energies.append(e)
forces.append(f)
return torch.cat(energies).detach(), torch.cat(forces).detach()
class Averager:
def __init__(self):
self.count = 0
self.cumsum = 0
def update(self, new):
assert len(new.shape) == 1
self.count += new.shape[0]
self.cumsum += new.sum().item()
def compute(self):
return self.cumsum / self.count
def relative_energies(energies):
a, b = torch.combinations(energies, r=2).unbind(1)
return a - b
def do_benchmark(model):
dataset = recursive_h5_files(parser.dir)
mae_averager_energy = Averager()
mae_averager_relative_energy = Averager()
mae_averager_force = Averager()
rmse_averager_energy = Averager()
rmse_averager_relative_energy = Averager()
rmse_averager_force = Averager()
for i in tqdm.tqdm(dataset, position=0, desc="dataset"):
# read
coordinates = torch.tensor(
i['coordinates'], dtype=dtype, device=parser.device)
species = model.species_to_tensor(i['species']) \
.unsqueeze(0).expand(coordinates.shape[0], -1)
energies = torch.tensor(i['energies'], dtype=dtype,
device=parser.device)
forces = torch.tensor(i['forces'], dtype=dtype,
device=parser.device)
# compute
energies2, forces2 = by_batch(species, coordinates, model)
ediff = energies - energies2
relative_ediff = relative_energies(energies) - \
relative_energies(energies2)
fdiff = forces.flatten() - forces2.flatten()
# update
mae_averager_energy.update(ediff.abs())
mae_averager_relative_energy.update(relative_ediff.abs())
mae_averager_force.update(fdiff.abs())
rmse_averager_energy.update(ediff ** 2)
rmse_averager_relative_energy.update(relative_ediff ** 2)
rmse_averager_force.update(fdiff ** 2)
mae_energy = mae_averager_energy.compute() * HARTREE2KCAL
rmse_energy = math.sqrt(rmse_averager_energy.compute()) * HARTREE2KCAL
mae_relative_energy = mae_averager_relative_energy.compute() * HARTREE2KCAL
rmse_relative_energy = math.sqrt(rmse_averager_relative_energy.compute()) \
* HARTREE2KCAL
mae_force = mae_averager_force.compute() * HARTREE2KCAL
rmse_force = math.sqrt(rmse_averager_force.compute()) * HARTREE2KCAL
print("Energy:", mae_energy, rmse_energy)
print("Relative Energy:", mae_relative_energy, rmse_relative_energy)
print("Forces:", mae_force, rmse_force)
do_benchmark(ani1x)
...@@ -11,7 +11,7 @@ neurochem = NeuroChem() ...@@ -11,7 +11,7 @@ neurochem = NeuroChem()
mol_count = 0 mol_count = 0
for i in [1, 2, 3, 4]: for i in [1, 2, 3, 4]:
data_file = os.path.join( data_file = os.path.join(
path, '../../dataset/ani_gdb_s0{}.h5'.format(i)) path, '../../dataset/ani1-up_to_gdb4/ani_gdb_s0{}.h5'.format(i))
adl = pyanitools.anidataloader(data_file) adl = pyanitools.anidataloader(data_file)
for data in tqdm.tqdm(adl, desc='ANI1: {} heavy atoms'.format(i)): for data in tqdm.tqdm(adl, desc='ANI1: {} heavy atoms'.format(i)):
coordinates = data['coordinates'][:10, :] coordinates = data['coordinates'][:10, :]
......
# Written by Roman Zubatyuk and Justin S. Smith # Written by Roman Zubatyuk and Justin S. Smith
import h5py import h5py
import numpy as np import numpy as np
import platform
import os import os
PY_VERSION = int(platform.python_version().split('.')[0]) > 3 class datapacker:
class datapacker(object):
def __init__(self, store_file, mode='w-', complib='gzip', complevel=6): def __init__(self, store_file, mode='w-', complib='gzip', complevel=6):
"""Wrapper to store arrays within HFD5 file """Wrapper to store arrays within HFD5 file
""" """
...@@ -40,7 +36,7 @@ class datapacker(object): ...@@ -40,7 +36,7 @@ class datapacker(object):
self.store.close() self.store.close()
class anidataloader(object): class anidataloader:
''' Contructor ''' ''' Contructor '''
......
...@@ -37,7 +37,6 @@ class BuiltinModels(torch.nn.Module): ...@@ -37,7 +37,6 @@ class BuiltinModels(torch.nn.Module):
self.aev_computer = self.builtins.aev_computer self.aev_computer = self.builtins.aev_computer
self.neural_networks = self.builtins.models self.neural_networks = self.builtins.models
self.energy_shifter = self.builtins.energy_shifter self.energy_shifter = self.builtins.energy_shifter
self.species_to_tensor = self.builtins.consts.species_to_tensor
def forward(self, species_coordinates): def forward(self, species_coordinates):
species_aevs = self.aev_computer(species_coordinates) species_aevs = self.aev_computer(species_coordinates)
...@@ -73,6 +72,13 @@ class BuiltinModels(torch.nn.Module): ...@@ -73,6 +72,13 @@ class BuiltinModels(torch.nn.Module):
self.neural_networks, self.energy_shifter, self.neural_networks, self.energy_shifter,
**kwargs) **kwargs)
def species_to_tensor(self, *args, **kwargs):
"""Convert species from strings to tensor.
See also :method:`torchani.neurochem.Constant.species_to_tensor`"""
return self.builtins.consts.species_to_tensor(*args, **kwargs) \
.to(self.aev_computer.ShfR.device)
class ANI1x(BuiltinModels): class ANI1x(BuiltinModels):
"""The ANI-1x model as in `ani-1x_8x on GitHub`_ and """The ANI-1x model as in `ani-1x_8x on GitHub`_ and
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment