command line tool to precompute aevs (#83)

83248cf1 · Gao, Xiang · GitHub · a9a792a2 · 83248cf1 · 83248cf1
Unverified Commit 83248cf1 authored Aug 31, 2018 by Gao, Xiang Committed by GitHub Aug 31, 2018
15 changed files
--- a/codefresh.yml
+++ b/codefresh.yml
@@ -33,6 +33,7 @@ steps:
      - python examples/neurochem-test.py ./dataset/ani_gdb_s01.h5
      - python examples/inference-benchmark.py examples/xyz_files/CH4-5.xyz
      - python -m torchani.neurochem.trainer tests/test_data/inputtrain.ipt  dataset/ani_gdb_s01.h5 dataset/ani_gdb_s01.h5
+      - python -m torchani.data.cache-aev tmp dataset/ani_gdb_s01.h5 256
  Docs:
    image: '${{BuildTorchANI}}'

--- a/docs/index.rst
+++ b/docs/index.rst
@@ -38,7 +38,7 @@ NeuroChem Utils
 .. autofunction:: torchani.neurochem.load_atomic_network
 .. autofunction:: torchani.neurochem.load_model
 .. autofunction:: torchani.neurochem.load_model_ensemble
-.. autoclass:: torchani.neurochem.Buildins
+.. autoclass:: torchani.neurochem.Builtins
 .. autoclass:: torchani.neurochem.Trainer
    :members:
 .. automodule:: torchani.neurochem.trainer

--- a/examples/inference-benchmark.py
+++ b/examples/inference-benchmark.py
@@ -18,11 +18,11 @@ parser = parser.parse_args()
 # set up benchmark
 device = torch.device(parser.device)
-buildins = torchani.neurochem.Buildins()
+builtins = torchani.neurochem.Builtins()
 nnp = torch.nn.Sequential(
-    buildins.aev_computer,
+    builtins.aev_computer,
-    buildins.models[0],
+    builtins.models[0],
-    buildins.energy_shifter
+    builtins.energy_shifter
 ).to(device)
@@ -54,7 +54,7 @@ class XYZ:
                atom_count -= 1
                if atom_count == 0:
                    state = 'ready'
-                    species = buildins.consts.species_to_tensor(species) \
+                    species = builtins.consts.species_to_tensor(species) \
                                      .to(device)
                    coordinates = torch.tensor(coordinates, device=device)
                    self.mols.append((species, coordinates))

--- a/examples/model.py
+++ b/examples/model.py
@@ -3,10 +3,10 @@ import torchani
 import os
-buildins = torchani.neurochem.Buildins()
+builtins = torchani.neurochem.Builtins()
-consts = buildins.consts
+consts = builtins.consts
-aev_computer = buildins.aev_computer
+aev_computer = builtins.aev_computer
-shift_energy = buildins.energy_shifter
+shift_energy = builtins.energy_shifter
 def atomic():

--- a/examples/neurochem-test.py
+++ b/examples/neurochem-test.py
@@ -6,7 +6,7 @@ import pickle
 import argparse
-buildins = torchani.neurochem.Buildins()
+builtins = torchani.neurochem.Builtins()
 # parse command line arguments
 parser = argparse.ArgumentParser()
@@ -22,13 +22,13 @@ parser.add_argument('--batch_size',
                    default=1024, type=int)
 parser.add_argument('--const_file',
                    help='File storing constants',
-                    default=buildins.const_file)
+                    default=builtins.const_file)
 parser.add_argument('--sae_file',
                    help='File storing self atomic energies',
-                    default=buildins.sae_file)
+                    default=builtins.sae_file)
 parser.add_argument('--network_dir',
                    help='Directory or prefix of directories storing networks',
-                    default=buildins.ensemble_prefix + '0/networks')
+                    default=builtins.ensemble_prefix + '0/networks')
 parser = parser.parse_args()
 # load modules and datasets

--- a/tests/test_aev.py
+++ b/tests/test_aev.py
@@ -11,8 +11,8 @@ N = 97
 class TestAEV(unittest.TestCase):
    def setUp(self):
-        buildins = torchani.neurochem.Buildins()
+        builtins = torchani.neurochem.Builtins()
-        self.aev_computer = buildins.aev_computer
+        self.aev_computer = builtins.aev_computer
        self.radial_length = self.aev_computer.radial_length()
        self.tolerance = 1e-5

--- a/tests/test_data.py
+++ b/tests/test_data.py
@@ -6,8 +6,8 @@ import unittest
 path = os.path.dirname(os.path.realpath(__file__))
 dataset_path = os.path.join(path, '../dataset')
 batch_size = 256
-buildins = torchani.neurochem.Buildins()
+builtins = torchani.neurochem.Builtins()
-consts = buildins.consts
+consts = builtins.consts
 class TestData(unittest.TestCase):

--- a/tests/test_energies.py
+++ b/tests/test_energies.py
@@ -13,10 +13,10 @@ class TestEnergies(unittest.TestCase):
    def setUp(self):
        self.tolerance = 5e-5
-        buildins = torchani.neurochem.Buildins()
+        builtins = torchani.neurochem.Builtins()
-        aev_computer = buildins.aev_computer
+        aev_computer = builtins.aev_computer
-        nnp = buildins.models[0]
+        nnp = builtins.models[0]
-        shift_energy = buildins.energy_shifter
+        shift_energy = builtins.energy_shifter
        self.model = torch.nn.Sequential(aev_computer, nnp, shift_energy)
    def testIsomers(self):

--- a/tests/test_ensemble.py
+++ b/tests/test_ensemble.py
@@ -15,10 +15,10 @@ class TestEnsemble(unittest.TestCase):
        self.conformations = 20
    def _test_molecule(self, coordinates, species):
-        buildins = torchani.neurochem.Buildins()
+        builtins = torchani.neurochem.Builtins()
        coordinates = torch.tensor(coordinates, requires_grad=True)
-        aev = buildins.aev_computer
+        aev = builtins.aev_computer
-        ensemble = buildins.models
+        ensemble = builtins.models
        models = [torch.nn.Sequential(aev, m) for m in ensemble]
        ensemble = torch.nn.Sequential(aev, ensemble)

--- a/tests/test_forces.py
+++ b/tests/test_forces.py
@@ -12,9 +12,9 @@ class TestForce(unittest.TestCase):
    def setUp(self):
        self.tolerance = 1e-5
-        buildins = torchani.neurochem.Buildins()
+        builtins = torchani.neurochem.Builtins()
-        aev_computer = buildins.aev_computer
+        aev_computer = builtins.aev_computer
-        nnp = buildins.models[0]
+        nnp = builtins.models[0]
        self.model = torch.nn.Sequential(aev_computer, nnp)
    def testIsomers(self):

--- a/tests/test_ignite.py
+++ b/tests/test_ignite.py
@@ -16,12 +16,12 @@ threshold = 1e-5
 class TestIgnite(unittest.TestCase):
    def testIgnite(self):
-        buildins = torchani.neurochem.Buildins()
+        builtins = torchani.neurochem.Builtins()
-        aev_computer = buildins.aev_computer
+        aev_computer = builtins.aev_computer
-        nnp = copy.deepcopy(buildins.models[0])
+        nnp = copy.deepcopy(builtins.models[0])
-        shift_energy = buildins.energy_shifter
+        shift_energy = builtins.energy_shifter
        ds = torchani.data.BatchedANIDataset(
-            path, buildins.consts.species_to_tensor, batchsize,
+            path, builtins.consts.species_to_tensor, batchsize,
            transform=[shift_energy.subtract_from_dataset])
        ds = torch.utils.data.Subset(ds, [0])

--- a/torchani/data.py
+++ b/torchani/data.py
@@ -6,7 +6,7 @@ from os.path import join, isfile, isdir
 import os
 from ._pyanitools import anidataloader
 import torch
-from . import utils
+from .. import utils
 def chunk_counts(counts, split):

--- a/torchani/_pyanitools.py
+++ b/torchani/_pyanitools.py
--- a/torchani/data/cache-aev.py
+++ b/torchani/data/cache-aev.py
+import os
+import torch
+from .. import aev, neurochem
+from . import BatchedANIDataset
+import pickle
+if __name__ == '__main__':
+    import argparse
+    builtin = neurochem.Builtins()
+    parser = argparse.ArgumentParser()
+    parser.add_argument('output',
+                        help='Path of the output directory')
+    parser.add_argument('dataset',
+                        help='Path of the dataset, can be a hdf5 file \
+                              or a directory containing hdf5 files')
+    parser.add_argument('batchsize', help='batch size', type=int)
+    parser.add_argument('--constfile',
+                        help='Path of the constant file `.params`',
+                        default=builtin.const_file)
+    default_device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    parser.add_argument('-d', '--device', help='Device for training',
+                        default=default_device)
+    parser.add_argument('--no-shuffle', help='Whether to shuffle dataset',
+                        dest='shuffle', action='store_false')
+    parser.add_argument('--no-tqdm', dest='tqdm', action='store_false',
+                        help='Whether to use tqdm to display progress')
+    parser = parser.parse_args()
+    # if output directory does not exist, then create it
+    if not os.path.exists(parser.output):
+        os.makedirs(parser.output)
+    device = torch.device(parser.device)
+    consts = neurochem.Constants(parser.constfile)
+    aev_computer = aev.AEVComputer(**consts).to(device)
+    dataset = BatchedANIDataset(parser.dataset, consts.species_to_tensor,
+                                parser.batchsize, shuffle=parser.shuffle,
+                                properties=[], device=device)
+    if parser.tqdm:
+        import tqdm
+        indices = tqdm.trange(len(dataset))
+    else:
+        indices = range(len(dataset))
+    for i in indices:
+        input_, _ = dataset[i]
+        aevs = [aev_computer(j) for j in input_]
+        aevs = [(x.cpu(), y.cpu()) for x, y in aevs]
+        filename = os.path.join(parser.output, '{}'.format(i))
+        with open(filename, 'wb') as f:
+            pickle.dump(aevs, f)
--- a/torchani/neurochem/__init__.py
+++ b/torchani/neurochem/__init__.py
@@ -263,7 +263,7 @@ def load_model_ensemble(species, prefix, count):
    return Ensemble(models)
-class Buildins:
+class Builtins:
    """Container for all builtin stuffs.
    Attributes: