Document cache_aev (#84)

af50a84f · Gao, Xiang · GitHub · 83248cf1 · af50a84f · af50a84f
Unverified Commit af50a84f authored Sep 02, 2018 by Gao, Xiang Committed by GitHub Sep 02, 2018
6 changed files
--- a/.gitignore
+++ b/.gitignore
@@ -21,3 +21,4 @@ benchmark_xyz
 /*.ipt
 /*.params
 /*.dat
+/tmp
\ No newline at end of file
--- a/codefresh.yml
+++ b/codefresh.yml
@@ -31,9 +31,9 @@ steps:
      - python examples/training-benchmark.py ./dataset/ani_gdb_s01.h5  # run twice to test if checkpoint is working
      - python examples/energy_force.py
      - python examples/neurochem-test.py ./dataset/ani_gdb_s01.h5
-      - python examples/inference-benchmark.py examples/xyz_files/CH4-5.xyz
-      - python -m torchani.neurochem.trainer tests/test_data/inputtrain.ipt  dataset/ani_gdb_s01.h5 dataset/ani_gdb_s01.h5
-      - python -m torchani.data.cache-aev tmp dataset/ani_gdb_s01.h5 256
+      - python examples/inference-benchmark.py --tqdm examples/xyz_files/CH4-5.xyz
+      - python -m torchani.neurochem.trainer --tqdm tests/test_data/inputtrain.ipt  dataset/ani_gdb_s01.h5 dataset/ani_gdb_s01.h5
+      - python -m torchani.data.cache_aev tmp dataset/ani_gdb_s01.h5 256

  Docs:
    image: '${{BuildTorchANI}}'

--- a/docs/index.rst
+++ b/docs/index.rst
@@ -17,6 +17,7 @@ Datasets

 .. automodule:: torchani.data
 .. autoclass:: torchani.data.BatchedANIDataset
+.. automodule:: torchani.data.cache_aev


 Utilities

--- a/examples/inference-benchmark.py
+++ b/examples/inference-benchmark.py
@@ -3,6 +3,7 @@ import torchani
 import torch
 import os
 import timeit
+import tqdm


 path = os.path.dirname(os.path.realpath(__file__))
@@ -14,6 +15,8 @@ parser.add_argument('filename',
 parser.add_argument('-d', '--device',
                    help='Device of modules and tensors',
                    default=('cuda' if torch.cuda.is_available() else 'cpu'))
+parser.add_argument('--tqdm', dest='tqdm', action='store_true',
+                    help='Whether to use tqdm to display progress')
 parser = parser.parse_args()

 # set up benchmark
@@ -88,6 +91,8 @@ print()
 # test single mode
 print('[Single mode]')
 start = timeit.default_timer()
+if parser.tqdm:
+    xyz = tqdm.tqdm(xyz)
 for species, coordinates in xyz:
    species = species.unsqueeze(0)
    coordinates = torch.tensor(coordinates.unsqueeze(0), requires_grad=True)

--- a/torchani/data/__init__.py
+++ b/torchani/data/__init__.py
@@ -132,11 +132,7 @@ class BatchedANIDataset(Dataset):
                 shuffle=True, properties=['energies'], transform=(),
                 dtype=torch.get_default_dtype(), device=torch.device('cpu')):
        super(BatchedANIDataset, self).__init__()
-        self.path = path
-        self.batch_size = batch_size
-        self.shuffle = shuffle
        self.properties = properties
-        self.dtype = dtype
        self.device = device

        # get name of files storing data

--- a/torchani/data/cache-aev.py
+++ b/torchani/data/cache-aev.py
+# -*- coding: utf-8 -*-
+"""AEVs for a dataset can be precomputed by invoking
+``python -m torchani.data.cache_aev``, this would dump the dataset and
+computed aevs. Use the ``-h`` option for help.
+"""
+
 import os
 import torch
 from .. import aev, neurochem
@@ -18,6 +24,12 @@ if __name__ == '__main__':
    parser.add_argument('--constfile',
                        help='Path of the constant file `.params`',
                        default=builtin.const_file)
+    parser.add_argument('--properties', nargs='+',
+                        help='Output properties to load.`',
+                        default=['energies'])
+    parser.add_argument('--dtype',
+                        help='Data type',
+                        default=str(torch.get_default_dtype()).split('.')[1])
    default_device = 'cuda' if torch.cuda.is_available() else 'cpu'
    parser.add_argument('-d', '--device', help='Device for training',
                        default=default_device)
@@ -36,13 +48,19 @@ if __name__ == '__main__':
    aev_computer = aev.AEVComputer(**consts).to(device)
    dataset = BatchedANIDataset(parser.dataset, consts.species_to_tensor,
                                parser.batchsize, shuffle=parser.shuffle,
-                                properties=[], device=device)
+                                properties=parser.properties, device=device,
+                                dtype=getattr(torch, parser.dtype))
+
+    # dump out the dataset
+    filename = os.path.join(parser.output, 'dataset')
+    with open(filename, 'wb') as f:
+        pickle.dump(dataset, f)
+
    if parser.tqdm:
        import tqdm
        indices = tqdm.trange(len(dataset))
    else:
        indices = range(len(dataset))
-
    for i in indices:
        input_, _ = dataset[i]
        aevs = [aev_computer(j) for j in input_]