Add tutorials (#86)

c059adbf · Gao, Xiang · GitHub · b4bb2393 · c059adbf · c059adbf
Unverified Commit c059adbf authored Sep 04, 2018 by Gao, Xiang Committed by GitHub Sep 04, 2018
20 changed files
--- a/.gitignore
+++ b/.gitignore
-*.txt
 *.prof
 __pycache__
 /data
@@ -6,7 +5,7 @@ __pycache__
 a.out
 /test.py
 /.vscode
-/build
+/build*
 /.eggs
 /torchani.egg-info
 /*.h5
@@ -21,4 +20,4 @@ benchmark_xyz
 /*.ipt
 /*.params
 /*.dat
 /tmp
\ No newline at end of file
--- a/Dockerfile
+++ b/Dockerfile
 FROM zasdfgbnm/pytorch-master
-RUN pacman -Sy --noconfirm python-sphinx python2-sphinx python-tqdm python2-tqdm flake8
+RUN pacman -Sy --noconfirm python-sphinx python2-sphinx python-tqdm python2-tqdm python2-matplotlib python-matplotlib python-pillow python2-pillow flake8
-RUN pip install tensorboardX && pip2 install tensorboardX
+RUN pip install tensorboardX sphinx-gallery && pip2 install tensorboardX sphinx-gallery
 COPY . /torchani
 RUN cd torchani && pip install .
 RUN cd torchani && pip2 install .
--- a/codefresh.yml
+++ b/codefresh.yml
@@ -27,11 +27,10 @@ steps:
      - rm -rf *.pt
      - python examples/nnp_training.py dataset/ani_gdb_s01.h5 dataset/ani_gdb_s01.h5
      - python examples/nnp_training.py dataset/ani_gdb_s01.h5 dataset/ani_gdb_s01.h5  # run twice to test if checkpoint is working
-      - python examples/training-benchmark.py ./dataset/ani_gdb_s01.h5
-      - python examples/training-benchmark.py ./dataset/ani_gdb_s01.h5  # run twice to test if checkpoint is working
      - python examples/energy_force.py
-      - python examples/neurochem-test.py ./dataset/ani_gdb_s01.h5
+      - python tools/training-benchmark.py ./dataset/ani_gdb_s01.h5
-      - python examples/inference-benchmark.py --tqdm examples/xyz_files/CH4-5.xyz
+      - python tools/neurochem-test.py ./dataset/ani_gdb_s01.h5
+      - python tools/inference-benchmark.py --tqdm ./xyz_files/CH4-5.xyz
      - python -m torchani.neurochem.trainer --tqdm tests/test_data/inputtrain.ipt  dataset/ani_gdb_s01.h5 dataset/ani_gdb_s01.h5
      - python -m torchani.data.cache_aev tmp dataset/ani_gdb_s01.h5 256

--- a/docs/.gitignore
+++ b/docs/.gitignore
+examples
\ No newline at end of file
--- a/docs/_static/.keep
+++ b/docs/_static/.keep
--- a/docs/api.rst
+++ b/docs/api.rst
+TorchANI
+========
+.. autoclass:: torchani.AEVComputer
+    :members:
+.. autoclass:: torchani.ANIModel
+.. autoclass:: torchani.Ensemble
+.. autoclass:: torchani.EnergyShifter
+    :members:
+.. autoclass:: torchani.nn.Gaussian
+Datasets
+========
+.. automodule:: torchani.data
+.. autoclass:: torchani.data.BatchedANIDataset
+.. automodule:: torchani.data.cache_aev
+Utilities
+=========
+.. automodule:: torchani.utils
+.. autofunction:: torchani.utils.pad_and_batch
+.. autofunction:: torchani.utils.present_species
+.. autofunction:: torchani.utils.strip_redundant_padding
+NeuroChem
+=========
+.. automodule:: torchani.neurochem
+.. autoclass:: torchani.neurochem.Constants
+    :members:
+.. autofunction:: torchani.neurochem.load_sae
+.. autofunction:: torchani.neurochem.load_atomic_network
+.. autofunction:: torchani.neurochem.load_model
+.. autofunction:: torchani.neurochem.load_model_ensemble
+.. autoclass:: torchani.neurochem.Builtins
+.. autoclass:: torchani.neurochem.Trainer
+    :members:
+.. automodule:: torchani.neurochem.trainer
+Ignite Helpers
+==============
+.. automodule:: torchani.ignite
+.. autoclass:: torchani.ignite.Container
+    :members:
+.. autoclass:: torchani.ignite.DictLoss
+.. autoclass:: torchani.ignite.PerAtomDictLoss
+.. autoclass:: torchani.ignite.TransformedLoss
+.. autofunction:: torchani.ignite.MSELoss
+.. autoclass:: torchani.ignite.DictMetric
+.. autofunction:: torchani.ignite.RMSEMetric
+.. autoclass:: torchani.ignite.MaxAbsoluteError
+.. autofunction:: torchani.ignite.MAEMetric
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -14,6 +14,7 @@ extensions = [
    'sphinx.ext.intersphinx',
    'sphinx.ext.mathjax',
    'sphinx.ext.viewcode',
+    'sphinx_gallery.gen_gallery',
 ]
 templates_path = ['_templates']
@@ -26,6 +27,12 @@ html_theme = 'sphinx_rtd_theme'
 html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
 htmlhelp_basename = 'TorchANIdoc'
+sphinx_gallery_conf = {
+     'examples_dirs': '../examples',
+     'gallery_dirs': 'examples',
+     'filename_pattern': r'.*\.py'
+}
 intersphinx_mapping = {
    'python': ('https://docs.python.org/', None),
    'numpy': ('http://docs.scipy.org/doc/numpy/', None),

--- a/docs/index.rst
+++ b/docs/index.rst
-TorchANI
+====================================
-========
+Welcome to TorchANI's documentation!
+====================================
+Precompute AEVs to Improve Training Performance
 .. automodule:: torchani
-.. autoclass:: torchani.AEVComputer
+.. toctree::
-    :members:
+    :maxdepth: 2
-.. autoclass:: torchani.ANIModel
+    :caption: Getting Started
-.. autoclass:: torchani.Ensemble
-.. autoclass:: torchani.EnergyShifter
+    start
-    :members:
-.. autoclass:: torchani.nn.Gaussian
+.. toctree::
+    :maxdepth: 2
+    :caption: Examples
-Datasets
-========
+    examples/energy_force
+    examples/nnp_training
-.. automodule:: torchani.data
+    examples/neurochem_trainer
-.. autoclass:: torchani.data.BatchedANIDataset
-.. automodule:: torchani.data.cache_aev
+.. toctree::
+    :maxdepth: 2
+    :caption: TorchANI's API
-Utilities
-=========
+    api
-.. automodule:: torchani.utils
+Indices and tables
-.. autofunction:: torchani.utils.pad_and_batch
+==================
-.. autofunction:: torchani.utils.present_species
-.. autofunction:: torchani.utils.strip_redundant_padding
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
-NeuroChem Utils
-===============
-.. automodule:: torchani.neurochem
-.. autoclass:: torchani.neurochem.Constants
-    :members:
-.. autofunction:: torchani.neurochem.load_sae
-.. autofunction:: torchani.neurochem.load_atomic_network
-.. autofunction:: torchani.neurochem.load_model
-.. autofunction:: torchani.neurochem.load_model_ensemble
-.. autoclass:: torchani.neurochem.Builtins
-.. autoclass:: torchani.neurochem.Trainer
-    :members:
-.. automodule:: torchani.neurochem.trainer
-Ignite Helpers
-==============
-.. automodule:: torchani.ignite
-.. autoclass:: torchani.ignite.Container
-    :members:
-.. autoclass:: torchani.ignite.DictLoss
-.. autoclass:: torchani.ignite.PerAtomDictLoss
-.. autoclass:: torchani.ignite.TransformedLoss
-.. autofunction:: torchani.ignite.MSELoss
-.. autoclass:: torchani.ignite.DictMetric
-.. autofunction:: torchani.ignite.RMSEMetric
-.. autoclass:: torchani.ignite.MaxAbsoluteError
-.. autofunction:: torchani.ignite.MAEMetric
--- a/docs/start.rst
+++ b/docs/start.rst
+Installation
+============
+The installation of TorchANI is very simple. You just need to clone the git
+repository and do ``pip install .`` like when you install other software.
+But keep in mind that TorchANI requires master branch of PyTorch. If you do
+not have master branch PyTorch installed, you should install it before
+installing TorchANI. Refer to PyTorch's `official instruction`_ to compile
+install PyTorch.
+.. _official instruction:
+    https://github.com/pytorch/pytorch#from-source
--- a/examples/.gitignore
+++ b/examples/.gitignore
 data
+runs
\ No newline at end of file
--- a/examples/README.txt
+++ b/examples/README.txt
+========
+Examples
+========
--- a/examples/energy_force.py
+++ b/examples/energy_force.py
+# -*- coding: utf-8 -*-
+"""
+Computing Energy and Force Using Builtin Models
+===============================================
+TorchANI has a model ensemble trained by NeuroChem on the `ANI-1x dataset`_.
+These models are shipped with TorchANI and can be used directly.
+.. _ANI-1x dataset:
+  https://aip.scitation.org/doi/abs/10.1063/1.5023802
+"""
+###############################################################################
+# To begin with, let's first import the modules we will use:
 import torch
-import os
 import torchani
+###############################################################################
+# Let's now manually specify the device we want TorchANI to run:
 device = torch.device('cpu')
-path = os.path.dirname(os.path.realpath(__file__))
-const_file = os.path.join(path, '../torchani/resources/ani-1x_dft_x8ens/rHCNO-5.2R_16-3.5A_a4-8.params')  # noqa: E501
-sae_file = os.path.join(path, '../torchani/resources/ani-1x_dft_x8ens/sae_linfit.dat')  # noqa: E501
-network_dir = os.path.join(path, '../torchani/resources/ani-1x_dft_x8ens/train')  # noqa: E501
-ensemble = 8
-consts = torchani.neurochem.Constants(const_file)
-aev_computer = torchani.AEVComputer(**consts)
-nn = torchani.neurochem.load_model_ensemble(consts.species, network_dir,
-                                            ensemble)
-shift_energy = torchani.neurochem.load_sae(sae_file)
-model = torch.nn.Sequential(aev_computer, nn, shift_energy)
+###############################################################################
+# Let's now load the built-in models and create a pipeline of AEV computer,
+# neural networks, and energy shifter. This pipeline will first compute AEV,
+# then use all models in the ensemble to compute molecular energies, and take
+# the average of these energies to obtain a final output. The reason we need an
+# energy shifter in the end is that the output of these networks is not the
+# total energy but the total energy subtracted by a self energy for each atom.
+builtin = torchani.neurochem.Builtins()
+model = torch.nn.Sequential(
+  builtin.aev_computer,
+  builtin.models,
+  builtin.energy_shifter
+)
+###############################################################################
+# Now let's define the coordinate and species. If you just want to compute the
+# energy and force for a single structure like in this example, you need to
+# make the coordinate tensor has shape ``(1, Na, 3)`` and species has shape
+# ``(1, Na)``, where ``Na`` is the number of atoms in the molecule, the
+# preceding ``1`` in the shape is here to support batch processing like in
+# training. If you have ``N`` different structures to compute, then make it
+# ``N``.
 coordinates = torch.tensor([[[0.03192167,  0.00638559,  0.01301679],
                             [-0.83140486,  0.39370209, -0.26395324],
                             [-0.66518241, -0.84461308,  0.20759389],
                             [0.45554739,   0.54289633,  0.81170881],
                             [0.66091919,  -0.16799635, -0.91037834]]],
-                           requires_grad=True)
+                           requires_grad=True, device=device)
-species = consts.species_to_tensor('CHHHH').to(device).unsqueeze(0)
+species = builtin.consts.species_to_tensor('CHHHH').to(device).unsqueeze(0)
+###############################################################################
+# Now let's compute energy and force:
 _, energy = model((species, coordinates))
 derivative = torch.autograd.grad(energy.sum(), coordinates)[0]
 force = -derivative
+###############################################################################
+# And print to see the result:
 print('Energy:', energy.item())
-print('Force:', force.squeeze().numpy())
+print('Force:', force.squeeze())
--- a/examples/model.py
+++ b/examples/model.py
-import torch
-import torchani
-import os
-builtins = torchani.neurochem.Builtins()
-consts = builtins.consts
-aev_computer = builtins.aev_computer
-shift_energy = builtins.energy_shifter
-def atomic():
-    model = torch.nn.Sequential(
-        torch.nn.Linear(384, 128),
-        torch.nn.CELU(0.1),
-        torch.nn.Linear(128, 128),
-        torch.nn.CELU(0.1),
-        torch.nn.Linear(128, 64),
-        torch.nn.CELU(0.1),
-        torch.nn.Linear(64, 1)
-    )
-    return model
-def get_or_create_model(filename, device=torch.device('cpu')):
-    model = torchani.ANIModel([atomic() for _ in range(4)])
-    class Flatten(torch.nn.Module):
-        def forward(self, x):
-            return x[0], x[1].flatten()
-    model = torch.nn.Sequential(aev_computer, model, Flatten())
-    if os.path.isfile(filename):
-        model.load_state_dict(torch.load(filename))
-    else:
-        torch.save(model.state_dict(), filename)
-    return model.to(device)
--- a/examples/neurochem_trainer.py
+++ b/examples/neurochem_trainer.py
+# -*- coding: utf-8 -*-
+"""
+Train Neural Network Potential From NeuroChem Input File
+========================================================
+This example shows how to use TorchANI's NeuroChem trainer to read and run
+NeuroChem's training config file to train a neural network potential.
+"""
+###############################################################################
+# To begin with, let's first import the modules we will use:
+import torchani
+import torch
+import os
+import sys
+import tqdm
+###############################################################################
+# Now let's setup path for the dataset and NeuroChem input file. Note that
+# these paths assumes the user run this script under the ``examples`` directory
+# of TorchANI's repository. If you download this script, you should manually
+# set the path of these files in your system before this script can run
+# successfully. Also note that here for our demo purpose, we set both training
+# set and validation set the ``ani_gdb_s01.h5`` in TorchANI's repository. This
+# allows this program to finish very quick, because that dataset is very small.
+# But this is wrong and should be avoided for any serious training.
+try:
+    path = os.path.dirname(os.path.realpath(__file__))
+except NameError:
+    path = os.getcwd()
+cfg_path = os.path.join(path, '../tests/test_data/inputtrain.ipt')
+training_path = os.path.join(path, '../dataset/ani_gdb_s01.h5')
+validation_path = os.path.join(path, '../dataset/ani_gdb_s01.h5')
+###############################################################################
+# We also need to set the device to run the training:
+device_str = 'cuda' if torch.cuda.is_available() else 'cpu'
+device = torch.device(device_str)
+trainer = torchani.neurochem.Trainer(cfg_path, device, True, 'runs')
+trainer.load_data(training_path, validation_path)
+###############################################################################
+# Once everything is set up, running NeuroChem is very easy. We simplify need a
+# ``trainer.run()``. But here, in order for sphinx-gallery to be able to
+# capture the output of tqdm, let's do some hacking first to make tqdm to print
+# its progressbar to stdout.
+def my_tqdm(*args, **kwargs):
+    return tqdm.tqdm(*args, **kwargs, file=sys.stdout)
+trainer.tqdm = my_tqdm
+###############################################################################
+# Now, let's go!
+trainer.run()
+###############################################################################
+# Alternatively, you can run NeuroChem trainer directly using command line.
+# There is no need for programming. Just run the following command for help
+# ``python -m torchani.neurochem.trainer -h`` for usage. For this demo, the
+# equivalent command is:
+cmd = ['python', '-m', 'torchani.neurochem.trainer', '-d', device_str,
+       '--tqdm', '--tensorboard', 'runs', cfg_path, training_path,
+       validation_path]
+print(' '.join(cmd))
+###############################################################################
+# Now let's invoke this command to see what we get. Again, we redirect stderr
+# to stdout simplify for sphinx-gallery to be able to capture it when
+# generating this document:
+from subprocess import Popen, PIPE  # noqa: E402
+print(Popen(cmd, stderr=PIPE).stderr.read().decode('utf-8'))
--- a/examples/nnp_training.py
+++ b/examples/nnp_training.py
+# -*- coding: utf-8 -*-
+"""
+Train Your Own Neural Network Potential
+=======================================
+This example shows how to use TorchANI train your own neural network potential.
+"""
+###############################################################################
+# To begin with, let's first import the modules we will use:
 import torch
 import ignite
 import torchani
-import model
 import tqdm
 import timeit
 import tensorboardX
-import math
+import os
-import argparse
+import sys
-import json
-# parse command line arguments
+###############################################################################
-parser = argparse.ArgumentParser()
+# Now let's setup training hyperparameters. Note that here for our demo purpose
-parser.add_argument('training_path',
+# , we set both training set and validation set the ``ani_gdb_s01.h5`` in
-                    help='Path of the training set, can be a hdf5 file \
+# TorchANI's repository. This allows this program to finish very quick, because
-                          or a directory containing hdf5 files')
+# that dataset is very small. But this is wrong and should be avoided for any
-parser.add_argument('validation_path',
+# serious training. These paths assumes the user run this script under the
-                    help='Path of the validation set, can be a hdf5 file \
+# ``examples`` directory of TorchANI's repository. If you download this script,
-                          or a directory containing hdf5 files')
+# you should manually set the path of these files in your system before this
-parser.add_argument('--model_checkpoint',
+# script can run successfully.
-                    help='Checkpoint file for model',
-                    default='model.pt')
+# training and validation set
-parser.add_argument('-m', '--max_epochs',
+try:
-                    help='Maximum number of epoches',
+    path = os.path.dirname(os.path.realpath(__file__))
-                    default=300, type=int)
+except NameError:
-parser.add_argument('--training_rmse_every',
+    path = os.getcwd()
-                    help='Compute training RMSE every epoches',
+training_path = os.path.join(path, '../dataset/ani_gdb_s01.h5')
-                    default=20, type=int)
+validation_path = os.path.join(path, '../dataset/ani_gdb_s01.h5')
-parser.add_argument('-d', '--device',
-                    help='Device of modules and tensors',
+# checkpoint file to save model when validation RMSE improves
-                    default=('cuda' if torch.cuda.is_available() else 'cpu'))
+model_checkpoint = 'model.pt'
-parser.add_argument('--batch_size',
-                    help='Number of conformations of each batch',
+# max epochs to run the training
-                    default=1024, type=int)
+max_epochs = 20
-parser.add_argument('--log',
-                    help='Log directory for tensorboardX',
+# Compute training RMSE every this steps. Since the training set is usually
-                    default=None)
+# huge and the loss funcition does not directly gives us RMSE, we need to
-parser.add_argument('--optimizer',
+# check the training RMSE to see overfitting.
-                    help='Optimizer used to train the model',
+training_rmse_every = 5
-                    default='Adam')
-parser.add_argument('--optim_args',
+# device to run the training
-                    help='Arguments to optimizers, in the format of json',
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-                    default='{}')
-parser.add_argument('--early_stopping',
+# batch size
-                    help='Stop after epoches of no improvements',
+batch_size = 1024
-                    default=math.inf, type=int)
-parser = parser.parse_args()
+# log directory for tensorboardX
+log = 'runs'
-# set up the training
-device = torch.device(parser.device)
-writer = tensorboardX.SummaryWriter(log_dir=parser.log)
+###############################################################################
-start = timeit.default_timer()
+# Now let's read our constants and self energies from constant files and
+# construct AEV computer.
-nnp = model.get_or_create_model(parser.model_checkpoint, device=device)
+const_file = os.path.join(path, '../torchani/resources/ani-1x_dft_x8ens/rHCNO-5.2R_16-3.5A_a4-8.params')  # noqa: E501
+sae_file = os.path.join(path, '../torchani/resources/ani-1x_dft_x8ens/sae_linfit.dat')  # noqa: E501
+consts = torchani.neurochem.Constants(const_file)
+aev_computer = torchani.AEVComputer(**consts)
+energy_shifter = torchani.neurochem.load_sae(sae_file)
+###############################################################################
+# Now let's define atomic neural networks. Here in this demo, we use the same
+# size of neural network for all atom types, but this is not necessary.
+def atomic():
+    model = torch.nn.Sequential(
+        torch.nn.Linear(384, 128),
+        torch.nn.CELU(0.1),
+        torch.nn.Linear(128, 128),
+        torch.nn.CELU(0.1),
+        torch.nn.Linear(128, 64),
+        torch.nn.CELU(0.1),
+        torch.nn.Linear(64, 1)
+    )
+    return model
+model = torchani.ANIModel([atomic() for _ in range(4)])
+print(model)
+###############################################################################
+# The output energy tensor has shape ``(N, 1)`` where ``N`` is the number of
+# different structures in each minibatch. However, in the dataset, the label
+# has shape ``(N,)``. To make it possible to subtract these two tensors, we
+# need to flatten the output tensor.
+class Flatten(torch.nn.Module):
+    def forward(self, x):
+        return x[0], x[1].flatten()
+model = torch.nn.Sequential(aev_computer, model, Flatten())
+###############################################################################
+# If checkpoint from previous training exists, then load it.
+if os.path.isfile(model_checkpoint):
+    model.load_state_dict(torch.load(model_checkpoint))
+else:
+    torch.save(model.state_dict(), model_checkpoint)
+model.to(device)
+###############################################################################
+# Now setup tensorboardX.
+writer = tensorboardX.SummaryWriter(log_dir=log)
+###############################################################################
+# Now load training and validation datasets into memory. Note that we need to
+# subtracting energies by the self energies of all atoms for each molecule.
+# This makes the range of energies in a reasonable range. The second argument
+# defines how to convert species as a list of string to tensor, that is, for
+# all supported chemical symbols, which is correspond to ``0``, which
+# correspond to ``1``, etc.
 training = torchani.data.BatchedANIDataset(
-    parser.training_path, model.consts.species_to_tensor,
+    training_path, consts.species_to_tensor, batch_size, device=device,
-    parser.batch_size, device=device,
+    transform=[energy_shifter.subtract_from_dataset])
-    transform=[model.shift_energy.subtract_from_dataset])
-validation = torchani.data.BatchedANIDataset(
-    parser.validation_path, model.consts.species_to_tensor,
-    parser.batch_size, device=device,
-    transform=[model.shift_energy.subtract_from_dataset])
-container = torchani.ignite.Container({'energies': nnp})
-parser.optim_args = json.loads(parser.optim_args)
-optimizer = getattr(torch.optim, parser.optimizer)
-optimizer = optimizer(nnp.parameters(), **parser.optim_args)
+validation = torchani.data.BatchedANIDataset(
+    validation_path, consts.species_to_tensor, batch_size, device=device,
+    transform=[energy_shifter.subtract_from_dataset])
+###############################################################################
+# When iterating the dataset, we will get pairs of input and output
+# ``(species_coordinates, properties)``, where ``species_coordinates`` is the
+# input and ``properties`` is the output.
+#
+# ``species_coordinates`` is a list of species-coordinate pairs, with shape
+# ``(N, Na)`` and ``(N, Na, 3)``. The reason for getting this type is, when
+# loading the dataset and generating minibatches, the whole dataset are
+# shuffled and each minibatch contains structures of molecules with a wide
+# range of number of atoms. Molecules of different number of atoms are batched
+# into single by padding. The way padding works is: adding ghost atoms, with
+# species 'X', and do computations as if they were normal atoms. But when
+# computing AEVs, atoms with species `X` would be ignored. To avoid computation
+# wasting on padding atoms, minibatches are further splitted into chunks. Each
+# chunk contains structures of molecules of similar size, which minimize the
+# total number of padding atoms required to add. The input list
+# ``species_coordinates`` contains chunks of that minibatch we are getting. The
+# batching and chunking happens automatically, so the user does not need to
+# worry how to construct chunks, but the user need to compute the energies for
+# each chunk and concat them into single tensor.
+#
+# The output, i.e. ``properties`` is a dictionary holding each property. This
+# allows us to extend TorchANI in the future to training forces and properties.
+#
+# We have tools to deal with these data types at :attr:`torchani.ignite` that
+# allow us to easily combine the dataset with pytorch ignite. These tools can
+# be used as follows:
+container = torchani.ignite.Container({'energies': model})
+optimizer = torch.optim.Adam(model.parameters())
 trainer = ignite.engine.create_supervised_trainer(
    container, optimizer, torchani.ignite.MSELoss('energies'))
 evaluator = ignite.engine.create_supervised_evaluator(container, metrics={
@@ -73,19 +163,12 @@ evaluator = ignite.engine.create_supervised_evaluator(container, metrics={
    })
-def hartree2kcal(x):
+###############################################################################
-    return 627.509 * x
+# Now let's register some event handlers to work with tqdm to display progress:
-@trainer.on(ignite.engine.Events.STARTED)
-def initialize(trainer):
-    trainer.state.best_validation_rmse = math.inf
-    trainer.state.no_improve_count = 0
 @trainer.on(ignite.engine.Events.EPOCH_STARTED)
 def init_tqdm(trainer):
-    trainer.state.tqdm = tqdm.tqdm(total=len(training), desc='epoch')
+    trainer.state.tqdm = tqdm.tqdm(total=len(training),
+                                   file=sys.stdout, desc='epoch')
 @trainer.on(ignite.engine.Events.ITERATION_COMPLETED)
@@ -98,6 +181,12 @@ def finalize_tqdm(trainer):
    trainer.state.tqdm.close()
+###############################################################################
+# And some event handlers to compute validation and training metrics:
+def hartree2kcal(x):
+    return 627.509 * x
 @trainer.on(ignite.engine.Events.EPOCH_STARTED)
 def validation_and_checkpoint(trainer):
    def evaluate(dataset, name):
@@ -111,30 +200,18 @@ def validation_and_checkpoint(trainer):
        metrics = evaluator.state.metrics
        rmse = hartree2kcal(metrics['RMSE'])
        writer.add_scalar(name, rmse, trainer.state.epoch)
-        return rmse
    # compute validation RMSE
-    rmse = evaluate(validation, 'validation_rmse_vs_epoch')
+    evaluate(validation, 'validation_rmse_vs_epoch')
    # compute training RMSE
-    if trainer.state.epoch % parser.training_rmse_every == 1:
+    if trainer.state.epoch % training_rmse_every == 1:
        evaluate(training, 'training_rmse_vs_epoch')
-    # handle best validation RMSE
-    if rmse < trainer.state.best_validation_rmse:
-        trainer.state.no_improve_count = 0
-        trainer.state.best_validation_rmse = rmse
-        writer.add_scalar('best_validation_rmse_vs_epoch', rmse,
-                          trainer.state.epoch)
-        torch.save(nnp.state_dict(), parser.model_checkpoint)
-    else:
-        trainer.state.no_improve_count += 1
-    writer.add_scalar('no_improve_count_vs_epoch',
-                      trainer.state.no_improve_count,
-                      trainer.state.epoch)
-    if trainer.state.no_improve_count > parser.early_stopping:
+###############################################################################
-            trainer.terminate()
+# Also some to log elapsed time:
+start = timeit.default_timer()
 @trainer.on(ignite.engine.Events.EPOCH_STARTED)
@@ -143,10 +220,14 @@ def log_time(trainer):
    writer.add_scalar('time_vs_epoch', elapsed, trainer.state.epoch)
+###############################################################################
+# Also log the loss per iteration:
 @trainer.on(ignite.engine.Events.ITERATION_COMPLETED)
-def log_loss_and_time(trainer):
+def log_loss(trainer):
    iteration = trainer.state.iteration
    writer.add_scalar('loss_vs_iteration', trainer.state.output, iteration)
-trainer.run(training, max_epochs=parser.max_epochs)
+###############################################################################
+# And finally, we are ready to run:
+trainer.run(training, max_epochs)
--- a/examples/inference-benchmark.py
+++ b/examples/inference-benchmark.py
 import argparse
 import torchani
 import torch
-import os
 import timeit
 import tqdm
-path = os.path.dirname(os.path.realpath(__file__))
 # parse command line arguments
 parser = argparse.ArgumentParser()
 parser.add_argument('filename',

--- a/examples/neurochem-test.py
+++ b/examples/neurochem-test.py
--- a/examples/training-benchmark.py
+++ b/examples/training-benchmark.py
@@ -2,7 +2,6 @@ import torch
 import ignite
 import torchani
 import timeit
-import model
 import tqdm
 import argparse
@@ -21,11 +20,39 @@ parser = parser.parse_args()
 # set up benchmark
 device = torch.device(parser.device)
-nnp = model.get_or_create_model('/tmp/model.pt', device=device)
+builtins = torchani.neurochem.Builtins()
+consts = builtins.consts
+aev_computer = builtins.aev_computer
+shift_energy = builtins.energy_shifter
+def atomic():
+    model = torch.nn.Sequential(
+        torch.nn.Linear(384, 128),
+        torch.nn.CELU(0.1),
+        torch.nn.Linear(128, 128),
+        torch.nn.CELU(0.1),
+        torch.nn.Linear(128, 64),
+        torch.nn.CELU(0.1),
+        torch.nn.Linear(64, 1)
+    )
+    return model
+model = torchani.ANIModel([atomic() for _ in range(4)])
+class Flatten(torch.nn.Module):
+    def forward(self, x):
+        return x[0], x[1].flatten()
+nnp = torch.nn.Sequential(aev_computer, model, Flatten()).to(device)
 dataset = torchani.data.BatchedANIDataset(
-    parser.dataset_path, model.consts.species_to_tensor,
+    parser.dataset_path, consts.species_to_tensor,
    parser.batch_size, device=device,
-    transform=[model.shift_energy.subtract_from_dataset])
+    transform=[shift_energy.subtract_from_dataset])
 container = torchani.ignite.Container({'energies': nnp})
 optimizer = torch.optim.Adam(nnp.parameters())

--- a/torchani/data/__init__.py
+++ b/torchani/data/__init__.py
@@ -90,10 +90,10 @@ class BatchedANIDataset(Dataset):
    into chunks according to some heuristics, so that each chunk would only
    have molecules of similar size, to minimize the padding required.
-    So, when iterating on this dataset, a tuple will be yeilded. The first
+    So, when iterating on this dataset, a tuple will be yielded. The first
    element of this tuple is a list of (species, coordinates) pairs. Each pair
    is a chunk of molecules of similar size. The second element of this tuple
-    would be a dictonary, where the keys are those specified in the argument
+    would be a dictionary, where the keys are those specified in the argument
    :attr:`properties`, and values are a single tensor of the whole batch
    (properties are not splitted into chunks).

--- a/examples/xyz_files/13.xyz
+++ b/examples/xyz_files/13.xyz