Add ANI model zoo (#169)

* add ANI model zoo * Update cache_aev.py

Add ANI model zoo (#169)
* add ANI model zoo * Update cache_aev.py
1b2faf43 · Gao, Xiang · GitHub · b3c7e0e3 · 1b2faf43 · 1b2faf43
Unverified Commit 1b2faf43 authored Feb 19, 2019 by Gao, Xiang Committed by GitHub Feb 19, 2019
20 changed files
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -10,6 +10,16 @@ TorchANI
 .. autoclass:: torchani.nn.Gaussian


+Model Zoo
+=========
+
+.. automodule:: torchani.models
+.. autoclass:: torchani.models.ANI1x
+    :members:
+.. autoclass:: torchani.models.ANI1ccx
+    :members:
+
+
 Datasets
 ========

@@ -54,7 +64,6 @@ ASE Interface
 .. autoclass:: torchani.ase.NeighborList
    :members:
 .. autoclass:: torchani.ase.Calculator
-    :members:

 Ignite Helpers
 ==============

--- a/docs/index.rst
+++ b/docs/index.rst
@@ -15,10 +15,11 @@ Welcome to TorchANI's documentation!
    :caption: Examples

    examples/energy_force
+    examples/ase_interface
+    examples/load_from_neurochem
    examples/nnp_training
    examples/cache_aev
    examples/neurochem_trainer
-    examples/ase_interface

 .. toctree::
    :maxdepth: 2

--- a/examples/ase_interface.py
+++ b/examples/ase_interface.py
@@ -30,9 +30,7 @@ print(len(atoms), "atoms in the cell")

 ###############################################################################
 # Now let's create a calculator from builtin models:
-builtin = torchani.neurochem.Builtins()
-calculator = torchani.ase.Calculator(builtin.species, builtin.aev_computer,
-                                     builtin.models, builtin.energy_shifter)
+calculator = torchani.models.ANI1ccx().ase()
 atoms.set_calculator(calculator)

 ###############################################################################

--- a/examples/cache_aev.py
+++ b/examples/cache_aev.py
@@ -52,8 +52,8 @@ log = 'runs'
 ###############################################################################
 # Here, there is no need to manually construct aev computer and energy shifter,
 # but we do need to generate a disk cache for datasets
-const_file = os.path.join(path, '../torchani/resources/ani-1x_dft_x8ens/rHCNO-5.2R_16-3.5A_a4-8.params')  # noqa: E501
-sae_file = os.path.join(path, '../torchani/resources/ani-1x_dft_x8ens/sae_linfit.dat')  # noqa: E501
+const_file = os.path.join(path, '../torchani/resources/ani-1x_8x/rHCNO-5.2R_16-3.5A_a4-8.params')  # noqa: E501
+sae_file = os.path.join(path, '../torchani/resources/ani-1x_8x/sae_linfit.dat')  # noqa: E501
 training_cache = './training_cache'
 validation_cache = './validation_cache'


--- a/examples/energy_force.py
+++ b/examples/energy_force.py
 # -*- coding: utf-8 -*-
 """
-Computing Energy and Force Using Builtin Models
-===============================================
+Computing Energy and Force Using Models Inside Model Zoo
+========================================================

-TorchANI has a model ensemble trained by NeuroChem on the `ANI-1x dataset`_.
-These models are shipped with TorchANI and can be used directly.
-
-.. _ANI-1x dataset:
-  https://aip.scitation.org/doi/abs/10.1063/1.5023802
+TorchANI has a model zoo trained by NeuroChem. These models are shipped with
+TorchANI and can be used directly.
 """

 ###############################################################################
@@ -20,18 +17,12 @@ import torchani
 device = torch.device('cpu')

 ###############################################################################
-# Let's now load the built-in models and create a pipeline of AEV computer,
-# neural networks, and energy shifter. This pipeline will first compute AEV,
-# then use all models in the ensemble to compute molecular energies, and take
-# the average of these energies to obtain a final output. The reason we need an
-# energy shifter in the end is that the output of these networks is not the
-# total energy but the total energy subtracted by a self energy for each atom.
-builtin = torchani.neurochem.Builtins()
-model = torch.nn.Sequential(
-  builtin.aev_computer,
-  builtin.models,
-  builtin.energy_shifter
-)
+# Let's now load the built-in ANI-1ccx models. The builtin ANI-1ccx contains 8
+# models trained with diffrent initialization. Predicting the energy and force
+# using the average of the 8 models outperform using a single model, so it is
+# always recommended to use an ensemble, unless the speed of computation is an
+# issue in your application.
+model = torchani.models.ANI1ccx()

 ###############################################################################
 # Now let's define the coordinate and species. If you just want to compute the
@@ -47,7 +38,7 @@ coordinates = torch.tensor([[[0.03192167,  0.00638559,  0.01301679],
                             [0.45554739,   0.54289633,  0.81170881],
                             [0.66091919,  -0.16799635, -0.91037834]]],
                           requires_grad=True, device=device)
-species = builtin.consts.species_to_tensor('CHHHH').to(device).unsqueeze(0)
+species = model.species_to_tensor('CHHHH').to(device).unsqueeze(0)

 ###############################################################################
 # Now let's compute energy and force:

--- a/examples/load_from_neurochem.py
+++ b/examples/load_from_neurochem.py
+# -*- coding: utf-8 -*-
+"""
+Construct Model From NeuroChem Files
+====================================
+
+This tutorial illustrates how to manually load model from `NeuroChem files`_.
+
+.. _NeuroChem files:
+    https://github.com/isayev/ASE_ANI/tree/master/ani_models
+
+"""
+
+###############################################################################
+# To begin with, let's first import the modules we will use:
+import os
+import torch
+import torchani
+import ase
+
+
+###############################################################################
+# Now let's read constants from constant file and construct AEV computer.
+try:
+    path = os.path.dirname(os.path.realpath(__file__))
+except NameError:
+    path = os.getcwd()
+const_file = os.path.join(path, '../torchani/resources/ani-1x_8x/rHCNO-5.2R_16-3.5A_a4-8.params')  # noqa: E501
+consts = torchani.neurochem.Constants(const_file)
+aev_computer = torchani.AEVComputer(**consts)
+
+###############################################################################
+# Now let's read self energies and construct energy shifter.
+sae_file = os.path.join(path, '../torchani/resources/ani-1x_8x/sae_linfit.dat')  # noqa: E501
+energy_shifter = torchani.neurochem.load_sae(sae_file)
+
+###############################################################################
+# Now let's read a whole ensemble of models.
+model_prefix = os.path.join(path, '../torchani/resources/ani-1x_8x/train')  # noqa: E501
+ensemble = torchani.neurochem.load_model_ensemble(consts.species, model_prefix, 8)  # noqa: E501
+
+###############################################################################
+# Or alternatively a single model.
+model_dir = os.path.join(path, '../torchani/resources/ani-1x_8x/train0/networks')  # noqa: E501
+model = torchani.neurochem.load_model(consts.species, model_dir)
+
+###############################################################################
+# You can create the pipeline of computing energies:
+# (Coordinates) -[AEVComputer]-> (AEV) -[Neural Network]->
+# (Raw energies) -[EnergyShifter]-> (Final energies)
+# From using either the ensemble or a single model:
+nnp1 = torch.nn.Sequential(aev_computer, ensemble, energy_shifter)
+nnp2 = torch.nn.Sequential(aev_computer, model, energy_shifter)
+print(nnp1)
+print(nnp2)
+
+###############################################################################
+# You can also create an ASE calculator using the ensemble or single model:
+calculator1 = torchani.ase.Calculator(consts.species, aev_computer,
+                                      ensemble, energy_shifter)
+calculator2 = torchani.ase.Calculator(consts.species, aev_computer,
+                                      model, energy_shifter)
+print(calculator1)
+print(calculator1)
+
+###############################################################################
+# Now let's define a methane molecule
+coordinates = torch.tensor([[[0.03192167,  0.00638559,  0.01301679],
+                             [-0.83140486,  0.39370209, -0.26395324],
+                             [-0.66518241, -0.84461308,  0.20759389],
+                             [0.45554739,   0.54289633,  0.81170881],
+                             [0.66091919,  -0.16799635, -0.91037834]]],
+                           requires_grad=True)
+species = consts.species_to_tensor('CHHHH').unsqueeze(0)
+methane = ase.Atoms('CHHHH', positions=coordinates.squeeze().detach().numpy())
+
+###############################################################################
+# Now let's compute energies using the ensemble directly:
+_, energy = nnp1((species, coordinates))
+derivative = torch.autograd.grad(energy.sum(), coordinates)[0]
+force = -derivative
+print('Energy:', energy.item())
+print('Force:', force.squeeze())
+
+###############################################################################
+# And using the ASE interface of the ensemble:
+methane.set_calculator(calculator1)
+print('Energy:', methane.get_potential_energy() / ase.units.Hartree)
+print('Force:', methane.get_forces() / ase.units.Hartree)
+
+###############################################################################
+# We can do the same thing with the single model:
+_, energy = nnp2((species, coordinates))
+derivative = torch.autograd.grad(energy.sum(), coordinates)[0]
+force = -derivative
+print('Energy:', energy.item())
+print('Force:', force.squeeze())
+
+methane.set_calculator(calculator2)
+print('Energy:', methane.get_potential_energy() / ase.units.Hartree)
+print('Force:', methane.get_forces() / ase.units.Hartree)
--- a/examples/nnp_training.py
+++ b/examples/nnp_training.py
@@ -61,8 +61,8 @@ log = 'runs'
 ###############################################################################
 # Now let's read our constants and self energies from constant files and
 # construct AEV computer.
-const_file = os.path.join(path, '../torchani/resources/ani-1x_dft_x8ens/rHCNO-5.2R_16-3.5A_a4-8.params')  # noqa: E501
-sae_file = os.path.join(path, '../torchani/resources/ani-1x_dft_x8ens/sae_linfit.dat')  # noqa: E501
+const_file = os.path.join(path, '../torchani/resources/ani-1x_8x/rHCNO-5.2R_16-3.5A_a4-8.params')  # noqa: E501
+sae_file = os.path.join(path, '../torchani/resources/ani-1x_8x/sae_linfit.dat')  # noqa: E501
 consts = torchani.neurochem.Constants(const_file)
 aev_computer = torchani.AEVComputer(**consts)
 energy_shifter = torchani.neurochem.load_sae(sae_file)
@@ -94,6 +94,8 @@ if os.path.isfile(model_checkpoint):
 else:
    torch.save(nn.state_dict(), model_checkpoint)

+###############################################################################
+# Let's now create a pipeline of AEV Computer --> Neural Networks.
 model = torch.nn.Sequential(aev_computer, nn).to(device)

 ###############################################################################

--- a/torchani/__init__.py
+++ b/torchani/__init__.py
@@ -31,6 +31,7 @@ from . import ignite
 from . import utils
 from . import neurochem
 from . import data
+from . import models
 from pkg_resources import get_distribution, DistributionNotFound

 try:
@@ -40,7 +41,7 @@ except DistributionNotFound:
    pass

 __all__ = ['AEVComputer', 'EnergyShifter', 'ANIModel', 'Ensemble',
-           'ignite', 'utils', 'neurochem', 'data']
+           'ignite', 'utils', 'neurochem', 'data', 'models']

 try:
    from . import ase  # noqa: F401

--- a/torchani/models.py
+++ b/torchani/models.py
+# -*- coding: utf-8 -*-
+"""The ANI model zoo that stores public ANI models.
+
+Currently the model zoo has two models: ANI-1x and ANI-1ccx. The corresponding
+classes of these two models are :class:`ANI1x` and :class:`ANI1ccx`. These
+classes share the same API. To use the builtin models, you simply need to
+create an object of its corresponding class. These classes are subclasses of
+:class:`torch.nn.Module`, and could be used directly. Below is an example of
+how to use these models:
+
+.. code:: python
+
+    model = torchani.models.ANI1x()
+    # compute energy using ANI-1x model ensemble
+    _, energies = model((species, coordinates))
+    model.ase()  # get ASE Calculator using this ensemble
+    # convert atom species from string to long tensor
+    model.species_to_tensor('CHHHH')
+
+    model0 = model[0]  # get the first model in the ensemble
+    # compute energy using the first model in the ANI-1x model ensemble
+    _, energies = model0((species, coordinates))
+    model0.ase()  # get ASE Calculator using this model
+    # convert atom species from string to long tensor
+    model0.species_to_tensor('CHHHH')
+"""
+
+import torch
+from . import neurochem
+
+
+class BuiltinModels(torch.nn.Module):
+
+    def __init__(self, builtin_class):
+        super(BuiltinModels, self).__init__()
+        self.builtins = builtin_class()
+        self.aev_computer = self.builtins.aev_computer
+        self.neural_networks = self.builtins.models
+        self.energy_shifter = self.builtins.energy_shifter
+        self.species_to_tensor = self.builtins.consts.species_to_tensor
+
+    def forward(self, species_coordinates):
+        species_aevs = self.aev_computer(species_coordinates)
+        species_energies = self.neural_networks(species_aevs)
+        return self.energy_shifter(species_energies)
+
+    def __getitem__(self, index):
+        ret = torch.nn.Sequential(
+            self.aev_computer,
+            self.neural_networks[index],
+            self.energy_shifter
+        )
+
+        def ase():
+            from . import ase
+            return ase.Calculator(self.builtins.species,
+                                  self.aev_computer,
+                                  self.neural_networks[index],
+                                  self.energy_shifter)
+
+        ret.ase = ase
+        ret.species_to_tensor = self.builtins.consts.species_to_tensor
+        return ret
+
+    def __len__(self):
+        return len(self.neural_networks)
+
+    def ase(self):
+        """Get an ASE Calculator using this model"""
+        from . import ase
+        return ase.Calculator(self.builtins.species, self.aev_computer,
+                              self.neural_networks, self.energy_shifter)
+
+
+class ANI1x(BuiltinModels):
+    """The ANI-1x model as in `ani-1x_8x on GitHub`_ and
+    `Active Learning Paper`_.
+
+    .. _ani-1x_8x on GitHub:
+        https://github.com/isayev/ASE_ANI/tree/master/ani_models/ani-1x_8x
+
+    .. _Active Learning Paper:
+        https://aip.scitation.org/doi/abs/10.1063/1.5023802
+    """
+
+    def __init__(self):
+        super(ANI1x, self).__init__(neurochem.Builtins)
+
+
+class ANI1ccx(BuiltinModels):
+    """The ANI-1x model as in `ani-1ccx_8x on GitHub`_ and
+    `Transfer Learning Paper`_.
+
+    .. _ani-1ccx_8x on GitHub:
+        https://github.com/isayev/ASE_ANI/tree/master/ani_models/ani-1ccx_8x
+
+    .. _Transfer Learning Paper:
+        https://doi.org/10.26434/chemrxiv.6744440.v1
+    """
+
+    def __init__(self):
+        super(ANI1ccx, self).__init__(neurochem.BuiltinsANI1CCX)
--- a/torchani/neurochem/__init__.py
+++ b/torchani/neurochem/__init__.py
@@ -327,11 +327,11 @@ class Builtins(BuiltinsAbstract):
    """
    def __init__(self):
        parent_name = '.'.join(__name__.split('.')[:-1])
-        const_file_path = 'resources/ani-1x_dft_x8ens'\
+        const_file_path = 'resources/ani-1x_8x'\
            '/rHCNO-5.2R_16-3.5A_a4-8.params'
-        sae_file_path = 'resources/ani-1x_dft_x8ens/sae_linfit.dat'
+        sae_file_path = 'resources/ani-1x_8x/sae_linfit.dat'
        ensemble_size = 8
-        ensemble_prefix_path = 'resources/ani-1x_dft_x8ens/train'
+        ensemble_prefix_path = 'resources/ani-1x_8x/train'
        super(Builtins, self).__init__(
            parent_name,
            const_file_path,

--- a/torchani/resources/ani-1x_dft_x8ens/train0/inputtrain.ipt
+++ b/torchani/resources/ani-1x_dft_x8ens/train0/inputtrain.ipt
--- a/torchani/resources/ani-1x_dft_x8ens/rHCNO-5.2R_16-3.5A_a4-8.params
+++ b/torchani/resources/ani-1x_dft_x8ens/rHCNO-5.2R_16-3.5A_a4-8.params
--- a/torchani/resources/ani-1x_dft_x8ens/sae_linfit.dat
+++ b/torchani/resources/ani-1x_dft_x8ens/sae_linfit.dat
--- a/torchani/resources/ani-1x_dft_x8ens/train0/cost.dat
+++ b/torchani/resources/ani-1x_dft_x8ens/train0/cost.dat
--- a/torchani/resources/ani-1x_dft_x8ens/train1/inputtrain.ipt
+++ b/torchani/resources/ani-1x_dft_x8ens/train1/inputtrain.ipt
--- a/torchani/resources/ani-1x_dft_x8ens/train0/networks/ANN-C.nnf
+++ b/torchani/resources/ani-1x_dft_x8ens/train0/networks/ANN-C.nnf
--- a/torchani/resources/ani-1x_dft_x8ens/train0/networks/ANN-C.nnf-l1.bparam
+++ b/torchani/resources/ani-1x_dft_x8ens/train0/networks/ANN-C.nnf-l1.bparam
--- a/torchani/resources/ani-1x_dft_x8ens/train0/networks/ANN-C.nnf-l1.wparam
+++ b/torchani/resources/ani-1x_dft_x8ens/train0/networks/ANN-C.nnf-l1.wparam
--- a/torchani/resources/ani-1x_dft_x8ens/train0/networks/ANN-C.nnf-l2.bparam
+++ b/torchani/resources/ani-1x_dft_x8ens/train0/networks/ANN-C.nnf-l2.bparam
--- a/torchani/resources/ani-1x_dft_x8ens/train0/networks/ANN-C.nnf-l2.wparam
+++ b/torchani/resources/ani-1x_dft_x8ens/train0/networks/ANN-C.nnf-l2.wparam