Refactor builtinnet (#474)

* Refactor BuiltinNet into BuiltinModel and BuiltinEnsemble * Change test name * flake8 whitespace * Get rid of redundant comparison * remove unused import * Fix info file bug * flake8 * don't use nonlocal on get_resource * Revert "don't use nonlocal on get_resource" This reverts commit efe221bb4f7ce4a1ccb8c3c73b886bdbad019ab0. * Fix get_resource bug * Add missing init call * Fix species_to_tensor bug * Add species attribute * Add some extra attributes * Fix bug in getitem * Always assign species_converter * Species to tensor is also an entry point * Lets not make it an entry point for now * delete duplicated definition

Refactor builtinnet (#474)
* Refactor BuiltinNet into BuiltinModel and BuiltinEnsemble * Change test name * flake8 whitespace * Get rid of redundant comparison * remove unused import * Fix info file bug * flake8 * don't use nonlocal on get_resource * Revert "don't use nonlocal on get_resource" This reverts commit efe221bb4f7ce4a1ccb8c3c73b886bdbad019ab0. * Fix get_resource bug * Add missing init call * Fix species_to_tensor bug * Add species attribute * Add some extra attributes * Fix bug in getitem * Always assign species_converter * Species to tensor is also an entry point * Lets not make it an entry point for now * delete duplicated definition
3a043d45 · Ignacio Pickering · GitHub · 7059e9a6 · 3a043d45 · 3a043d45
Unverified Commit 3a043d45 authored Jun 02, 2020 by Ignacio Pickering Committed by GitHub Jun 02, 2020
Show whitespace changes
Inline Side-by-side

Showing with 139 additions and 119 deletions

tests/test_periodic_table_indexing.py tests/test_periodic_table_indexing.py +1 -1

torchani/models.py torchani/models.py +138 -118

No files found.
--- a/tests/test_periodic_table_indexing.py
+++ b/tests/test_periodic_table_indexing.py
@@ -29,7 +29,7 @@ class TestSpeciesConverterJIT(TestSpeciesConverter):
        self.c = torch.jit.script(self.c)
-class TestBuiltinNetPeriodicTableIndex(unittest.TestCase):
+class TestBuiltinEnsemblePeriodicTableIndex(unittest.TestCase):
    def setUp(self):
        self.model1 = torchani.models.ANI1x()

--- a/torchani/models.py
+++ b/torchani/models.py
@@ -32,73 +32,26 @@ from torch import Tensor
 from typing import Tuple, Optional
 from pkg_resources import resource_filename
 from . import neurochem
-from .nn import Sequential, SpeciesConverter, SpeciesEnergies
+from .nn import SpeciesConverter, SpeciesEnergies
 from .aev import AEVComputer
-class BuiltinNet(torch.nn.Module):
+class BuiltinModel(torch.nn.Module):
-    """Private template for the builtin ANI ensemble models.
+    r"""Private template for the builtin ANI models """
-    All ANI ensemble models form the ANI models zoo should inherit from this class.
-    This class is a torch module that sequentially calculates
-    AEVs, then energies from a torchani.Ensemble and then uses EnergyShifter
-    to shift those energies. It is essentially a sequential
-    'AEVComputer -> Ensemble -> EnergyShifter'.
-    .. note::
-        This class is for internal use only, avoid using it, use ANI1x, ANI1ccx,
-        etc instead. Don't confuse this class with torchani.Ensemble, which
-        is only a container for many ANIModel instances and shouldn't be used
-        directly for calculations.
-    Attributes:
-        const_file (:class:`str`): Path to the file with the builtin constants.
-        sae_file (:class:`str`): Path to the file with the Self Atomic Energies.
-        ensemble_prefix (:class:`str`): Prefix of directories.
-        ensemble_size (:class:`int`): Number of models in the ensemble.
-        energy_shifter (:class:`torchani.EnergyShifter`): Energy shifter with
-            builtin Self Atomic Energies.
-        aev_computer (:class:`torchani.AEVComputer`): AEV computer with
-            builtin constants
-        neural_networks (:class:`torchani.Ensemble`): Ensemble of ANIModel networks
-        periodic_table_index (bool): Whether to use element number in periodic table
-            to index species. If set to `False`, then indices must be `0, 1, 2, ..., N - 1`
-            where `N` is the number of parametrized species.
-    """
-    def __init__(self, info_file, periodic_table_index=False):
+    def __init__(self, species_converter, aev_computer, neural_networks, energy_shifter, species_to_tensor, consts, sae_dict, periodic_table_index):
-        super(BuiltinNet, self).__init__()
+        super(BuiltinModel, self).__init__()
+        self.species_converter = species_converter
+        self.aev_computer = aev_computer
+        self.neural_networks = neural_networks
+        self.energy_shifter = energy_shifter
+        self._species_to_tensor = species_to_tensor
+        self.species = consts.species
        self.periodic_table_index = periodic_table_index
-        package_name = '.'.join(__name__.split('.')[:-1])
+        # a bit useless maybe
-        info_file = 'resources/' + info_file
+        self.consts = consts
-        self.info_file = resource_filename(package_name, info_file)
+        self.sae_dict = sae_dict
-        with open(self.info_file) as f:
-            lines = [x.strip() for x in f.readlines()][:4]
-            const_file_path, sae_file_path, ensemble_prefix_path, ensemble_size = lines
-            const_file_path = 'resources/' + const_file_path
-            sae_file_path = 'resources/' + sae_file_path
-            ensemble_prefix_path = 'resources/' + ensemble_prefix_path
-            ensemble_size = int(ensemble_size)
-        self.const_file = resource_filename(package_name, const_file_path)
-        self.sae_file = resource_filename(package_name, sae_file_path)
-        self.ensemble_prefix = resource_filename(package_name, ensemble_prefix_path)
-        self.ensemble_size = ensemble_size
-        self.consts = neurochem.Constants(self.const_file)
-        self.species = self.consts.species
-        self.species_converter = SpeciesConverter(self.species)
-        self.aev_computer = AEVComputer(**self.consts)
-        self.energy_shifter, self.sae_dict = neurochem.load_sae(self.sae_file, return_dict=True)
-        self.neural_networks = neurochem.load_model_ensemble(
-            self.species, self.ensemble_prefix, self.ensemble_size)
-    @torch.jit.export
-    def _recast_long_buffers(self):
-        self.species_converter.conv_tensor = self.species_converter.conv_tensor.to(dtype=torch.long)
-        self.aev_computer.triu_index = self.aev_computer.triu_index.to(dtype=torch.long)
    def forward(self, species_coordinates: Tuple[Tensor, Tensor],
                cell: Optional[Tensor] = None,
@@ -122,55 +75,30 @@ class BuiltinNet(torch.nn.Module):
        species_energies = self.neural_networks(species_aevs)
        return self.energy_shifter(species_energies)
-    def __getitem__(self, index):
+    @torch.jit.export
-        """Get a single 'AEVComputer -> ANIModel -> EnergyShifter' sequential model
+    def _recast_long_buffers(self):
+        self.species_converter.conv_tensor = self.species_converter.conv_tensor.to(dtype=torch.long)
-        Indexing allows access to a single model inside the ensemble
+        self.aev_computer.triu_index = self.aev_computer.triu_index.to(dtype=torch.long)
-        that can be used directly for calculations. The model consists
-        of a sequence AEVComputer -> ANIModel -> EnergyShifter
-        and can return an ase calculator and convert species to tensor.
-        Args:
-            index (:class:`int`): Index of the model
-        Returns:
+    def species_to_tensor(self, *args, **kwargs):
-            ret: (:class:`Sequential`): Sequential model ready for
+        """Convert species from strings to tensor.
-                calculations
-        """
-        if self.periodic_table_index:
-            ret = Sequential(
-                self.species_converter,
-                self.aev_computer,
-                self.neural_networks[index],
-                self.energy_shifter
-            )
-        else:
-            ret = Sequential(
-                self.aev_computer,
-                self.neural_networks[index],
-                self.energy_shifter
-            )
-        def ase(**kwargs):
-            """Attach an ase calculator """
-            from . import ase
-            return ase.Calculator(self.species, ret, **kwargs)
-        ret.ase = ase
+        See also :method:`torchani.neurochem.Constant.species_to_tensor`
-        ret.species_to_tensor = self.consts.species_to_tensor
-        ret.periodic_table_index = self.periodic_table_index
-        return ret
-    def __len__(self):
+        Arguments:
-        """Get the number of networks in the ensemble
+            species (:class:`str`): A string of chemical symbols
        Returns:
-            length (:class:`int`): Number of networks in the ensemble
+            tensor (:class:`torch.Tensor`): A 1D tensor of integers
        """
-        return len(self.neural_networks)
+        # The only difference between this and the "raw" private version
+        # _species_to_tensor is that this sends the final tensor to the model
+        # device
+        return self._species_to_tensor(*args, **kwargs) \
+            .to(self.aev_computer.ShfR.device)
    def ase(self, **kwargs):
-        """Get an ASE Calculator using this ANI model ensemble
+        """Get an ASE Calculator using this ANI model
        Arguments:
            kwargs: ase.Calculator kwargs
@@ -181,22 +109,118 @@ class BuiltinNet(torch.nn.Module):
        from . import ase
        return ase.Calculator(self.species, self, **kwargs)
-    def species_to_tensor(self, *args, **kwargs):
-        """Convert species from strings to tensor.
-        See also :method:`torchani.neurochem.Constant.species_to_tensor`
+class BuiltinEnsemble(BuiltinModel):
+    """Private template for the builtin ANI ensemble models.
-        Arguments:
+    ANI ensemble models form the ANI models zoo are instances of this class.
-            species (:class:`str`): A string of chemical symbols
+    This class is a torch module that sequentially calculates
+    AEVs, then energies from a torchani.Ensemble and then uses EnergyShifter
+    to shift those energies. It is essentially a sequential
+    'AEVComputer -> Ensemble -> EnergyShifter'
+    (periodic_table_index=False), or a sequential
+    'SpeciesConverter -> AEVComputer -> Ensemble -> EnergyShifter'
+    (periodic_table_index=True).
+    .. note::
+        This class is for internal use only, avoid relying on anything from it
+        except the public methods, always use ANI1x, ANI1ccx, etc to instance
+        the models.
+        Also, don't confuse this class with torchani.Ensemble, which is only a
+        container for many ANIModel instances and shouldn't be used directly
+        for calculations.
+    Attributes:
+        species_converter (:class:`torchani.nn.SpeciesConverter`): Converts periodic table index to
+            internal indices. Only present if periodic_table_index is `True`.
+        aev_computer (:class:`torchani.AEVComputer`): AEV computer with
+            builtin constants
+        energy_shifter (:class:`torchani.EnergyShifter`): Energy shifter with
+            builtin Self Atomic Energies.
+        periodic_table_index (bool): Whether to use element number in periodic table
+            to index species. If set to `False`, then indices must be `0, 1, 2, ..., N - 1`
+            where `N` is the number of parametrized species.
+    """
+    def __init__(self, species_converter, aev_computer, neural_networks,
+                 energy_shifter, species_to_tensor, consts, sae_dict, periodic_table_index):
+        super(BuiltinEnsemble, self).__init__(species_converter,
+                                              aev_computer,
+                                              neural_networks,
+                                              energy_shifter,
+                                              species_to_tensor,
+                                              consts,
+                                              sae_dict,
+                                              periodic_table_index)
+    @classmethod
+    def _from_neurochem_resources(cls, info_file_path, periodic_table_index=False):
+        def get_resource(file_path):
+            package_name = '.'.join(__name__.split('.')[:-1])
+            return resource_filename(package_name, 'resources/' + file_path)
+        info_file = get_resource(info_file_path)
+        with open(info_file) as f:
+            # const_file: Path to the file with the builtin constants.
+            # sae_file: Path to the file with the Self Atomic Energies.
+            # ensemble_prefix: Prefix of the neurochem resource directories.
+            lines = [x.strip() for x in f.readlines()][:4]
+            const_file_path, sae_file_path, ensemble_prefix_path, ensemble_size = lines
+            const_file = get_resource(const_file_path)
+            sae_file = get_resource(sae_file_path)
+            ensemble_prefix = get_resource(ensemble_prefix_path)
+            ensemble_size = int(ensemble_size)
+            consts = neurochem.Constants(const_file)
+        species_converter = SpeciesConverter(consts.species)
+        aev_computer = AEVComputer(**consts)
+        neural_networks = neurochem.load_model_ensemble(consts.species,
+                                                        ensemble_prefix, ensemble_size)
+        energy_shifter, sae_dict = neurochem.load_sae(sae_file, return_dict=True)
+        species_to_tensor = consts.species_to_tensor
+        return cls(species_converter, aev_computer, neural_networks,
+                   energy_shifter, species_to_tensor, consts, sae_dict, periodic_table_index)
+    def __getitem__(self, index):
+        """Get a single 'AEVComputer -> ANIModel -> EnergyShifter' sequential model
+        Get a single 'AEVComputer -> ANIModel -> EnergyShifter' sequential model
+        or
+        Indexing allows access to a single model inside the ensemble
+        that can be used directly for calculations. The model consists
+        of a sequence AEVComputer -> ANIModel -> EnergyShifter
+        and can return an ase calculator and convert species to tensor.
+        Args:
+            index (:class:`int`): Index of the model
        Returns:
-            tensor (:class:`torch.Tensor`): A 1D tensor of integers
+            ret: (:class:`torchani.models.BuiltinModel`) Model ready for
+                calculations
        """
-        return self.consts.species_to_tensor(*args, **kwargs) \
+        ret = BuiltinModel(self.species_converter, self.aev_computer,
-            .to(self.aev_computer.ShfR.device)
+                           self.neural_networks[index], self.energy_shifter,
+                           self._species_to_tensor, self.consts, self.sae_dict,
+                           self.periodic_table_index)
+        return ret
+    def __len__(self):
+        """Get the number of networks in the ensemble
+        Returns:
+            length (:class:`int`): Number of networks in the ensemble
+        """
+        return len(self.neural_networks)
-class ANI1x(BuiltinNet):
+def ANI1x(periodic_table_index=False):
    """The ANI-1x model as in `ani-1x_8x on GitHub`_ and `Active Learning Paper`_.
    The ANI-1x model is an ensemble of 8 networks that was trained using
@@ -210,12 +234,10 @@ class ANI1x(BuiltinNet):
    .. _Active Learning Paper:
        https://aip.scitation.org/doi/abs/10.1063/1.5023802
    """
+    return BuiltinEnsemble._from_neurochem_resources('ani-1x_8x.info', periodic_table_index)
-    def __init__(self, *args, **kwargs):
-        super().__init__('ani-1x_8x.info', *args, **kwargs)
-class ANI1ccx(BuiltinNet):
+def ANI1ccx(periodic_table_index=False):
    """The ANI-1ccx model as in `ani-1ccx_8x on GitHub`_ and `Transfer Learning Paper`_.
    The ANI-1ccx model is an ensemble of 8 networks that was trained
@@ -230,6 +252,4 @@ class ANI1ccx(BuiltinNet):
    .. _Transfer Learning Paper:
        https://doi.org/10.26434/chemrxiv.6744440.v1
    """
+    return BuiltinEnsemble._from_neurochem_resources('ani-1ccx_8x.info', periodic_table_index)
-    def __init__(self, *args, **kwargs):
-        super().__init__('ani-1ccx_8x.info', *args, **kwargs)