Unverified Commit 3a043d45 authored by Ignacio Pickering's avatar Ignacio Pickering Committed by GitHub
Browse files

Refactor builtinnet (#474)

* Refactor BuiltinNet into BuiltinModel and BuiltinEnsemble

* Change test name

* flake8 whitespace

* Get rid of redundant comparison

* remove unused import

* Fix info file bug

* flake8

* don't use nonlocal on get_resource

* Revert "don't use nonlocal on get_resource"

This reverts commit efe221bb4f7ce4a1ccb8c3c73b886bdbad019ab0.

* Fix get_resource bug

* Add missing init call

* Fix species_to_tensor bug

* Add species attribute

* Add some extra attributes

* Fix bug in getitem

* Always assign species_converter

* Species to tensor is also an entry point

* Lets not make it an entry point for now

* delete duplicated definition
parent 7059e9a6
...@@ -29,7 +29,7 @@ class TestSpeciesConverterJIT(TestSpeciesConverter): ...@@ -29,7 +29,7 @@ class TestSpeciesConverterJIT(TestSpeciesConverter):
self.c = torch.jit.script(self.c) self.c = torch.jit.script(self.c)
class TestBuiltinNetPeriodicTableIndex(unittest.TestCase): class TestBuiltinEnsemblePeriodicTableIndex(unittest.TestCase):
def setUp(self): def setUp(self):
self.model1 = torchani.models.ANI1x() self.model1 = torchani.models.ANI1x()
......
...@@ -32,73 +32,26 @@ from torch import Tensor ...@@ -32,73 +32,26 @@ from torch import Tensor
from typing import Tuple, Optional from typing import Tuple, Optional
from pkg_resources import resource_filename from pkg_resources import resource_filename
from . import neurochem from . import neurochem
from .nn import Sequential, SpeciesConverter, SpeciesEnergies from .nn import SpeciesConverter, SpeciesEnergies
from .aev import AEVComputer from .aev import AEVComputer
class BuiltinNet(torch.nn.Module): class BuiltinModel(torch.nn.Module):
"""Private template for the builtin ANI ensemble models. r"""Private template for the builtin ANI models """
All ANI ensemble models form the ANI models zoo should inherit from this class.
This class is a torch module that sequentially calculates
AEVs, then energies from a torchani.Ensemble and then uses EnergyShifter
to shift those energies. It is essentially a sequential
'AEVComputer -> Ensemble -> EnergyShifter'.
.. note::
This class is for internal use only, avoid using it, use ANI1x, ANI1ccx,
etc instead. Don't confuse this class with torchani.Ensemble, which
is only a container for many ANIModel instances and shouldn't be used
directly for calculations.
Attributes:
const_file (:class:`str`): Path to the file with the builtin constants.
sae_file (:class:`str`): Path to the file with the Self Atomic Energies.
ensemble_prefix (:class:`str`): Prefix of directories.
ensemble_size (:class:`int`): Number of models in the ensemble.
energy_shifter (:class:`torchani.EnergyShifter`): Energy shifter with
builtin Self Atomic Energies.
aev_computer (:class:`torchani.AEVComputer`): AEV computer with
builtin constants
neural_networks (:class:`torchani.Ensemble`): Ensemble of ANIModel networks
periodic_table_index (bool): Whether to use element number in periodic table
to index species. If set to `False`, then indices must be `0, 1, 2, ..., N - 1`
where `N` is the number of parametrized species.
"""
def __init__(self, info_file, periodic_table_index=False): def __init__(self, species_converter, aev_computer, neural_networks, energy_shifter, species_to_tensor, consts, sae_dict, periodic_table_index):
super(BuiltinNet, self).__init__() super(BuiltinModel, self).__init__()
self.species_converter = species_converter
self.aev_computer = aev_computer
self.neural_networks = neural_networks
self.energy_shifter = energy_shifter
self._species_to_tensor = species_to_tensor
self.species = consts.species
self.periodic_table_index = periodic_table_index self.periodic_table_index = periodic_table_index
package_name = '.'.join(__name__.split('.')[:-1]) # a bit useless maybe
info_file = 'resources/' + info_file self.consts = consts
self.info_file = resource_filename(package_name, info_file) self.sae_dict = sae_dict
with open(self.info_file) as f:
lines = [x.strip() for x in f.readlines()][:4]
const_file_path, sae_file_path, ensemble_prefix_path, ensemble_size = lines
const_file_path = 'resources/' + const_file_path
sae_file_path = 'resources/' + sae_file_path
ensemble_prefix_path = 'resources/' + ensemble_prefix_path
ensemble_size = int(ensemble_size)
self.const_file = resource_filename(package_name, const_file_path)
self.sae_file = resource_filename(package_name, sae_file_path)
self.ensemble_prefix = resource_filename(package_name, ensemble_prefix_path)
self.ensemble_size = ensemble_size
self.consts = neurochem.Constants(self.const_file)
self.species = self.consts.species
self.species_converter = SpeciesConverter(self.species)
self.aev_computer = AEVComputer(**self.consts)
self.energy_shifter, self.sae_dict = neurochem.load_sae(self.sae_file, return_dict=True)
self.neural_networks = neurochem.load_model_ensemble(
self.species, self.ensemble_prefix, self.ensemble_size)
@torch.jit.export
def _recast_long_buffers(self):
self.species_converter.conv_tensor = self.species_converter.conv_tensor.to(dtype=torch.long)
self.aev_computer.triu_index = self.aev_computer.triu_index.to(dtype=torch.long)
def forward(self, species_coordinates: Tuple[Tensor, Tensor], def forward(self, species_coordinates: Tuple[Tensor, Tensor],
cell: Optional[Tensor] = None, cell: Optional[Tensor] = None,
...@@ -122,55 +75,30 @@ class BuiltinNet(torch.nn.Module): ...@@ -122,55 +75,30 @@ class BuiltinNet(torch.nn.Module):
species_energies = self.neural_networks(species_aevs) species_energies = self.neural_networks(species_aevs)
return self.energy_shifter(species_energies) return self.energy_shifter(species_energies)
def __getitem__(self, index): @torch.jit.export
"""Get a single 'AEVComputer -> ANIModel -> EnergyShifter' sequential model def _recast_long_buffers(self):
self.species_converter.conv_tensor = self.species_converter.conv_tensor.to(dtype=torch.long)
Indexing allows access to a single model inside the ensemble self.aev_computer.triu_index = self.aev_computer.triu_index.to(dtype=torch.long)
that can be used directly for calculations. The model consists
of a sequence AEVComputer -> ANIModel -> EnergyShifter
and can return an ase calculator and convert species to tensor.
Args:
index (:class:`int`): Index of the model
Returns: def species_to_tensor(self, *args, **kwargs):
ret: (:class:`Sequential`): Sequential model ready for """Convert species from strings to tensor.
calculations
"""
if self.periodic_table_index:
ret = Sequential(
self.species_converter,
self.aev_computer,
self.neural_networks[index],
self.energy_shifter
)
else:
ret = Sequential(
self.aev_computer,
self.neural_networks[index],
self.energy_shifter
)
def ase(**kwargs):
"""Attach an ase calculator """
from . import ase
return ase.Calculator(self.species, ret, **kwargs)
ret.ase = ase See also :method:`torchani.neurochem.Constant.species_to_tensor`
ret.species_to_tensor = self.consts.species_to_tensor
ret.periodic_table_index = self.periodic_table_index
return ret
def __len__(self): Arguments:
"""Get the number of networks in the ensemble species (:class:`str`): A string of chemical symbols
Returns: Returns:
length (:class:`int`): Number of networks in the ensemble tensor (:class:`torch.Tensor`): A 1D tensor of integers
""" """
return len(self.neural_networks) # The only difference between this and the "raw" private version
# _species_to_tensor is that this sends the final tensor to the model
# device
return self._species_to_tensor(*args, **kwargs) \
.to(self.aev_computer.ShfR.device)
def ase(self, **kwargs): def ase(self, **kwargs):
"""Get an ASE Calculator using this ANI model ensemble """Get an ASE Calculator using this ANI model
Arguments: Arguments:
kwargs: ase.Calculator kwargs kwargs: ase.Calculator kwargs
...@@ -181,22 +109,118 @@ class BuiltinNet(torch.nn.Module): ...@@ -181,22 +109,118 @@ class BuiltinNet(torch.nn.Module):
from . import ase from . import ase
return ase.Calculator(self.species, self, **kwargs) return ase.Calculator(self.species, self, **kwargs)
def species_to_tensor(self, *args, **kwargs):
"""Convert species from strings to tensor.
See also :method:`torchani.neurochem.Constant.species_to_tensor` class BuiltinEnsemble(BuiltinModel):
"""Private template for the builtin ANI ensemble models.
Arguments: ANI ensemble models form the ANI models zoo are instances of this class.
species (:class:`str`): A string of chemical symbols This class is a torch module that sequentially calculates
AEVs, then energies from a torchani.Ensemble and then uses EnergyShifter
to shift those energies. It is essentially a sequential
'AEVComputer -> Ensemble -> EnergyShifter'
(periodic_table_index=False), or a sequential
'SpeciesConverter -> AEVComputer -> Ensemble -> EnergyShifter'
(periodic_table_index=True).
.. note::
This class is for internal use only, avoid relying on anything from it
except the public methods, always use ANI1x, ANI1ccx, etc to instance
the models.
Also, don't confuse this class with torchani.Ensemble, which is only a
container for many ANIModel instances and shouldn't be used directly
for calculations.
Attributes:
species_converter (:class:`torchani.nn.SpeciesConverter`): Converts periodic table index to
internal indices. Only present if periodic_table_index is `True`.
aev_computer (:class:`torchani.AEVComputer`): AEV computer with
builtin constants
energy_shifter (:class:`torchani.EnergyShifter`): Energy shifter with
builtin Self Atomic Energies.
periodic_table_index (bool): Whether to use element number in periodic table
to index species. If set to `False`, then indices must be `0, 1, 2, ..., N - 1`
where `N` is the number of parametrized species.
"""
def __init__(self, species_converter, aev_computer, neural_networks,
energy_shifter, species_to_tensor, consts, sae_dict, periodic_table_index):
super(BuiltinEnsemble, self).__init__(species_converter,
aev_computer,
neural_networks,
energy_shifter,
species_to_tensor,
consts,
sae_dict,
periodic_table_index)
@classmethod
def _from_neurochem_resources(cls, info_file_path, periodic_table_index=False):
def get_resource(file_path):
package_name = '.'.join(__name__.split('.')[:-1])
return resource_filename(package_name, 'resources/' + file_path)
info_file = get_resource(info_file_path)
with open(info_file) as f:
# const_file: Path to the file with the builtin constants.
# sae_file: Path to the file with the Self Atomic Energies.
# ensemble_prefix: Prefix of the neurochem resource directories.
lines = [x.strip() for x in f.readlines()][:4]
const_file_path, sae_file_path, ensemble_prefix_path, ensemble_size = lines
const_file = get_resource(const_file_path)
sae_file = get_resource(sae_file_path)
ensemble_prefix = get_resource(ensemble_prefix_path)
ensemble_size = int(ensemble_size)
consts = neurochem.Constants(const_file)
species_converter = SpeciesConverter(consts.species)
aev_computer = AEVComputer(**consts)
neural_networks = neurochem.load_model_ensemble(consts.species,
ensemble_prefix, ensemble_size)
energy_shifter, sae_dict = neurochem.load_sae(sae_file, return_dict=True)
species_to_tensor = consts.species_to_tensor
return cls(species_converter, aev_computer, neural_networks,
energy_shifter, species_to_tensor, consts, sae_dict, periodic_table_index)
def __getitem__(self, index):
"""Get a single 'AEVComputer -> ANIModel -> EnergyShifter' sequential model
Get a single 'AEVComputer -> ANIModel -> EnergyShifter' sequential model
or
Indexing allows access to a single model inside the ensemble
that can be used directly for calculations. The model consists
of a sequence AEVComputer -> ANIModel -> EnergyShifter
and can return an ase calculator and convert species to tensor.
Args:
index (:class:`int`): Index of the model
Returns: Returns:
tensor (:class:`torch.Tensor`): A 1D tensor of integers ret: (:class:`torchani.models.BuiltinModel`) Model ready for
calculations
""" """
return self.consts.species_to_tensor(*args, **kwargs) \ ret = BuiltinModel(self.species_converter, self.aev_computer,
.to(self.aev_computer.ShfR.device) self.neural_networks[index], self.energy_shifter,
self._species_to_tensor, self.consts, self.sae_dict,
self.periodic_table_index)
return ret
def __len__(self):
"""Get the number of networks in the ensemble
Returns:
length (:class:`int`): Number of networks in the ensemble
"""
return len(self.neural_networks)
class ANI1x(BuiltinNet):
def ANI1x(periodic_table_index=False):
"""The ANI-1x model as in `ani-1x_8x on GitHub`_ and `Active Learning Paper`_. """The ANI-1x model as in `ani-1x_8x on GitHub`_ and `Active Learning Paper`_.
The ANI-1x model is an ensemble of 8 networks that was trained using The ANI-1x model is an ensemble of 8 networks that was trained using
...@@ -210,12 +234,10 @@ class ANI1x(BuiltinNet): ...@@ -210,12 +234,10 @@ class ANI1x(BuiltinNet):
.. _Active Learning Paper: .. _Active Learning Paper:
https://aip.scitation.org/doi/abs/10.1063/1.5023802 https://aip.scitation.org/doi/abs/10.1063/1.5023802
""" """
return BuiltinEnsemble._from_neurochem_resources('ani-1x_8x.info', periodic_table_index)
def __init__(self, *args, **kwargs):
super().__init__('ani-1x_8x.info', *args, **kwargs)
class ANI1ccx(BuiltinNet): def ANI1ccx(periodic_table_index=False):
"""The ANI-1ccx model as in `ani-1ccx_8x on GitHub`_ and `Transfer Learning Paper`_. """The ANI-1ccx model as in `ani-1ccx_8x on GitHub`_ and `Transfer Learning Paper`_.
The ANI-1ccx model is an ensemble of 8 networks that was trained The ANI-1ccx model is an ensemble of 8 networks that was trained
...@@ -230,6 +252,4 @@ class ANI1ccx(BuiltinNet): ...@@ -230,6 +252,4 @@ class ANI1ccx(BuiltinNet):
.. _Transfer Learning Paper: .. _Transfer Learning Paper:
https://doi.org/10.26434/chemrxiv.6744440.v1 https://doi.org/10.26434/chemrxiv.6744440.v1
""" """
return BuiltinEnsemble._from_neurochem_resources('ani-1ccx_8x.info', periodic_table_index)
def __init__(self, *args, **kwargs):
super().__init__('ani-1ccx_8x.info', *args, **kwargs)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment