models.py 12.2 KB
Newer Older
Gao, Xiang's avatar
Gao, Xiang committed
1
2
3
# -*- coding: utf-8 -*-
"""The ANI model zoo that stores public ANI models.

4
5
6
7
8
Currently the model zoo has two models: ANI-1x and ANI-1ccx. The classes
of these two models are :class:`ANI1x` and :class:`ANI1ccx`,
these are subclasses of :class:`torch.nn.Module`.
To use the models just instantiate them and either
directly calculate energies or get an ASE calculator. For example:
Gao, Xiang's avatar
Gao, Xiang committed
9

Ignacio Pickering's avatar
Ignacio Pickering committed
10
11
.. code-block:: python

12
    ani1x = torchani.models.ANI1x()
Gao, Xiang's avatar
Gao, Xiang committed
13
    # compute energy using ANI-1x model ensemble
14
15
    _, energies = ani1x((species, coordinates))
    ani1x.ase()  # get ASE Calculator using this ensemble
Gao, Xiang's avatar
Gao, Xiang committed
16
    # convert atom species from string to long tensor
17
    ani1x.species_to_tensor('CHHHH')
Gao, Xiang's avatar
Gao, Xiang committed
18

19
    model0 = ani1x[0]  # get the first model in the ensemble
Gao, Xiang's avatar
Gao, Xiang committed
20
21
22
23
24
    # compute energy using the first model in the ANI-1x model ensemble
    _, energies = model0((species, coordinates))
    model0.ase()  # get ASE Calculator using this model
    # convert atom species from string to long tensor
    model0.species_to_tensor('CHHHH')
25
26
27

Note that the class BuiltinModels can be accessed but it is deprecated and
shouldn't be used anymore.
Gao, Xiang's avatar
Gao, Xiang committed
28
"""
29
import os
Gao, Xiang's avatar
Gao, Xiang committed
30
import torch
31
from torch import Tensor
32
from typing import Tuple, Optional
33
from pkg_resources import resource_filename
Gao, Xiang's avatar
Gao, Xiang committed
34
from . import neurochem
Ignacio Pickering's avatar
Ignacio Pickering committed
35
from .nn import SpeciesConverter, SpeciesEnergies
36
from .aev import AEVComputer
Gao, Xiang's avatar
Gao, Xiang committed
37
38


Ignacio Pickering's avatar
Ignacio Pickering committed
39
40
class BuiltinModel(torch.nn.Module):
    r"""Private template for the builtin ANI models """
41

Ignacio Pickering's avatar
Ignacio Pickering committed
42
43
44
45
46
47
48
49
    def __init__(self, species_converter, aev_computer, neural_networks, energy_shifter, species_to_tensor, consts, sae_dict, periodic_table_index):
        super(BuiltinModel, self).__init__()
        self.species_converter = species_converter
        self.aev_computer = aev_computer
        self.neural_networks = neural_networks
        self.energy_shifter = energy_shifter
        self._species_to_tensor = species_to_tensor
        self.species = consts.species
50
51
        self.periodic_table_index = periodic_table_index

Ignacio Pickering's avatar
Ignacio Pickering committed
52
53
54
        # a bit useless maybe
        self.consts = consts
        self.sae_dict = sae_dict
55

56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
    @classmethod
    def _from_neurochem_resources(cls, info_file_path, periodic_table_index=False, model_index=0):
        # this is used to load only 1 model (by default model 0)
        consts, sae_file, ensemble_prefix, ensemble_size = cls._parse_neurochem_resources(info_file_path)
        if (model_index >= ensemble_size):
            raise ValueError("The ensemble size is only {}, model {} can't be loaded".format(ensemble_size, model_index))

        species_converter = SpeciesConverter(consts.species)
        aev_computer = AEVComputer(**consts)
        energy_shifter, sae_dict = neurochem.load_sae(sae_file, return_dict=True)
        species_to_tensor = consts.species_to_tensor

        network_dir = os.path.join('{}{}'.format(ensemble_prefix, model_index), 'networks')
        neural_networks = neurochem.load_model(consts.species, network_dir)

        return cls(species_converter, aev_computer, neural_networks,
                   energy_shifter, species_to_tensor, consts, sae_dict, periodic_table_index)

    @staticmethod
    def _parse_neurochem_resources(info_file_path):
        def get_resource(file_path):
            package_name = '.'.join(__name__.split('.')[:-1])
            return resource_filename(package_name, 'resources/' + file_path)

        info_file = get_resource(info_file_path)

        with open(info_file) as f:
            # const_file: Path to the file with the builtin constants.
            # sae_file: Path to the file with the Self Atomic Energies.
            # ensemble_prefix: Prefix of the neurochem resource directories.
            lines = [x.strip() for x in f.readlines()][:4]
            const_file_path, sae_file_path, ensemble_prefix_path, ensemble_size = lines
            const_file = get_resource(const_file_path)
            sae_file = get_resource(sae_file_path)
            ensemble_prefix = get_resource(ensemble_prefix_path)
            ensemble_size = int(ensemble_size)
            consts = neurochem.Constants(const_file)
        return consts, sae_file, ensemble_prefix, ensemble_size

95
96
    def forward(self, species_coordinates: Tuple[Tensor, Tensor],
                cell: Optional[Tensor] = None,
97
                pbc: Optional[Tensor] = None) -> SpeciesEnergies:
98
99
100
101
        """Calculates predicted properties for minibatch of configurations

        Args:
            species_coordinates: minibatch of configurations
102
103
            cell: the cell used in PBC computation, set to None if PBC is not enabled
            pbc: the bool tensor indicating which direction PBC is enabled, set to None if PBC is not enabled
104
105
106

        Returns:
            species_energies: energies for the given configurations
Gao, Xiang's avatar
Gao, Xiang committed
107
108
109

        .. note:: The coordinates, and cell are in Angstrom, and the energies
            will be in Hartree.
110
        """
111
112
        if self.periodic_table_index:
            species_coordinates = self.species_converter(species_coordinates)
113
        species_aevs = self.aev_computer(species_coordinates, cell=cell, pbc=pbc)
114
115
116
        species_energies = self.neural_networks(species_aevs)
        return self.energy_shifter(species_energies)

Ignacio Pickering's avatar
Ignacio Pickering committed
117
118
119
120
    @torch.jit.export
    def _recast_long_buffers(self):
        self.species_converter.conv_tensor = self.species_converter.conv_tensor.to(dtype=torch.long)
        self.aev_computer.triu_index = self.aev_computer.triu_index.to(dtype=torch.long)
121

Ignacio Pickering's avatar
Ignacio Pickering committed
122
123
    def species_to_tensor(self, *args, **kwargs):
        """Convert species from strings to tensor.
124

Ignacio Pickering's avatar
Ignacio Pickering committed
125
        See also :method:`torchani.neurochem.Constant.species_to_tensor`
Gao, Xiang's avatar
Gao, Xiang committed
126

Ignacio Pickering's avatar
Ignacio Pickering committed
127
128
        Arguments:
            species (:class:`str`): A string of chemical symbols
129
130

        Returns:
Ignacio Pickering's avatar
Ignacio Pickering committed
131
            tensor (:class:`torch.Tensor`): A 1D tensor of integers
132
        """
Ignacio Pickering's avatar
Ignacio Pickering committed
133
134
135
136
137
        # The only difference between this and the "raw" private version
        # _species_to_tensor is that this sends the final tensor to the model
        # device
        return self._species_to_tensor(*args, **kwargs) \
            .to(self.aev_computer.ShfR.device)
Gao, Xiang's avatar
Gao, Xiang committed
138

139
    def ase(self, **kwargs):
Ignacio Pickering's avatar
Ignacio Pickering committed
140
        """Get an ASE Calculator using this ANI model
141
142
143
144
145
146
147

        Arguments:
            kwargs: ase.Calculator kwargs

        Returns:
            calculator (:class:`int`): A calculator to be used with ASE
        """
Gao, Xiang's avatar
Gao, Xiang committed
148
        from . import ase
149
        return ase.Calculator(self.species, self, **kwargs)
Gao, Xiang's avatar
Gao, Xiang committed
150

Gao, Xiang's avatar
Gao, Xiang committed
151

Ignacio Pickering's avatar
Ignacio Pickering committed
152
153
class BuiltinEnsemble(BuiltinModel):
    """Private template for the builtin ANI ensemble models.
154

Ignacio Pickering's avatar
Ignacio Pickering committed
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
    ANI ensemble models form the ANI models zoo are instances of this class.
    This class is a torch module that sequentially calculates
    AEVs, then energies from a torchani.Ensemble and then uses EnergyShifter
    to shift those energies. It is essentially a sequential

    'AEVComputer -> Ensemble -> EnergyShifter'

    (periodic_table_index=False), or a sequential

    'SpeciesConverter -> AEVComputer -> Ensemble -> EnergyShifter'

    (periodic_table_index=True).

    .. note::
        This class is for internal use only, avoid relying on anything from it
        except the public methods, always use ANI1x, ANI1ccx, etc to instance
        the models.
        Also, don't confuse this class with torchani.Ensemble, which is only a
        container for many ANIModel instances and shouldn't be used directly
        for calculations.

    Attributes:
        species_converter (:class:`torchani.nn.SpeciesConverter`): Converts periodic table index to
            internal indices. Only present if periodic_table_index is `True`.
        aev_computer (:class:`torchani.AEVComputer`): AEV computer with
            builtin constants
        energy_shifter (:class:`torchani.EnergyShifter`): Energy shifter with
            builtin Self Atomic Energies.
        periodic_table_index (bool): Whether to use element number in periodic table
            to index species. If set to `False`, then indices must be `0, 1, 2, ..., N - 1`
            where `N` is the number of parametrized species.
    """

    def __init__(self, species_converter, aev_computer, neural_networks,
                 energy_shifter, species_to_tensor, consts, sae_dict, periodic_table_index):
        super(BuiltinEnsemble, self).__init__(species_converter,
                                              aev_computer,
                                              neural_networks,
                                              energy_shifter,
                                              species_to_tensor,
                                              consts,
                                              sae_dict,
                                              periodic_table_index)

    @classmethod
    def _from_neurochem_resources(cls, info_file_path, periodic_table_index=False):
201
202
        # this is used to load only 1 model (by default model 0)
        consts, sae_file, ensemble_prefix, ensemble_size = cls._parse_neurochem_resources(info_file_path)
Ignacio Pickering's avatar
Ignacio Pickering committed
203
204
205
206
207

        species_converter = SpeciesConverter(consts.species)
        aev_computer = AEVComputer(**consts)
        energy_shifter, sae_dict = neurochem.load_sae(sae_file, return_dict=True)
        species_to_tensor = consts.species_to_tensor
208
209
        neural_networks = neurochem.load_model_ensemble(consts.species,
                                                        ensemble_prefix, ensemble_size)
Ignacio Pickering's avatar
Ignacio Pickering committed
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225

        return cls(species_converter, aev_computer, neural_networks,
                   energy_shifter, species_to_tensor, consts, sae_dict, periodic_table_index)

    def __getitem__(self, index):
        """Get a single 'AEVComputer -> ANIModel -> EnergyShifter' sequential model

        Get a single 'AEVComputer -> ANIModel -> EnergyShifter' sequential model
        or
        Indexing allows access to a single model inside the ensemble
        that can be used directly for calculations. The model consists
        of a sequence AEVComputer -> ANIModel -> EnergyShifter
        and can return an ase calculator and convert species to tensor.

        Args:
            index (:class:`int`): Index of the model
226
227

        Returns:
Ignacio Pickering's avatar
Ignacio Pickering committed
228
229
            ret: (:class:`torchani.models.BuiltinModel`) Model ready for
                calculations
230
        """
Ignacio Pickering's avatar
Ignacio Pickering committed
231
232
233
234
235
236
237
238
        ret = BuiltinModel(self.species_converter, self.aev_computer,
                           self.neural_networks[index], self.energy_shifter,
                           self._species_to_tensor, self.consts, self.sae_dict,
                           self.periodic_table_index)
        return ret

    def __len__(self):
        """Get the number of networks in the ensemble
Gao, Xiang's avatar
Gao, Xiang committed
239

Ignacio Pickering's avatar
Ignacio Pickering committed
240
241
242
243
        Returns:
            length (:class:`int`): Number of networks in the ensemble
        """
        return len(self.neural_networks)
Gao, Xiang's avatar
Gao, Xiang committed
244

Ignacio Pickering's avatar
Ignacio Pickering committed
245

246
def ANI1x(periodic_table_index=False, model_index=None):
247
248
249
250
251
252
    """The ANI-1x model as in `ani-1x_8x on GitHub`_ and `Active Learning Paper`_.

    The ANI-1x model is an ensemble of 8 networks that was trained using
    active learning on the ANI-1x dataset, the target level of theory is
    wB97X/6-31G(d). It predicts energies on HCNO elements exclusively, it
    shouldn't be used with other atom types.
Gao, Xiang's avatar
Gao, Xiang committed
253
254
255
256
257
258
259

    .. _ani-1x_8x on GitHub:
        https://github.com/isayev/ASE_ANI/tree/master/ani_models/ani-1x_8x

    .. _Active Learning Paper:
        https://aip.scitation.org/doi/abs/10.1063/1.5023802
    """
260
261
262
263
    info_file = 'ani-1x_8x.info'
    if model_index is None:
        return BuiltinEnsemble._from_neurochem_resources(info_file, periodic_table_index)
    return BuiltinModel._from_neurochem_resources(info_file, periodic_table_index, model_index)
264

Gao, Xiang's avatar
Gao, Xiang committed
265

266
def ANI1ccx(periodic_table_index=False, model_index=None):
267
    """The ANI-1ccx model as in `ani-1ccx_8x on GitHub`_ and `Transfer Learning Paper`_.
Gao, Xiang's avatar
Gao, Xiang committed
268

269
270
271
272
273
    The ANI-1ccx model is an ensemble of 8 networks that was trained
    on the ANI-1ccx dataset, using transfer learning. The target accuracy
    is CCSD(T)*/CBS (CCSD(T) using the DPLNO-CCSD(T) method). It predicts
    energies on HCNO elements exclusively, it shouldn't be used with other
    atom types.
Gao, Xiang's avatar
Gao, Xiang committed
274
275
276
277
278
279
280

    .. _ani-1ccx_8x on GitHub:
        https://github.com/isayev/ASE_ANI/tree/master/ani_models/ani-1ccx_8x

    .. _Transfer Learning Paper:
        https://doi.org/10.26434/chemrxiv.6744440.v1
    """
281
282
283
284
    info_file = 'ani-1ccx_8x.info'
    if model_index is None:
        return BuiltinEnsemble._from_neurochem_resources(info_file, periodic_table_index)
    return BuiltinModel._from_neurochem_resources(info_file, periodic_table_index, model_index)