Cherry-pick roitberg-group#15 (#541)

* Move parsing resource code to neurochem/ (#15) * Move parsing resource code to neurochem/ * fix bug * fix * fix Co-authored-by: Ignacio Pickering <ign.pickering@gmail.com>

Cherry-pick roitberg-group#15 (#541)
* Move parsing resource code to neurochem/ (#15) * Move parsing resource code to neurochem/ * fix bug * fix * fix Co-authored-by: Ignacio Pickering <ign.pickering@gmail.com>
b270d59d · Gao, Xiang · GitHub · ec35c7da · b270d59d · b270d59d
Unverified Commit b270d59d authored Nov 13, 2020 by Gao, Xiang Committed by GitHub Nov 13, 2020
Showing with 82 additions and 53 deletions

torchani/models.py torchani/models.py +7 -52

torchani/neurochem/__init__.py torchani/neurochem/__init__.py +2 -1

torchani/neurochem/parse_resources.py torchani/neurochem/parse_resources.py +73 -0

No files found.
--- a/torchani/models.py
+++ b/torchani/models.py
@@ -29,13 +29,8 @@ directly calculate energies or get an ASE calculator. For example:
    model0.species_to_tensor(['C', 'H', 'H', 'H', 'H'])
 """
 import os
-import io
-import requests
-import zipfile
 import torch
-from distutils import dir_util
 from torch import Tensor
-from . import neurochem
 from typing import Tuple, Optional, NamedTuple
 from .nn import SpeciesConverter, SpeciesEnergies
 from .aev import AEVComputer
@@ -66,11 +61,14 @@ class BuiltinModel(torch.nn.Module):

    @classmethod
    def _from_neurochem_resources(cls, info_file_path, periodic_table_index=False, model_index=0):
+        from . import neurochem  # noqa
+
        # this is used to load only 1 model (by default model 0)
-        consts, sae_file, ensemble_prefix, ensemble_size = cls._parse_neurochem_resources(info_file_path)
+        const_file, sae_file, ensemble_prefix, ensemble_size = neurochem.parse_neurochem_resources(info_file_path)
        if (model_index >= ensemble_size):
            raise ValueError("The ensemble size is only {}, model {} can't be loaded".format(ensemble_size, model_index))

+        consts = neurochem.Constants(const_file)
        species_converter = SpeciesConverter(consts.species)
        aev_computer = AEVComputer(**consts)
        energy_shifter, sae_dict = neurochem.load_sae(sae_file, return_dict=True)
@@ -82,51 +80,6 @@ class BuiltinModel(torch.nn.Module):
        return cls(species_converter, aev_computer, neural_networks,
                   energy_shifter, species_to_tensor, consts, sae_dict, periodic_table_index)

-    @staticmethod
-    def _parse_neurochem_resources(info_file_path):
-        def get_resource(resource_path, file_path):
-            return os.path.join(resource_path, file_path)
-
-        resource_path = os.path.join(os.path.dirname(__file__), 'resources/')
-        local_dir = os.path.expanduser('~/.local/torchani/')
-        repo_name = "ani-model-zoo"
-        tag_name = "ani-2x"
-        extracted_name = '{}-{}'.format(repo_name, tag_name)
-        url = "https://github.com/aiqm/{}/archive/{}.zip".format(repo_name, tag_name)
-
-        if os.stat(get_resource(resource_path, info_file_path)).st_size == 0:
-            if not os.path.isfile(get_resource(local_dir, info_file_path)):
-                print('Downloading ANI model parameters ...')
-                resource_res = requests.get(url)
-                resource_zip = zipfile.ZipFile(io.BytesIO(resource_res.content))
-                try:
-                    resource_zip.extractall(resource_path)
-                except PermissionError:
-                    resource_zip.extractall(local_dir)
-                    resource_path = local_dir
-
-                source = os.path.join(resource_path, extracted_name, "resources")
-                dir_util.copy_tree(source, resource_path)
-                dir_util.remove_tree(os.path.join(resource_path, extracted_name))
-
-            else:
-                resource_path = local_dir
-
-        info_file = get_resource(resource_path, info_file_path)
-
-        with open(info_file) as f:
-            # const_file: Path to the file with the builtin constants.
-            # sae_file: Path to the file with the Self Atomic Energies.
-            # ensemble_prefix: Prefix of the neurochem resource directories.
-            lines = [x.strip() for x in f.readlines()][:4]
-            const_file_path, sae_file_path, ensemble_prefix_path, ensemble_size = lines
-            const_file = get_resource(resource_path, const_file_path)
-            sae_file = get_resource(resource_path, sae_file_path)
-            ensemble_prefix = get_resource(resource_path, ensemble_prefix_path)
-            ensemble_size = int(ensemble_size)
-            consts = neurochem.Constants(const_file)
-        return consts, sae_file, ensemble_prefix, ensemble_size
-
    def forward(self, species_coordinates: Tuple[Tensor, Tensor],
                cell: Optional[Tensor] = None,
                pbc: Optional[Tensor] = None) -> SpeciesEnergies:
@@ -301,9 +254,11 @@ class BuiltinEnsemble(BuiltinModel):

    @classmethod
    def _from_neurochem_resources(cls, info_file_path, periodic_table_index=False):
+        from . import neurochem  # noqa
        # this is used to load only 1 model (by default model 0)
-        consts, sae_file, ensemble_prefix, ensemble_size = cls._parse_neurochem_resources(info_file_path)
+        const_file, sae_file, ensemble_prefix, ensemble_size = neurochem.parse_neurochem_resources(info_file_path)

+        consts = neurochem.Constants(const_file)
        species_converter = SpeciesConverter(consts.species)
        aev_computer = AEVComputer(**consts)
        energy_shifter, sae_dict = neurochem.load_sae(sae_file, return_dict=True)

--- a/torchani/neurochem/__init__.py
+++ b/torchani/neurochem/__init__.py
@@ -14,6 +14,7 @@ import sys
 from ..nn import ANIModel, Ensemble, Gaussian, Sequential
 from ..utils import EnergyShifter, ChemicalSymbolsToInts
 from ..aev import AEVComputer
+from .parse_resources import parse_neurochem_resources
 from torch.optim import AdamW
 from collections import OrderedDict
 from torchani.units import hartree2kcalmol
@@ -644,4 +645,4 @@ if sys.version_info[0] > 2:
                    self.tensorboard.add_scalar('time_vs_epoch', elapsed, AdamW_scheduler.last_epoch)


-__all__ = ['Constants', 'load_sae', 'load_model', 'load_model_ensemble', 'Trainer']
+__all__ = ['Constants', 'load_sae', 'load_model', 'load_model_ensemble', 'Trainer', 'parse_neurochem_resources']
--- a/torchani/neurochem/parse_resources.py
+++ b/torchani/neurochem/parse_resources.py
+import os
+import io
+import requests
+import zipfile
+from distutils import dir_util
+from pathlib import Path
+
+
+__all__ = ['parse_neurochem_resources']
+
+
+SUPPORTED_INFO_FILES = ['ani-1ccx_8x.info', 'ani-1x_8x.info', 'ani-2x_8x.info']
+
+
+def parse_neurochem_resources(info_file_path):
+    torchani_dir = Path(__file__).resolve().parent.parent.as_posix()
+    resource_path = os.path.join(torchani_dir, 'resources/')
+    print(resource_path)
+    local_dir = os.path.expanduser('~/.local/torchani/')
+
+    resource_info = os.path.join(resource_path, info_file_path)
+
+    if os.path.isfile(resource_info) and os.stat(resource_info).st_size > 0:
+        # No action needed if the info file can be located in the default path
+        pass
+
+    elif os.path.isfile(os.path.join(local_dir, info_file_path)):
+        # if the info file is not located in the default path, ~/.local/torchani
+        # is tried as an alternative
+        resource_path = local_dir
+
+    else:
+        # if all else fails files are downloaded and extracted ONLY if a
+        # correct info file path is passed, otherwise an error is raised
+        if info_file_path in SUPPORTED_INFO_FILES:
+            repo_name = "ani-model-zoo"
+            tag_name = "ani-2x"
+            extracted_name = '{}-{}'.format(repo_name, tag_name)
+            url = "https://github.com/aiqm/{}/archive/{}.zip".format(repo_name, tag_name)
+
+            print('Downloading ANI model parameters ...')
+            resource_res = requests.get(url)
+            resource_zip = zipfile.ZipFile(io.BytesIO(resource_res.content))
+            try:
+                resource_zip.extractall(resource_path)
+            except PermissionError:
+                resource_zip.extractall(local_dir)
+                resource_path = local_dir
+            source = os.path.join(resource_path, extracted_name, "resources")
+            dir_util.copy_tree(source, resource_path)
+            dir_util.remove_tree(os.path.join(resource_path, extracted_name))
+
+        else:
+            raise ValueError('File {0} could not be found either in {1} or {2}\n'
+                             'It is also not one of the supported builtin info files:'
+                             ' {3}'.format(info_file_path, resource_path, local_dir,
+                                           SUPPORTED_INFO_FILES))
+
+    return _get_resources(resource_path, info_file_path)
+
+
+def _get_resources(resource_path, info_file):
+    with open(os.path.join(resource_path, info_file)) as f:
+        # const_file: Path to the file with the builtin constants.
+        # sae_file: Path to the file with the Self Atomic Energies.
+        # ensemble_prefix: Prefix of the neurochem resource directories.
+        lines = [x.strip() for x in f.readlines()][:4]
+        const_file_path, sae_file_path, ensemble_prefix_path, ensemble_size = lines
+        const_file = os.path.join(resource_path, const_file_path)
+        sae_file = os.path.join(resource_path, sae_file_path)
+        ensemble_prefix = os.path.join(resource_path, ensemble_prefix_path)
+        ensemble_size = int(ensemble_size)
+    return const_file, sae_file, ensemble_prefix, ensemble_size