Unverified Commit b270d59d authored by Gao, Xiang's avatar Gao, Xiang Committed by GitHub
Browse files

Cherry-pick roitberg-group#15 (#541)



* Move parsing resource code to neurochem/ (#15)

* Move parsing resource code to neurochem/

* fix bug

* fix

* fix
Co-authored-by: default avatarIgnacio Pickering <ign.pickering@gmail.com>
parent ec35c7da
...@@ -29,13 +29,8 @@ directly calculate energies or get an ASE calculator. For example: ...@@ -29,13 +29,8 @@ directly calculate energies or get an ASE calculator. For example:
model0.species_to_tensor(['C', 'H', 'H', 'H', 'H']) model0.species_to_tensor(['C', 'H', 'H', 'H', 'H'])
""" """
import os import os
import io
import requests
import zipfile
import torch import torch
from distutils import dir_util
from torch import Tensor from torch import Tensor
from . import neurochem
from typing import Tuple, Optional, NamedTuple from typing import Tuple, Optional, NamedTuple
from .nn import SpeciesConverter, SpeciesEnergies from .nn import SpeciesConverter, SpeciesEnergies
from .aev import AEVComputer from .aev import AEVComputer
...@@ -66,11 +61,14 @@ class BuiltinModel(torch.nn.Module): ...@@ -66,11 +61,14 @@ class BuiltinModel(torch.nn.Module):
@classmethod @classmethod
def _from_neurochem_resources(cls, info_file_path, periodic_table_index=False, model_index=0): def _from_neurochem_resources(cls, info_file_path, periodic_table_index=False, model_index=0):
from . import neurochem # noqa
# this is used to load only 1 model (by default model 0) # this is used to load only 1 model (by default model 0)
consts, sae_file, ensemble_prefix, ensemble_size = cls._parse_neurochem_resources(info_file_path) const_file, sae_file, ensemble_prefix, ensemble_size = neurochem.parse_neurochem_resources(info_file_path)
if (model_index >= ensemble_size): if (model_index >= ensemble_size):
raise ValueError("The ensemble size is only {}, model {} can't be loaded".format(ensemble_size, model_index)) raise ValueError("The ensemble size is only {}, model {} can't be loaded".format(ensemble_size, model_index))
consts = neurochem.Constants(const_file)
species_converter = SpeciesConverter(consts.species) species_converter = SpeciesConverter(consts.species)
aev_computer = AEVComputer(**consts) aev_computer = AEVComputer(**consts)
energy_shifter, sae_dict = neurochem.load_sae(sae_file, return_dict=True) energy_shifter, sae_dict = neurochem.load_sae(sae_file, return_dict=True)
...@@ -82,51 +80,6 @@ class BuiltinModel(torch.nn.Module): ...@@ -82,51 +80,6 @@ class BuiltinModel(torch.nn.Module):
return cls(species_converter, aev_computer, neural_networks, return cls(species_converter, aev_computer, neural_networks,
energy_shifter, species_to_tensor, consts, sae_dict, periodic_table_index) energy_shifter, species_to_tensor, consts, sae_dict, periodic_table_index)
@staticmethod
def _parse_neurochem_resources(info_file_path):
def get_resource(resource_path, file_path):
return os.path.join(resource_path, file_path)
resource_path = os.path.join(os.path.dirname(__file__), 'resources/')
local_dir = os.path.expanduser('~/.local/torchani/')
repo_name = "ani-model-zoo"
tag_name = "ani-2x"
extracted_name = '{}-{}'.format(repo_name, tag_name)
url = "https://github.com/aiqm/{}/archive/{}.zip".format(repo_name, tag_name)
if os.stat(get_resource(resource_path, info_file_path)).st_size == 0:
if not os.path.isfile(get_resource(local_dir, info_file_path)):
print('Downloading ANI model parameters ...')
resource_res = requests.get(url)
resource_zip = zipfile.ZipFile(io.BytesIO(resource_res.content))
try:
resource_zip.extractall(resource_path)
except PermissionError:
resource_zip.extractall(local_dir)
resource_path = local_dir
source = os.path.join(resource_path, extracted_name, "resources")
dir_util.copy_tree(source, resource_path)
dir_util.remove_tree(os.path.join(resource_path, extracted_name))
else:
resource_path = local_dir
info_file = get_resource(resource_path, info_file_path)
with open(info_file) as f:
# const_file: Path to the file with the builtin constants.
# sae_file: Path to the file with the Self Atomic Energies.
# ensemble_prefix: Prefix of the neurochem resource directories.
lines = [x.strip() for x in f.readlines()][:4]
const_file_path, sae_file_path, ensemble_prefix_path, ensemble_size = lines
const_file = get_resource(resource_path, const_file_path)
sae_file = get_resource(resource_path, sae_file_path)
ensemble_prefix = get_resource(resource_path, ensemble_prefix_path)
ensemble_size = int(ensemble_size)
consts = neurochem.Constants(const_file)
return consts, sae_file, ensemble_prefix, ensemble_size
def forward(self, species_coordinates: Tuple[Tensor, Tensor], def forward(self, species_coordinates: Tuple[Tensor, Tensor],
cell: Optional[Tensor] = None, cell: Optional[Tensor] = None,
pbc: Optional[Tensor] = None) -> SpeciesEnergies: pbc: Optional[Tensor] = None) -> SpeciesEnergies:
...@@ -301,9 +254,11 @@ class BuiltinEnsemble(BuiltinModel): ...@@ -301,9 +254,11 @@ class BuiltinEnsemble(BuiltinModel):
@classmethod @classmethod
def _from_neurochem_resources(cls, info_file_path, periodic_table_index=False): def _from_neurochem_resources(cls, info_file_path, periodic_table_index=False):
from . import neurochem # noqa
# this is used to load only 1 model (by default model 0) # this is used to load only 1 model (by default model 0)
consts, sae_file, ensemble_prefix, ensemble_size = cls._parse_neurochem_resources(info_file_path) const_file, sae_file, ensemble_prefix, ensemble_size = neurochem.parse_neurochem_resources(info_file_path)
consts = neurochem.Constants(const_file)
species_converter = SpeciesConverter(consts.species) species_converter = SpeciesConverter(consts.species)
aev_computer = AEVComputer(**consts) aev_computer = AEVComputer(**consts)
energy_shifter, sae_dict = neurochem.load_sae(sae_file, return_dict=True) energy_shifter, sae_dict = neurochem.load_sae(sae_file, return_dict=True)
......
...@@ -14,6 +14,7 @@ import sys ...@@ -14,6 +14,7 @@ import sys
from ..nn import ANIModel, Ensemble, Gaussian, Sequential from ..nn import ANIModel, Ensemble, Gaussian, Sequential
from ..utils import EnergyShifter, ChemicalSymbolsToInts from ..utils import EnergyShifter, ChemicalSymbolsToInts
from ..aev import AEVComputer from ..aev import AEVComputer
from .parse_resources import parse_neurochem_resources
from torch.optim import AdamW from torch.optim import AdamW
from collections import OrderedDict from collections import OrderedDict
from torchani.units import hartree2kcalmol from torchani.units import hartree2kcalmol
...@@ -644,4 +645,4 @@ if sys.version_info[0] > 2: ...@@ -644,4 +645,4 @@ if sys.version_info[0] > 2:
self.tensorboard.add_scalar('time_vs_epoch', elapsed, AdamW_scheduler.last_epoch) self.tensorboard.add_scalar('time_vs_epoch', elapsed, AdamW_scheduler.last_epoch)
__all__ = ['Constants', 'load_sae', 'load_model', 'load_model_ensemble', 'Trainer'] __all__ = ['Constants', 'load_sae', 'load_model', 'load_model_ensemble', 'Trainer', 'parse_neurochem_resources']
import os
import io
import requests
import zipfile
from distutils import dir_util
from pathlib import Path
__all__ = ['parse_neurochem_resources']
SUPPORTED_INFO_FILES = ['ani-1ccx_8x.info', 'ani-1x_8x.info', 'ani-2x_8x.info']
def parse_neurochem_resources(info_file_path):
torchani_dir = Path(__file__).resolve().parent.parent.as_posix()
resource_path = os.path.join(torchani_dir, 'resources/')
print(resource_path)
local_dir = os.path.expanduser('~/.local/torchani/')
resource_info = os.path.join(resource_path, info_file_path)
if os.path.isfile(resource_info) and os.stat(resource_info).st_size > 0:
# No action needed if the info file can be located in the default path
pass
elif os.path.isfile(os.path.join(local_dir, info_file_path)):
# if the info file is not located in the default path, ~/.local/torchani
# is tried as an alternative
resource_path = local_dir
else:
# if all else fails files are downloaded and extracted ONLY if a
# correct info file path is passed, otherwise an error is raised
if info_file_path in SUPPORTED_INFO_FILES:
repo_name = "ani-model-zoo"
tag_name = "ani-2x"
extracted_name = '{}-{}'.format(repo_name, tag_name)
url = "https://github.com/aiqm/{}/archive/{}.zip".format(repo_name, tag_name)
print('Downloading ANI model parameters ...')
resource_res = requests.get(url)
resource_zip = zipfile.ZipFile(io.BytesIO(resource_res.content))
try:
resource_zip.extractall(resource_path)
except PermissionError:
resource_zip.extractall(local_dir)
resource_path = local_dir
source = os.path.join(resource_path, extracted_name, "resources")
dir_util.copy_tree(source, resource_path)
dir_util.remove_tree(os.path.join(resource_path, extracted_name))
else:
raise ValueError('File {0} could not be found either in {1} or {2}\n'
'It is also not one of the supported builtin info files:'
' {3}'.format(info_file_path, resource_path, local_dir,
SUPPORTED_INFO_FILES))
return _get_resources(resource_path, info_file_path)
def _get_resources(resource_path, info_file):
with open(os.path.join(resource_path, info_file)) as f:
# const_file: Path to the file with the builtin constants.
# sae_file: Path to the file with the Self Atomic Energies.
# ensemble_prefix: Prefix of the neurochem resource directories.
lines = [x.strip() for x in f.readlines()][:4]
const_file_path, sae_file_path, ensemble_prefix_path, ensemble_size = lines
const_file = os.path.join(resource_path, const_file_path)
sae_file = os.path.join(resource_path, sae_file_path)
ensemble_prefix = os.path.join(resource_path, ensemble_prefix_path)
ensemble_size = int(ensemble_size)
return const_file, sae_file, ensemble_prefix, ensemble_size
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment