Unverified Commit b270d59d authored by Gao, Xiang's avatar Gao, Xiang Committed by GitHub
Browse files

Cherry-pick roitberg-group#15 (#541)



* Move parsing resource code to neurochem/ (#15)

* Move parsing resource code to neurochem/

* fix bug

* fix

* fix
Co-authored-by: default avatarIgnacio Pickering <ign.pickering@gmail.com>
parent ec35c7da
......@@ -29,13 +29,8 @@ directly calculate energies or get an ASE calculator. For example:
model0.species_to_tensor(['C', 'H', 'H', 'H', 'H'])
"""
import os
import io
import requests
import zipfile
import torch
from distutils import dir_util
from torch import Tensor
from . import neurochem
from typing import Tuple, Optional, NamedTuple
from .nn import SpeciesConverter, SpeciesEnergies
from .aev import AEVComputer
......@@ -66,11 +61,14 @@ class BuiltinModel(torch.nn.Module):
@classmethod
def _from_neurochem_resources(cls, info_file_path, periodic_table_index=False, model_index=0):
from . import neurochem # noqa
# this is used to load only 1 model (by default model 0)
consts, sae_file, ensemble_prefix, ensemble_size = cls._parse_neurochem_resources(info_file_path)
const_file, sae_file, ensemble_prefix, ensemble_size = neurochem.parse_neurochem_resources(info_file_path)
if (model_index >= ensemble_size):
raise ValueError("The ensemble size is only {}, model {} can't be loaded".format(ensemble_size, model_index))
consts = neurochem.Constants(const_file)
species_converter = SpeciesConverter(consts.species)
aev_computer = AEVComputer(**consts)
energy_shifter, sae_dict = neurochem.load_sae(sae_file, return_dict=True)
......@@ -82,51 +80,6 @@ class BuiltinModel(torch.nn.Module):
return cls(species_converter, aev_computer, neural_networks,
energy_shifter, species_to_tensor, consts, sae_dict, periodic_table_index)
@staticmethod
def _parse_neurochem_resources(info_file_path):
def get_resource(resource_path, file_path):
return os.path.join(resource_path, file_path)
resource_path = os.path.join(os.path.dirname(__file__), 'resources/')
local_dir = os.path.expanduser('~/.local/torchani/')
repo_name = "ani-model-zoo"
tag_name = "ani-2x"
extracted_name = '{}-{}'.format(repo_name, tag_name)
url = "https://github.com/aiqm/{}/archive/{}.zip".format(repo_name, tag_name)
if os.stat(get_resource(resource_path, info_file_path)).st_size == 0:
if not os.path.isfile(get_resource(local_dir, info_file_path)):
print('Downloading ANI model parameters ...')
resource_res = requests.get(url)
resource_zip = zipfile.ZipFile(io.BytesIO(resource_res.content))
try:
resource_zip.extractall(resource_path)
except PermissionError:
resource_zip.extractall(local_dir)
resource_path = local_dir
source = os.path.join(resource_path, extracted_name, "resources")
dir_util.copy_tree(source, resource_path)
dir_util.remove_tree(os.path.join(resource_path, extracted_name))
else:
resource_path = local_dir
info_file = get_resource(resource_path, info_file_path)
with open(info_file) as f:
# const_file: Path to the file with the builtin constants.
# sae_file: Path to the file with the Self Atomic Energies.
# ensemble_prefix: Prefix of the neurochem resource directories.
lines = [x.strip() for x in f.readlines()][:4]
const_file_path, sae_file_path, ensemble_prefix_path, ensemble_size = lines
const_file = get_resource(resource_path, const_file_path)
sae_file = get_resource(resource_path, sae_file_path)
ensemble_prefix = get_resource(resource_path, ensemble_prefix_path)
ensemble_size = int(ensemble_size)
consts = neurochem.Constants(const_file)
return consts, sae_file, ensemble_prefix, ensemble_size
def forward(self, species_coordinates: Tuple[Tensor, Tensor],
cell: Optional[Tensor] = None,
pbc: Optional[Tensor] = None) -> SpeciesEnergies:
......@@ -301,9 +254,11 @@ class BuiltinEnsemble(BuiltinModel):
@classmethod
def _from_neurochem_resources(cls, info_file_path, periodic_table_index=False):
from . import neurochem # noqa
# this is used to load only 1 model (by default model 0)
consts, sae_file, ensemble_prefix, ensemble_size = cls._parse_neurochem_resources(info_file_path)
const_file, sae_file, ensemble_prefix, ensemble_size = neurochem.parse_neurochem_resources(info_file_path)
consts = neurochem.Constants(const_file)
species_converter = SpeciesConverter(consts.species)
aev_computer = AEVComputer(**consts)
energy_shifter, sae_dict = neurochem.load_sae(sae_file, return_dict=True)
......
......@@ -14,6 +14,7 @@ import sys
from ..nn import ANIModel, Ensemble, Gaussian, Sequential
from ..utils import EnergyShifter, ChemicalSymbolsToInts
from ..aev import AEVComputer
from .parse_resources import parse_neurochem_resources
from torch.optim import AdamW
from collections import OrderedDict
from torchani.units import hartree2kcalmol
......@@ -644,4 +645,4 @@ if sys.version_info[0] > 2:
self.tensorboard.add_scalar('time_vs_epoch', elapsed, AdamW_scheduler.last_epoch)
__all__ = ['Constants', 'load_sae', 'load_model', 'load_model_ensemble', 'Trainer']
__all__ = ['Constants', 'load_sae', 'load_model', 'load_model_ensemble', 'Trainer', 'parse_neurochem_resources']
import os
import io
import requests
import zipfile
from distutils import dir_util
from pathlib import Path
__all__ = ['parse_neurochem_resources']
SUPPORTED_INFO_FILES = ['ani-1ccx_8x.info', 'ani-1x_8x.info', 'ani-2x_8x.info']
def parse_neurochem_resources(info_file_path):
torchani_dir = Path(__file__).resolve().parent.parent.as_posix()
resource_path = os.path.join(torchani_dir, 'resources/')
print(resource_path)
local_dir = os.path.expanduser('~/.local/torchani/')
resource_info = os.path.join(resource_path, info_file_path)
if os.path.isfile(resource_info) and os.stat(resource_info).st_size > 0:
# No action needed if the info file can be located in the default path
pass
elif os.path.isfile(os.path.join(local_dir, info_file_path)):
# if the info file is not located in the default path, ~/.local/torchani
# is tried as an alternative
resource_path = local_dir
else:
# if all else fails files are downloaded and extracted ONLY if a
# correct info file path is passed, otherwise an error is raised
if info_file_path in SUPPORTED_INFO_FILES:
repo_name = "ani-model-zoo"
tag_name = "ani-2x"
extracted_name = '{}-{}'.format(repo_name, tag_name)
url = "https://github.com/aiqm/{}/archive/{}.zip".format(repo_name, tag_name)
print('Downloading ANI model parameters ...')
resource_res = requests.get(url)
resource_zip = zipfile.ZipFile(io.BytesIO(resource_res.content))
try:
resource_zip.extractall(resource_path)
except PermissionError:
resource_zip.extractall(local_dir)
resource_path = local_dir
source = os.path.join(resource_path, extracted_name, "resources")
dir_util.copy_tree(source, resource_path)
dir_util.remove_tree(os.path.join(resource_path, extracted_name))
else:
raise ValueError('File {0} could not be found either in {1} or {2}\n'
'It is also not one of the supported builtin info files:'
' {3}'.format(info_file_path, resource_path, local_dir,
SUPPORTED_INFO_FILES))
return _get_resources(resource_path, info_file_path)
def _get_resources(resource_path, info_file):
with open(os.path.join(resource_path, info_file)) as f:
# const_file: Path to the file with the builtin constants.
# sae_file: Path to the file with the Self Atomic Energies.
# ensemble_prefix: Prefix of the neurochem resource directories.
lines = [x.strip() for x in f.readlines()][:4]
const_file_path, sae_file_path, ensemble_prefix_path, ensemble_size = lines
const_file = os.path.join(resource_path, const_file_path)
sae_file = os.path.join(resource_path, sae_file_path)
ensemble_prefix = os.path.join(resource_path, ensemble_prefix_path)
ensemble_size = int(ensemble_size)
return const_file, sae_file, ensemble_prefix, ensemble_size
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment