Unverified Commit f146feca authored by Gao, Xiang's avatar Gao, Xiang Committed by GitHub
Browse files

Refactor codes to put neurochem related codes together (#72)

parent 85a6dd1e
import torch import torch
from ..benchmarked import BenchmarkedModule from . import padding
from .. import padding
class ANIModel(BenchmarkedModule): class ANIModel(torch.nn.Module):
"""Subclass of `torch.nn.Module` for the [xyz]->[aev]->[per_atom_y]->y
pipeline.
Attributes def __init__(self, models, reducer=torch.sum, padding_fill=0):
---------- """
species : list Parameters
Chemical symbol of supported atom species. ----------
suffixes : sequence models : (str, torch.nn.Module)
Different suffixes denote different models in an ensemble. Models for all species. This must be a mapping where the key is
model_<X><suffix> : nn.Module atomic symbol and the value is a module.
Model of suffix <suffix> for species <X>. There should be one such reducer : function
attribute for each supported species. Function of (input, dim)->output that reduce the input tensor along
reducer : function the given dimension to get an output tensor. This function will be
Function of (input, dim)->output that reduce the input tensor along the called with the per atom output tensor with internal shape as input
given dimension to get an output tensor. This function will be called , and desired reduction dimension as dim, and should reduce the
with the per atom output tensor with internal shape as input, and input into the tensor containing desired output.
desired reduction dimension as dim, and should reduce the input into padding_fill : float
the tensor containing desired output. Default value used to fill padding atoms
padding_fill : float """
Default value used to fill padding atoms super(ANIModel, self).__init__()
output_length : int self.species = [s for s, _ in models]
Length of output of each submodel.
timers : dict
Dictionary storing the the benchmark result. It has the following keys:
forward : total time for the forward pass
"""
def __init__(self, species, suffixes, reducer, padding_fill, models,
benchmark=False):
super(ANIModel, self).__init__(benchmark)
self.species = species
self.suffixes = suffixes
self.reducer = reducer self.reducer = reducer
self.padding_fill = padding_fill self.padding_fill = padding_fill
for i in models: for s, m in models:
setattr(self, i, models[i]) setattr(self, 'model_' + s, m)
if benchmark:
self.forward = self._enable_benchmark(self.forward, 'forward')
def forward(self, species_aev): def forward(self, species_aev):
"""Compute output from aev """Compute output from aev
...@@ -69,17 +52,22 @@ class ANIModel(BenchmarkedModule): ...@@ -69,17 +52,22 @@ class ANIModel(BenchmarkedModule):
species_ = species.flatten() species_ = species.flatten()
present_species = padding.present_species(species) present_species = padding.present_species(species)
aev = aev.flatten(0, 1) aev = aev.flatten(0, 1)
outputs = []
for suffix in self.suffixes:
output = torch.full_like(species_, self.padding_fill,
dtype=aev.dtype)
for i in present_species:
s = self.species[i]
model_X = getattr(self, 'model_' + s + suffix)
mask = (species_ == i)
input = aev.index_select(0, mask.nonzero().squeeze())
output[mask] = model_X(input).squeeze()
output = output.view_as(species)
outputs.append(self.reducer(output, dim=1))
output = torch.full_like(species_, self.padding_fill,
dtype=aev.dtype)
for i in present_species:
s = self.species[i]
model_X = getattr(self, 'model_' + s)
mask = (species_ == i)
input = aev.index_select(0, mask.nonzero().squeeze())
output[mask] = model_X(input).squeeze()
output = output.view_as(species)
return species, self.reducer(output, dim=1)
class Ensemble(torch.nn.ModuleList):
def forward(self, species_aev):
outputs = [x(species_aev)[1] for x in self]
species, _ = species_aev
return species, sum(outputs) / len(outputs) return species, sum(outputs) / len(outputs)
from .custom import CustomModel
from .neurochem_nnp import NeuroChemNNP
__all__ = ['CustomModel', 'NeuroChemNNP']
import torch
from .ani_model import ANIModel
class CustomModel(ANIModel):
def __init__(self, per_species, reducer=torch.sum, padding_fill=0,
derivative=False, derivative_graph=False, benchmark=False):
"""Custom single model, no ensemble
Parameters
----------
per_species : dict
Dictionary with supported species as keys and objects of
`torch.nn.Model` as values, storing the model for each supported
species. These models will finally become `model_X` attributes.
reducer : function
The desired `reducer` attribute.
"""
suffixes = ['']
models = {}
for i in per_species:
models['model_' + i] = per_species[i]
super(CustomModel, self).__init__(list(per_species.keys()), suffixes,
reducer, padding_fill, models,
benchmark)
import os
import torch
from .ani_model import ANIModel
from .neurochem_atomic_network import NeuroChemAtomicNetwork
from ..env import buildin_network_dir, buildin_model_prefix, buildin_ensemble
class NeuroChemNNP(ANIModel):
def __init__(self, species, from_=None, ensemble=False, benchmark=False):
"""If from_=None then ensemble must be a boolean. If ensemble=False,
then use buildin network0, else use buildin network ensemble.
If from_ != None, ensemble must be either False or an integer
specifying the number of networks in the ensemble.
"""
if from_ is None:
if not isinstance(ensemble, bool):
raise TypeError('ensemble must be boolean')
if ensemble:
from_ = buildin_model_prefix
ensemble = buildin_ensemble
else:
from_ = buildin_network_dir
else:
if not (ensemble is False or isinstance(ensemble, int)):
raise ValueError('invalid argument ensemble')
if ensemble is False:
network_dirs = [from_]
suffixes = ['']
else:
assert isinstance(ensemble, int)
network_prefix = from_
network_dirs = []
suffixes = []
for i in range(ensemble):
suffix = '{}'.format(i)
network_dir = os.path.join(
network_prefix+suffix, 'networks')
network_dirs.append(network_dir)
suffixes.append(suffix)
models = {}
for network_dir, suffix in zip(network_dirs, suffixes):
for i in species:
filename = os.path.join(
network_dir, 'ANN-{}.nnf'.format(i))
model_X = NeuroChemAtomicNetwork(filename)
models['model_' + i + suffix] = model_X
super(NeuroChemNNP, self).__init__(species, suffixes, torch.sum,
0, models, benchmark)
from .. import _six # noqa: F401 import pkg_resources
import torch
import os import os
import bz2 import bz2
import lark import lark
import torch
import math
import struct import struct
from collections.abc import Mapping
from .models import ANIModel, Ensemble
class NeuroChemAtomicNetwork(torch.nn.Module):
"""Per atom aev->y transformation, loaded from NeuroChem network dir.
buildin_const_file = pkg_resources.resource_filename(
Attributes __name__, 'resources/ani-1x_dft_x8ens/rHCNO-5.2R_16-3.5A_a4-8.params')
buildin_sae_file = pkg_resources.resource_filename(
__name__, 'resources/ani-1x_dft_x8ens/sae_linfit.dat')
buildin_network_dir = pkg_resources.resource_filename(
__name__, 'resources/ani-1x_dft_x8ens/train0/networks/')
buildin_model_prefix = pkg_resources.resource_filename(
__name__, 'resources/ani-1x_dft_x8ens/train')
buildin_ensemble = 8
class Constants(Mapping):
def __init__(self, filename=buildin_const_file):
self.filename = filename
with open(filename) as f:
for i in f:
try:
line = [x.strip() for x in i.split('=')]
name = line[0]
value = line[1]
if name == 'Rcr' or name == 'Rca':
setattr(self, name, torch.tensor(float(value)))
elif name in ['EtaR', 'ShfR', 'Zeta',
'ShfZ', 'EtaA', 'ShfA']:
value = [float(x.strip()) for x in value.replace(
'[', '').replace(']', '').split(',')]
setattr(self, name, torch.tensor(value))
elif name == 'Atyp':
value = [x.strip() for x in value.replace(
'[', '').replace(']', '').split(',')]
self.species = value
except Exception:
raise ValueError('unable to parse const file')
self.rev_species = {}
for i in range(len(self.species)):
s = self.species[i]
self.rev_species[s] = i
def __iter__(self):
yield 'Rcr'
yield 'Rca'
yield 'EtaR'
yield 'ShfR'
yield 'EtaA'
yield 'Zeta'
yield 'ShfA'
yield 'ShfZ'
yield 'species'
def __len__(self):
return 8
def __getitem__(self, item):
return getattr(self, item)
def species_to_tensor(self, species, device):
rev = [self.rev_species[s] for s in species]
return torch.tensor(rev, dtype=torch.long, device=device)
def load_sae(filename=buildin_sae_file):
"""Load self energies from NeuroChem sae file"""
self_energies = {}
with open(filename) as f:
for i in f:
try:
line = [x.strip() for x in i.split('=')]
name = line[0].split(',')[0].strip()
value = float(line[1])
self_energies[name] = value
except Exception:
pass # ignore unrecognizable line
return self_energies
def load_atomic_network(filename):
"""Load atomic network from NeuroChem's .nnf, .wparam and .bparam files
Parameters
---------- ----------
layers : int filename : string
Number of layers. The file name for the `.nnf` file that store network
layerN : torch.nn.Linear hyperparameters. The `.bparam` and `.wparam` must be
Linear model for each layer. in the same directory
activation : function
Function for computing the activation for all layers but the Returns
last layer. -------
activation_index : int torch.nn.Sequential
The NeuroChem index for activation. The loaded atomic network
""" """
def __init__(self, filename): def decompress_nnf(buffer):
"""Initialize from NeuroChem network directory.
Parameters
----------
filename : string
The file name for the `.nnf` file that store network
hyperparameters. The `.bparam` and `.wparam` must be
in the same directory
"""
super(NeuroChemAtomicNetwork, self).__init__()
networ_dir = os.path.dirname(filename)
with open(filename, 'rb') as f:
buffer = f.read()
buffer = self._decompress(buffer)
layer_setups = self._parse(buffer)
self._construct(layer_setups, networ_dir)
def _decompress(self, buffer):
"""Decompress the `.nnf` file
Parameters
----------
buffer : bytes
The buffer storing the whole compressed `.nnf` file content.
Returns
-------
string
The string storing the whole decompressed `.nnf` file content.
"""
# decompress nnf file
while buffer[0] != b'='[0]: while buffer[0] != b'='[0]:
buffer = buffer[1:] buffer = buffer[1:]
buffer = buffer[2:] buffer = buffer[2:]
return bz2.decompress(buffer)[:-1].decode('ascii').strip() return bz2.decompress(buffer)[:-1].decode('ascii').strip()
def _parse(self, nnf_file): def parse_nnf(nnf_file):
"""Parse the `.nnf` file
Parameters
----------
nnf_file : string
The string storing the while decompressed `.nnf` file content.
Returns
-------
list of dict
Parsed setups as list of dictionary storing the parsed `.nnf`
file content. Each dictionary in the list is the hyperparameters
for a layer.
"""
# parse input file # parse input file
parser = lark.Lark(r''' parser = lark.Lark(r'''
identifier : CNAME identifier : CNAME
...@@ -150,59 +185,7 @@ class NeuroChemAtomicNetwork(torch.nn.Module): ...@@ -150,59 +185,7 @@ class NeuroChemAtomicNetwork(torch.nn.Module):
layer_setups = TreeExec().transform(tree) layer_setups = TreeExec().transform(tree)
return layer_setups return layer_setups
def _construct(self, setups, dirname): def load_param_file(linear, in_size, out_size, wfn, bfn):
"""Construct model from parsed setups
Parameters
----------
setups : list of dict
Parsed setups as list of dictionary storing the parsed `.nnf`
file content. Each dictionary in the list is the hyperparameters
for a layer.
dirname : string
The directory where network files are stored.
"""
# Activation defined in:
# https://github.com/Jussmith01/NeuroChem/blob/master/src-atomicnnplib/cunetwork/cuannlayer_t.cu#L868
self.activation_index = None
self.activation = None
self.layers = len(setups)
for i in range(self.layers):
s = setups[i]
in_size = s['blocksize']
out_size = s['nodes']
activation = s['activation']
wfn, wsz = s['weights']
bfn, bsz = s['biases']
if i == self.layers-1:
if activation != 6: # no activation
raise ValueError('activation in the last layer must be 6')
else:
if self.activation_index is None:
self.activation_index = activation
if activation == 5: # Gaussian
self.activation = lambda x: torch.exp(-x*x)
elif activation == 9: # CELU
alpha = 0.1
self.activation = lambda x: torch.celu(x, alpha)
else:
raise NotImplementedError(
'Unexpected activation {}'.format(activation))
elif self.activation_index != activation:
raise NotImplementedError(
'''different activation on different
layers are not supported''')
linear = torch.nn.Linear(in_size, out_size)
name = 'layer{}'.format(i)
setattr(self, name, linear)
if in_size * out_size != wsz or out_size != bsz:
raise ValueError('bad parameter shape')
wfn = os.path.join(dirname, wfn)
bfn = os.path.join(dirname, bfn)
self._load_param_file(linear, in_size, out_size, wfn, bfn)
def _load_param_file(self, linear, in_size, out_size, wfn, bfn):
"""Load `.wparam` and `.bparam` files""" """Load `.wparam` and `.bparam` files"""
wsize = in_size * out_size wsize = in_size * out_size
fw = open(wfn, 'rb') fw = open(wfn, 'rb')
...@@ -216,50 +199,79 @@ class NeuroChemAtomicNetwork(torch.nn.Module): ...@@ -216,50 +199,79 @@ class NeuroChemAtomicNetwork(torch.nn.Module):
linear.bias.data = b linear.bias.data = b
fb.close() fb.close()
def get_activations(self, aev, layer): class Gaussian(torch.nn.Module):
"""Compute the activation of the specified layer. def forward(self, x):
return torch.exp(-x*x)
Parameters
---------- networ_dir = os.path.dirname(filename)
aev : torch.Tensor
The pytorch tensor of shape (conformations, aev_length) storing AEV with open(filename, 'rb') as f:
as input to this model. buffer = f.read()
layer : int buffer = decompress_nnf(buffer)
The layer whose activation is desired. The index starts at zero, layer_setups = parse_nnf(buffer)
that is `layer=0` means the `activation(layer0(aev))` instead of
`aev`. If the given layer is larger than the total number of layers = []
layers, then the activation of the last layer will be returned. for s in layer_setups:
# construct linear layer and load parameters
Returns in_size = s['blocksize']
------- out_size = s['nodes']
torch.Tensor wfn, wsz = s['weights']
The pytorch tensor of activations of specified layer. bfn, bsz = s['biases']
""" if in_size * out_size != wsz or out_size != bsz:
y = aev raise ValueError('bad parameter shape')
for j in range(self.layers-1): layer = torch.nn.Linear(in_size, out_size)
linear = getattr(self, 'layer{}'.format(j)) wfn = os.path.join(networ_dir, wfn)
y = linear(y) bfn = os.path.join(networ_dir, bfn)
y = self.activation(y) load_param_file(layer, in_size, out_size, wfn, bfn)
if j == layer: layers.append(layer)
break
if layer >= self.layers-1: # Activation defined in:
linear = getattr(self, 'layer{}'.format(self.layers-1)) # https://github.com/Jussmith01/NeuroChem/blob/master/src-atomicnnplib/cunetwork/cuannlayer_t.cu#L868
y = linear(y) activation = s['activation']
return y if activation == 6:
continue
def forward(self, aev): elif activation == 5: # Gaussian
"""Compute output from aev layers.append(Gaussian())
elif activation == 9: # CELU
Parameters layers.append(torch.nn.CELU(alpha=0.1))
---------- else:
aev : torch.Tensor raise NotImplementedError(
The pytorch tensor of shape (conformations, aev_length) storing 'Unexpected activation {}'.format(activation))
AEV as input to this model.
return torch.nn.Sequential(*layers)
Returns
-------
torch.Tensor def load_model(species, from_=None, ensemble=False):
The pytorch tensor of shape (conformations, output_length) for """If from_=None then ensemble must be a boolean. If ensemble=False,
output. then use buildin network0, else use buildin network ensemble.
""" If from_ != None, ensemble must be either False or an integer
return self.get_activations(aev, math.inf) specifying the number of networks in the ensemble.
"""
if from_ is None:
if not isinstance(ensemble, bool):
raise TypeError('ensemble must be boolean')
if ensemble:
from_ = buildin_model_prefix
ensemble = buildin_ensemble
else:
from_ = buildin_network_dir
else:
if not (ensemble is False or isinstance(ensemble, int)):
raise ValueError('invalid argument ensemble')
def load_single_model(from_):
models = []
for i in species:
filename = os.path.join(from_, 'ANN-{}.nnf'.format(i))
models.append((i, load_atomic_network(filename)))
return ANIModel(models)
if ensemble is False:
return load_single_model(from_)
else:
assert isinstance(ensemble, int)
models = []
for i in range(ensemble):
network_dir = os.path.join('{}{}'.format(from_, i), 'networks')
models.append(load_single_model(network_dir))
return Ensemble(models)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment