Unverified Commit f146feca authored by Gao, Xiang's avatar Gao, Xiang Committed by GitHub
Browse files

Refactor codes to put neurochem related codes together (#72)

parent 85a6dd1e
import torch
from ..benchmarked import BenchmarkedModule
from .. import padding
from . import padding
class ANIModel(BenchmarkedModule):
"""Subclass of `torch.nn.Module` for the [xyz]->[aev]->[per_atom_y]->y
pipeline.
class ANIModel(torch.nn.Module):
Attributes
----------
species : list
Chemical symbol of supported atom species.
suffixes : sequence
Different suffixes denote different models in an ensemble.
model_<X><suffix> : nn.Module
Model of suffix <suffix> for species <X>. There should be one such
attribute for each supported species.
reducer : function
Function of (input, dim)->output that reduce the input tensor along the
given dimension to get an output tensor. This function will be called
with the per atom output tensor with internal shape as input, and
desired reduction dimension as dim, and should reduce the input into
the tensor containing desired output.
padding_fill : float
Default value used to fill padding atoms
output_length : int
Length of output of each submodel.
timers : dict
Dictionary storing the the benchmark result. It has the following keys:
forward : total time for the forward pass
"""
def __init__(self, species, suffixes, reducer, padding_fill, models,
benchmark=False):
super(ANIModel, self).__init__(benchmark)
self.species = species
self.suffixes = suffixes
def __init__(self, models, reducer=torch.sum, padding_fill=0):
"""
Parameters
----------
models : (str, torch.nn.Module)
Models for all species. This must be a mapping where the key is
atomic symbol and the value is a module.
reducer : function
Function of (input, dim)->output that reduce the input tensor along
the given dimension to get an output tensor. This function will be
called with the per atom output tensor with internal shape as input
, and desired reduction dimension as dim, and should reduce the
input into the tensor containing desired output.
padding_fill : float
Default value used to fill padding atoms
"""
super(ANIModel, self).__init__()
self.species = [s for s, _ in models]
self.reducer = reducer
self.padding_fill = padding_fill
for i in models:
setattr(self, i, models[i])
if benchmark:
self.forward = self._enable_benchmark(self.forward, 'forward')
for s, m in models:
setattr(self, 'model_' + s, m)
def forward(self, species_aev):
"""Compute output from aev
......@@ -69,17 +52,22 @@ class ANIModel(BenchmarkedModule):
species_ = species.flatten()
present_species = padding.present_species(species)
aev = aev.flatten(0, 1)
outputs = []
for suffix in self.suffixes:
output = torch.full_like(species_, self.padding_fill,
dtype=aev.dtype)
for i in present_species:
s = self.species[i]
model_X = getattr(self, 'model_' + s + suffix)
mask = (species_ == i)
input = aev.index_select(0, mask.nonzero().squeeze())
output[mask] = model_X(input).squeeze()
output = output.view_as(species)
outputs.append(self.reducer(output, dim=1))
output = torch.full_like(species_, self.padding_fill,
dtype=aev.dtype)
for i in present_species:
s = self.species[i]
model_X = getattr(self, 'model_' + s)
mask = (species_ == i)
input = aev.index_select(0, mask.nonzero().squeeze())
output[mask] = model_X(input).squeeze()
output = output.view_as(species)
return species, self.reducer(output, dim=1)
class Ensemble(torch.nn.ModuleList):
def forward(self, species_aev):
outputs = [x(species_aev)[1] for x in self]
species, _ = species_aev
return species, sum(outputs) / len(outputs)
from .custom import CustomModel
from .neurochem_nnp import NeuroChemNNP
__all__ = ['CustomModel', 'NeuroChemNNP']
import torch
from .ani_model import ANIModel
class CustomModel(ANIModel):
def __init__(self, per_species, reducer=torch.sum, padding_fill=0,
derivative=False, derivative_graph=False, benchmark=False):
"""Custom single model, no ensemble
Parameters
----------
per_species : dict
Dictionary with supported species as keys and objects of
`torch.nn.Model` as values, storing the model for each supported
species. These models will finally become `model_X` attributes.
reducer : function
The desired `reducer` attribute.
"""
suffixes = ['']
models = {}
for i in per_species:
models['model_' + i] = per_species[i]
super(CustomModel, self).__init__(list(per_species.keys()), suffixes,
reducer, padding_fill, models,
benchmark)
import os
import torch
from .ani_model import ANIModel
from .neurochem_atomic_network import NeuroChemAtomicNetwork
from ..env import buildin_network_dir, buildin_model_prefix, buildin_ensemble
class NeuroChemNNP(ANIModel):
def __init__(self, species, from_=None, ensemble=False, benchmark=False):
"""If from_=None then ensemble must be a boolean. If ensemble=False,
then use buildin network0, else use buildin network ensemble.
If from_ != None, ensemble must be either False or an integer
specifying the number of networks in the ensemble.
"""
if from_ is None:
if not isinstance(ensemble, bool):
raise TypeError('ensemble must be boolean')
if ensemble:
from_ = buildin_model_prefix
ensemble = buildin_ensemble
else:
from_ = buildin_network_dir
else:
if not (ensemble is False or isinstance(ensemble, int)):
raise ValueError('invalid argument ensemble')
if ensemble is False:
network_dirs = [from_]
suffixes = ['']
else:
assert isinstance(ensemble, int)
network_prefix = from_
network_dirs = []
suffixes = []
for i in range(ensemble):
suffix = '{}'.format(i)
network_dir = os.path.join(
network_prefix+suffix, 'networks')
network_dirs.append(network_dir)
suffixes.append(suffix)
models = {}
for network_dir, suffix in zip(network_dirs, suffixes):
for i in species:
filename = os.path.join(
network_dir, 'ANN-{}.nnf'.format(i))
model_X = NeuroChemAtomicNetwork(filename)
models['model_' + i + suffix] = model_X
super(NeuroChemNNP, self).__init__(species, suffixes, torch.sum,
0, models, benchmark)
from .. import _six # noqa: F401
import pkg_resources
import torch
import os
import bz2
import lark
import torch
import math
import struct
class NeuroChemAtomicNetwork(torch.nn.Module):
"""Per atom aev->y transformation, loaded from NeuroChem network dir.
Attributes
from collections.abc import Mapping
from .models import ANIModel, Ensemble
buildin_const_file = pkg_resources.resource_filename(
__name__, 'resources/ani-1x_dft_x8ens/rHCNO-5.2R_16-3.5A_a4-8.params')
buildin_sae_file = pkg_resources.resource_filename(
__name__, 'resources/ani-1x_dft_x8ens/sae_linfit.dat')
buildin_network_dir = pkg_resources.resource_filename(
__name__, 'resources/ani-1x_dft_x8ens/train0/networks/')
buildin_model_prefix = pkg_resources.resource_filename(
__name__, 'resources/ani-1x_dft_x8ens/train')
buildin_ensemble = 8
class Constants(Mapping):
def __init__(self, filename=buildin_const_file):
self.filename = filename
with open(filename) as f:
for i in f:
try:
line = [x.strip() for x in i.split('=')]
name = line[0]
value = line[1]
if name == 'Rcr' or name == 'Rca':
setattr(self, name, torch.tensor(float(value)))
elif name in ['EtaR', 'ShfR', 'Zeta',
'ShfZ', 'EtaA', 'ShfA']:
value = [float(x.strip()) for x in value.replace(
'[', '').replace(']', '').split(',')]
setattr(self, name, torch.tensor(value))
elif name == 'Atyp':
value = [x.strip() for x in value.replace(
'[', '').replace(']', '').split(',')]
self.species = value
except Exception:
raise ValueError('unable to parse const file')
self.rev_species = {}
for i in range(len(self.species)):
s = self.species[i]
self.rev_species[s] = i
def __iter__(self):
yield 'Rcr'
yield 'Rca'
yield 'EtaR'
yield 'ShfR'
yield 'EtaA'
yield 'Zeta'
yield 'ShfA'
yield 'ShfZ'
yield 'species'
def __len__(self):
return 8
def __getitem__(self, item):
return getattr(self, item)
def species_to_tensor(self, species, device):
rev = [self.rev_species[s] for s in species]
return torch.tensor(rev, dtype=torch.long, device=device)
def load_sae(filename=buildin_sae_file):
"""Load self energies from NeuroChem sae file"""
self_energies = {}
with open(filename) as f:
for i in f:
try:
line = [x.strip() for x in i.split('=')]
name = line[0].split(',')[0].strip()
value = float(line[1])
self_energies[name] = value
except Exception:
pass # ignore unrecognizable line
return self_energies
def load_atomic_network(filename):
"""Load atomic network from NeuroChem's .nnf, .wparam and .bparam files
Parameters
----------
layers : int
Number of layers.
layerN : torch.nn.Linear
Linear model for each layer.
activation : function
Function for computing the activation for all layers but the
last layer.
activation_index : int
The NeuroChem index for activation.
filename : string
The file name for the `.nnf` file that store network
hyperparameters. The `.bparam` and `.wparam` must be
in the same directory
Returns
-------
torch.nn.Sequential
The loaded atomic network
"""
def __init__(self, filename):
"""Initialize from NeuroChem network directory.
Parameters
----------
filename : string
The file name for the `.nnf` file that store network
hyperparameters. The `.bparam` and `.wparam` must be
in the same directory
"""
super(NeuroChemAtomicNetwork, self).__init__()
networ_dir = os.path.dirname(filename)
with open(filename, 'rb') as f:
buffer = f.read()
buffer = self._decompress(buffer)
layer_setups = self._parse(buffer)
self._construct(layer_setups, networ_dir)
def _decompress(self, buffer):
"""Decompress the `.nnf` file
Parameters
----------
buffer : bytes
The buffer storing the whole compressed `.nnf` file content.
Returns
-------
string
The string storing the whole decompressed `.nnf` file content.
"""
# decompress nnf file
def decompress_nnf(buffer):
while buffer[0] != b'='[0]:
buffer = buffer[1:]
buffer = buffer[2:]
return bz2.decompress(buffer)[:-1].decode('ascii').strip()
def _parse(self, nnf_file):
"""Parse the `.nnf` file
Parameters
----------
nnf_file : string
The string storing the while decompressed `.nnf` file content.
Returns
-------
list of dict
Parsed setups as list of dictionary storing the parsed `.nnf`
file content. Each dictionary in the list is the hyperparameters
for a layer.
"""
def parse_nnf(nnf_file):
# parse input file
parser = lark.Lark(r'''
identifier : CNAME
......@@ -150,59 +185,7 @@ class NeuroChemAtomicNetwork(torch.nn.Module):
layer_setups = TreeExec().transform(tree)
return layer_setups
def _construct(self, setups, dirname):
"""Construct model from parsed setups
Parameters
----------
setups : list of dict
Parsed setups as list of dictionary storing the parsed `.nnf`
file content. Each dictionary in the list is the hyperparameters
for a layer.
dirname : string
The directory where network files are stored.
"""
# Activation defined in:
# https://github.com/Jussmith01/NeuroChem/blob/master/src-atomicnnplib/cunetwork/cuannlayer_t.cu#L868
self.activation_index = None
self.activation = None
self.layers = len(setups)
for i in range(self.layers):
s = setups[i]
in_size = s['blocksize']
out_size = s['nodes']
activation = s['activation']
wfn, wsz = s['weights']
bfn, bsz = s['biases']
if i == self.layers-1:
if activation != 6: # no activation
raise ValueError('activation in the last layer must be 6')
else:
if self.activation_index is None:
self.activation_index = activation
if activation == 5: # Gaussian
self.activation = lambda x: torch.exp(-x*x)
elif activation == 9: # CELU
alpha = 0.1
self.activation = lambda x: torch.celu(x, alpha)
else:
raise NotImplementedError(
'Unexpected activation {}'.format(activation))
elif self.activation_index != activation:
raise NotImplementedError(
'''different activation on different
layers are not supported''')
linear = torch.nn.Linear(in_size, out_size)
name = 'layer{}'.format(i)
setattr(self, name, linear)
if in_size * out_size != wsz or out_size != bsz:
raise ValueError('bad parameter shape')
wfn = os.path.join(dirname, wfn)
bfn = os.path.join(dirname, bfn)
self._load_param_file(linear, in_size, out_size, wfn, bfn)
def _load_param_file(self, linear, in_size, out_size, wfn, bfn):
def load_param_file(linear, in_size, out_size, wfn, bfn):
"""Load `.wparam` and `.bparam` files"""
wsize = in_size * out_size
fw = open(wfn, 'rb')
......@@ -216,50 +199,79 @@ class NeuroChemAtomicNetwork(torch.nn.Module):
linear.bias.data = b
fb.close()
def get_activations(self, aev, layer):
"""Compute the activation of the specified layer.
Parameters
----------
aev : torch.Tensor
The pytorch tensor of shape (conformations, aev_length) storing AEV
as input to this model.
layer : int
The layer whose activation is desired. The index starts at zero,
that is `layer=0` means the `activation(layer0(aev))` instead of
`aev`. If the given layer is larger than the total number of
layers, then the activation of the last layer will be returned.
Returns
-------
torch.Tensor
The pytorch tensor of activations of specified layer.
"""
y = aev
for j in range(self.layers-1):
linear = getattr(self, 'layer{}'.format(j))
y = linear(y)
y = self.activation(y)
if j == layer:
break
if layer >= self.layers-1:
linear = getattr(self, 'layer{}'.format(self.layers-1))
y = linear(y)
return y
def forward(self, aev):
"""Compute output from aev
Parameters
----------
aev : torch.Tensor
The pytorch tensor of shape (conformations, aev_length) storing
AEV as input to this model.
Returns
-------
torch.Tensor
The pytorch tensor of shape (conformations, output_length) for
output.
"""
return self.get_activations(aev, math.inf)
class Gaussian(torch.nn.Module):
def forward(self, x):
return torch.exp(-x*x)
networ_dir = os.path.dirname(filename)
with open(filename, 'rb') as f:
buffer = f.read()
buffer = decompress_nnf(buffer)
layer_setups = parse_nnf(buffer)
layers = []
for s in layer_setups:
# construct linear layer and load parameters
in_size = s['blocksize']
out_size = s['nodes']
wfn, wsz = s['weights']
bfn, bsz = s['biases']
if in_size * out_size != wsz or out_size != bsz:
raise ValueError('bad parameter shape')
layer = torch.nn.Linear(in_size, out_size)
wfn = os.path.join(networ_dir, wfn)
bfn = os.path.join(networ_dir, bfn)
load_param_file(layer, in_size, out_size, wfn, bfn)
layers.append(layer)
# Activation defined in:
# https://github.com/Jussmith01/NeuroChem/blob/master/src-atomicnnplib/cunetwork/cuannlayer_t.cu#L868
activation = s['activation']
if activation == 6:
continue
elif activation == 5: # Gaussian
layers.append(Gaussian())
elif activation == 9: # CELU
layers.append(torch.nn.CELU(alpha=0.1))
else:
raise NotImplementedError(
'Unexpected activation {}'.format(activation))
return torch.nn.Sequential(*layers)
def load_model(species, from_=None, ensemble=False):
"""If from_=None then ensemble must be a boolean. If ensemble=False,
then use buildin network0, else use buildin network ensemble.
If from_ != None, ensemble must be either False or an integer
specifying the number of networks in the ensemble.
"""
if from_ is None:
if not isinstance(ensemble, bool):
raise TypeError('ensemble must be boolean')
if ensemble:
from_ = buildin_model_prefix
ensemble = buildin_ensemble
else:
from_ = buildin_network_dir
else:
if not (ensemble is False or isinstance(ensemble, int)):
raise ValueError('invalid argument ensemble')
def load_single_model(from_):
models = []
for i in species:
filename = os.path.join(from_, 'ANN-{}.nnf'.format(i))
models.append((i, load_atomic_network(filename)))
return ANIModel(models)
if ensemble is False:
return load_single_model(from_)
else:
assert isinstance(ensemble, int)
models = []
for i in range(ensemble):
network_dir = os.path.join('{}{}'.format(from_, i), 'networks')
models.append(load_single_model(network_dir))
return Ensemble(models)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment