Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
torchani
Commits
f146feca
Unverified
Commit
f146feca
authored
Aug 21, 2018
by
Gao, Xiang
Committed by
GitHub
Aug 21, 2018
Browse files
Refactor codes to put neurochem related codes together (#72)
parent
85a6dd1e
Changes
25
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
350 additions
and
81 deletions
+350
-81
torchani/models.py
torchani/models.py
+73
-0
torchani/models/__init__.py
torchani/models/__init__.py
+0
-4
torchani/models/custom.py
torchani/models/custom.py
+0
-26
torchani/models/neurochem_nnp.py
torchani/models/neurochem_nnp.py
+0
-51
torchani/neurochem.py
torchani/neurochem.py
+277
-0
No files found.
torchani/models
/ani_model
.py
→
torchani/models.py
View file @
f146feca
import
torch
from
..benchmarked
import
BenchmarkedModule
from
..
import
padding
from
.
import
padding
class
ANIModel
(
BenchmarkedModule
):
"""Subclass of `torch.nn.Module` for the [xyz]->[aev]->[per_atom_y]->y
pipeline.
class
ANIModel
(
torch
.
nn
.
Module
):
Attributes
----------
species : list
Chemical symbol of supported atom species.
suffixes : sequence
Different suffixes denote different models in an ensemble.
model_<X><suffix> : nn.Module
Model of suffix <suffix> for species <X>. There should be one such
attribute for each supported species.
reducer : function
Function of (input, dim)->output that reduce the input tensor along the
given dimension to get an output tensor. This function will be called
with the per atom output tensor with internal shape as input, and
desired reduction dimension as dim, and should reduce the input into
the tensor containing desired output.
padding_fill : float
Default value used to fill padding atoms
output_length : int
Length of output of each submodel.
timers : dict
Dictionary storing the the benchmark result. It has the following keys:
forward : total time for the forward pass
"""
def
__init__
(
self
,
species
,
suffixes
,
reducer
,
padding_fill
,
models
,
benchmark
=
False
):
super
(
ANIModel
,
self
).
__init__
(
benchmark
)
self
.
species
=
species
self
.
suffixes
=
suffixes
def
__init__
(
self
,
models
,
reducer
=
torch
.
sum
,
padding_fill
=
0
):
"""
Parameters
----------
models : (str, torch.nn.Module)
Models for all species. This must be a mapping where the key is
atomic symbol and the value is a module.
reducer : function
Function of (input, dim)->output that reduce the input tensor along
the given dimension to get an output tensor. This function will be
called with the per atom output tensor with internal shape as input
, and desired reduction dimension as dim, and should reduce the
input into the tensor containing desired output.
padding_fill : float
Default value used to fill padding atoms
"""
super
(
ANIModel
,
self
).
__init__
()
self
.
species
=
[
s
for
s
,
_
in
models
]
self
.
reducer
=
reducer
self
.
padding_fill
=
padding_fill
for
i
in
models
:
setattr
(
self
,
i
,
models
[
i
])
if
benchmark
:
self
.
forward
=
self
.
_enable_benchmark
(
self
.
forward
,
'forward'
)
for
s
,
m
in
models
:
setattr
(
self
,
'model_'
+
s
,
m
)
def
forward
(
self
,
species_aev
):
"""Compute output from aev
...
...
@@ -69,17 +52,22 @@ class ANIModel(BenchmarkedModule):
species_
=
species
.
flatten
()
present_species
=
padding
.
present_species
(
species
)
aev
=
aev
.
flatten
(
0
,
1
)
outputs
=
[]
for
suffix
in
self
.
suffixes
:
output
=
torch
.
full_like
(
species_
,
self
.
padding_fill
,
dtype
=
aev
.
dtype
)
for
i
in
present_species
:
s
=
self
.
species
[
i
]
model_X
=
getattr
(
self
,
'model_'
+
s
+
suffix
)
mask
=
(
species_
==
i
)
input
=
aev
.
index_select
(
0
,
mask
.
nonzero
().
squeeze
())
output
[
mask
]
=
model_X
(
input
).
squeeze
()
output
=
output
.
view_as
(
species
)
outputs
.
append
(
self
.
reducer
(
output
,
dim
=
1
))
output
=
torch
.
full_like
(
species_
,
self
.
padding_fill
,
dtype
=
aev
.
dtype
)
for
i
in
present_species
:
s
=
self
.
species
[
i
]
model_X
=
getattr
(
self
,
'model_'
+
s
)
mask
=
(
species_
==
i
)
input
=
aev
.
index_select
(
0
,
mask
.
nonzero
().
squeeze
())
output
[
mask
]
=
model_X
(
input
).
squeeze
()
output
=
output
.
view_as
(
species
)
return
species
,
self
.
reducer
(
output
,
dim
=
1
)
class
Ensemble
(
torch
.
nn
.
ModuleList
):
def
forward
(
self
,
species_aev
):
outputs
=
[
x
(
species_aev
)[
1
]
for
x
in
self
]
species
,
_
=
species_aev
return
species
,
sum
(
outputs
)
/
len
(
outputs
)
torchani/models/__init__.py
deleted
100644 → 0
View file @
85a6dd1e
from
.custom
import
CustomModel
from
.neurochem_nnp
import
NeuroChemNNP
__all__
=
[
'CustomModel'
,
'NeuroChemNNP'
]
torchani/models/custom.py
deleted
100644 → 0
View file @
85a6dd1e
import
torch
from
.ani_model
import
ANIModel
class
CustomModel
(
ANIModel
):
def
__init__
(
self
,
per_species
,
reducer
=
torch
.
sum
,
padding_fill
=
0
,
derivative
=
False
,
derivative_graph
=
False
,
benchmark
=
False
):
"""Custom single model, no ensemble
Parameters
----------
per_species : dict
Dictionary with supported species as keys and objects of
`torch.nn.Model` as values, storing the model for each supported
species. These models will finally become `model_X` attributes.
reducer : function
The desired `reducer` attribute.
"""
suffixes
=
[
''
]
models
=
{}
for
i
in
per_species
:
models
[
'model_'
+
i
]
=
per_species
[
i
]
super
(
CustomModel
,
self
).
__init__
(
list
(
per_species
.
keys
()),
suffixes
,
reducer
,
padding_fill
,
models
,
benchmark
)
torchani/models/neurochem_nnp.py
deleted
100644 → 0
View file @
85a6dd1e
import
os
import
torch
from
.ani_model
import
ANIModel
from
.neurochem_atomic_network
import
NeuroChemAtomicNetwork
from
..env
import
buildin_network_dir
,
buildin_model_prefix
,
buildin_ensemble
class
NeuroChemNNP
(
ANIModel
):
def
__init__
(
self
,
species
,
from_
=
None
,
ensemble
=
False
,
benchmark
=
False
):
"""If from_=None then ensemble must be a boolean. If ensemble=False,
then use buildin network0, else use buildin network ensemble.
If from_ != None, ensemble must be either False or an integer
specifying the number of networks in the ensemble.
"""
if
from_
is
None
:
if
not
isinstance
(
ensemble
,
bool
):
raise
TypeError
(
'ensemble must be boolean'
)
if
ensemble
:
from_
=
buildin_model_prefix
ensemble
=
buildin_ensemble
else
:
from_
=
buildin_network_dir
else
:
if
not
(
ensemble
is
False
or
isinstance
(
ensemble
,
int
)):
raise
ValueError
(
'invalid argument ensemble'
)
if
ensemble
is
False
:
network_dirs
=
[
from_
]
suffixes
=
[
''
]
else
:
assert
isinstance
(
ensemble
,
int
)
network_prefix
=
from_
network_dirs
=
[]
suffixes
=
[]
for
i
in
range
(
ensemble
):
suffix
=
'{}'
.
format
(
i
)
network_dir
=
os
.
path
.
join
(
network_prefix
+
suffix
,
'networks'
)
network_dirs
.
append
(
network_dir
)
suffixes
.
append
(
suffix
)
models
=
{}
for
network_dir
,
suffix
in
zip
(
network_dirs
,
suffixes
):
for
i
in
species
:
filename
=
os
.
path
.
join
(
network_dir
,
'ANN-{}.nnf'
.
format
(
i
))
model_X
=
NeuroChemAtomicNetwork
(
filename
)
models
[
'model_'
+
i
+
suffix
]
=
model_X
super
(
NeuroChemNNP
,
self
).
__init__
(
species
,
suffixes
,
torch
.
sum
,
0
,
models
,
benchmark
)
torchani/
models/
neurochem
_atomic_network
.py
→
torchani/neurochem.py
View file @
f146feca
from
..
import
_six
# noqa: F401
import
pkg_resources
import
torch
import
os
import
bz2
import
lark
import
torch
import
math
import
struct
class
NeuroChemAtomicNetwork
(
torch
.
nn
.
Module
):
"""Per atom aev->y transformation, loaded from NeuroChem network dir.
Attributes
from
collections.abc
import
Mapping
from
.models
import
ANIModel
,
Ensemble
buildin_const_file
=
pkg_resources
.
resource_filename
(
__name__
,
'resources/ani-1x_dft_x8ens/rHCNO-5.2R_16-3.5A_a4-8.params'
)
buildin_sae_file
=
pkg_resources
.
resource_filename
(
__name__
,
'resources/ani-1x_dft_x8ens/sae_linfit.dat'
)
buildin_network_dir
=
pkg_resources
.
resource_filename
(
__name__
,
'resources/ani-1x_dft_x8ens/train0/networks/'
)
buildin_model_prefix
=
pkg_resources
.
resource_filename
(
__name__
,
'resources/ani-1x_dft_x8ens/train'
)
buildin_ensemble
=
8
class
Constants
(
Mapping
):
def
__init__
(
self
,
filename
=
buildin_const_file
):
self
.
filename
=
filename
with
open
(
filename
)
as
f
:
for
i
in
f
:
try
:
line
=
[
x
.
strip
()
for
x
in
i
.
split
(
'='
)]
name
=
line
[
0
]
value
=
line
[
1
]
if
name
==
'Rcr'
or
name
==
'Rca'
:
setattr
(
self
,
name
,
torch
.
tensor
(
float
(
value
)))
elif
name
in
[
'EtaR'
,
'ShfR'
,
'Zeta'
,
'ShfZ'
,
'EtaA'
,
'ShfA'
]:
value
=
[
float
(
x
.
strip
())
for
x
in
value
.
replace
(
'['
,
''
).
replace
(
']'
,
''
).
split
(
','
)]
setattr
(
self
,
name
,
torch
.
tensor
(
value
))
elif
name
==
'Atyp'
:
value
=
[
x
.
strip
()
for
x
in
value
.
replace
(
'['
,
''
).
replace
(
']'
,
''
).
split
(
','
)]
self
.
species
=
value
except
Exception
:
raise
ValueError
(
'unable to parse const file'
)
self
.
rev_species
=
{}
for
i
in
range
(
len
(
self
.
species
)):
s
=
self
.
species
[
i
]
self
.
rev_species
[
s
]
=
i
def
__iter__
(
self
):
yield
'Rcr'
yield
'Rca'
yield
'EtaR'
yield
'ShfR'
yield
'EtaA'
yield
'Zeta'
yield
'ShfA'
yield
'ShfZ'
yield
'species'
def
__len__
(
self
):
return
8
def
__getitem__
(
self
,
item
):
return
getattr
(
self
,
item
)
def
species_to_tensor
(
self
,
species
,
device
):
rev
=
[
self
.
rev_species
[
s
]
for
s
in
species
]
return
torch
.
tensor
(
rev
,
dtype
=
torch
.
long
,
device
=
device
)
def
load_sae
(
filename
=
buildin_sae_file
):
"""Load self energies from NeuroChem sae file"""
self_energies
=
{}
with
open
(
filename
)
as
f
:
for
i
in
f
:
try
:
line
=
[
x
.
strip
()
for
x
in
i
.
split
(
'='
)]
name
=
line
[
0
].
split
(
','
)[
0
].
strip
()
value
=
float
(
line
[
1
])
self_energies
[
name
]
=
value
except
Exception
:
pass
# ignore unrecognizable line
return
self_energies
def
load_atomic_network
(
filename
):
"""Load atomic network from NeuroChem's .nnf, .wparam and .bparam files
Parameters
----------
layers :
in
t
Number of layers.
layerN : torch.nn.Linear
L
in
ear model for each layer.
activation : function
Function for computing the activation for all layers but the
last layer.
activation_index : int
The
NeuroChem index for activation.
filename : str
in
g
The file name for the `.nnf` file that store network
hyperparameters. The `.bparam` and `.wparam` must be
in
the same directory
Returns
-------
torch.nn.Sequential
The
loaded atomic network
"""
def
__init__
(
self
,
filename
):
"""Initialize from NeuroChem network directory.
Parameters
----------
filename : string
The file name for the `.nnf` file that store network
hyperparameters. The `.bparam` and `.wparam` must be
in the same directory
"""
super
(
NeuroChemAtomicNetwork
,
self
).
__init__
()
networ_dir
=
os
.
path
.
dirname
(
filename
)
with
open
(
filename
,
'rb'
)
as
f
:
buffer
=
f
.
read
()
buffer
=
self
.
_decompress
(
buffer
)
layer_setups
=
self
.
_parse
(
buffer
)
self
.
_construct
(
layer_setups
,
networ_dir
)
def
_decompress
(
self
,
buffer
):
"""Decompress the `.nnf` file
Parameters
----------
buffer : bytes
The buffer storing the whole compressed `.nnf` file content.
Returns
-------
string
The string storing the whole decompressed `.nnf` file content.
"""
# decompress nnf file
def
decompress_nnf
(
buffer
):
while
buffer
[
0
]
!=
b
'='
[
0
]:
buffer
=
buffer
[
1
:]
buffer
=
buffer
[
2
:]
return
bz2
.
decompress
(
buffer
)[:
-
1
].
decode
(
'ascii'
).
strip
()
def
_parse
(
self
,
nnf_file
):
"""Parse the `.nnf` file
Parameters
----------
nnf_file : string
The string storing the while decompressed `.nnf` file content.
Returns
-------
list of dict
Parsed setups as list of dictionary storing the parsed `.nnf`
file content. Each dictionary in the list is the hyperparameters
for a layer.
"""
def
parse_nnf
(
nnf_file
):
# parse input file
parser
=
lark
.
Lark
(
r
'''
identifier : CNAME
...
...
@@ -150,59 +185,7 @@ class NeuroChemAtomicNetwork(torch.nn.Module):
layer_setups
=
TreeExec
().
transform
(
tree
)
return
layer_setups
def
_construct
(
self
,
setups
,
dirname
):
"""Construct model from parsed setups
Parameters
----------
setups : list of dict
Parsed setups as list of dictionary storing the parsed `.nnf`
file content. Each dictionary in the list is the hyperparameters
for a layer.
dirname : string
The directory where network files are stored.
"""
# Activation defined in:
# https://github.com/Jussmith01/NeuroChem/blob/master/src-atomicnnplib/cunetwork/cuannlayer_t.cu#L868
self
.
activation_index
=
None
self
.
activation
=
None
self
.
layers
=
len
(
setups
)
for
i
in
range
(
self
.
layers
):
s
=
setups
[
i
]
in_size
=
s
[
'blocksize'
]
out_size
=
s
[
'nodes'
]
activation
=
s
[
'activation'
]
wfn
,
wsz
=
s
[
'weights'
]
bfn
,
bsz
=
s
[
'biases'
]
if
i
==
self
.
layers
-
1
:
if
activation
!=
6
:
# no activation
raise
ValueError
(
'activation in the last layer must be 6'
)
else
:
if
self
.
activation_index
is
None
:
self
.
activation_index
=
activation
if
activation
==
5
:
# Gaussian
self
.
activation
=
lambda
x
:
torch
.
exp
(
-
x
*
x
)
elif
activation
==
9
:
# CELU
alpha
=
0.1
self
.
activation
=
lambda
x
:
torch
.
celu
(
x
,
alpha
)
else
:
raise
NotImplementedError
(
'Unexpected activation {}'
.
format
(
activation
))
elif
self
.
activation_index
!=
activation
:
raise
NotImplementedError
(
'''different activation on different
layers are not supported'''
)
linear
=
torch
.
nn
.
Linear
(
in_size
,
out_size
)
name
=
'layer{}'
.
format
(
i
)
setattr
(
self
,
name
,
linear
)
if
in_size
*
out_size
!=
wsz
or
out_size
!=
bsz
:
raise
ValueError
(
'bad parameter shape'
)
wfn
=
os
.
path
.
join
(
dirname
,
wfn
)
bfn
=
os
.
path
.
join
(
dirname
,
bfn
)
self
.
_load_param_file
(
linear
,
in_size
,
out_size
,
wfn
,
bfn
)
def
_load_param_file
(
self
,
linear
,
in_size
,
out_size
,
wfn
,
bfn
):
def
load_param_file
(
linear
,
in_size
,
out_size
,
wfn
,
bfn
):
"""Load `.wparam` and `.bparam` files"""
wsize
=
in_size
*
out_size
fw
=
open
(
wfn
,
'rb'
)
...
...
@@ -216,50 +199,79 @@ class NeuroChemAtomicNetwork(torch.nn.Module):
linear
.
bias
.
data
=
b
fb
.
close
()
def
get_activations
(
self
,
aev
,
layer
):
"""Compute the activation of the specified layer.
Parameters
----------
aev : torch.Tensor
The pytorch tensor of shape (conformations, aev_length) storing AEV
as input to this model.
layer : int
The layer whose activation is desired. The index starts at zero,
that is `layer=0` means the `activation(layer0(aev))` instead of
`aev`. If the given layer is larger than the total number of
layers, then the activation of the last layer will be returned.
Returns
-------
torch.Tensor
The pytorch tensor of activations of specified layer.
"""
y
=
aev
for
j
in
range
(
self
.
layers
-
1
):
linear
=
getattr
(
self
,
'layer{}'
.
format
(
j
))
y
=
linear
(
y
)
y
=
self
.
activation
(
y
)
if
j
==
layer
:
break
if
layer
>=
self
.
layers
-
1
:
linear
=
getattr
(
self
,
'layer{}'
.
format
(
self
.
layers
-
1
))
y
=
linear
(
y
)
return
y
def
forward
(
self
,
aev
):
"""Compute output from aev
Parameters
----------
aev : torch.Tensor
The pytorch tensor of shape (conformations, aev_length) storing
AEV as input to this model.
Returns
-------
torch.Tensor
The pytorch tensor of shape (conformations, output_length) for
output.
"""
return
self
.
get_activations
(
aev
,
math
.
inf
)
class
Gaussian
(
torch
.
nn
.
Module
):
def
forward
(
self
,
x
):
return
torch
.
exp
(
-
x
*
x
)
networ_dir
=
os
.
path
.
dirname
(
filename
)
with
open
(
filename
,
'rb'
)
as
f
:
buffer
=
f
.
read
()
buffer
=
decompress_nnf
(
buffer
)
layer_setups
=
parse_nnf
(
buffer
)
layers
=
[]
for
s
in
layer_setups
:
# construct linear layer and load parameters
in_size
=
s
[
'blocksize'
]
out_size
=
s
[
'nodes'
]
wfn
,
wsz
=
s
[
'weights'
]
bfn
,
bsz
=
s
[
'biases'
]
if
in_size
*
out_size
!=
wsz
or
out_size
!=
bsz
:
raise
ValueError
(
'bad parameter shape'
)
layer
=
torch
.
nn
.
Linear
(
in_size
,
out_size
)
wfn
=
os
.
path
.
join
(
networ_dir
,
wfn
)
bfn
=
os
.
path
.
join
(
networ_dir
,
bfn
)
load_param_file
(
layer
,
in_size
,
out_size
,
wfn
,
bfn
)
layers
.
append
(
layer
)
# Activation defined in:
# https://github.com/Jussmith01/NeuroChem/blob/master/src-atomicnnplib/cunetwork/cuannlayer_t.cu#L868
activation
=
s
[
'activation'
]
if
activation
==
6
:
continue
elif
activation
==
5
:
# Gaussian
layers
.
append
(
Gaussian
())
elif
activation
==
9
:
# CELU
layers
.
append
(
torch
.
nn
.
CELU
(
alpha
=
0.1
))
else
:
raise
NotImplementedError
(
'Unexpected activation {}'
.
format
(
activation
))
return
torch
.
nn
.
Sequential
(
*
layers
)
def
load_model
(
species
,
from_
=
None
,
ensemble
=
False
):
"""If from_=None then ensemble must be a boolean. If ensemble=False,
then use buildin network0, else use buildin network ensemble.
If from_ != None, ensemble must be either False or an integer
specifying the number of networks in the ensemble.
"""
if
from_
is
None
:
if
not
isinstance
(
ensemble
,
bool
):
raise
TypeError
(
'ensemble must be boolean'
)
if
ensemble
:
from_
=
buildin_model_prefix
ensemble
=
buildin_ensemble
else
:
from_
=
buildin_network_dir
else
:
if
not
(
ensemble
is
False
or
isinstance
(
ensemble
,
int
)):
raise
ValueError
(
'invalid argument ensemble'
)
def
load_single_model
(
from_
):
models
=
[]
for
i
in
species
:
filename
=
os
.
path
.
join
(
from_
,
'ANN-{}.nnf'
.
format
(
i
))
models
.
append
((
i
,
load_atomic_network
(
filename
)))
return
ANIModel
(
models
)
if
ensemble
is
False
:
return
load_single_model
(
from_
)
else
:
assert
isinstance
(
ensemble
,
int
)
models
=
[]
for
i
in
range
(
ensemble
):
network_dir
=
os
.
path
.
join
(
'{}{}'
.
format
(
from_
,
i
),
'networks'
)
models
.
append
(
load_single_model
(
network_dir
))
return
Ensemble
(
models
)
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment