Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
torchani
Commits
f146feca
Unverified
Commit
f146feca
authored
Aug 21, 2018
by
Gao, Xiang
Committed by
GitHub
Aug 21, 2018
Browse files
Refactor codes to put neurochem related codes together (#72)
parent
85a6dd1e
Changes
25
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
350 additions
and
81 deletions
+350
-81
torchani/models.py
torchani/models.py
+73
-0
torchani/models/__init__.py
torchani/models/__init__.py
+0
-4
torchani/models/custom.py
torchani/models/custom.py
+0
-26
torchani/models/neurochem_nnp.py
torchani/models/neurochem_nnp.py
+0
-51
torchani/neurochem.py
torchani/neurochem.py
+277
-0
No files found.
torchani/models
/ani_model
.py
→
torchani/models.py
View file @
f146feca
import
torch
import
torch
from
..benchmarked
import
BenchmarkedModule
from
.
import
padding
from
..
import
padding
class
ANIModel
(
BenchmarkedModule
):
class
ANIModel
(
torch
.
nn
.
Module
):
"""Subclass of `torch.nn.Module` for the [xyz]->[aev]->[per_atom_y]->y
pipeline.
Attributes
def
__init__
(
self
,
models
,
reducer
=
torch
.
sum
,
padding_fill
=
0
):
----------
"""
species : list
Parameters
Chemical symbol of supported atom species.
----------
suffixes : sequence
models : (str, torch.nn.Module)
Different suffixes denote different models in an ensemble.
Models for all species. This must be a mapping where the key is
model_<X><suffix> : nn.Module
atomic symbol and the value is a module.
Model of suffix <suffix> for species <X>. There should be one such
reducer : function
attribute for each supported species.
Function of (input, dim)->output that reduce the input tensor along
reducer : function
the given dimension to get an output tensor. This function will be
Function of (input, dim)->output that reduce the input tensor along the
called with the per atom output tensor with internal shape as input
given dimension to get an output tensor. This function will be called
, and desired reduction dimension as dim, and should reduce the
with the per atom output tensor with internal shape as input, and
input into the tensor containing desired output.
desired reduction dimension as dim, and should reduce the input into
padding_fill : float
the tensor containing desired output.
Default value used to fill padding atoms
padding_fill : float
"""
Default value used to fill padding atoms
super
(
ANIModel
,
self
).
__init__
()
output_length : int
self
.
species
=
[
s
for
s
,
_
in
models
]
Length of output of each submodel.
timers : dict
Dictionary storing the the benchmark result. It has the following keys:
forward : total time for the forward pass
"""
def
__init__
(
self
,
species
,
suffixes
,
reducer
,
padding_fill
,
models
,
benchmark
=
False
):
super
(
ANIModel
,
self
).
__init__
(
benchmark
)
self
.
species
=
species
self
.
suffixes
=
suffixes
self
.
reducer
=
reducer
self
.
reducer
=
reducer
self
.
padding_fill
=
padding_fill
self
.
padding_fill
=
padding_fill
for
i
in
models
:
for
s
,
m
in
models
:
setattr
(
self
,
i
,
models
[
i
])
setattr
(
self
,
'model_'
+
s
,
m
)
if
benchmark
:
self
.
forward
=
self
.
_enable_benchmark
(
self
.
forward
,
'forward'
)
def
forward
(
self
,
species_aev
):
def
forward
(
self
,
species_aev
):
"""Compute output from aev
"""Compute output from aev
...
@@ -69,17 +52,22 @@ class ANIModel(BenchmarkedModule):
...
@@ -69,17 +52,22 @@ class ANIModel(BenchmarkedModule):
species_
=
species
.
flatten
()
species_
=
species
.
flatten
()
present_species
=
padding
.
present_species
(
species
)
present_species
=
padding
.
present_species
(
species
)
aev
=
aev
.
flatten
(
0
,
1
)
aev
=
aev
.
flatten
(
0
,
1
)
outputs
=
[]
for
suffix
in
self
.
suffixes
:
output
=
torch
.
full_like
(
species_
,
self
.
padding_fill
,
dtype
=
aev
.
dtype
)
for
i
in
present_species
:
s
=
self
.
species
[
i
]
model_X
=
getattr
(
self
,
'model_'
+
s
+
suffix
)
mask
=
(
species_
==
i
)
input
=
aev
.
index_select
(
0
,
mask
.
nonzero
().
squeeze
())
output
[
mask
]
=
model_X
(
input
).
squeeze
()
output
=
output
.
view_as
(
species
)
outputs
.
append
(
self
.
reducer
(
output
,
dim
=
1
))
output
=
torch
.
full_like
(
species_
,
self
.
padding_fill
,
dtype
=
aev
.
dtype
)
for
i
in
present_species
:
s
=
self
.
species
[
i
]
model_X
=
getattr
(
self
,
'model_'
+
s
)
mask
=
(
species_
==
i
)
input
=
aev
.
index_select
(
0
,
mask
.
nonzero
().
squeeze
())
output
[
mask
]
=
model_X
(
input
).
squeeze
()
output
=
output
.
view_as
(
species
)
return
species
,
self
.
reducer
(
output
,
dim
=
1
)
class
Ensemble
(
torch
.
nn
.
ModuleList
):
def
forward
(
self
,
species_aev
):
outputs
=
[
x
(
species_aev
)[
1
]
for
x
in
self
]
species
,
_
=
species_aev
return
species
,
sum
(
outputs
)
/
len
(
outputs
)
return
species
,
sum
(
outputs
)
/
len
(
outputs
)
torchani/models/__init__.py
deleted
100644 → 0
View file @
85a6dd1e
from
.custom
import
CustomModel
from
.neurochem_nnp
import
NeuroChemNNP
__all__
=
[
'CustomModel'
,
'NeuroChemNNP'
]
torchani/models/custom.py
deleted
100644 → 0
View file @
85a6dd1e
import
torch
from
.ani_model
import
ANIModel
class
CustomModel
(
ANIModel
):
def
__init__
(
self
,
per_species
,
reducer
=
torch
.
sum
,
padding_fill
=
0
,
derivative
=
False
,
derivative_graph
=
False
,
benchmark
=
False
):
"""Custom single model, no ensemble
Parameters
----------
per_species : dict
Dictionary with supported species as keys and objects of
`torch.nn.Model` as values, storing the model for each supported
species. These models will finally become `model_X` attributes.
reducer : function
The desired `reducer` attribute.
"""
suffixes
=
[
''
]
models
=
{}
for
i
in
per_species
:
models
[
'model_'
+
i
]
=
per_species
[
i
]
super
(
CustomModel
,
self
).
__init__
(
list
(
per_species
.
keys
()),
suffixes
,
reducer
,
padding_fill
,
models
,
benchmark
)
torchani/models/neurochem_nnp.py
deleted
100644 → 0
View file @
85a6dd1e
import
os
import
torch
from
.ani_model
import
ANIModel
from
.neurochem_atomic_network
import
NeuroChemAtomicNetwork
from
..env
import
buildin_network_dir
,
buildin_model_prefix
,
buildin_ensemble
class
NeuroChemNNP
(
ANIModel
):
def
__init__
(
self
,
species
,
from_
=
None
,
ensemble
=
False
,
benchmark
=
False
):
"""If from_=None then ensemble must be a boolean. If ensemble=False,
then use buildin network0, else use buildin network ensemble.
If from_ != None, ensemble must be either False or an integer
specifying the number of networks in the ensemble.
"""
if
from_
is
None
:
if
not
isinstance
(
ensemble
,
bool
):
raise
TypeError
(
'ensemble must be boolean'
)
if
ensemble
:
from_
=
buildin_model_prefix
ensemble
=
buildin_ensemble
else
:
from_
=
buildin_network_dir
else
:
if
not
(
ensemble
is
False
or
isinstance
(
ensemble
,
int
)):
raise
ValueError
(
'invalid argument ensemble'
)
if
ensemble
is
False
:
network_dirs
=
[
from_
]
suffixes
=
[
''
]
else
:
assert
isinstance
(
ensemble
,
int
)
network_prefix
=
from_
network_dirs
=
[]
suffixes
=
[]
for
i
in
range
(
ensemble
):
suffix
=
'{}'
.
format
(
i
)
network_dir
=
os
.
path
.
join
(
network_prefix
+
suffix
,
'networks'
)
network_dirs
.
append
(
network_dir
)
suffixes
.
append
(
suffix
)
models
=
{}
for
network_dir
,
suffix
in
zip
(
network_dirs
,
suffixes
):
for
i
in
species
:
filename
=
os
.
path
.
join
(
network_dir
,
'ANN-{}.nnf'
.
format
(
i
))
model_X
=
NeuroChemAtomicNetwork
(
filename
)
models
[
'model_'
+
i
+
suffix
]
=
model_X
super
(
NeuroChemNNP
,
self
).
__init__
(
species
,
suffixes
,
torch
.
sum
,
0
,
models
,
benchmark
)
torchani/
models/
neurochem
_atomic_network
.py
→
torchani/neurochem.py
View file @
f146feca
from
..
import
_six
# noqa: F401
import
pkg_resources
import
torch
import
os
import
os
import
bz2
import
bz2
import
lark
import
lark
import
torch
import
math
import
struct
import
struct
from
collections.abc
import
Mapping
from
.models
import
ANIModel
,
Ensemble
class
NeuroChemAtomicNetwork
(
torch
.
nn
.
Module
):
"""Per atom aev->y transformation, loaded from NeuroChem network dir.
buildin_const_file
=
pkg_resources
.
resource_filename
(
Attributes
__name__
,
'resources/ani-1x_dft_x8ens/rHCNO-5.2R_16-3.5A_a4-8.params'
)
buildin_sae_file
=
pkg_resources
.
resource_filename
(
__name__
,
'resources/ani-1x_dft_x8ens/sae_linfit.dat'
)
buildin_network_dir
=
pkg_resources
.
resource_filename
(
__name__
,
'resources/ani-1x_dft_x8ens/train0/networks/'
)
buildin_model_prefix
=
pkg_resources
.
resource_filename
(
__name__
,
'resources/ani-1x_dft_x8ens/train'
)
buildin_ensemble
=
8
class
Constants
(
Mapping
):
def
__init__
(
self
,
filename
=
buildin_const_file
):
self
.
filename
=
filename
with
open
(
filename
)
as
f
:
for
i
in
f
:
try
:
line
=
[
x
.
strip
()
for
x
in
i
.
split
(
'='
)]
name
=
line
[
0
]
value
=
line
[
1
]
if
name
==
'Rcr'
or
name
==
'Rca'
:
setattr
(
self
,
name
,
torch
.
tensor
(
float
(
value
)))
elif
name
in
[
'EtaR'
,
'ShfR'
,
'Zeta'
,
'ShfZ'
,
'EtaA'
,
'ShfA'
]:
value
=
[
float
(
x
.
strip
())
for
x
in
value
.
replace
(
'['
,
''
).
replace
(
']'
,
''
).
split
(
','
)]
setattr
(
self
,
name
,
torch
.
tensor
(
value
))
elif
name
==
'Atyp'
:
value
=
[
x
.
strip
()
for
x
in
value
.
replace
(
'['
,
''
).
replace
(
']'
,
''
).
split
(
','
)]
self
.
species
=
value
except
Exception
:
raise
ValueError
(
'unable to parse const file'
)
self
.
rev_species
=
{}
for
i
in
range
(
len
(
self
.
species
)):
s
=
self
.
species
[
i
]
self
.
rev_species
[
s
]
=
i
def
__iter__
(
self
):
yield
'Rcr'
yield
'Rca'
yield
'EtaR'
yield
'ShfR'
yield
'EtaA'
yield
'Zeta'
yield
'ShfA'
yield
'ShfZ'
yield
'species'
def
__len__
(
self
):
return
8
def
__getitem__
(
self
,
item
):
return
getattr
(
self
,
item
)
def
species_to_tensor
(
self
,
species
,
device
):
rev
=
[
self
.
rev_species
[
s
]
for
s
in
species
]
return
torch
.
tensor
(
rev
,
dtype
=
torch
.
long
,
device
=
device
)
def
load_sae
(
filename
=
buildin_sae_file
):
"""Load self energies from NeuroChem sae file"""
self_energies
=
{}
with
open
(
filename
)
as
f
:
for
i
in
f
:
try
:
line
=
[
x
.
strip
()
for
x
in
i
.
split
(
'='
)]
name
=
line
[
0
].
split
(
','
)[
0
].
strip
()
value
=
float
(
line
[
1
])
self_energies
[
name
]
=
value
except
Exception
:
pass
# ignore unrecognizable line
return
self_energies
def
load_atomic_network
(
filename
):
"""Load atomic network from NeuroChem's .nnf, .wparam and .bparam files
Parameters
----------
----------
layers :
in
t
filename : str
in
g
Number of layers.
The file name for the `.nnf` file that store network
layerN : torch.nn.Linear
hyperparameters. The `.bparam` and `.wparam` must be
L
in
ear model for each layer.
in
the same directory
activation : function
Function for computing the activation for all layers but the
Returns
last layer.
-------
activation_index : int
torch.nn.Sequential
The
NeuroChem index for activation.
The
loaded atomic network
"""
"""
def
__init__
(
self
,
filename
):
def
decompress_nnf
(
buffer
):
"""Initialize from NeuroChem network directory.
Parameters
----------
filename : string
The file name for the `.nnf` file that store network
hyperparameters. The `.bparam` and `.wparam` must be
in the same directory
"""
super
(
NeuroChemAtomicNetwork
,
self
).
__init__
()
networ_dir
=
os
.
path
.
dirname
(
filename
)
with
open
(
filename
,
'rb'
)
as
f
:
buffer
=
f
.
read
()
buffer
=
self
.
_decompress
(
buffer
)
layer_setups
=
self
.
_parse
(
buffer
)
self
.
_construct
(
layer_setups
,
networ_dir
)
def
_decompress
(
self
,
buffer
):
"""Decompress the `.nnf` file
Parameters
----------
buffer : bytes
The buffer storing the whole compressed `.nnf` file content.
Returns
-------
string
The string storing the whole decompressed `.nnf` file content.
"""
# decompress nnf file
while
buffer
[
0
]
!=
b
'='
[
0
]:
while
buffer
[
0
]
!=
b
'='
[
0
]:
buffer
=
buffer
[
1
:]
buffer
=
buffer
[
1
:]
buffer
=
buffer
[
2
:]
buffer
=
buffer
[
2
:]
return
bz2
.
decompress
(
buffer
)[:
-
1
].
decode
(
'ascii'
).
strip
()
return
bz2
.
decompress
(
buffer
)[:
-
1
].
decode
(
'ascii'
).
strip
()
def
_parse
(
self
,
nnf_file
):
def
parse_nnf
(
nnf_file
):
"""Parse the `.nnf` file
Parameters
----------
nnf_file : string
The string storing the while decompressed `.nnf` file content.
Returns
-------
list of dict
Parsed setups as list of dictionary storing the parsed `.nnf`
file content. Each dictionary in the list is the hyperparameters
for a layer.
"""
# parse input file
# parse input file
parser
=
lark
.
Lark
(
r
'''
parser
=
lark
.
Lark
(
r
'''
identifier : CNAME
identifier : CNAME
...
@@ -150,59 +185,7 @@ class NeuroChemAtomicNetwork(torch.nn.Module):
...
@@ -150,59 +185,7 @@ class NeuroChemAtomicNetwork(torch.nn.Module):
layer_setups
=
TreeExec
().
transform
(
tree
)
layer_setups
=
TreeExec
().
transform
(
tree
)
return
layer_setups
return
layer_setups
def
_construct
(
self
,
setups
,
dirname
):
def
load_param_file
(
linear
,
in_size
,
out_size
,
wfn
,
bfn
):
"""Construct model from parsed setups
Parameters
----------
setups : list of dict
Parsed setups as list of dictionary storing the parsed `.nnf`
file content. Each dictionary in the list is the hyperparameters
for a layer.
dirname : string
The directory where network files are stored.
"""
# Activation defined in:
# https://github.com/Jussmith01/NeuroChem/blob/master/src-atomicnnplib/cunetwork/cuannlayer_t.cu#L868
self
.
activation_index
=
None
self
.
activation
=
None
self
.
layers
=
len
(
setups
)
for
i
in
range
(
self
.
layers
):
s
=
setups
[
i
]
in_size
=
s
[
'blocksize'
]
out_size
=
s
[
'nodes'
]
activation
=
s
[
'activation'
]
wfn
,
wsz
=
s
[
'weights'
]
bfn
,
bsz
=
s
[
'biases'
]
if
i
==
self
.
layers
-
1
:
if
activation
!=
6
:
# no activation
raise
ValueError
(
'activation in the last layer must be 6'
)
else
:
if
self
.
activation_index
is
None
:
self
.
activation_index
=
activation
if
activation
==
5
:
# Gaussian
self
.
activation
=
lambda
x
:
torch
.
exp
(
-
x
*
x
)
elif
activation
==
9
:
# CELU
alpha
=
0.1
self
.
activation
=
lambda
x
:
torch
.
celu
(
x
,
alpha
)
else
:
raise
NotImplementedError
(
'Unexpected activation {}'
.
format
(
activation
))
elif
self
.
activation_index
!=
activation
:
raise
NotImplementedError
(
'''different activation on different
layers are not supported'''
)
linear
=
torch
.
nn
.
Linear
(
in_size
,
out_size
)
name
=
'layer{}'
.
format
(
i
)
setattr
(
self
,
name
,
linear
)
if
in_size
*
out_size
!=
wsz
or
out_size
!=
bsz
:
raise
ValueError
(
'bad parameter shape'
)
wfn
=
os
.
path
.
join
(
dirname
,
wfn
)
bfn
=
os
.
path
.
join
(
dirname
,
bfn
)
self
.
_load_param_file
(
linear
,
in_size
,
out_size
,
wfn
,
bfn
)
def
_load_param_file
(
self
,
linear
,
in_size
,
out_size
,
wfn
,
bfn
):
"""Load `.wparam` and `.bparam` files"""
"""Load `.wparam` and `.bparam` files"""
wsize
=
in_size
*
out_size
wsize
=
in_size
*
out_size
fw
=
open
(
wfn
,
'rb'
)
fw
=
open
(
wfn
,
'rb'
)
...
@@ -216,50 +199,79 @@ class NeuroChemAtomicNetwork(torch.nn.Module):
...
@@ -216,50 +199,79 @@ class NeuroChemAtomicNetwork(torch.nn.Module):
linear
.
bias
.
data
=
b
linear
.
bias
.
data
=
b
fb
.
close
()
fb
.
close
()
def
get_activations
(
self
,
aev
,
layer
):
class
Gaussian
(
torch
.
nn
.
Module
):
"""Compute the activation of the specified layer.
def
forward
(
self
,
x
):
return
torch
.
exp
(
-
x
*
x
)
Parameters
----------
networ_dir
=
os
.
path
.
dirname
(
filename
)
aev : torch.Tensor
The pytorch tensor of shape (conformations, aev_length) storing AEV
with
open
(
filename
,
'rb'
)
as
f
:
as input to this model.
buffer
=
f
.
read
()
layer : int
buffer
=
decompress_nnf
(
buffer
)
The layer whose activation is desired. The index starts at zero,
layer_setups
=
parse_nnf
(
buffer
)
that is `layer=0` means the `activation(layer0(aev))` instead of
`aev`. If the given layer is larger than the total number of
layers
=
[]
layers, then the activation of the last layer will be returned.
for
s
in
layer_setups
:
# construct linear layer and load parameters
Returns
in_size
=
s
[
'blocksize'
]
-------
out_size
=
s
[
'nodes'
]
torch.Tensor
wfn
,
wsz
=
s
[
'weights'
]
The pytorch tensor of activations of specified layer.
bfn
,
bsz
=
s
[
'biases'
]
"""
if
in_size
*
out_size
!=
wsz
or
out_size
!=
bsz
:
y
=
aev
raise
ValueError
(
'bad parameter shape'
)
for
j
in
range
(
self
.
layers
-
1
):
layer
=
torch
.
nn
.
Linear
(
in_size
,
out_size
)
linear
=
getattr
(
self
,
'layer{}'
.
format
(
j
))
wfn
=
os
.
path
.
join
(
networ_dir
,
wfn
)
y
=
linear
(
y
)
bfn
=
os
.
path
.
join
(
networ_dir
,
bfn
)
y
=
self
.
activation
(
y
)
load_param_file
(
layer
,
in_size
,
out_size
,
wfn
,
bfn
)
if
j
==
layer
:
layers
.
append
(
layer
)
break
if
layer
>=
self
.
layers
-
1
:
# Activation defined in:
linear
=
getattr
(
self
,
'layer{}'
.
format
(
self
.
layers
-
1
))
# https://github.com/Jussmith01/NeuroChem/blob/master/src-atomicnnplib/cunetwork/cuannlayer_t.cu#L868
y
=
linear
(
y
)
activation
=
s
[
'activation'
]
return
y
if
activation
==
6
:
continue
def
forward
(
self
,
aev
):
elif
activation
==
5
:
# Gaussian
"""Compute output from aev
layers
.
append
(
Gaussian
())
elif
activation
==
9
:
# CELU
Parameters
layers
.
append
(
torch
.
nn
.
CELU
(
alpha
=
0.1
))
----------
else
:
aev : torch.Tensor
raise
NotImplementedError
(
The pytorch tensor of shape (conformations, aev_length) storing
'Unexpected activation {}'
.
format
(
activation
))
AEV as input to this model.
return
torch
.
nn
.
Sequential
(
*
layers
)
Returns
-------
torch.Tensor
def
load_model
(
species
,
from_
=
None
,
ensemble
=
False
):
The pytorch tensor of shape (conformations, output_length) for
"""If from_=None then ensemble must be a boolean. If ensemble=False,
output.
then use buildin network0, else use buildin network ensemble.
"""
If from_ != None, ensemble must be either False or an integer
return
self
.
get_activations
(
aev
,
math
.
inf
)
specifying the number of networks in the ensemble.
"""
if
from_
is
None
:
if
not
isinstance
(
ensemble
,
bool
):
raise
TypeError
(
'ensemble must be boolean'
)
if
ensemble
:
from_
=
buildin_model_prefix
ensemble
=
buildin_ensemble
else
:
from_
=
buildin_network_dir
else
:
if
not
(
ensemble
is
False
or
isinstance
(
ensemble
,
int
)):
raise
ValueError
(
'invalid argument ensemble'
)
def
load_single_model
(
from_
):
models
=
[]
for
i
in
species
:
filename
=
os
.
path
.
join
(
from_
,
'ANN-{}.nnf'
.
format
(
i
))
models
.
append
((
i
,
load_atomic_network
(
filename
)))
return
ANIModel
(
models
)
if
ensemble
is
False
:
return
load_single_model
(
from_
)
else
:
assert
isinstance
(
ensemble
,
int
)
models
=
[]
for
i
in
range
(
ensemble
):
network_dir
=
os
.
path
.
join
(
'{}{}'
.
format
(
from_
,
i
),
'networks'
)
models
.
append
(
load_single_model
(
network_dir
))
return
Ensemble
(
models
)
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment