Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
torchani
Commits
b83f824a
Commit
b83f824a
authored
Aug 12, 2019
by
Farhad Ramezanghorbani
Committed by
Gao, Xiang
Aug 12, 2019
Browse files
Discard outlier energy conformers (#287)
* outlier removal process fixed * remove outlier energies if exist
parent
0b3e26ee
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
15 additions
and
11 deletions
+15
-11
examples/nnp_training.py
examples/nnp_training.py
+1
-1
examples/nnp_training_force.py
examples/nnp_training_force.py
+2
-2
examples/nnp_training_ignite.py
examples/nnp_training_ignite.py
+1
-1
torchani/data/__init__.py
torchani/data/__init__.py
+9
-5
torchani/neurochem/__init__.py
torchani/neurochem/__init__.py
+2
-2
No files found.
examples/nnp_training.py
View file @
b83f824a
...
@@ -82,7 +82,7 @@ dspath = os.path.join(path, '../dataset/ani1-up_to_gdb4/ani_gdb_s01.h5')
...
@@ -82,7 +82,7 @@ dspath = os.path.join(path, '../dataset/ani1-up_to_gdb4/ani_gdb_s01.h5')
batch_size
=
2560
batch_size
=
2560
training
,
validation
=
torchani
.
data
.
load_ani_dataset
(
training
,
validation
=
torchani
.
data
.
load_ani_dataset
(
dspath
,
species_to_tensor
,
batch_size
,
device
=
device
,
dspath
,
species_to_tensor
,
batch_size
,
rm_outlier
=
True
,
device
=
device
,
transform
=
[
energy_shifter
.
subtract_from_dataset
],
split
=
[
0.8
,
None
])
transform
=
[
energy_shifter
.
subtract_from_dataset
],
split
=
[
0.8
,
None
])
print
(
'Self atomic energies: '
,
energy_shifter
.
self_energies
)
print
(
'Self atomic energies: '
,
energy_shifter
.
self_energies
)
...
...
examples/nnp_training_force.py
View file @
b83f824a
...
@@ -52,8 +52,8 @@ batch_size = 2560
...
@@ -52,8 +52,8 @@ batch_size = 2560
# from hdf5 files.
# from hdf5 files.
training
,
validation
=
torchani
.
data
.
load_ani_dataset
(
training
,
validation
=
torchani
.
data
.
load_ani_dataset
(
dspath
,
species_to_tensor
,
batch_size
,
device
=
devic
e
,
dspath
,
species_to_tensor
,
batch_size
,
rm_outlier
=
Tru
e
,
atomic_properties
=
[
'forces'
],
device
=
device
,
atomic_properties
=
[
'forces'
],
transform
=
[
energy_shifter
.
subtract_from_dataset
],
split
=
[
0.8
,
None
])
transform
=
[
energy_shifter
.
subtract_from_dataset
],
split
=
[
0.8
,
None
])
print
(
'Self atomic energies: '
,
energy_shifter
.
self_energies
)
print
(
'Self atomic energies: '
,
energy_shifter
.
self_energies
)
...
...
examples/nnp_training_ignite.py
View file @
b83f824a
...
@@ -102,7 +102,7 @@ writer = torch.utils.tensorboard.SummaryWriter(log_dir=log)
...
@@ -102,7 +102,7 @@ writer = torch.utils.tensorboard.SummaryWriter(log_dir=log)
###############################################################################
###############################################################################
# Now load training and validation datasets into memory.
# Now load training and validation datasets into memory.
training
,
validation
=
torchani
.
data
.
load_ani_dataset
(
training
,
validation
=
torchani
.
data
.
load_ani_dataset
(
dspath
,
consts
.
species_to_tensor
,
batch_size
,
device
=
device
,
dspath
,
consts
.
species_to_tensor
,
batch_size
,
rm_outlier
=
True
,
device
=
device
,
transform
=
[
energy_shifter
.
subtract_from_dataset
],
split
=
[
0.8
,
None
])
transform
=
[
energy_shifter
.
subtract_from_dataset
],
split
=
[
0.8
,
None
])
###############################################################################
###############################################################################
...
...
torchani/data/__init__.py
View file @
b83f824a
...
@@ -309,23 +309,27 @@ def load_ani_dataset(path, species_tensor_converter, batch_size, shuffle=True,
...
@@ -309,23 +309,27 @@ def load_ani_dataset(path, species_tensor_converter, batch_size, shuffle=True,
atomic_properties_
,
properties_
=
t
(
atomic_properties_
,
properties_
)
atomic_properties_
,
properties_
=
t
(
atomic_properties_
,
properties_
)
if
rm_outlier
:
if
rm_outlier
:
# This is how NeuroChem discard the outliers
transformed_energies
=
properties_
[
'energies'
]
transformed_energies
=
properties_
[
'energies'
]
num_atoms
=
(
atomic_properties_
[
'species'
]
>=
0
).
to
(
transformed_energies
.
dtype
).
sum
(
dim
=
1
)
num_atoms
=
(
atomic_properties_
[
'species'
]
>=
0
).
to
(
transformed_energies
.
dtype
).
sum
(
dim
=
1
)
scaled_diff
=
transformed_energies
/
num_atoms
.
sqrt
()
scaled_diff
=
transformed_energies
/
num_atoms
.
sqrt
()
mean
=
transformed_energies
.
mean
()
mean
=
scaled_diff
[
torch
.
abs
(
scaled_diff
)
<
15.0
].
mean
()
std
=
transformed_energies
.
std
()
std
=
torch
.
abs
(
scaled_diff
[
torch
.
abs
(
scaled_diff
)
<
15.0
]).
std
()
tol
=
15.0
*
std
+
mean
low_idx
=
(
torch
.
abs
(
scaled_diff
)
<
tol
).
nonzero
().
squeeze
()
# -15 * std + mean < scaled_diff < +11 * std + mean
tol
=
13.0
*
std
+
mean
low_idx
=
(
torch
.
abs
(
scaled_diff
+
2.0
*
std
)
<
tol
).
nonzero
().
squeeze
()
outlier_count
=
molecules
-
low_idx
.
numel
()
outlier_count
=
molecules
-
low_idx
.
numel
()
# discard outlier energy conformers if exist
# discard outlier energy conformers if exist
if
outlier_count
>
0
:
if
outlier_count
>
0
:
print
(
f
'
Note:
{
outlier_count
}
outlier energy conformers have been discarded from dataset
'
)
print
(
"
Note: {} outlier energy conformers have been discarded from dataset
"
.
format
(
outlier_count
)
)
for
key
,
val
in
atomic_properties_
.
items
():
for
key
,
val
in
atomic_properties_
.
items
():
atomic_properties_
[
key
]
=
val
[
low_idx
]
atomic_properties_
[
key
]
=
val
[
low_idx
]
for
key
,
val
in
properties_
.
items
():
for
key
,
val
in
properties_
.
items
():
properties_
[
key
]
=
val
[
low_idx
]
properties_
[
key
]
=
val
[
low_idx
]
molecules
=
low_idx
.
numel
()
# compute size of each subset
# compute size of each subset
split_
=
[]
split_
=
[]
...
...
torchani/neurochem/__init__.py
View file @
b83f824a
...
@@ -567,11 +567,11 @@ if sys.version_info[0] > 2:
...
@@ -567,11 +567,11 @@ if sys.version_info[0] > 2:
else
:
else
:
self
.
training_set
=
self
.
imports
.
load_ani_dataset
(
self
.
training_set
=
self
.
imports
.
load_ani_dataset
(
training_path
,
self
.
consts
.
species_to_tensor
,
training_path
,
self
.
consts
.
species_to_tensor
,
self
.
training_batch_size
,
device
=
self
.
device
,
self
.
training_batch_size
,
rm_outlier
=
True
,
device
=
self
.
device
,
transform
=
[
self
.
shift_energy
.
subtract_from_dataset
])
transform
=
[
self
.
shift_energy
.
subtract_from_dataset
])
self
.
validation_set
=
self
.
imports
.
load_ani_dataset
(
self
.
validation_set
=
self
.
imports
.
load_ani_dataset
(
validation_path
,
self
.
consts
.
species_to_tensor
,
validation_path
,
self
.
consts
.
species_to_tensor
,
self
.
validation_batch_size
,
device
=
self
.
device
,
self
.
validation_batch_size
,
rm_outlier
=
True
,
device
=
self
.
device
,
transform
=
[
self
.
shift_energy
.
subtract_from_dataset
])
transform
=
[
self
.
shift_energy
.
subtract_from_dataset
])
def
evaluate
(
self
,
dataset
):
def
evaluate
(
self
,
dataset
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment