Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
torchani
Commits
57dd26bf
Unverified
Commit
57dd26bf
authored
Nov 17, 2020
by
Ignacio Pickering
Committed by
GitHub
Nov 17, 2020
Browse files
Add code snippet to save validation and training sets in nnp_training (#548)
parent
12422dd1
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
27 additions
and
3 deletions
+27
-3
.gitignore
.gitignore
+1
-0
examples/nnp_training.py
examples/nnp_training.py
+26
-3
No files found.
.gitignore
View file @
57dd26bf
...
...
@@ -39,3 +39,4 @@ Untitled.ipynb
htmlcov/
/include
training_outputs/
examples/dataset.pkl
examples/nnp_training.py
View file @
57dd26bf
...
...
@@ -39,6 +39,7 @@ import os
import
math
import
torch.utils.tensorboard
import
tqdm
import
pickle
# helper function to convert energy unit from Hartree to kcal/mol
from
torchani.units
import
hartree2kcalmol
...
...
@@ -95,9 +96,31 @@ except NameError:
dspath
=
os
.
path
.
join
(
path
,
'../dataset/ani1-up_to_gdb4/ani_gdb_s01.h5'
)
batch_size
=
2560
training
,
validation
=
torchani
.
data
.
load
(
dspath
).
subtract_self_energies
(
energy_shifter
,
species_order
).
species_to_indices
(
species_order
).
shuffle
().
split
(
0.8
,
None
)
training
=
training
.
collate
(
batch_size
).
cache
()
validation
=
validation
.
collate
(
batch_size
).
cache
()
pickled_dataset_path
=
'dataset.pkl'
# We pickle the dataset after loading to ensure we use the same validation set
# each time we restart training, otherwise we risk mixing the validation and
# training sets on each restart.
if
os
.
path
.
isfile
(
pickled_dataset_path
):
print
(
f
'Unpickling preprocessed dataset found in
{
pickled_dataset_path
}
'
)
with
open
(
pickled_dataset_path
,
'rb'
)
as
f
:
dataset
=
pickle
.
load
(
f
)
training
=
dataset
[
'training'
].
collate
(
batch_size
).
cache
()
validation
=
dataset
[
'validation'
].
collate
(
batch_size
).
cache
()
energy_shifter
.
self_energies
=
dataset
[
'self_energies'
].
to
(
device
)
else
:
print
(
f
'Processing dataset in
{
dspath
}
'
)
training
,
validation
=
torchani
.
data
.
load
(
dspath
)
\
.
subtract_self_energies
(
energy_shifter
,
species_order
)
\
.
species_to_indices
(
species_order
)
\
.
shuffle
()
\
.
split
(
0.8
,
None
)
with
open
(
pickled_dataset_path
,
'wb'
)
as
f
:
pickle
.
dump
({
'training'
:
training
,
'validation'
:
validation
,
'self_energies'
:
energy_shifter
.
self_energies
.
cpu
()},
f
)
training
=
training
.
collate
(
batch_size
).
cache
()
validation
=
validation
.
collate
(
batch_size
).
cache
()
print
(
'Self atomic energies: '
,
energy_shifter
.
self_energies
)
###############################################################################
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment