neurochem_trainer.py 3.29 KB
Newer Older
Gao, Xiang's avatar
Gao, Xiang committed
1
2
# -*- coding: utf-8 -*-
"""
3
4
.. _neurochem-training:

Gao, Xiang's avatar
Gao, Xiang committed
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
Train Neural Network Potential From NeuroChem Input File
========================================================

This example shows how to use TorchANI's NeuroChem trainer to read and run
NeuroChem's training config file to train a neural network potential.
"""

###############################################################################
# To begin with, let's first import the modules we will use:
import torchani
import torch
import os
import sys
import tqdm

###############################################################################
# Now let's setup path for the dataset and NeuroChem input file. Note that
# these paths assumes the user run this script under the ``examples`` directory
# of TorchANI's repository. If you download this script, you should manually
# set the path of these files in your system before this script can run
# successfully. Also note that here for our demo purpose, we set both training
# set and validation set the ``ani_gdb_s01.h5`` in TorchANI's repository. This
# allows this program to finish very quick, because that dataset is very small.
# But this is wrong and should be avoided for any serious training.

try:
    path = os.path.dirname(os.path.realpath(__file__))
except NameError:
    path = os.getcwd()
cfg_path = os.path.join(path, '../tests/test_data/inputtrain.ipt')
Gao, Xiang's avatar
Gao, Xiang committed
35
36
training_path = os.path.join(path, '../dataset/ani1-up_to_gdb4/ani_gdb_s01.h5')  # noqa: E501
validation_path = os.path.join(path, '../dataset/ani1-up_to_gdb4/ani_gdb_s01.h5')  # noqa: E501
Gao, Xiang's avatar
Gao, Xiang committed
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79

###############################################################################
# We also need to set the device to run the training:
device_str = 'cuda' if torch.cuda.is_available() else 'cpu'
device = torch.device(device_str)


trainer = torchani.neurochem.Trainer(cfg_path, device, True, 'runs')
trainer.load_data(training_path, validation_path)


###############################################################################
# Once everything is set up, running NeuroChem is very easy. We simplify need a
# ``trainer.run()``. But here, in order for sphinx-gallery to be able to
# capture the output of tqdm, let's do some hacking first to make tqdm to print
# its progressbar to stdout.
def my_tqdm(*args, **kwargs):
    return tqdm.tqdm(*args, **kwargs, file=sys.stdout)


trainer.tqdm = my_tqdm

###############################################################################
# Now, let's go!
trainer.run()


###############################################################################
# Alternatively, you can run NeuroChem trainer directly using command line.
# There is no need for programming. Just run the following command for help
# ``python -m torchani.neurochem.trainer -h`` for usage. For this demo, the
# equivalent command is:
cmd = ['python', '-m', 'torchani.neurochem.trainer', '-d', device_str,
       '--tqdm', '--tensorboard', 'runs', cfg_path, training_path,
       validation_path]
print(' '.join(cmd))

###############################################################################
# Now let's invoke this command to see what we get. Again, we redirect stderr
# to stdout simplify for sphinx-gallery to be able to capture it when
# generating this document:
from subprocess import Popen, PIPE  # noqa: E402
print(Popen(cmd, stderr=PIPE).stderr.read().decode('utf-8'))