training-benchmark-nsys-profile.py 9.17 KB
Newer Older
1
2
3
4
import torch
import torchani
import argparse
import pkbar
Ignacio Pickering's avatar
Ignacio Pickering committed
5
from torchani.units import hartree2kcalmol
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146


WARM_UP_BATCHES = 50
PROFILE_BATCHES = 10


def atomic():
    model = torch.nn.Sequential(
        torch.nn.Linear(384, 128),
        torch.nn.CELU(0.1),
        torch.nn.Linear(128, 128),
        torch.nn.CELU(0.1),
        torch.nn.Linear(128, 64),
        torch.nn.CELU(0.1),
        torch.nn.Linear(64, 1)
    )
    return model


def time_func(key, func):

    def wrapper(*args, **kwargs):
        torch.cuda.nvtx.range_push(key)
        ret = func(*args, **kwargs)
        torch.cuda.nvtx.range_pop()
        return ret

    return wrapper


def enable_timers(model):
    torchani.aev.cutoff_cosine = time_func('cutoff_cosine', torchani.aev.cutoff_cosine)
    torchani.aev.radial_terms = time_func('radial_terms', torchani.aev.radial_terms)
    torchani.aev.angular_terms = time_func('angular_terms', torchani.aev.angular_terms)
    torchani.aev.compute_shifts = time_func('compute_shifts', torchani.aev.compute_shifts)
    torchani.aev.neighbor_pairs = time_func('neighbor_pairs', torchani.aev.neighbor_pairs)
    torchani.aev.triu_index = time_func('triu_index', torchani.aev.triu_index)
    torchani.aev.cumsum_from_zero = time_func('cumsum_from_zero', torchani.aev.cumsum_from_zero)
    torchani.aev.triple_by_molecule = time_func('triple_by_molecule', torchani.aev.triple_by_molecule)
    torchani.aev.compute_aev = time_func('compute_aev', torchani.aev.compute_aev)
    model[1].forward = time_func('nn forward', model[1].forward)


if __name__ == "__main__":
    # parse command line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('dataset_path',
                        help='Path of the dataset, can a hdf5 file \
                            or a directory containing hdf5 files')
    parser.add_argument('-b', '--batch_size',
                        help='Number of conformations of each batch',
                        default=2560, type=int)
    parser.add_argument('-o', '--original_dataset_api',
                        help='use original dataset api',
                        dest='dataset',
                        action='store_const',
                        const='original')
    parser.add_argument('-s', '--shuffle_dataset_api',
                        help='use shuffle dataset api',
                        dest='dataset',
                        action='store_const',
                        const='shuffle')
    parser.add_argument('-c', '--cache_dataset_api',
                        help='use cache dataset api',
                        dest='dataset',
                        action='store_const',
                        const='cache')
    parser.set_defaults(dataset='shuffle')
    parser = parser.parse_args()
    parser.device = torch.device('cuda')

    Rcr = 5.2000e+00
    Rca = 3.5000e+00
    EtaR = torch.tensor([1.6000000e+01], device=parser.device)
    ShfR = torch.tensor([9.0000000e-01, 1.1687500e+00, 1.4375000e+00, 1.7062500e+00, 1.9750000e+00, 2.2437500e+00, 2.5125000e+00, 2.7812500e+00, 3.0500000e+00, 3.3187500e+00, 3.5875000e+00, 3.8562500e+00, 4.1250000e+00, 4.3937500e+00, 4.6625000e+00, 4.9312500e+00], device=parser.device)
    Zeta = torch.tensor([3.2000000e+01], device=parser.device)
    ShfZ = torch.tensor([1.9634954e-01, 5.8904862e-01, 9.8174770e-01, 1.3744468e+00, 1.7671459e+00, 2.1598449e+00, 2.5525440e+00, 2.9452431e+00], device=parser.device)
    EtaA = torch.tensor([8.0000000e+00], device=parser.device)
    ShfA = torch.tensor([9.0000000e-01, 1.5500000e+00, 2.2000000e+00, 2.8500000e+00], device=parser.device)
    num_species = 4
    aev_computer = torchani.AEVComputer(Rcr, Rca, EtaR, ShfR, EtaA, Zeta, ShfA, ShfZ, num_species)

    nn = torchani.ANIModel([atomic() for _ in range(4)])
    model = torch.nn.Sequential(aev_computer, nn).to(parser.device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.000001)
    mse = torch.nn.MSELoss(reduction='none')

    if parser.dataset == 'shuffle':
        print('using shuffle dataset API')
        print('=> loading dataset...')
        dataset = torchani.data.ShuffledDataset(file_path=parser.dataset_path,
                                                species_order=['H', 'C', 'N', 'O'],
                                                subtract_self_energies=True,
                                                batch_size=parser.batch_size,
                                                num_workers=2)
        print('=> the first batch is ([chunk1, chunk2, ...], {"energies", "force", ...}) in which chunk1=(species, coordinates)')
        chunks, properties = iter(dataset).next()
    elif parser.dataset == 'original':
        print('using original dataset API')
        print('=> loading dataset...')
        energy_shifter = torchani.utils.EnergyShifter(None)
        species_to_tensor = torchani.utils.ChemicalSymbolsToInts('HCNO')
        dataset = torchani.data.load_ani_dataset(parser.dataset_path, species_to_tensor,
                                                 parser.batch_size, device=parser.device,
                                                 transform=[energy_shifter.subtract_from_dataset])
        print('=> the first batch is ([chunk1, chunk2, ...], {"energies", "force", ...}) in which chunk1=(species, coordinates)')
        chunks, properties = dataset[0]
    elif parser.dataset == 'cache':
        print('using cache dataset API')
        print('=> loading dataset...')
        dataset = torchani.data.CachedDataset(file_path=parser.dataset_path,
                                              species_order=['H', 'C', 'N', 'O'],
                                              subtract_self_energies=True,
                                              batch_size=parser.batch_size)
        print('=> caching all dataset into cpu')
        pbar = pkbar.Pbar('loading and processing dataset into cpu memory, total '
                          + 'batches: {}, batch_size: {}'.format(len(dataset), parser.batch_size),
                          len(dataset))
        for i, t in enumerate(dataset):
            pbar.update(i)
        print('=> the first batch is ([chunk1, chunk2, ...], {"energies", "force", ...}) in which chunk1=(species, coordinates)')
        chunks, properties = dataset[0]

    for i, chunk in enumerate(chunks):
        print('chunk{}'.format(i + 1), list(chunk[0].size()), list(chunk[1].size()))
    print('energies', list(properties['energies'].size()))

    print('=> start warming up')
    total_batch_counter = 0
    for epoch in range(0, WARM_UP_BATCHES + 1):

        print('Epoch: %d/inf' % (epoch + 1,))
        progbar = pkbar.Kbar(target=len(dataset) - 1, width=8)

        for i, (batch_x, batch_y) in enumerate(dataset):

            if total_batch_counter == WARM_UP_BATCHES:
                print('=> warm up finished, start profiling')
                enable_timers(model)
                torch.cuda.cudart().cudaProfilerStart()

147
148
149
            PROFILING_STARTED = (total_batch_counter >= WARM_UP_BATCHES)

            if PROFILING_STARTED:
150
151
152
153
154
155
156
                torch.cuda.nvtx.range_push("batch{}".format(total_batch_counter))

            true_energies = batch_y['energies'].to(parser.device)
            predicted_energies = []
            num_atoms = []

            for j, (chunk_species, chunk_coordinates) in enumerate(batch_x):
157
                if PROFILING_STARTED:
158
159
160
161
                    torch.cuda.nvtx.range_push("chunk{}".format(j))
                chunk_species = chunk_species.to(parser.device)
                chunk_coordinates = chunk_coordinates.to(parser.device)
                num_atoms.append((chunk_species >= 0).to(true_energies.dtype).sum(dim=1))
162
163
                with torch.autograd.profiler.emit_nvtx(enabled=PROFILING_STARTED, record_shapes=True):
                    _, chunk_energies = model((chunk_species, chunk_coordinates))
164
                predicted_energies.append(chunk_energies)
165
                if PROFILING_STARTED:
166
167
168
169
170
                    torch.cuda.nvtx.range_pop()

            num_atoms = torch.cat(num_atoms)
            predicted_energies = torch.cat(predicted_energies).to(true_energies.dtype)
            loss = (mse(predicted_energies, true_energies) / num_atoms.sqrt()).mean()
Ignacio Pickering's avatar
Ignacio Pickering committed
171
            rmse = hartree2kcalmol((mse(predicted_energies, true_energies)).mean()).detach().cpu().numpy()
172

173
            if PROFILING_STARTED:
174
                torch.cuda.nvtx.range_push("backward")
175
176
177
            with torch.autograd.profiler.emit_nvtx(enabled=PROFILING_STARTED, record_shapes=True):
                loss.backward()
            if PROFILING_STARTED:
178
179
                torch.cuda.nvtx.range_pop()

180
            if PROFILING_STARTED:
181
                torch.cuda.nvtx.range_push("optimizer.step()")
182
183
184
            with torch.autograd.profiler.emit_nvtx(enabled=PROFILING_STARTED, record_shapes=True):
                optimizer.step()
            if PROFILING_STARTED:
185
186
187
188
                torch.cuda.nvtx.range_pop()

            progbar.update(i, values=[("rmse", rmse)])

189
            if PROFILING_STARTED:
190
191
192
193
194
195
196
197
198
                torch.cuda.nvtx.range_pop()

            total_batch_counter += 1
            if total_batch_counter > WARM_UP_BATCHES + PROFILE_BATCHES:
                break

        if total_batch_counter > WARM_UP_BATCHES + PROFILE_BATCHES:
            print('=> profiling terminate after {} batches'.format(PROFILE_BATCHES))
            break