benchmarks.py 29.3 KB
Newer Older
LysandreJik's avatar
LysandreJik committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# coding=utf-8
# Copyright 2018 The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Benchmarking the library on inference and training """

# If checking the tensors placement
# tf.debugging.set_log_device_placement(True)

import argparse
import csv
23
import logging
Aymeric Augustin's avatar
Aymeric Augustin committed
24
25
import timeit
from time import time
26
from typing import Callable, List
Aymeric Augustin's avatar
Aymeric Augustin committed
27

28
29
30
31
32
33
34
35
36
from transformers import (
    AutoConfig,
    AutoTokenizer,
    MemorySummary,
    is_tf_available,
    is_torch_available,
    start_memory_tracing,
    stop_memory_tracing,
)
Aymeric Augustin's avatar
Aymeric Augustin committed
37

LysandreJik's avatar
LysandreJik committed
38

39
40
41
42
43
44
45
if is_tf_available():
    import tensorflow as tf
    from transformers import TFAutoModel

if is_torch_available():
    import torch
    from transformers import AutoModel
LysandreJik's avatar
LysandreJik committed
46
47


48
49
50
input_text = """Bent over their instruments, three hundred Fertilizers were plunged, as
the Director of Hatcheries and Conditioning entered the room, in the
scarcely breathing silence, the absent-minded, soliloquizing hum or
51

52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
whistle, of absorbed concentration. A troop of newly arrived students,
very young, pink and callow, followed nervously, rather abjectly, at the
Director's heels. Each of them carried a notebook, in which, whenever
the great man spoke, he desperately scribbled. Straight from the
horse's mouth. It was a rare privilege. The D. H. C. for Central London
always made a point of personally conducting his new students round
the various departments.

"Just to give you a general idea," he would explain to them. For of
course some sort of general idea they must have, if they were to do
their work intelligently-though as little of one, if they were to be good
and happy members of society, as possible. For particulars, as every
one knows, make for virtue and happiness; generalities are intellectu-
ally necessary evils. Not philosophers but fret-sawyers and stamp col-
lectors compose the backbone of society.

"To-morrow," he would add, smiling at them with a slightly menacing
geniality, "you'll be settling down to serious work. You won't have time
for generalities. Meanwhile ..."

Meanwhile, it was a privilege. Straight from the horse's mouth into the
notebook. The boys scribbled like mad.

Tall and rather thin but upright, the Director advanced into the room.
He had a long chin and big rather prominent teeth, just covered, when
he was not talking, by his full, floridly curved lips. Old, young? Thirty?
Fifty? Fifty-five? It was hard to say. And anyhow the question didn't
arise; in this year of stability, A. F. 632, it didn't occur to you to ask it.

"I shall begin at the beginning," said the D.H.C. and the more zealous
students recorded his intention in their notebooks: Begin at the begin-
ning. "These," he waved his hand, "are the incubators." And opening
an insulated door he showed them racks upon racks of numbered test-
tubes. "The week's supply of ova. Kept," he explained, "at blood heat;
whereas the male gametes," and here he opened another door, "they
have to be kept at thirty-five instead of thirty-seven. Full blood heat
sterilizes." Rams wrapped in theremogene beget no lambs.

Still leaning against the incubators he gave them, while the pencils
scurried illegibly across the pages, a brief description of the modern



fertilizing process; spoke first, of course, of its surgical introduc-
tion-"the operation undergone voluntarily for the good of Society, not
to mention the fact that it carries a bonus amounting to six months'
salary"; continued with some account of the technique for preserving
the excised ovary alive and actively developing; passed on to a consid-
eration of optimum temperature, salinity, viscosity; referred to the liq-
uor in which the detached and ripened eggs were kept; and, leading
his charges to the work tables, actually showed them how this liquor
was drawn off from the test-tubes; how it was let out drop by drop
onto the specially warmed slides of the microscopes; how the eggs
which it contained were inspected for abnormalities, counted and
transferred to a porous receptacle; how (and he now took them to
watch the operation) this receptacle was immersed in a warm bouillon
containing free-swimming spermatozoa-at a minimum concentration
of one hundred thousand per cubic centimetre, he insisted; and how,
after ten minutes, the container was lifted out of the liquor and its
contents re-examined; how, if any of the eggs remained unfertilized, it
was again immersed, and, if necessary, yet again; how the fertilized
ova went back to the incubators; where the Alphas and Betas re-
mained until definitely bottled; while the Gammas, Deltas and Epsilons
were brought out again, after only thirty-six hours, to undergo Bo-
kanovsky's Process.

"Bokanovsky's Process," repeated the Director, and the students un-
derlined the words in their little notebooks.

One egg, one embryo, one adult-normality. But a bokanovskified egg
will bud, will proliferate, will divide. From eight to ninety-six buds, and
every bud will grow into a perfectly formed embryo, and every embryo
into a full-sized adult. Making ninety-six human beings grow where
only one grew before. Progress.

"Essentially," the D.H.C. concluded, "bokanovskification consists of a
series of arrests of development. We check the normal growth and,
paradoxically enough, the egg responds by budding."

Responds by budding. The pencils were busy.

He pointed. On a very slowly moving band a rack-full of test-tubes was
entering a large metal box, another, rack-full was emerging. Machinery
faintly purred. It took eight minutes for the tubes to go through, he



told them. Eight minutes of hard X-rays being about as much as an
egg can stand. A few died; of the rest, the least susceptible divided
into two; most put out four buds; some eight; all were returned to the
incubators, where the buds began to develop; then, after two days,
were suddenly chilled, chilled and checked. Two, four, eight, the buds
in their turn budded; and having budded were dosed almost to death
with alcohol; consequently burgeoned again and having budded-bud
out of bud out of bud-were thereafter-further arrest being generally
fatal-left to develop in peace. By which time the original egg was in a
fair way to becoming anything from eight to ninety-six embryos- a
prodigious improvement, you will agree, on nature. Identical twins-but
not in piddling twos and threes as in the old viviparous days, when an
egg would sometimes accidentally divide; actually by dozens, by
scores at a time.

"Scores," the Director repeated and flung out his arms, as though he
were distributing largesse. "Scores."

But one of the students was fool enough to ask where the advantage
lay.

"My good boy!" The Director wheeled sharply round on him. "Can't you
see? Can't you see?" He raised a hand; his expression was solemn.
"Bokanovsky's Process is one of the major instruments of social stabil-
ity!"

Major instruments of social stability.

Standard men and women; in uniform batches. The whole of a small
factory staffed with the products of a single bokanovskified egg.

"Ninety-six identical twins working ninety-six identical machines!" The
voice was almost tremulous with enthusiasm. "You really know where
you are. For the first time in history." He quoted the planetary motto.
"Community, Identity, Stability." Grand words. "If we could bo-
kanovskify indefinitely the whole problem would be solved."

Solved by standard Gammas, unvarying Deltas, uniform Epsilons. Mil-
lions of identical twins. The principle of mass production at last applied
to biology.



"But, alas," the Director shook his head, "we can't bokanovskify indefi-
nitely."

Ninety-six seemed to be the limit; seventy-two a good average. From
the same ovary and with gametes of the same male to manufacture as
many batches of identical twins as possible-that was the best (sadly a
second best) that they could do. And even that was difficult.

"For in nature it takes thirty years for two hundred eggs to reach ma-
turity. But our business is to stabilize the population at this moment,
here and now. Dribbling out twins over a quarter of a century-what
would be the use of that?"

Obviously, no use at all. But Podsnap's Technique had immensely ac-
celerated the process of ripening. They could make sure of at least a
hundred and fifty mature eggs within two years. Fertilize and bo-
kanovskify-in other words, multiply by seventy-two-and you get an
average of nearly eleven thousand brothers and sisters in a hundred
and fifty batches of identical twins, all within two years of the same
age.

"And in exceptional cases we can make one ovary yield us over fifteen
thousand adult individuals."

Beckoning to a fair-haired, ruddy young man who happened to be
passing at the moment. "Mr. Foster," he called. The ruddy young man
approached. "Can you tell us the record for a single ovary, Mr. Foster?"

"Sixteen thousand and twelve in this Centre," Mr. Foster replied with-
out hesitation. He spoke very quickly, had a vivacious blue eye, and
took an evident pleasure in quoting figures. "Sixteen thousand and
twelve; in one hundred and eighty-nine batches of identicals. But of
course they've done much better," he rattled on, "in some of the tropi-
cal Centres. Singapore has often produced over sixteen thousand five
hundred; and Mombasa has actually touched the seventeen thousand
mark. But then they have unfair advantages. You should see the way a
negro ovary responds to pituitary! It's quite astonishing, when you're
used to working with European material. Still," he added, with a laugh
(but the light of combat was in his eyes and the lift of his chin was
challenging), "still, we mean to beat them if we can. I'm working on a
wonderful Delta-Minus ovary at this moment. Only just eighteen



months old. Over twelve thousand seven hundred children already, ei-
ther decanted or in embryo. And still going strong. We'll beat them
yet."

"That's the spirit I like!" cried the Director, and clapped Mr. Foster on
the shoulder. "Come along with us, and give these boys the benefit of
your expert knowledge."

Mr. Foster smiled modestly. "With pleasure." They went.
In the Bottling Room all was harmonious bustle and ordered activity.
Flaps of fresh sow's peritoneum ready cut to the proper size came
shooting up in little lifts from the Organ Store in the sub-basement.
Whizz and then, click! the lift-hatches hew open; the bottle-liner had
only to reach out a hand, take the flap, insert, smooth-down, and be-
fore the lined bottle had had time to travel out of reach along the end-
less band, whizz, click! another flap of peritoneum had shot up from
the depths, ready to be slipped into yet another bottle, the next of that
slow interminable procession on the band.

Next to the Liners stood the Matriculators. The procession advanced;
one by one the eggs were transferred from their test-tubes to the
larger containers; deftly the peritoneal lining was slit, the morula
dropped into place, the saline solution poured in ... and already the
bottle had passed, and it was the turn of the labellers. Heredity, date
of fertilization, membership of Bokanovsky Group-details were trans-
ferred from test-tube to bottle. No longer anonymous, but named,
identified, the procession marched slowly on; on through an opening in
the wall, slowly on into the Social Predestination Room.
"Eighty-eight cubic metres of card-index," said Mr. Foster with relish,
LysandreJik's avatar
LysandreJik committed
255
256
257
as they entered."""


258
259
def create_setup_and_compute(
    model_names: List[str],
260
261
    batch_sizes: List[int],
    slice_sizes: List[int],
262
263
264
    gpu: bool = True,
    tensorflow: bool = False,
    average_over: int = 3,
265
266
267
    no_speed: bool = False,
    no_memory: bool = False,
    verbose: bool = False,
268
269
270
271
272
    torchscript: bool = False,
    xla: bool = False,
    amp: bool = False,
    fp16: bool = False,
    save_to_csv: bool = False,
273
    csv_time_filename: str = f"time_{round(time())}.csv",
274
    csv_memory_filename: str = f"memory_{round(time())}.csv",
275
    print_fn: Callable[[str], None] = print,
276
):
LysandreJik's avatar
LysandreJik committed
277
278
    if xla:
        tf.config.optimizer.set_jit(True)
279
280
    if amp:
        tf.config.optimizer.set_experimental_options({"auto_mixed_precision": True})
LysandreJik's avatar
LysandreJik committed
281
282
283

    if tensorflow:
        dictionary = {model_name: {} for model_name in model_names}
284
        results = _compute_tensorflow(
285
286
287
288
289
290
291
292
293
294
            model_names,
            batch_sizes,
            slice_sizes,
            dictionary,
            average_over,
            amp,
            no_speed,
            no_memory,
            verbose,
            print_fn,
295
        )
LysandreJik's avatar
LysandreJik committed
296
    else:
297
        device = "cuda" if (gpu and torch.cuda.is_available()) else "cpu"
LysandreJik's avatar
LysandreJik committed
298
        dictionary = {model_name: {} for model_name in model_names}
299
300
301
302
303
304
305
306
307
308
309
310
        results = _compute_pytorch(
            model_names,
            batch_sizes,
            slice_sizes,
            dictionary,
            average_over,
            device,
            torchscript,
            fp16,
            no_speed,
            no_memory,
            verbose,
311
            print_fn,
312
        )
LysandreJik's avatar
LysandreJik committed
313

314
    print_fn("=========== RESULTS ===========")
LysandreJik's avatar
LysandreJik committed
315
    for model_name in model_names:
316
        print_fn("\t" + f"======= MODEL CHECKPOINT: {model_name} =======")
LysandreJik's avatar
LysandreJik committed
317
        for batch_size in results[model_name]["bs"]:
318
            print_fn("\t\t" + f"===== BATCH SIZE: {batch_size} =====")
LysandreJik's avatar
LysandreJik committed
319
            for slice_size in results[model_name]["ss"]:
320
                time = results[model_name]["time"][batch_size][slice_size]
321
                memory = results[model_name]["memory"][batch_size][slice_size]
322
323
                if isinstance(time, str):
                    print_fn(f"\t\t{model_name}/{batch_size}/{slice_size}: " f"{time} " f"{memory}")
LysandreJik's avatar
LysandreJik committed
324
                else:
325
                    print_fn(
326
                        f"\t\t{model_name}/{batch_size}/{slice_size}: "
327
                        f"{(round(1000 * time) / 1000)}"
328
329
330
                        f"s "
                        f"{memory}"
                    )
LysandreJik's avatar
LysandreJik committed
331
332

    if save_to_csv:
333
334
335
336
337
338
339
340
341
342
        with open(csv_time_filename, mode="w") as csv_time_file, open(
            csv_memory_filename, mode="w"
        ) as csv_memory_file:

            assert len(model_names) > 0, "At least 1 model should be defined, but got {}".format(model_names)

            fieldnames = ["model", "batch_size", "sequence_length"]
            time_writer = csv.DictWriter(csv_time_file, fieldnames=fieldnames + ["time_in_s"])
            time_writer.writeheader()
            memory_writer = csv.DictWriter(csv_memory_file, fieldnames=fieldnames + ["memory"])
343
            memory_writer.writeheader()
LysandreJik's avatar
LysandreJik committed
344
345

            for model_name in model_names:
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
                time_dict = results[model_name]["time"]
                memory_dict = results[model_name]["memory"]
                for bs in time_dict:
                    for ss in time_dict[bs]:
                        time_writer.writerow(
                            {
                                "model": model_name,
                                "batch_size": bs,
                                "sequence_length": ss,
                                "time_in_s": "{:.4f}".format(time_dict[bs][ss]),
                            }
                        )

                for bs in memory_dict:
                    for ss in time_dict[bs]:
                        memory_writer.writerow(
                            {
                                "model": model_name,
                                "batch_size": bs,
                                "sequence_length": ss,
                                "memory": memory_dict[bs][ss],
                            }
                        )


def print_summary_statistics(summary: MemorySummary, print_fn: Callable[[str], None]):
    print_fn(
373
374
375
376
377
378
        "\nLines by line memory consumption:\n"
        + "\n".join(
            f"{state.frame.filename}:{state.frame.line_number}: mem {state.cpu_gpu}: {state.frame.line_text}"
            for state in summary.sequential
        )
    )
379
    print_fn(
380
381
382
383
384
385
        "\nLines with top memory consumption:\n"
        + "\n".join(
            f"=> {state.frame.filename}:{state.frame.line_number}: mem {state.cpu_gpu}: {state.frame.line_text}"
            for state in summary.cumulative[:6]
        )
    )
386
    print_fn(
387
388
389
390
391
392
        "\nLines with lowest memory consumption:\n"
        + "\n".join(
            f"=> {state.frame.filename}:{state.frame.line_number}: mem {state.cpu_gpu}: {state.frame.line_text}"
            for state in summary.cumulative[-6:]
        )
    )
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
    print_fn(f"\nTotal memory increase: {summary.total}")


def get_print_function(save_print_log, log_filename):
    if save_print_log:
        logging.basicConfig(
            level=logging.DEBUG,
            filename=log_filename,
            filemode="a+",
            format="%(asctime)-15s %(levelname)-8s %(message)s",
        )

        def print_with_print_log(*args):
            logging.info(*args)
            print(*args)

        return print_with_print_log
    else:
        return print
412
413
414
415
416
417
418
419
420
421
422
423
424
425


def _compute_pytorch(
    model_names,
    batch_sizes,
    slice_sizes,
    dictionary,
    average_over,
    device,
    torchscript,
    fp16,
    no_speed,
    no_memory,
    verbose,
426
    print_fn,
427
):
LysandreJik's avatar
LysandreJik committed
428
    for c, model_name in enumerate(model_names):
429
        print_fn(f"{c + 1} / {len(model_names)}")
LysandreJik's avatar
LysandreJik committed
430
431
432
433
        config = AutoConfig.from_pretrained(model_name, torchscript=torchscript)
        model = AutoModel.from_pretrained(model_name, config=config)
        tokenizer = AutoTokenizer.from_pretrained(model_name)

Lysandre's avatar
Remove  
Lysandre committed
434
        tokenized_sequence = tokenizer.encode(input_text, add_special_tokens=False)
LysandreJik's avatar
LysandreJik committed
435
436
437

        max_input_size = tokenizer.max_model_input_sizes[model_name]

438
439
        dictionary[model_name] = {"bs": batch_sizes, "ss": slice_sizes, "time": {}, "memory": {}}
        dictionary[model_name]["time"] = {i: {} for i in batch_sizes}
440
        dictionary[model_name]["memory"] = {i: {} for i in batch_sizes}
LysandreJik's avatar
LysandreJik committed
441

442
443
444
        print_fn("Using model {}".format(model))
        print_fn("Number of all parameters {}".format(model.num_parameters()))

LysandreJik's avatar
LysandreJik committed
445
        for batch_size in batch_sizes:
446
447
            if fp16:
                model.half()
LysandreJik's avatar
LysandreJik committed
448
449
            model.to(device)
            model.eval()
450

LysandreJik's avatar
LysandreJik committed
451
452
            for slice_size in slice_sizes:
                if max_input_size is not None and slice_size > max_input_size:
453
                    dictionary[model_name]["time"][batch_size][slice_size] = "N/A"
LysandreJik's avatar
LysandreJik committed
454
455
456
457
                else:
                    sequence = torch.tensor(tokenized_sequence[:slice_size], device=device).repeat(batch_size, 1)
                    try:
                        if torchscript:
458
                            print_fn("Tracing model with sequence size {}".format(sequence.shape))
LysandreJik's avatar
LysandreJik committed
459
460
461
462
463
464
                            inference = torch.jit.trace(model, sequence)
                            inference(sequence)
                        else:
                            inference = model
                            inference(sequence)

465
466
467
468
469
470
471
472
473
                        if not no_memory:
                            # model.add_memory_hooks()  # Forward method tracing (only for PyTorch models)

                            # Line by line memory tracing (all code in the module `transformers`) works for all models/arbitrary code
                            trace = start_memory_tracing("transformers")
                            inference(sequence)
                            summary = stop_memory_tracing(trace)

                            if verbose:
474
                                print_summary_statistics(summary, print_fn)
475
476
477
478
479
480

                            dictionary[model_name]["memory"][batch_size][slice_size] = str(summary.total)
                        else:
                            dictionary[model_name]["memory"][batch_size][slice_size] = "N/A"

                        if not no_speed:
481
                            print_fn("Going through model with sequence of shape".format(sequence.shape))
482
483
                            runtimes = timeit.repeat(lambda: inference(sequence), repeat=average_over, number=3)
                            average_time = sum(runtimes) / float(len(runtimes)) / 3.0
484
                            dictionary[model_name]["time"][batch_size][slice_size] = average_time
485
                        else:
486
                            dictionary[model_name]["time"][batch_size][slice_size] = "N/A"
487

LysandreJik's avatar
LysandreJik committed
488
                    except RuntimeError as e:
489
                        print_fn("Doesn't fit on GPU. {}".format(e))
LysandreJik's avatar
LysandreJik committed
490
                        torch.cuda.empty_cache()
491
                        dictionary[model_name]["time"][batch_size][slice_size] = "N/A"
492
                        dictionary[model_name]["memory"][batch_size][slice_size] = "N/A"
LysandreJik's avatar
LysandreJik committed
493
494
495
    return dictionary


496
def _compute_tensorflow(
497
    model_names, batch_sizes, slice_sizes, dictionary, average_over, amp, no_speed, no_memory, verbose, print_fn
498
):
LysandreJik's avatar
LysandreJik committed
499
    for c, model_name in enumerate(model_names):
500
        print_fn(f"{c + 1} / {len(model_names)}")
LysandreJik's avatar
LysandreJik committed
501
502
503
504
        config = AutoConfig.from_pretrained(model_name)
        model = TFAutoModel.from_pretrained(model_name, config=config)
        tokenizer = AutoTokenizer.from_pretrained(model_name)

Lysandre's avatar
Remove  
Lysandre committed
505
        tokenized_sequence = tokenizer.encode(input_text, add_special_tokens=False)
LysandreJik's avatar
LysandreJik committed
506
507
508

        max_input_size = tokenizer.max_model_input_sizes[model_name]

509
510
        dictionary[model_name] = {"bs": batch_sizes, "ss": slice_sizes, "time": {}, "memory": {}}
        dictionary[model_name]["time"] = {i: {} for i in batch_sizes}
511
        dictionary[model_name]["memory"] = {i: {} for i in batch_sizes}
LysandreJik's avatar
LysandreJik committed
512

513
514
        print_fn("Using model {}".format(model))
        print_fn("Number of all parameters {}".format(model.num_parameters()))
LysandreJik's avatar
LysandreJik committed
515
516
517
518
519
520
521
522

        @tf.function
        def inference(inputs):
            return model(inputs)

        for batch_size in batch_sizes:
            for slice_size in slice_sizes:
                if max_input_size is not None and slice_size > max_input_size:
523
                    dictionary[model_name]["time"][batch_size][slice_size] = "N/A"
LysandreJik's avatar
LysandreJik committed
524
                else:
525
526
527
                    sequence = tf.stack(
                        [tf.squeeze(tf.constant(tokenized_sequence[:slice_size])[None, :])] * batch_size
                    )
LysandreJik's avatar
LysandreJik committed
528
529

                    try:
530
                        print_fn("Going through model with sequence of shape {}".format(sequence.shape))
LysandreJik's avatar
LysandreJik committed
531
532
533
                        # To make sure that the model is traced + that the tensors are on the appropriate device
                        inference(sequence)

534
535
536
537
538
539
540
                        if not no_memory:
                            # Line by line memory tracing (all code in the module `transformers`) works for all models/arbitrary code
                            trace = start_memory_tracing("transformers")
                            inference(sequence)
                            summary = stop_memory_tracing(trace)

                            if verbose:
541
                                print_summary_statistics(summary, print_fn)
542
543
544
545
546
547
548
549

                            dictionary[model_name]["memory"][batch_size][slice_size] = str(summary.total)
                        else:
                            dictionary[model_name]["memory"][batch_size][slice_size] = "N/A"

                        if not no_speed:
                            runtimes = timeit.repeat(lambda: inference(sequence), repeat=average_over, number=3)
                            average_time = sum(runtimes) / float(len(runtimes)) / 3.0
550
                            dictionary[model_name]["time"][batch_size][slice_size] = average_time
551
                        else:
552
                            dictionary[model_name]["time"][batch_size][slice_size] = "N/A"
553

LysandreJik's avatar
LysandreJik committed
554
                    except tf.errors.ResourceExhaustedError as e:
555
556
                        print_fn("Doesn't fit on GPU. {}".format(e))
                        dictionary[model_name]["time"][batch_size][slice_size] = "N/A"
557
                        dictionary[model_name]["memory"][batch_size][slice_size] = "N/A"
LysandreJik's avatar
LysandreJik committed
558
559
560
561
562
563
    return dictionary


def main():
    parser = argparse.ArgumentParser()

564
565
566
567
568
569
570
571
572
573
574
    parser.add_argument(
        "--models",
        required=False,
        type=str,
        default="all",
        help="Model checkpoints to be provided "
        "to the AutoModel classes. Leave "
        "blank to benchmark the base version "
        "of all available model "
        "architectures.",
    )
575
576
577
    parser.add_argument("--verbose", required=False, action="store_true", help="Verbose memory tracing")
    parser.add_argument("--no_speed", required=False, action="store_true", help="Don't perform speed measurments")
    parser.add_argument("--no_memory", required=False, action="store_true", help="Don't perform memory measurments")
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
    parser.add_argument(
        "--torch", required=False, action="store_true", help="Benchmark the Pytorch version of the " "models"
    )
    parser.add_argument(
        "--torch_cuda", required=False, action="store_true", help="Pytorch only: run on available " "cuda devices"
    )
    parser.add_argument(
        "--torchscript",
        required=False,
        action="store_true",
        help="Pytorch only: trace the models " "using torchscript",
    )
    parser.add_argument(
        "--tensorflow",
        required=False,
        action="store_true",
        help="Benchmark the TensorFlow version "
        "of the models. Will run on GPU if "
        "the correct dependencies are "
        "installed",
    )
LysandreJik's avatar
LysandreJik committed
599
    parser.add_argument("--xla", required=False, action="store_true", help="TensorFlow only: use XLA acceleration.")
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
    parser.add_argument(
        "--amp",
        required=False,
        action="store_true",
        help="TensorFlow only: use automatic mixed precision acceleration.",
    )
    parser.add_argument(
        "--fp16", required=False, action="store_true", help="PyTorch only: use FP16 to accelerate inference."
    )
    parser.add_argument(
        "--keras_predict",
        required=False,
        action="store_true",
        help="Whether to use model.predict " "instead of model() to do a " "forward pass.",
    )
LysandreJik's avatar
LysandreJik committed
615
    parser.add_argument("--save_to_csv", required=False, action="store_true", help="Save to a CSV file.")
616
    parser.add_argument(
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
        "--log_print", required=False, action="store_true", help="Save all print statements in log file."
    )
    parser.add_argument(
        "--csv_time_filename",
        required=False,
        default=f"time_{round(time())}.csv",
        help="CSV filename used if saving time results to csv.",
    )
    parser.add_argument(
        "--csv_memory_filename",
        required=False,
        default=f"memory_{round(time())}.csv",
        help="CSV filename used if saving memory results to csv.",
    )
    parser.add_argument(
        "--log_filename",
        required=False,
        default=f"log_{round(time())}.txt",
        help="Log filename used if print statements are saved in log.",
636
637
638
639
    )
    parser.add_argument(
        "--average_over", required=False, default=30, type=int, help="Times an experiment will be run."
    )
640
641
    parser.add_argument("--batch_sizes", nargs="+", type=int, default=[1, 2, 4, 8])
    parser.add_argument("--slice_sizes", nargs="+", type=int, default=[8, 64, 128, 256, 512, 1024])
LysandreJik's avatar
LysandreJik committed
642
643

    args = parser.parse_args()
644
    if args.models == "all":
LysandreJik's avatar
LysandreJik committed
645
646
647
648
649
650
651
652
653
654
        args.models = [
            "gpt2",
            "bert-base-cased",
            "xlnet-base-cased",
            "xlm-mlm-en-2048",
            "transfo-xl-wt103",
            "openai-gpt",
            "distilbert-base-uncased",
            "distilgpt2",
            "roberta-base",
655
            "ctrl",
656
657
            "t5-base",
            "bart-large",
LysandreJik's avatar
LysandreJik committed
658
659
660
661
        ]
    else:
        args.models = args.models.split()

662
663
    print_fn = get_print_function(args.log_print, args.log_filename)
    print_fn("Running with arguments: {}".format(args))
LysandreJik's avatar
LysandreJik committed
664
665

    if args.torch:
666
667
668
        if is_torch_available():
            create_setup_and_compute(
                model_names=args.models,
669
670
                batch_sizes=args.batch_sizes,
                slice_sizes=args.slice_sizes,
671
672
673
                tensorflow=False,
                gpu=args.torch_cuda,
                torchscript=args.torchscript,
674
                fp16=args.fp16,
675
                save_to_csv=args.save_to_csv,
676
677
                csv_time_filename=args.csv_time_filename,
                csv_memory_filename=args.csv_memory_filename,
678
                average_over=args.average_over,
679
680
681
                no_speed=args.no_speed,
                no_memory=args.no_memory,
                verbose=args.verbose,
682
                print_fn=print_fn,
683
684
685
            )
        else:
            raise ImportError("Trying to run a PyTorch benchmark but PyTorch was not found in the environment.")
LysandreJik's avatar
LysandreJik committed
686
687

    if args.tensorflow:
688
689
690
        if is_tf_available():
            create_setup_and_compute(
                model_names=args.models,
691
692
                batch_sizes=args.batch_sizes,
                slice_sizes=args.slice_sizes,
693
694
                tensorflow=True,
                xla=args.xla,
695
                amp=args.amp,
696
                save_to_csv=args.save_to_csv,
697
698
                csv_time_filename=args.csv_time_filename,
                csv_memory_filename=args.csv_memory_filename,
699
                average_over=args.average_over,
700
701
702
                no_speed=args.no_speed,
                no_memory=args.no_memory,
                verbose=args.verbose,
703
                print_fn=print_fn,
704
705
706
            )
        else:
            raise ImportError("Trying to run a TensorFlow benchmark but TensorFlow was not found in the environment.")
LysandreJik's avatar
LysandreJik committed
707
708


709
710
if __name__ == "__main__":
    main()