Unverified Commit 80fa0f78 authored by Patrick von Platen's avatar Patrick von Platen Committed by GitHub
Browse files

[Examples, Benchmark] Improve benchmark utils (#3674)

* improve and add features to benchmark utils

* update benchmark style

* remove output files
parent 05deb52d
...@@ -20,9 +20,10 @@ ...@@ -20,9 +20,10 @@
import argparse import argparse
import csv import csv
import logging
import timeit import timeit
from time import time from time import time
from typing import List from typing import Callable, List
from transformers import ( from transformers import (
AutoConfig, AutoConfig,
...@@ -46,10 +47,8 @@ if is_torch_available(): ...@@ -46,10 +47,8 @@ if is_torch_available():
input_text = """Bent over their instruments, three hundred Fertilizers were plunged, as input_text = """Bent over their instruments, three hundred Fertilizers were plunged, as
the Director of Hatcheries and Conditioning entered the room, in the the Director of Hatcheries and Conditioning entered the room, in the
scarcely breathing silence, the absent-minded, soliloquizing hum or scarcely breathing silence, the absent-minded, soliloquizing hum or
whistle, of absorbed concentration. A troop of newly arrived students, whistle, of absorbed concentration. A troop of newly arrived students,
very young, pink and callow, followed nervously, rather abjectly, at the very young, pink and callow, followed nervously, rather abjectly, at the
Director's heels. Each of them carried a notebook, in which, whenever Director's heels. Each of them carried a notebook, in which, whenever
...@@ -271,8 +270,9 @@ def create_setup_and_compute( ...@@ -271,8 +270,9 @@ def create_setup_and_compute(
amp: bool = False, amp: bool = False,
fp16: bool = False, fp16: bool = False,
save_to_csv: bool = False, save_to_csv: bool = False,
csv_filename: str = f"results_{round(time())}.csv", csv_time_filename: str = f"time_{round(time())}.csv",
csv_memory_filename: str = f"memory_{round(time())}.csv", csv_memory_filename: str = f"memory_{round(time())}.csv",
print_fn: Callable[[str], None] = print,
): ):
if xla: if xla:
tf.config.optimizer.set_jit(True) tf.config.optimizer.set_jit(True)
...@@ -282,7 +282,16 @@ def create_setup_and_compute( ...@@ -282,7 +282,16 @@ def create_setup_and_compute(
if tensorflow: if tensorflow:
dictionary = {model_name: {} for model_name in model_names} dictionary = {model_name: {} for model_name in model_names}
results = _compute_tensorflow( results = _compute_tensorflow(
model_names, batch_sizes, slice_sizes, dictionary, average_over, amp, no_speed, no_memory, verbose model_names,
batch_sizes,
slice_sizes,
dictionary,
average_over,
amp,
no_speed,
no_memory,
verbose,
print_fn,
) )
else: else:
device = "cuda" if (gpu and torch.cuda.is_available()) else "cpu" device = "cuda" if (gpu and torch.cuda.is_available()) else "cpu"
...@@ -299,100 +308,107 @@ def create_setup_and_compute( ...@@ -299,100 +308,107 @@ def create_setup_and_compute(
no_speed, no_speed,
no_memory, no_memory,
verbose, verbose,
print_fn,
) )
print("=========== RESULTS ===========") print_fn("=========== RESULTS ===========")
for model_name in model_names: for model_name in model_names:
print("\t" + f"======= MODEL CHECKPOINT: {model_name} =======") print_fn("\t" + f"======= MODEL CHECKPOINT: {model_name} =======")
for batch_size in results[model_name]["bs"]: for batch_size in results[model_name]["bs"]:
print("\t\t" + f"===== BATCH SIZE: {batch_size} =====") print_fn("\t\t" + f"===== BATCH SIZE: {batch_size} =====")
for slice_size in results[model_name]["ss"]: for slice_size in results[model_name]["ss"]:
result = results[model_name]["results"][batch_size][slice_size] time = results[model_name]["time"][batch_size][slice_size]
memory = results[model_name]["memory"][batch_size][slice_size] memory = results[model_name]["memory"][batch_size][slice_size]
if isinstance(result, str): if isinstance(time, str):
print(f"\t\t{model_name}/{batch_size}/{slice_size}: " f"{result} " f"{memory}") print_fn(f"\t\t{model_name}/{batch_size}/{slice_size}: " f"{time} " f"{memory}")
else: else:
print( print_fn(
f"\t\t{model_name}/{batch_size}/{slice_size}: " f"\t\t{model_name}/{batch_size}/{slice_size}: "
f"{(round(1000 * result) / 1000)}" f"{(round(1000 * time) / 1000)}"
f"s " f"s "
f"{memory}" f"{memory}"
) )
if save_to_csv: if save_to_csv:
with open(csv_filename, mode="w") as csv_file, open(csv_memory_filename, mode="w") as csv_memory_file: with open(csv_time_filename, mode="w") as csv_time_file, open(
fieldnames = [ csv_memory_filename, mode="w"
"model", ) as csv_memory_file:
"1x8",
"1x64", assert len(model_names) > 0, "At least 1 model should be defined, but got {}".format(model_names)
"1x128",
"1x256", fieldnames = ["model", "batch_size", "sequence_length"]
"1x512", time_writer = csv.DictWriter(csv_time_file, fieldnames=fieldnames + ["time_in_s"])
"1x1024", time_writer.writeheader()
"2x8", memory_writer = csv.DictWriter(csv_memory_file, fieldnames=fieldnames + ["memory"])
"2x64",
"2x128",
"2x256",
"2x512",
"2x1024",
"4x8",
"4x64",
"4x128",
"4x256",
"4x512",
"4x1024",
"8x8",
"8x64",
"8x128",
"8x256",
"8x512",
"8x1024",
]
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
memory_writer = csv.DictWriter(csv_memory_file, fieldnames=fieldnames)
memory_writer.writeheader() memory_writer.writeheader()
for model_name in model_names: for model_name in model_names:
model_results = { time_dict = results[model_name]["time"]
f"{bs}x{ss}": results[model_name]["results"][bs][ss] memory_dict = results[model_name]["memory"]
for bs in results[model_name]["results"] for bs in time_dict:
for ss in results[model_name]["results"][bs] for ss in time_dict[bs]:
} time_writer.writerow(
writer.writerow({"model": model_name, **model_results}) {
"model": model_name,
model_memory_results = { "batch_size": bs,
f"{bs}x{ss}": results[model_name]["memory"][bs][ss] "sequence_length": ss,
for bs in results[model_name]["memory"] "time_in_s": "{:.4f}".format(time_dict[bs][ss]),
for ss in results[model_name]["memory"][bs] }
} )
memory_writer.writerow({"model": model_name, **model_memory_results})
for bs in memory_dict:
for ss in time_dict[bs]:
def print_summary_statistics(summary: MemorySummary): memory_writer.writerow(
print( {
"model": model_name,
"batch_size": bs,
"sequence_length": ss,
"memory": memory_dict[bs][ss],
}
)
def print_summary_statistics(summary: MemorySummary, print_fn: Callable[[str], None]):
print_fn(
"\nLines by line memory consumption:\n" "\nLines by line memory consumption:\n"
+ "\n".join( + "\n".join(
f"{state.frame.filename}:{state.frame.line_number}: mem {state.cpu_gpu}: {state.frame.line_text}" f"{state.frame.filename}:{state.frame.line_number}: mem {state.cpu_gpu}: {state.frame.line_text}"
for state in summary.sequential for state in summary.sequential
) )
) )
print( print_fn(
"\nLines with top memory consumption:\n" "\nLines with top memory consumption:\n"
+ "\n".join( + "\n".join(
f"=> {state.frame.filename}:{state.frame.line_number}: mem {state.cpu_gpu}: {state.frame.line_text}" f"=> {state.frame.filename}:{state.frame.line_number}: mem {state.cpu_gpu}: {state.frame.line_text}"
for state in summary.cumulative[:6] for state in summary.cumulative[:6]
) )
) )
print( print_fn(
"\nLines with lowest memory consumption:\n" "\nLines with lowest memory consumption:\n"
+ "\n".join( + "\n".join(
f"=> {state.frame.filename}:{state.frame.line_number}: mem {state.cpu_gpu}: {state.frame.line_text}" f"=> {state.frame.filename}:{state.frame.line_number}: mem {state.cpu_gpu}: {state.frame.line_text}"
for state in summary.cumulative[-6:] for state in summary.cumulative[-6:]
) )
) )
print(f"\nTotal memory increase: {summary.total}") print_fn(f"\nTotal memory increase: {summary.total}")
def get_print_function(save_print_log, log_filename):
if save_print_log:
logging.basicConfig(
level=logging.DEBUG,
filename=log_filename,
filemode="a+",
format="%(asctime)-15s %(levelname)-8s %(message)s",
)
def print_with_print_log(*args):
logging.info(*args)
print(*args)
return print_with_print_log
else:
return print
def _compute_pytorch( def _compute_pytorch(
...@@ -407,9 +423,10 @@ def _compute_pytorch( ...@@ -407,9 +423,10 @@ def _compute_pytorch(
no_speed, no_speed,
no_memory, no_memory,
verbose, verbose,
print_fn,
): ):
for c, model_name in enumerate(model_names): for c, model_name in enumerate(model_names):
print(f"{c + 1} / {len(model_names)}") print_fn(f"{c + 1} / {len(model_names)}")
config = AutoConfig.from_pretrained(model_name, torchscript=torchscript) config = AutoConfig.from_pretrained(model_name, torchscript=torchscript)
model = AutoModel.from_pretrained(model_name, config=config) model = AutoModel.from_pretrained(model_name, config=config)
tokenizer = AutoTokenizer.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name)
...@@ -418,10 +435,13 @@ def _compute_pytorch( ...@@ -418,10 +435,13 @@ def _compute_pytorch(
max_input_size = tokenizer.max_model_input_sizes[model_name] max_input_size = tokenizer.max_model_input_sizes[model_name]
dictionary[model_name] = {"bs": batch_sizes, "ss": slice_sizes, "results": {}, "memory": {}} dictionary[model_name] = {"bs": batch_sizes, "ss": slice_sizes, "time": {}, "memory": {}}
dictionary[model_name]["results"] = {i: {} for i in batch_sizes} dictionary[model_name]["time"] = {i: {} for i in batch_sizes}
dictionary[model_name]["memory"] = {i: {} for i in batch_sizes} dictionary[model_name]["memory"] = {i: {} for i in batch_sizes}
print_fn("Using model {}".format(model))
print_fn("Number of all parameters {}".format(model.num_parameters()))
for batch_size in batch_sizes: for batch_size in batch_sizes:
if fp16: if fp16:
model.half() model.half()
...@@ -430,12 +450,12 @@ def _compute_pytorch( ...@@ -430,12 +450,12 @@ def _compute_pytorch(
for slice_size in slice_sizes: for slice_size in slice_sizes:
if max_input_size is not None and slice_size > max_input_size: if max_input_size is not None and slice_size > max_input_size:
dictionary[model_name]["results"][batch_size][slice_size] = "N/A" dictionary[model_name]["time"][batch_size][slice_size] = "N/A"
else: else:
sequence = torch.tensor(tokenized_sequence[:slice_size], device=device).repeat(batch_size, 1) sequence = torch.tensor(tokenized_sequence[:slice_size], device=device).repeat(batch_size, 1)
try: try:
if torchscript: if torchscript:
print("Tracing model with sequence size", sequence.shape) print_fn("Tracing model with sequence size {}".format(sequence.shape))
inference = torch.jit.trace(model, sequence) inference = torch.jit.trace(model, sequence)
inference(sequence) inference(sequence)
else: else:
...@@ -451,33 +471,33 @@ def _compute_pytorch( ...@@ -451,33 +471,33 @@ def _compute_pytorch(
summary = stop_memory_tracing(trace) summary = stop_memory_tracing(trace)
if verbose: if verbose:
print_summary_statistics(summary) print_summary_statistics(summary, print_fn)
dictionary[model_name]["memory"][batch_size][slice_size] = str(summary.total) dictionary[model_name]["memory"][batch_size][slice_size] = str(summary.total)
else: else:
dictionary[model_name]["memory"][batch_size][slice_size] = "N/A" dictionary[model_name]["memory"][batch_size][slice_size] = "N/A"
if not no_speed: if not no_speed:
print("Going through model with sequence of shape", sequence.shape) print_fn("Going through model with sequence of shape".format(sequence.shape))
runtimes = timeit.repeat(lambda: inference(sequence), repeat=average_over, number=3) runtimes = timeit.repeat(lambda: inference(sequence), repeat=average_over, number=3)
average_time = sum(runtimes) / float(len(runtimes)) / 3.0 average_time = sum(runtimes) / float(len(runtimes)) / 3.0
dictionary[model_name]["results"][batch_size][slice_size] = average_time dictionary[model_name]["time"][batch_size][slice_size] = average_time
else: else:
dictionary[model_name]["results"][batch_size][slice_size] = "N/A" dictionary[model_name]["time"][batch_size][slice_size] = "N/A"
except RuntimeError as e: except RuntimeError as e:
print("Doesn't fit on GPU.", e) print_fn("Doesn't fit on GPU. {}".format(e))
torch.cuda.empty_cache() torch.cuda.empty_cache()
dictionary[model_name]["results"][batch_size][slice_size] = "N/A" dictionary[model_name]["time"][batch_size][slice_size] = "N/A"
dictionary[model_name]["memory"][batch_size][slice_size] = "N/A" dictionary[model_name]["memory"][batch_size][slice_size] = "N/A"
return dictionary return dictionary
def _compute_tensorflow( def _compute_tensorflow(
model_names, batch_sizes, slice_sizes, dictionary, average_over, amp, no_speed, no_memory, verbose model_names, batch_sizes, slice_sizes, dictionary, average_over, amp, no_speed, no_memory, verbose, print_fn
): ):
for c, model_name in enumerate(model_names): for c, model_name in enumerate(model_names):
print(f"{c + 1} / {len(model_names)}") print_fn(f"{c + 1} / {len(model_names)}")
config = AutoConfig.from_pretrained(model_name) config = AutoConfig.from_pretrained(model_name)
model = TFAutoModel.from_pretrained(model_name, config=config) model = TFAutoModel.from_pretrained(model_name, config=config)
tokenizer = AutoTokenizer.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name)
...@@ -486,11 +506,12 @@ def _compute_tensorflow( ...@@ -486,11 +506,12 @@ def _compute_tensorflow(
max_input_size = tokenizer.max_model_input_sizes[model_name] max_input_size = tokenizer.max_model_input_sizes[model_name]
dictionary[model_name] = {"bs": batch_sizes, "ss": slice_sizes, "results": {}, "memory": {}} dictionary[model_name] = {"bs": batch_sizes, "ss": slice_sizes, "time": {}, "memory": {}}
dictionary[model_name]["results"] = {i: {} for i in batch_sizes} dictionary[model_name]["time"] = {i: {} for i in batch_sizes}
dictionary[model_name]["memory"] = {i: {} for i in batch_sizes} dictionary[model_name]["memory"] = {i: {} for i in batch_sizes}
print("Using model", model) print_fn("Using model {}".format(model))
print_fn("Number of all parameters {}".format(model.num_parameters()))
@tf.function @tf.function
def inference(inputs): def inference(inputs):
...@@ -499,14 +520,14 @@ def _compute_tensorflow( ...@@ -499,14 +520,14 @@ def _compute_tensorflow(
for batch_size in batch_sizes: for batch_size in batch_sizes:
for slice_size in slice_sizes: for slice_size in slice_sizes:
if max_input_size is not None and slice_size > max_input_size: if max_input_size is not None and slice_size > max_input_size:
dictionary[model_name]["results"][batch_size][slice_size] = "N/A" dictionary[model_name]["time"][batch_size][slice_size] = "N/A"
else: else:
sequence = tf.stack( sequence = tf.stack(
[tf.squeeze(tf.constant(tokenized_sequence[:slice_size])[None, :])] * batch_size [tf.squeeze(tf.constant(tokenized_sequence[:slice_size])[None, :])] * batch_size
) )
try: try:
print("Going through model with sequence of shape", sequence.shape) print_fn("Going through model with sequence of shape {}".format(sequence.shape))
# To make sure that the model is traced + that the tensors are on the appropriate device # To make sure that the model is traced + that the tensors are on the appropriate device
inference(sequence) inference(sequence)
...@@ -517,7 +538,7 @@ def _compute_tensorflow( ...@@ -517,7 +538,7 @@ def _compute_tensorflow(
summary = stop_memory_tracing(trace) summary = stop_memory_tracing(trace)
if verbose: if verbose:
print_summary_statistics(summary) print_summary_statistics(summary, print_fn)
dictionary[model_name]["memory"][batch_size][slice_size] = str(summary.total) dictionary[model_name]["memory"][batch_size][slice_size] = str(summary.total)
else: else:
...@@ -526,13 +547,13 @@ def _compute_tensorflow( ...@@ -526,13 +547,13 @@ def _compute_tensorflow(
if not no_speed: if not no_speed:
runtimes = timeit.repeat(lambda: inference(sequence), repeat=average_over, number=3) runtimes = timeit.repeat(lambda: inference(sequence), repeat=average_over, number=3)
average_time = sum(runtimes) / float(len(runtimes)) / 3.0 average_time = sum(runtimes) / float(len(runtimes)) / 3.0
dictionary[model_name]["results"][batch_size][slice_size] = average_time dictionary[model_name]["time"][batch_size][slice_size] = average_time
else: else:
dictionary[model_name]["results"][batch_size][slice_size] = "N/A" dictionary[model_name]["time"][batch_size][slice_size] = "N/A"
except tf.errors.ResourceExhaustedError as e: except tf.errors.ResourceExhaustedError as e:
print("Doesn't fit on GPU.", e) print_fn("Doesn't fit on GPU. {}".format(e))
dictionary[model_name]["results"][batch_size][slice_size] = "N/A" dictionary[model_name]["time"][batch_size][slice_size] = "N/A"
dictionary[model_name]["memory"][batch_size][slice_size] = "N/A" dictionary[model_name]["memory"][batch_size][slice_size] = "N/A"
return dictionary return dictionary
...@@ -593,7 +614,25 @@ def main(): ...@@ -593,7 +614,25 @@ def main():
) )
parser.add_argument("--save_to_csv", required=False, action="store_true", help="Save to a CSV file.") parser.add_argument("--save_to_csv", required=False, action="store_true", help="Save to a CSV file.")
parser.add_argument( parser.add_argument(
"--csv_filename", required=False, default=None, help="CSV filename used if saving results to csv." "--log_print", required=False, action="store_true", help="Save all print statements in log file."
)
parser.add_argument(
"--csv_time_filename",
required=False,
default=f"time_{round(time())}.csv",
help="CSV filename used if saving time results to csv.",
)
parser.add_argument(
"--csv_memory_filename",
required=False,
default=f"memory_{round(time())}.csv",
help="CSV filename used if saving memory results to csv.",
)
parser.add_argument(
"--log_filename",
required=False,
default=f"log_{round(time())}.txt",
help="Log filename used if print statements are saved in log.",
) )
parser.add_argument( parser.add_argument(
"--average_over", required=False, default=30, type=int, help="Times an experiment will be run." "--average_over", required=False, default=30, type=int, help="Times an experiment will be run."
...@@ -614,11 +653,14 @@ def main(): ...@@ -614,11 +653,14 @@ def main():
"distilgpt2", "distilgpt2",
"roberta-base", "roberta-base",
"ctrl", "ctrl",
"t5-base",
"bart-large",
] ]
else: else:
args.models = args.models.split() args.models = args.models.split()
print("Running with arguments", args) print_fn = get_print_function(args.log_print, args.log_filename)
print_fn("Running with arguments: {}".format(args))
if args.torch: if args.torch:
if is_torch_available(): if is_torch_available():
...@@ -631,11 +673,13 @@ def main(): ...@@ -631,11 +673,13 @@ def main():
torchscript=args.torchscript, torchscript=args.torchscript,
fp16=args.fp16, fp16=args.fp16,
save_to_csv=args.save_to_csv, save_to_csv=args.save_to_csv,
csv_filename=args.csv_filename, csv_time_filename=args.csv_time_filename,
csv_memory_filename=args.csv_memory_filename,
average_over=args.average_over, average_over=args.average_over,
no_speed=args.no_speed, no_speed=args.no_speed,
no_memory=args.no_memory, no_memory=args.no_memory,
verbose=args.verbose, verbose=args.verbose,
print_fn=print_fn,
) )
else: else:
raise ImportError("Trying to run a PyTorch benchmark but PyTorch was not found in the environment.") raise ImportError("Trying to run a PyTorch benchmark but PyTorch was not found in the environment.")
...@@ -650,11 +694,13 @@ def main(): ...@@ -650,11 +694,13 @@ def main():
xla=args.xla, xla=args.xla,
amp=args.amp, amp=args.amp,
save_to_csv=args.save_to_csv, save_to_csv=args.save_to_csv,
csv_filename=args.csv_filename, csv_time_filename=args.csv_time_filename,
csv_memory_filename=args.csv_memory_filename,
average_over=args.average_over, average_over=args.average_over,
no_speed=args.no_speed, no_speed=args.no_speed,
no_memory=args.no_memory, no_memory=args.no_memory,
verbose=args.verbose, verbose=args.verbose,
print_fn=print_fn,
) )
else: else:
raise ImportError("Trying to run a TensorFlow benchmark but TensorFlow was not found in the environment.") raise ImportError("Trying to run a TensorFlow benchmark but TensorFlow was not found in the environment.")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment