# Lint as: python3 # Copyright 2020 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """TFM common benchmark training driver.""" import os import time from typing import Any, Mapping from absl import logging import orbit import tensorflow as tf from official.common import distribute_utils from official.core import config_definitions from official.core import task_factory from official.core import train_utils from official.modeling import performance def run_benchmark( execution_mode: str, params: config_definitions.ExperimentConfig, model_dir: str, distribution_strategy: tf.distribute.Strategy = None ) -> Mapping[str, Any]: """Runs benchmark for a specific experiment. Args: execution_mode: A 'str', specifying the mode. Can be 'accuracy', or 'performance'. params: ExperimentConfig instance. model_dir: A 'str', a path to store model checkpoints and summaries. distribution_strategy: A tf.distribute.Strategy to use. If specified, it will be used instead of inferring the strategy from params. Returns: benchmark_data: returns benchmark data in dict format. """ # For GPU runs, allow option to set thread mode if params.runtime.gpu_thread_mode: os.environ['TF_GPU_THREAD_MODE'] = params.runtime.gpu_thread_mode logging.info('TF_GPU_THREAD_MODE: %s', os.environ['TF_GPU_THREAD_MODE']) # Sets mixed_precision policy. Using 'mixed_float16' or 'mixed_bfloat16' # can have significant impact on model speeds by utilizing float16 in case of # GPUs, and bfloat16 in the case of TPUs. loss_scale takes effect only when # dtype is float16 if params.runtime.mixed_precision_dtype: performance.set_mixed_precision_policy(params.runtime.mixed_precision_dtype) strategy = distribution_strategy or distribute_utils.get_distribution_strategy( distribution_strategy=params.runtime.distribution_strategy, all_reduce_alg=params.runtime.all_reduce_alg, num_gpus=params.runtime.num_gpus, tpu_address=params.runtime.tpu) with strategy.scope(): task = task_factory.get_task(params.task, logging_dir=model_dir) trainer = train_utils.create_trainer( params, task, train=True, evaluate=(execution_mode == 'accuracy')) # Initialize the model if possible, e.g., from a pre-trained checkpoint. trainer.initialize() steps_per_loop = params.trainer.steps_per_loop if ( execution_mode == 'accuracy') else 100 controller = orbit.Controller( strategy=strategy, trainer=trainer, evaluator=trainer if (execution_mode == 'accuracy') else None, global_step=trainer.global_step, steps_per_loop=steps_per_loop) logging.info('Starts to execute execution mode: %s', execution_mode) with strategy.scope(): # Training for one loop, first loop time includes warmup time. first_loop_start_time = time.time() controller.train(steps=steps_per_loop) first_loop_time = time.time() - first_loop_start_time # Training for second loop. second_loop_start_time = time.time() controller.train(steps=2*steps_per_loop) second_loop_time = time.time() - second_loop_start_time if execution_mode == 'accuracy': controller.train(steps=params.trainer.train_steps) wall_time = time.time() - first_loop_time eval_logs = trainer.evaluate( tf.convert_to_tensor(params.trainer.validation_steps)) benchmark_data = {'metrics': eval_logs} elif execution_mode == 'performance': benchmark_data = {} else: raise NotImplementedError( 'The benchmark execution mode is not implemented: %s' % execution_mode) # First training loop time contains startup time plus training time, while # second training loop time is purely training time. Startup time can be # recovered by subtracting second trianing loop time from first training # loop time. startup_time = first_loop_time - second_loop_time wall_time = time.time() - first_loop_start_time examples_per_second = steps_per_loop * params.task.train_data.global_batch_size / second_loop_time benchmark_data.update( dict( examples_per_second=examples_per_second, wall_time=wall_time, startup_time=startup_time)) return benchmark_data