Remove profiler callback which is calling tensorflow internal library.

Profiler has been integrated with TensorBoard callback and released in TF 2.2 PiperOrigin-RevId: 311475711

Remove profiler callback which is calling tensorflow internal library.
Profiler has been integrated with TensorBoard callback and released in TF 2.2 PiperOrigin-RevId: 311475711
ec3a4616 · Hongkun Yu · A. Unique TensorFlower · 40773bb2 · ec3a4616 · ec3a4616
Commit ec3a4616 authored May 13, 2020 by Hongkun Yu Committed by A. Unique TensorFlower May 13, 2020
6 changed files
--- a/official/benchmark/models/resnet_cifar_main.py
+++ b/official/benchmark/models/resnet_cifar_main.py
@@ -213,7 +213,7 @@ def run(flags_obj):
  train_epochs = flags_obj.train_epochs
-  callbacks = common.get_callbacks(steps_per_epoch)
+  callbacks = common.get_callbacks()
  if not flags_obj.use_tensor_lr:
    lr_callback = LearningRateBatchScheduler(

--- a/official/benchmark/models/resnet_imagenet_main.py
+++ b/official/benchmark/models/resnet_imagenet_main.py
@@ -218,7 +218,6 @@ def run(flags_obj):
  train_epochs = flags_obj.train_epochs
  callbacks = common.get_callbacks(
-      steps_per_epoch=steps_per_epoch,
      pruning_method=flags_obj.pruning_method,
      enable_checkpoint_and_export=flags_obj.enable_checkpoint_and_export,
      model_dir=flags_obj.model_dir)

--- a/official/nlp/transformer/misc.py
+++ b/official/nlp/transformer/misc.py
@@ -113,14 +113,6 @@ def define_transformer_flags():
      name='enable_mlir_bridge',
      default=False,
      help='Whether to enable the TF to XLA bridge.')
-  flags.DEFINE_string(
-      name='profile_steps', default=None,
-      help='Save profiling data to model dir at given range of steps. The '
-      'value must be a comma separated pair of positive integers, specifying '
-      'the first and last step to profile. For example, "--profile_steps=2,4" '
-      'triggers the profiler to process 3 steps, starting from the 2nd step. '
-      'Note that profiler has a non-trivial performance overhead, and the '
-      'output file can be gigantic if profiling many steps.')
  # Set flags from the flags_core module as 'key flags' so they're listed when
  # the '-h' flag is used. Without this line, the flags defined above are
  # only shown in the full `--helpful` help text.
@@ -239,7 +231,7 @@ def define_transformer_flags():
  # pylint: enable=unused-variable
-def get_callbacks(steps_per_epoch):
+def get_callbacks():
  """Returns common callbacks."""
  callbacks = []
  if FLAGS.enable_time_history:
@@ -254,14 +246,6 @@ def get_callbacks(steps_per_epoch):
        log_dir=FLAGS.model_dir)
    callbacks.append(tensorboard_callback)
-  if FLAGS.profile_steps:
-    profiler_callback = keras_utils.get_profiler_callback(
-        FLAGS.model_dir,
-        FLAGS.profile_steps,
-        FLAGS.enable_tensorboard,
-        steps_per_epoch)
-    callbacks.append(profiler_callback)
  return callbacks

--- a/official/nlp/transformer/transformer_main.py
+++ b/official/nlp/transformer/transformer_main.py
@@ -416,7 +416,7 @@ class TransformerTask(object):
                                     params["hidden_size"],
                                     params["learning_rate_warmup_steps"])
    scheduler_callback = optimizer.LearningRateScheduler(sfunc, init_steps)
-    callbacks = misc.get_callbacks(params["steps_between_evals"])
+    callbacks = misc.get_callbacks()
    callbacks.append(scheduler_callback)
    if params["enable_checkpointing"]:
      ckpt_full_path = os.path.join(cur_log_dir, "cp-{epoch:04d}.ckpt")

--- a/official/utils/misc/keras_utils.py
+++ b/official/utils/misc/keras_utils.py
@@ -23,9 +23,8 @@ import os
 import time
 from absl import logging
-import tensorflow.compat.v2 as tf
+import tensorflow as tf
 from tensorflow.python import tf2
-from tensorflow.python.profiler import profiler_v2 as profiler
 class BatchTimestamp(object):
@@ -139,31 +138,6 @@ class TimeHistory(tf.keras.callbacks.Callback):
    self.steps_in_epoch = 0
-def get_profiler_callback(model_dir, profile_steps, enable_tensorboard,
-                          steps_per_epoch):
-  """Validate profile_steps flag value and return profiler callback."""
-  profile_steps_error_message = (
-      'profile_steps must be a comma separated pair of positive integers, '
-      'specifying the first and last steps to be profiled.'
-  )
-  try:
-    profile_steps = [int(i) for i in profile_steps.split(',')]
-  except ValueError:
-    raise ValueError(profile_steps_error_message)
-  if len(profile_steps) != 2:
-    raise ValueError(profile_steps_error_message)
-  start_step, stop_step = profile_steps
-  if start_step < 0 or start_step > stop_step:
-    raise ValueError(profile_steps_error_message)
-  if enable_tensorboard:
-    logging.warning(
-        'Both TensorBoard and profiler callbacks are used. Note that the '
-        'TensorBoard callback profiles the 2nd step (unless otherwise '
-        'specified). Please make sure the steps profiled by the two callbacks '
-        'do not overlap.')
-  return ProfilerCallback(model_dir, start_step, stop_step, steps_per_epoch)
 class SimpleCheckpoint(tf.keras.callbacks.Callback):
  """Keras callback to save tf.train.Checkpoints."""
@@ -176,41 +150,6 @@ class SimpleCheckpoint(tf.keras.callbacks.Callback):
    self.checkpoint_manager.save(checkpoint_number=step_counter)
-class ProfilerCallback(tf.keras.callbacks.Callback):
-  """Save profiles in specified step range to log directory."""
-  def __init__(self, log_dir, start_step, stop_step, steps_per_epoch):
-    super(ProfilerCallback, self).__init__()
-    self.log_dir = log_dir
-    self.start_step = start_step
-    self.stop_step = stop_step
-    self.start_epoch = start_step // steps_per_epoch
-    self.stop_epoch = stop_step // steps_per_epoch
-    self.start_step_in_epoch = start_step % steps_per_epoch
-    self.stop_step_in_epoch = stop_step % steps_per_epoch
-    self.should_start = False
-    self.should_stop = False
-  def on_epoch_begin(self, epoch, logs=None):
-    if epoch == self.start_epoch:
-      self.should_start = True
-    if epoch == self.stop_epoch:
-      self.should_stop = True
-  def on_batch_begin(self, batch, logs=None):
-    if batch == self.start_step_in_epoch and self.should_start:
-      self.should_start = False
-      profiler.start(self.log_dir)
-      logging.info('Profiler started at Step %s', self.start_step)
-  def on_batch_end(self, batch, logs=None):
-    if batch == self.stop_step_in_epoch and self.should_stop:
-      self.should_stop = False
-      profiler.stop()
-      logging.info('Profiler saved profiles for steps between %s and %s to %s',
-                   self.start_step, self.stop_step, self.log_dir)
 def set_session_config(enable_eager=False,
                       enable_xla=False):
  """Sets the session config."""

--- a/official/vision/image_classification/resnet/common.py
+++ b/official/vision/image_classification/resnet/common.py
@@ -105,7 +105,6 @@ def get_optimizer(learning_rate=0.1):
 def get_callbacks(
-    steps_per_epoch,
    pruning_method=None,
    enable_checkpoint_and_export=False,
    model_dir=None):
@@ -121,14 +120,6 @@ def get_callbacks(
        log_dir=FLAGS.model_dir)
    callbacks.append(tensorboard_callback)
-  if FLAGS.profile_steps:
-    profiler_callback = keras_utils.get_profiler_callback(
-        FLAGS.model_dir,
-        FLAGS.profile_steps,
-        FLAGS.enable_tensorboard,
-        steps_per_epoch)
-    callbacks.append(profiler_callback)
  is_pruning_enabled = pruning_method is not None
  if is_pruning_enabled:
    callbacks.append(tfmot.sparsity.keras.UpdatePruningStep())
@@ -242,14 +233,6 @@ def define_keras_flags(
      help='The number of steps to run for training. If it is larger than '
      '# batches per epoch, then use # batches per epoch. This flag will be '
      'ignored if train_epochs is set to be larger than 1. ')
-  flags.DEFINE_string(
-      name='profile_steps', default=None,
-      help='Save profiling data to model dir at given range of global steps. The '
-      'value must be a comma separated pair of positive integers, specifying '
-      'the first and last step to profile. For example, "--profile_steps=2,4" '
-      'triggers the profiler to process 3 steps, starting from the 2nd step. '
-      'Note that profiler has a non-trivial performance overhead, and the '
-      'output file can be gigantic if profiling many steps.')
  flags.DEFINE_boolean(
      name='batchnorm_spatial_persistent', default=True,
      help='Enable the spacial persistent mode for CuDNN batch norm kernel.')