Commit ec3a4616 authored by Hongkun Yu's avatar Hongkun Yu Committed by A. Unique TensorFlower
Browse files

Remove profiler callback which is calling tensorflow internal library.

Profiler has been integrated with TensorBoard callback and released in TF 2.2

PiperOrigin-RevId: 311475711
parent 40773bb2
...@@ -213,7 +213,7 @@ def run(flags_obj): ...@@ -213,7 +213,7 @@ def run(flags_obj):
train_epochs = flags_obj.train_epochs train_epochs = flags_obj.train_epochs
callbacks = common.get_callbacks(steps_per_epoch) callbacks = common.get_callbacks()
if not flags_obj.use_tensor_lr: if not flags_obj.use_tensor_lr:
lr_callback = LearningRateBatchScheduler( lr_callback = LearningRateBatchScheduler(
......
...@@ -218,7 +218,6 @@ def run(flags_obj): ...@@ -218,7 +218,6 @@ def run(flags_obj):
train_epochs = flags_obj.train_epochs train_epochs = flags_obj.train_epochs
callbacks = common.get_callbacks( callbacks = common.get_callbacks(
steps_per_epoch=steps_per_epoch,
pruning_method=flags_obj.pruning_method, pruning_method=flags_obj.pruning_method,
enable_checkpoint_and_export=flags_obj.enable_checkpoint_and_export, enable_checkpoint_and_export=flags_obj.enable_checkpoint_and_export,
model_dir=flags_obj.model_dir) model_dir=flags_obj.model_dir)
......
...@@ -113,14 +113,6 @@ def define_transformer_flags(): ...@@ -113,14 +113,6 @@ def define_transformer_flags():
name='enable_mlir_bridge', name='enable_mlir_bridge',
default=False, default=False,
help='Whether to enable the TF to XLA bridge.') help='Whether to enable the TF to XLA bridge.')
flags.DEFINE_string(
name='profile_steps', default=None,
help='Save profiling data to model dir at given range of steps. The '
'value must be a comma separated pair of positive integers, specifying '
'the first and last step to profile. For example, "--profile_steps=2,4" '
'triggers the profiler to process 3 steps, starting from the 2nd step. '
'Note that profiler has a non-trivial performance overhead, and the '
'output file can be gigantic if profiling many steps.')
# Set flags from the flags_core module as 'key flags' so they're listed when # Set flags from the flags_core module as 'key flags' so they're listed when
# the '-h' flag is used. Without this line, the flags defined above are # the '-h' flag is used. Without this line, the flags defined above are
# only shown in the full `--helpful` help text. # only shown in the full `--helpful` help text.
...@@ -239,7 +231,7 @@ def define_transformer_flags(): ...@@ -239,7 +231,7 @@ def define_transformer_flags():
# pylint: enable=unused-variable # pylint: enable=unused-variable
def get_callbacks(steps_per_epoch): def get_callbacks():
"""Returns common callbacks.""" """Returns common callbacks."""
callbacks = [] callbacks = []
if FLAGS.enable_time_history: if FLAGS.enable_time_history:
...@@ -254,14 +246,6 @@ def get_callbacks(steps_per_epoch): ...@@ -254,14 +246,6 @@ def get_callbacks(steps_per_epoch):
log_dir=FLAGS.model_dir) log_dir=FLAGS.model_dir)
callbacks.append(tensorboard_callback) callbacks.append(tensorboard_callback)
if FLAGS.profile_steps:
profiler_callback = keras_utils.get_profiler_callback(
FLAGS.model_dir,
FLAGS.profile_steps,
FLAGS.enable_tensorboard,
steps_per_epoch)
callbacks.append(profiler_callback)
return callbacks return callbacks
......
...@@ -416,7 +416,7 @@ class TransformerTask(object): ...@@ -416,7 +416,7 @@ class TransformerTask(object):
params["hidden_size"], params["hidden_size"],
params["learning_rate_warmup_steps"]) params["learning_rate_warmup_steps"])
scheduler_callback = optimizer.LearningRateScheduler(sfunc, init_steps) scheduler_callback = optimizer.LearningRateScheduler(sfunc, init_steps)
callbacks = misc.get_callbacks(params["steps_between_evals"]) callbacks = misc.get_callbacks()
callbacks.append(scheduler_callback) callbacks.append(scheduler_callback)
if params["enable_checkpointing"]: if params["enable_checkpointing"]:
ckpt_full_path = os.path.join(cur_log_dir, "cp-{epoch:04d}.ckpt") ckpt_full_path = os.path.join(cur_log_dir, "cp-{epoch:04d}.ckpt")
......
...@@ -23,9 +23,8 @@ import os ...@@ -23,9 +23,8 @@ import os
import time import time
from absl import logging from absl import logging
import tensorflow.compat.v2 as tf import tensorflow as tf
from tensorflow.python import tf2 from tensorflow.python import tf2
from tensorflow.python.profiler import profiler_v2 as profiler
class BatchTimestamp(object): class BatchTimestamp(object):
...@@ -139,31 +138,6 @@ class TimeHistory(tf.keras.callbacks.Callback): ...@@ -139,31 +138,6 @@ class TimeHistory(tf.keras.callbacks.Callback):
self.steps_in_epoch = 0 self.steps_in_epoch = 0
def get_profiler_callback(model_dir, profile_steps, enable_tensorboard,
steps_per_epoch):
"""Validate profile_steps flag value and return profiler callback."""
profile_steps_error_message = (
'profile_steps must be a comma separated pair of positive integers, '
'specifying the first and last steps to be profiled.'
)
try:
profile_steps = [int(i) for i in profile_steps.split(',')]
except ValueError:
raise ValueError(profile_steps_error_message)
if len(profile_steps) != 2:
raise ValueError(profile_steps_error_message)
start_step, stop_step = profile_steps
if start_step < 0 or start_step > stop_step:
raise ValueError(profile_steps_error_message)
if enable_tensorboard:
logging.warning(
'Both TensorBoard and profiler callbacks are used. Note that the '
'TensorBoard callback profiles the 2nd step (unless otherwise '
'specified). Please make sure the steps profiled by the two callbacks '
'do not overlap.')
return ProfilerCallback(model_dir, start_step, stop_step, steps_per_epoch)
class SimpleCheckpoint(tf.keras.callbacks.Callback): class SimpleCheckpoint(tf.keras.callbacks.Callback):
"""Keras callback to save tf.train.Checkpoints.""" """Keras callback to save tf.train.Checkpoints."""
...@@ -176,41 +150,6 @@ class SimpleCheckpoint(tf.keras.callbacks.Callback): ...@@ -176,41 +150,6 @@ class SimpleCheckpoint(tf.keras.callbacks.Callback):
self.checkpoint_manager.save(checkpoint_number=step_counter) self.checkpoint_manager.save(checkpoint_number=step_counter)
class ProfilerCallback(tf.keras.callbacks.Callback):
"""Save profiles in specified step range to log directory."""
def __init__(self, log_dir, start_step, stop_step, steps_per_epoch):
super(ProfilerCallback, self).__init__()
self.log_dir = log_dir
self.start_step = start_step
self.stop_step = stop_step
self.start_epoch = start_step // steps_per_epoch
self.stop_epoch = stop_step // steps_per_epoch
self.start_step_in_epoch = start_step % steps_per_epoch
self.stop_step_in_epoch = stop_step % steps_per_epoch
self.should_start = False
self.should_stop = False
def on_epoch_begin(self, epoch, logs=None):
if epoch == self.start_epoch:
self.should_start = True
if epoch == self.stop_epoch:
self.should_stop = True
def on_batch_begin(self, batch, logs=None):
if batch == self.start_step_in_epoch and self.should_start:
self.should_start = False
profiler.start(self.log_dir)
logging.info('Profiler started at Step %s', self.start_step)
def on_batch_end(self, batch, logs=None):
if batch == self.stop_step_in_epoch and self.should_stop:
self.should_stop = False
profiler.stop()
logging.info('Profiler saved profiles for steps between %s and %s to %s',
self.start_step, self.stop_step, self.log_dir)
def set_session_config(enable_eager=False, def set_session_config(enable_eager=False,
enable_xla=False): enable_xla=False):
"""Sets the session config.""" """Sets the session config."""
......
...@@ -105,7 +105,6 @@ def get_optimizer(learning_rate=0.1): ...@@ -105,7 +105,6 @@ def get_optimizer(learning_rate=0.1):
def get_callbacks( def get_callbacks(
steps_per_epoch,
pruning_method=None, pruning_method=None,
enable_checkpoint_and_export=False, enable_checkpoint_and_export=False,
model_dir=None): model_dir=None):
...@@ -121,14 +120,6 @@ def get_callbacks( ...@@ -121,14 +120,6 @@ def get_callbacks(
log_dir=FLAGS.model_dir) log_dir=FLAGS.model_dir)
callbacks.append(tensorboard_callback) callbacks.append(tensorboard_callback)
if FLAGS.profile_steps:
profiler_callback = keras_utils.get_profiler_callback(
FLAGS.model_dir,
FLAGS.profile_steps,
FLAGS.enable_tensorboard,
steps_per_epoch)
callbacks.append(profiler_callback)
is_pruning_enabled = pruning_method is not None is_pruning_enabled = pruning_method is not None
if is_pruning_enabled: if is_pruning_enabled:
callbacks.append(tfmot.sparsity.keras.UpdatePruningStep()) callbacks.append(tfmot.sparsity.keras.UpdatePruningStep())
...@@ -242,14 +233,6 @@ def define_keras_flags( ...@@ -242,14 +233,6 @@ def define_keras_flags(
help='The number of steps to run for training. If it is larger than ' help='The number of steps to run for training. If it is larger than '
'# batches per epoch, then use # batches per epoch. This flag will be ' '# batches per epoch, then use # batches per epoch. This flag will be '
'ignored if train_epochs is set to be larger than 1. ') 'ignored if train_epochs is set to be larger than 1. ')
flags.DEFINE_string(
name='profile_steps', default=None,
help='Save profiling data to model dir at given range of global steps. The '
'value must be a comma separated pair of positive integers, specifying '
'the first and last step to profile. For example, "--profile_steps=2,4" '
'triggers the profiler to process 3 steps, starting from the 2nd step. '
'Note that profiler has a non-trivial performance overhead, and the '
'output file can be gigantic if profiling many steps.')
flags.DEFINE_boolean( flags.DEFINE_boolean(
name='batchnorm_spatial_persistent', default=True, name='batchnorm_spatial_persistent', default=True,
help='Enable the spacial persistent mode for CuDNN batch norm kernel.') help='Enable the spacial persistent mode for CuDNN batch norm kernel.')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment