Commit d3d7f15f authored by Hongkun Yu's avatar Hongkun Yu Committed by A. Unique TensorFlower
Browse files

Remove force_v2_in_keras_compile. experimental_run_tf_function is no-op now.

PiperOrigin-RevId: 299160422
parent 12271d7c
......@@ -163,21 +163,6 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.dtype = 'fp32'
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v1_path(self):
"""No dist strat forced v1 execution path."""
self._setup()
FLAGS.distribution_strategy = 'off'
FLAGS.num_gpus = 1
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_no_dist_strat_force_v1_path')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_2_gpu(self):
"""Test keras based model with eager and distribution strategies."""
self._setup()
......@@ -261,17 +246,6 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128
self._run_and_report_benchmark()
def benchmark_1_gpu_force_v1_path(self):
"""Test 1 gpu using forced v1 execution path."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'one_device'
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_force_v1_path')
FLAGS.batch_size = 128
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_graph_1_gpu(self):
"""Test 1 gpu graph."""
self._setup()
......@@ -316,33 +290,6 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v1_path(self):
"""No dist strat but forced v1 execution path."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = 128
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_no_dist_strat_force_v1_path')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'off'
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly(self):
"""Forced v1 execution path and forced eager."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = 128
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.run_eagerly = True
FLAGS.distribution_strategy = 'off'
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_2_gpu(self):
"""Test 2 gpu."""
self._setup()
......@@ -409,19 +356,6 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark()
def benchmark_cpu_no_dist_strat_force_v1_path(self):
"""Test cpu without dist strat and force v1 in model.compile."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'off'
FLAGS.model_dir = self._get_model_dir(
'benchmark_cpu_no_dist_strat_force_v1_path')
FLAGS.batch_size = 128
FLAGS.data_format = 'channels_last'
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_graph_cpu_no_dist_strat(self):
"""Test cpu graph mode without distribution strategies."""
self._setup()
......
......@@ -205,24 +205,12 @@ def run(flags_obj):
with strategy_scope:
optimizer = common.get_optimizer(lr_schedule)
model = resnet_cifar_model.resnet56(classes=cifar_preprocessing.NUM_CLASSES)
# TODO(b/138957587): Remove when force_v2_in_keras_compile is on longer
# a valid arg for this model. Also remove as a valid flag.
if flags_obj.force_v2_in_keras_compile is not None:
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=optimizer,
metrics=(['sparse_categorical_accuracy']
if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly,
experimental_run_tf_function=flags_obj.force_v2_in_keras_compile)
else:
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=optimizer,
metrics=(['sparse_categorical_accuracy']
if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly)
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=optimizer,
metrics=(['sparse_categorical_accuracy']
if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly)
train_epochs = flags_obj.train_epochs
......
......@@ -142,25 +142,12 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.early_stopping = True
self._run_and_report_benchmark()
def benchmark_1_gpu_force_v1_path_early_stop(self):
self._setup()
FLAGS.early_stopping = True
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_early_stop(self):
self._setup()
FLAGS.distribution_strategy = 'off'
FLAGS.early_stopping = True
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v1_path_early_stop(self):
self._setup()
FLAGS.distribution_strategy = 'off'
FLAGS.early_stopping = True
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_run_eagerly_early_stop(self):
self._setup()
FLAGS.distribution_strategy = 'off'
......@@ -174,13 +161,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.enable_xla = True
self._run_and_report_benchmark()
def benchmark_xla_1_gpu_force_v1_path_early_stop(self):
self._setup()
FLAGS.early_stopping = True
FLAGS.enable_xla = True
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_1_gpu_ctl_early_stop(self):
self._setup()
FLAGS.keras_use_ctl = True
......@@ -233,14 +213,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.train_epochs = 7
self._run_and_report_benchmark_mlperf_like()
def benchmark_1_gpu_no_dist_strat_force_v1_path_mlperf_like(self):
"""1 GPU using compile/fit without dist_strat."""
self._setup()
FLAGS.train_epochs = 7
FLAGS.distribution_strategy = 'off'
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_mlperf_like(self):
"""1 GPU using compile/fit without dist_strat."""
self._setup()
......@@ -353,20 +325,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.epsilon = 1e-8
self._run_and_report_benchmark_mlperf_like()
def benchmark_8_gpu_force_v1_path_mlperf_like(self):
"""8 GPU using keras fit/compile v1 codepath."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.train_epochs = 17
FLAGS.batch_size = 1048576
FLAGS.eval_batch_size = 160000
FLAGS.learning_rate = 0.0045
FLAGS.beta1 = 0.25
FLAGS.beta2 = 0.5
FLAGS.epsilon = 1e-8
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark_mlperf_like()
def benchmark_8_gpu_ctl_mlperf_like(self):
"""8 GPU using CTL."""
self._setup()
......
......@@ -75,7 +75,6 @@ def define_transformer_flags():
tf_gpu_thread_mode=True,
datasets_num_private_threads=True,
enable_xla=True,
force_v2_in_keras_compile=True,
fp16_implementation=True
)
......
......@@ -157,7 +157,6 @@ def define_ncf_flags():
loss_scale=True,
dynamic_loss_scale=True,
enable_xla=True,
force_v2_in_keras_compile=True
)
flags_core.define_device(tpu=True)
flags_core.define_benchmark()
......
......@@ -300,15 +300,7 @@ def run_ncf(_):
num_eval_steps,
generate_input_online=generate_input_online)
else:
# TODO(b/138957587): Remove when force_v2_in_keras_compile is on longer
# a valid arg for this model. Also remove as a valid flag.
if FLAGS.force_v2_in_keras_compile is not None:
keras_model.compile(
optimizer=optimizer,
run_eagerly=FLAGS.run_eagerly,
experimental_run_tf_function=FLAGS.force_v2_in_keras_compile)
else:
keras_model.compile(optimizer=optimizer, run_eagerly=FLAGS.run_eagerly)
keras_model.compile(optimizer=optimizer, run_eagerly=FLAGS.run_eagerly)
if not FLAGS.ml_perf:
# Create Tensorboard summary and checkpoint callbacks.
......
......@@ -176,19 +176,6 @@ class ShakespeareAccuracy(ShakespeareBenchmarkBase):
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_1_gpu_no_ds_force_v2(self):
"""Benchmark 1 gpu no ds with force_v2 in keras.compile."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.training_data = self.train_data
FLAGS.batch_size = 64
FLAGS.train_epochs = 43
FLAGS.model_dir = ''
FLAGS.force_v2_in_keras_compile = True
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_xla_1_gpu(self):
"""Benchmark 1 gpu w/xla."""
self._setup()
......@@ -297,15 +284,6 @@ class ShakespeareKerasBenchmarkReal(ShakespeareBenchmarkBase):
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_1_gpu_no_ds_force_v2(self):
"""Benchmark 1 gpu no ds, and force v2."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = 64
FLAGS.force_v2_in_keras_compile = True
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_1_gpu_no_ds_run_eagerly(self):
"""Benchmark 1 gpu."""
self._setup()
......
......@@ -59,8 +59,7 @@ def define_flags():
max_train_steps=False,
dtype=True,
loss_scale=True,
enable_xla=True,
force_v2_in_keras_compile=True)
enable_xla=True)
flags_core.set_defaults(train_epochs=43,
batch_size=64)
......@@ -193,8 +192,7 @@ def train_model(flags_obj, dataset, vocab_size, strategy, checkpoint_dir=None):
loss=tf.keras.losses.CategoricalCrossentropy(),
metrics=[tf.keras.metrics.Recall(top_k=1, name='RecallAt1'),
tf.keras.metrics.Recall(top_k=5, name='RecallAt5')],
run_eagerly=flags_obj.run_eagerly,
experimental_run_tf_function=flags_obj.force_v2_in_keras_compile)
run_eagerly=flags_obj.run_eagerly)
callbacks = []
if checkpoint_dir:
......
......@@ -64,7 +64,6 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False,
dynamic_loss_scale=False, fp16_implementation=False,
loss_scale=False,
tf_data_experimental_slack=False, enable_xla=False,
force_v2_in_keras_compile=False,
training_dataset_cache=False):
"""Register flags for specifying performance tuning arguments.
......@@ -91,9 +90,6 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False,
tf_data_experimental_slack: Determines whether to enable tf.data's
`experimental_slack` option.
enable_xla: Determines if XLA (auto clustering) is turned on.
force_v2_in_keras_compile: Forces the use of run_distribued path even if not
using a `strategy`. This is not the same as
`tf.distribute.OneDeviceStrategy`
training_dataset_cache: Whether to cache the training dataset on workers.
Typically used to improve training performance when training data is in
remote storage and can fit into worker memory.
......@@ -290,11 +286,4 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False,
name="enable_xla", default=False,
help="Whether to enable XLA auto jit compilation")
if force_v2_in_keras_compile:
flags.DEFINE_boolean(
name="force_v2_in_keras_compile", default=None,
help="Forces the use of run_distribued path even if not"
"using a `strategy`. This is not the same as"
"`tf.distribute.OneDeviceStrategy`")
return key_flags
......@@ -213,7 +213,6 @@ def define_keras_flags(
fp16_implementation=True,
tf_data_experimental_slack=True,
enable_xla=True,
force_v2_in_keras_compile=True,
training_dataset_cache=True)
flags_core.define_image()
flags_core.define_benchmark()
......
......@@ -215,23 +215,13 @@ def run(flags_obj):
elif flags_obj.pruning_method:
raise NotImplementedError(
'Only polynomial_decay is currently supported.')
# TODO(b/138957587): Remove when force_v2_in_keras_compile is on longer
# a valid arg for this model. Also remove as a valid flag.
if flags_obj.force_v2_in_keras_compile is not None:
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=optimizer,
metrics=(['sparse_categorical_accuracy']
if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly,
experimental_run_tf_function=flags_obj.force_v2_in_keras_compile)
else:
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=optimizer,
metrics=(['sparse_categorical_accuracy']
if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly)
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=optimizer,
metrics=(['sparse_categorical_accuracy']
if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly)
train_epochs = flags_obj.train_epochs
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment