Commit d3d7f15f authored by Hongkun Yu's avatar Hongkun Yu Committed by A. Unique TensorFlower
Browse files

Remove force_v2_in_keras_compile. experimental_run_tf_function is no-op now.

PiperOrigin-RevId: 299160422
parent 12271d7c
...@@ -163,21 +163,6 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark): ...@@ -163,21 +163,6 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.dtype = 'fp32' FLAGS.dtype = 'fp32'
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v1_path(self):
"""No dist strat forced v1 execution path."""
self._setup()
FLAGS.distribution_strategy = 'off'
FLAGS.num_gpus = 1
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_no_dist_strat_force_v1_path')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_2_gpu(self): def benchmark_2_gpu(self):
"""Test keras based model with eager and distribution strategies.""" """Test keras based model with eager and distribution strategies."""
self._setup() self._setup()
...@@ -261,17 +246,6 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -261,17 +246,6 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128 FLAGS.batch_size = 128
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_force_v1_path(self):
"""Test 1 gpu using forced v1 execution path."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'one_device'
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_force_v1_path')
FLAGS.batch_size = 128
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_graph_1_gpu(self): def benchmark_graph_1_gpu(self):
"""Test 1 gpu graph.""" """Test 1 gpu graph."""
self._setup() self._setup()
...@@ -316,33 +290,6 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -316,33 +290,6 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.distribution_strategy = 'off' FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v1_path(self):
"""No dist strat but forced v1 execution path."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = 128
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_no_dist_strat_force_v1_path')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'off'
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly(self):
"""Forced v1 execution path and forced eager."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = 128
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.run_eagerly = True
FLAGS.distribution_strategy = 'off'
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_2_gpu(self): def benchmark_2_gpu(self):
"""Test 2 gpu.""" """Test 2 gpu."""
self._setup() self._setup()
...@@ -409,19 +356,6 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -409,19 +356,6 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.data_format = 'channels_last' FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_cpu_no_dist_strat_force_v1_path(self):
"""Test cpu without dist strat and force v1 in model.compile."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'off'
FLAGS.model_dir = self._get_model_dir(
'benchmark_cpu_no_dist_strat_force_v1_path')
FLAGS.batch_size = 128
FLAGS.data_format = 'channels_last'
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_graph_cpu_no_dist_strat(self): def benchmark_graph_cpu_no_dist_strat(self):
"""Test cpu graph mode without distribution strategies.""" """Test cpu graph mode without distribution strategies."""
self._setup() self._setup()
......
...@@ -205,18 +205,6 @@ def run(flags_obj): ...@@ -205,18 +205,6 @@ def run(flags_obj):
with strategy_scope: with strategy_scope:
optimizer = common.get_optimizer(lr_schedule) optimizer = common.get_optimizer(lr_schedule)
model = resnet_cifar_model.resnet56(classes=cifar_preprocessing.NUM_CLASSES) model = resnet_cifar_model.resnet56(classes=cifar_preprocessing.NUM_CLASSES)
# TODO(b/138957587): Remove when force_v2_in_keras_compile is on longer
# a valid arg for this model. Also remove as a valid flag.
if flags_obj.force_v2_in_keras_compile is not None:
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=optimizer,
metrics=(['sparse_categorical_accuracy']
if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly,
experimental_run_tf_function=flags_obj.force_v2_in_keras_compile)
else:
model.compile( model.compile(
loss='sparse_categorical_crossentropy', loss='sparse_categorical_crossentropy',
optimizer=optimizer, optimizer=optimizer,
......
...@@ -142,25 +142,12 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase): ...@@ -142,25 +142,12 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.early_stopping = True FLAGS.early_stopping = True
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_force_v1_path_early_stop(self):
self._setup()
FLAGS.early_stopping = True
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_early_stop(self): def benchmark_1_gpu_no_dist_strat_early_stop(self):
self._setup() self._setup()
FLAGS.distribution_strategy = 'off' FLAGS.distribution_strategy = 'off'
FLAGS.early_stopping = True FLAGS.early_stopping = True
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v1_path_early_stop(self):
self._setup()
FLAGS.distribution_strategy = 'off'
FLAGS.early_stopping = True
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_run_eagerly_early_stop(self): def benchmark_1_gpu_no_dist_strat_run_eagerly_early_stop(self):
self._setup() self._setup()
FLAGS.distribution_strategy = 'off' FLAGS.distribution_strategy = 'off'
...@@ -174,13 +161,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase): ...@@ -174,13 +161,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.enable_xla = True FLAGS.enable_xla = True
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_xla_1_gpu_force_v1_path_early_stop(self):
self._setup()
FLAGS.early_stopping = True
FLAGS.enable_xla = True
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_1_gpu_ctl_early_stop(self): def benchmark_1_gpu_ctl_early_stop(self):
self._setup() self._setup()
FLAGS.keras_use_ctl = True FLAGS.keras_use_ctl = True
...@@ -233,14 +213,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase): ...@@ -233,14 +213,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.train_epochs = 7 FLAGS.train_epochs = 7
self._run_and_report_benchmark_mlperf_like() self._run_and_report_benchmark_mlperf_like()
def benchmark_1_gpu_no_dist_strat_force_v1_path_mlperf_like(self):
"""1 GPU using compile/fit without dist_strat."""
self._setup()
FLAGS.train_epochs = 7
FLAGS.distribution_strategy = 'off'
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_mlperf_like(self): def benchmark_1_gpu_no_dist_strat_mlperf_like(self):
"""1 GPU using compile/fit without dist_strat.""" """1 GPU using compile/fit without dist_strat."""
self._setup() self._setup()
...@@ -353,20 +325,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase): ...@@ -353,20 +325,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.epsilon = 1e-8 FLAGS.epsilon = 1e-8
self._run_and_report_benchmark_mlperf_like() self._run_and_report_benchmark_mlperf_like()
def benchmark_8_gpu_force_v1_path_mlperf_like(self):
"""8 GPU using keras fit/compile v1 codepath."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.train_epochs = 17
FLAGS.batch_size = 1048576
FLAGS.eval_batch_size = 160000
FLAGS.learning_rate = 0.0045
FLAGS.beta1 = 0.25
FLAGS.beta2 = 0.5
FLAGS.epsilon = 1e-8
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark_mlperf_like()
def benchmark_8_gpu_ctl_mlperf_like(self): def benchmark_8_gpu_ctl_mlperf_like(self):
"""8 GPU using CTL.""" """8 GPU using CTL."""
self._setup() self._setup()
......
...@@ -75,7 +75,6 @@ def define_transformer_flags(): ...@@ -75,7 +75,6 @@ def define_transformer_flags():
tf_gpu_thread_mode=True, tf_gpu_thread_mode=True,
datasets_num_private_threads=True, datasets_num_private_threads=True,
enable_xla=True, enable_xla=True,
force_v2_in_keras_compile=True,
fp16_implementation=True fp16_implementation=True
) )
......
...@@ -157,7 +157,6 @@ def define_ncf_flags(): ...@@ -157,7 +157,6 @@ def define_ncf_flags():
loss_scale=True, loss_scale=True,
dynamic_loss_scale=True, dynamic_loss_scale=True,
enable_xla=True, enable_xla=True,
force_v2_in_keras_compile=True
) )
flags_core.define_device(tpu=True) flags_core.define_device(tpu=True)
flags_core.define_benchmark() flags_core.define_benchmark()
......
...@@ -299,14 +299,6 @@ def run_ncf(_): ...@@ -299,14 +299,6 @@ def run_ncf(_):
num_train_steps, num_train_steps,
num_eval_steps, num_eval_steps,
generate_input_online=generate_input_online) generate_input_online=generate_input_online)
else:
# TODO(b/138957587): Remove when force_v2_in_keras_compile is on longer
# a valid arg for this model. Also remove as a valid flag.
if FLAGS.force_v2_in_keras_compile is not None:
keras_model.compile(
optimizer=optimizer,
run_eagerly=FLAGS.run_eagerly,
experimental_run_tf_function=FLAGS.force_v2_in_keras_compile)
else: else:
keras_model.compile(optimizer=optimizer, run_eagerly=FLAGS.run_eagerly) keras_model.compile(optimizer=optimizer, run_eagerly=FLAGS.run_eagerly)
......
...@@ -176,19 +176,6 @@ class ShakespeareAccuracy(ShakespeareBenchmarkBase): ...@@ -176,19 +176,6 @@ class ShakespeareAccuracy(ShakespeareBenchmarkBase):
FLAGS.distribution_strategy = 'off' FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_no_ds_force_v2(self):
"""Benchmark 1 gpu no ds with force_v2 in keras.compile."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.training_data = self.train_data
FLAGS.batch_size = 64
FLAGS.train_epochs = 43
FLAGS.model_dir = ''
FLAGS.force_v2_in_keras_compile = True
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_xla_1_gpu(self): def benchmark_xla_1_gpu(self):
"""Benchmark 1 gpu w/xla.""" """Benchmark 1 gpu w/xla."""
self._setup() self._setup()
...@@ -297,15 +284,6 @@ class ShakespeareKerasBenchmarkReal(ShakespeareBenchmarkBase): ...@@ -297,15 +284,6 @@ class ShakespeareKerasBenchmarkReal(ShakespeareBenchmarkBase):
FLAGS.distribution_strategy = 'off' FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_no_ds_force_v2(self):
"""Benchmark 1 gpu no ds, and force v2."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = 64
FLAGS.force_v2_in_keras_compile = True
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_1_gpu_no_ds_run_eagerly(self): def benchmark_1_gpu_no_ds_run_eagerly(self):
"""Benchmark 1 gpu.""" """Benchmark 1 gpu."""
self._setup() self._setup()
......
...@@ -59,8 +59,7 @@ def define_flags(): ...@@ -59,8 +59,7 @@ def define_flags():
max_train_steps=False, max_train_steps=False,
dtype=True, dtype=True,
loss_scale=True, loss_scale=True,
enable_xla=True, enable_xla=True)
force_v2_in_keras_compile=True)
flags_core.set_defaults(train_epochs=43, flags_core.set_defaults(train_epochs=43,
batch_size=64) batch_size=64)
...@@ -193,8 +192,7 @@ def train_model(flags_obj, dataset, vocab_size, strategy, checkpoint_dir=None): ...@@ -193,8 +192,7 @@ def train_model(flags_obj, dataset, vocab_size, strategy, checkpoint_dir=None):
loss=tf.keras.losses.CategoricalCrossentropy(), loss=tf.keras.losses.CategoricalCrossentropy(),
metrics=[tf.keras.metrics.Recall(top_k=1, name='RecallAt1'), metrics=[tf.keras.metrics.Recall(top_k=1, name='RecallAt1'),
tf.keras.metrics.Recall(top_k=5, name='RecallAt5')], tf.keras.metrics.Recall(top_k=5, name='RecallAt5')],
run_eagerly=flags_obj.run_eagerly, run_eagerly=flags_obj.run_eagerly)
experimental_run_tf_function=flags_obj.force_v2_in_keras_compile)
callbacks = [] callbacks = []
if checkpoint_dir: if checkpoint_dir:
......
...@@ -64,7 +64,6 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False, ...@@ -64,7 +64,6 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False,
dynamic_loss_scale=False, fp16_implementation=False, dynamic_loss_scale=False, fp16_implementation=False,
loss_scale=False, loss_scale=False,
tf_data_experimental_slack=False, enable_xla=False, tf_data_experimental_slack=False, enable_xla=False,
force_v2_in_keras_compile=False,
training_dataset_cache=False): training_dataset_cache=False):
"""Register flags for specifying performance tuning arguments. """Register flags for specifying performance tuning arguments.
...@@ -91,9 +90,6 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False, ...@@ -91,9 +90,6 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False,
tf_data_experimental_slack: Determines whether to enable tf.data's tf_data_experimental_slack: Determines whether to enable tf.data's
`experimental_slack` option. `experimental_slack` option.
enable_xla: Determines if XLA (auto clustering) is turned on. enable_xla: Determines if XLA (auto clustering) is turned on.
force_v2_in_keras_compile: Forces the use of run_distribued path even if not
using a `strategy`. This is not the same as
`tf.distribute.OneDeviceStrategy`
training_dataset_cache: Whether to cache the training dataset on workers. training_dataset_cache: Whether to cache the training dataset on workers.
Typically used to improve training performance when training data is in Typically used to improve training performance when training data is in
remote storage and can fit into worker memory. remote storage and can fit into worker memory.
...@@ -290,11 +286,4 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False, ...@@ -290,11 +286,4 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False,
name="enable_xla", default=False, name="enable_xla", default=False,
help="Whether to enable XLA auto jit compilation") help="Whether to enable XLA auto jit compilation")
if force_v2_in_keras_compile:
flags.DEFINE_boolean(
name="force_v2_in_keras_compile", default=None,
help="Forces the use of run_distribued path even if not"
"using a `strategy`. This is not the same as"
"`tf.distribute.OneDeviceStrategy`")
return key_flags return key_flags
...@@ -213,7 +213,6 @@ def define_keras_flags( ...@@ -213,7 +213,6 @@ def define_keras_flags(
fp16_implementation=True, fp16_implementation=True,
tf_data_experimental_slack=True, tf_data_experimental_slack=True,
enable_xla=True, enable_xla=True,
force_v2_in_keras_compile=True,
training_dataset_cache=True) training_dataset_cache=True)
flags_core.define_image() flags_core.define_image()
flags_core.define_benchmark() flags_core.define_benchmark()
......
...@@ -215,17 +215,7 @@ def run(flags_obj): ...@@ -215,17 +215,7 @@ def run(flags_obj):
elif flags_obj.pruning_method: elif flags_obj.pruning_method:
raise NotImplementedError( raise NotImplementedError(
'Only polynomial_decay is currently supported.') 'Only polynomial_decay is currently supported.')
# TODO(b/138957587): Remove when force_v2_in_keras_compile is on longer
# a valid arg for this model. Also remove as a valid flag.
if flags_obj.force_v2_in_keras_compile is not None:
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=optimizer,
metrics=(['sparse_categorical_accuracy']
if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly,
experimental_run_tf_function=flags_obj.force_v2_in_keras_compile)
else:
model.compile( model.compile(
loss='sparse_categorical_crossentropy', loss='sparse_categorical_crossentropy',
optimizer=optimizer, optimizer=optimizer,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment