Unverified Commit 51e60bab authored by Ayushman Kumar's avatar Ayushman Kumar Committed by GitHub
Browse files

Merge pull request #3 from tensorflow/master

Updated
parents 7653185e 7d86c317
......@@ -75,7 +75,6 @@ def define_transformer_flags():
tf_gpu_thread_mode=True,
datasets_num_private_threads=True,
enable_xla=True,
force_v2_in_keras_compile=True,
fp16_implementation=True
)
......
......@@ -157,7 +157,6 @@ def define_ncf_flags():
loss_scale=True,
dynamic_loss_scale=True,
enable_xla=True,
force_v2_in_keras_compile=True
)
flags_core.define_device(tpu=True)
flags_core.define_benchmark()
......
......@@ -300,15 +300,7 @@ def run_ncf(_):
num_eval_steps,
generate_input_online=generate_input_online)
else:
# TODO(b/138957587): Remove when force_v2_in_keras_compile is on longer
# a valid arg for this model. Also remove as a valid flag.
if FLAGS.force_v2_in_keras_compile is not None:
keras_model.compile(
optimizer=optimizer,
run_eagerly=FLAGS.run_eagerly,
experimental_run_tf_function=FLAGS.force_v2_in_keras_compile)
else:
keras_model.compile(optimizer=optimizer, run_eagerly=FLAGS.run_eagerly)
keras_model.compile(optimizer=optimizer, run_eagerly=FLAGS.run_eagerly)
if not FLAGS.ml_perf:
# Create Tensorboard summary and checkpoint callbacks.
......
......@@ -176,19 +176,6 @@ class ShakespeareAccuracy(ShakespeareBenchmarkBase):
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_1_gpu_no_ds_force_v2(self):
"""Benchmark 1 gpu no ds with force_v2 in keras.compile."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.training_data = self.train_data
FLAGS.batch_size = 64
FLAGS.train_epochs = 43
FLAGS.model_dir = ''
FLAGS.force_v2_in_keras_compile = True
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_xla_1_gpu(self):
"""Benchmark 1 gpu w/xla."""
self._setup()
......@@ -297,15 +284,6 @@ class ShakespeareKerasBenchmarkReal(ShakespeareBenchmarkBase):
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_1_gpu_no_ds_force_v2(self):
"""Benchmark 1 gpu no ds, and force v2."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = 64
FLAGS.force_v2_in_keras_compile = True
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_1_gpu_no_ds_run_eagerly(self):
"""Benchmark 1 gpu."""
self._setup()
......
......@@ -59,8 +59,7 @@ def define_flags():
max_train_steps=False,
dtype=True,
loss_scale=True,
enable_xla=True,
force_v2_in_keras_compile=True)
enable_xla=True)
flags_core.set_defaults(train_epochs=43,
batch_size=64)
......@@ -193,8 +192,7 @@ def train_model(flags_obj, dataset, vocab_size, strategy, checkpoint_dir=None):
loss=tf.keras.losses.CategoricalCrossentropy(),
metrics=[tf.keras.metrics.Recall(top_k=1, name='RecallAt1'),
tf.keras.metrics.Recall(top_k=5, name='RecallAt5')],
run_eagerly=flags_obj.run_eagerly,
experimental_run_tf_function=flags_obj.force_v2_in_keras_compile)
run_eagerly=flags_obj.run_eagerly)
callbacks = []
if checkpoint_dir:
......
......@@ -104,7 +104,8 @@ def minimize_using_explicit_allreduce(tape,
and model variables pairs as input, manipulate them, and returns a new
gradients and model variables pairs. The callback functions will be
invoked in the list order and before gradients are allreduced.
Default is no callbacks.
With mixed precision training, the pre_allreduce_allbacks will be
applied on scaled_gradients. Default is no callbacks.
post_allreduce_callbacks: A list of callback functions that takes
gradients and model variables pairs as input, manipulate them, and
returns a new gradients and model variables paris. The callback
......
......@@ -64,7 +64,6 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False,
dynamic_loss_scale=False, fp16_implementation=False,
loss_scale=False,
tf_data_experimental_slack=False, enable_xla=False,
force_v2_in_keras_compile=False,
training_dataset_cache=False):
"""Register flags for specifying performance tuning arguments.
......@@ -91,9 +90,6 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False,
tf_data_experimental_slack: Determines whether to enable tf.data's
`experimental_slack` option.
enable_xla: Determines if XLA (auto clustering) is turned on.
force_v2_in_keras_compile: Forces the use of run_distribued path even if not
using a `strategy`. This is not the same as
`tf.distribute.OneDeviceStrategy`
training_dataset_cache: Whether to cache the training dataset on workers.
Typically used to improve training performance when training data is in
remote storage and can fit into worker memory.
......@@ -290,11 +286,4 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False,
name="enable_xla", default=False,
help="Whether to enable XLA auto jit compilation")
if force_v2_in_keras_compile:
flags.DEFINE_boolean(
name="force_v2_in_keras_compile", default=None,
help="Forces the use of run_distribued path even if not"
"using a `strategy`. This is not the same as"
"`tf.distribute.OneDeviceStrategy`")
return key_flags
......@@ -115,7 +115,6 @@ def get_filenames(is_training, data_dir):
def input_fn(is_training,
data_dir,
batch_size,
num_epochs=1,
dtype=tf.float32,
datasets_num_private_threads=None,
parse_record_fn=parse_record,
......@@ -127,7 +126,6 @@ def input_fn(is_training,
is_training: A boolean denoting whether the input is for training.
data_dir: The directory containing the input data.
batch_size: The number of samples per batch.
num_epochs: The number of epochs to repeat the dataset.
dtype: Data type to use for images/features
datasets_num_private_threads: Number of private threads for tf.data.
parse_record_fn: Function to use for parsing the records.
......@@ -155,7 +153,6 @@ def input_fn(is_training,
batch_size=batch_size,
shuffle_buffer=NUM_IMAGES['train'],
parse_record_fn=parse_record_fn,
num_epochs=num_epochs,
dtype=dtype,
datasets_num_private_threads=datasets_num_private_threads,
drop_remainder=drop_remainder
......
......@@ -213,7 +213,6 @@ def define_keras_flags(
fp16_implementation=True,
tf_data_experimental_slack=True,
enable_xla=True,
force_v2_in_keras_compile=True,
training_dataset_cache=True)
flags_core.define_image()
flags_core.define_benchmark()
......
......@@ -67,7 +67,6 @@ def process_record_dataset(dataset,
batch_size,
shuffle_buffer,
parse_record_fn,
num_epochs=1,
dtype=tf.float32,
datasets_num_private_threads=None,
drop_remainder=False,
......@@ -83,7 +82,6 @@ def process_record_dataset(dataset,
time and use less memory.
parse_record_fn: A function that takes a raw record and returns the
corresponding (image, label) pair.
num_epochs: The number of epochs to repeat the dataset.
dtype: Data type to use for images/features.
datasets_num_private_threads: Number of threads for a private
threadpool created for all datasets computation.
......@@ -276,7 +274,6 @@ def get_parse_record_fn(use_keras_image_data_format=False):
def input_fn(is_training,
data_dir,
batch_size,
num_epochs=1,
dtype=tf.float32,
datasets_num_private_threads=None,
parse_record_fn=parse_record,
......@@ -291,7 +288,6 @@ def input_fn(is_training,
is_training: A boolean denoting whether the input is for training.
data_dir: The directory containing the input data.
batch_size: The number of samples per batch.
num_epochs: The number of epochs to repeat the dataset.
dtype: Data type to use for images/features
datasets_num_private_threads: Number of private threads for tf.data.
parse_record_fn: Function to use for parsing the records.
......@@ -344,7 +340,6 @@ def input_fn(is_training,
batch_size=batch_size,
shuffle_buffer=_SHUFFLE_BUFFER,
parse_record_fn=parse_record_fn,
num_epochs=num_epochs,
dtype=dtype,
datasets_num_private_threads=datasets_num_private_threads,
drop_remainder=drop_remainder,
......
......@@ -215,23 +215,13 @@ def run(flags_obj):
elif flags_obj.pruning_method:
raise NotImplementedError(
'Only polynomial_decay is currently supported.')
# TODO(b/138957587): Remove when force_v2_in_keras_compile is on longer
# a valid arg for this model. Also remove as a valid flag.
if flags_obj.force_v2_in_keras_compile is not None:
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=optimizer,
metrics=(['sparse_categorical_accuracy']
if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly,
experimental_run_tf_function=flags_obj.force_v2_in_keras_compile)
else:
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=optimizer,
metrics=(['sparse_categorical_accuracy']
if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly)
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=optimizer,
metrics=(['sparse_categorical_accuracy']
if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly)
train_epochs = flags_obj.train_epochs
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment