"doc/git@developer.sourcefind.cn:wqshmzh/ktransformers.git" did not exist on "4f4ed36442894fa8a0b2f228c8ec89453c740c8d"
Unverified Commit a35e09d2 authored by Vinh Nguyen's avatar Vinh Nguyen Committed by GitHub
Browse files

Merge branch 'master' into amp_resnet50

parents d5722dcd 1f5a5e9d
...@@ -21,6 +21,7 @@ import os ...@@ -21,6 +21,7 @@ import os
import time import time
from absl import flags from absl import flags
import tensorflow as tf
from official.transformer.v2 import misc from official.transformer.v2 import misc
from official.transformer.v2 import transformer_main as transformer_main from official.transformer.v2 import transformer_main as transformer_main
...@@ -30,6 +31,7 @@ from official.utils.testing.perfzero_benchmark import PerfZeroBenchmark ...@@ -30,6 +31,7 @@ from official.utils.testing.perfzero_benchmark import PerfZeroBenchmark
TRANSFORMER_EN2DE_DATA_DIR_NAME = 'wmt32k-en2de-official' TRANSFORMER_EN2DE_DATA_DIR_NAME = 'wmt32k-en2de-official'
EN2DE_2014_BLEU_DATA_DIR_NAME = 'newstest2014' EN2DE_2014_BLEU_DATA_DIR_NAME = 'newstest2014'
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
TMP_DIR = os.getenv('TMPDIR')
class TransformerBenchmark(PerfZeroBenchmark): class TransformerBenchmark(PerfZeroBenchmark):
...@@ -56,6 +58,11 @@ class TransformerBenchmark(PerfZeroBenchmark): ...@@ -56,6 +58,11 @@ class TransformerBenchmark(PerfZeroBenchmark):
EN2DE_2014_BLEU_DATA_DIR_NAME, EN2DE_2014_BLEU_DATA_DIR_NAME,
'newstest2014.de') 'newstest2014.de')
if default_flags is None:
default_flags = {}
default_flags['data_dir'] = self.train_data_dir
default_flags['vocab_file'] = self.vocab_file
super(TransformerBenchmark, self).__init__( super(TransformerBenchmark, self).__init__(
output_dir=output_dir, output_dir=output_dir,
default_flags=default_flags, default_flags=default_flags,
...@@ -280,8 +287,8 @@ class TransformerBigKerasAccuracy(TransformerBenchmark): ...@@ -280,8 +287,8 @@ class TransformerBigKerasAccuracy(TransformerBenchmark):
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu') FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu')
self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps, log_steps=FLAGS.log_steps,
bleu_min=28, bleu_min=27.9,
bleu_max=29) bleu_max=29.2)
def benchmark_8_gpu_static_batch(self): def benchmark_8_gpu_static_batch(self):
"""Benchmark 8 gpu. """Benchmark 8 gpu.
...@@ -305,12 +312,19 @@ class TransformerBigKerasAccuracy(TransformerBenchmark): ...@@ -305,12 +312,19 @@ class TransformerBigKerasAccuracy(TransformerBenchmark):
self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps, log_steps=FLAGS.log_steps,
bleu_min=28, bleu_min=28,
bleu_max=29) bleu_max=29.2)
def benchmark_8_gpu_fp16(self): def benchmark_8_gpu_fp16(self):
"""Benchmark 8 gpu with dynamic batch and fp16. """Benchmark 8 gpu with dynamic batch and fp16.
Should converge to 28.4 BLEU (uncased). This has not be verified yet." Over 6 runs with eval every 20K steps the average highest value was 28.247
(bleu uncased). 28.424 was the highest and 28.09 the lowest. The values are
the highest value seen during a run and occurred at a median of iteration
11. While this could be interpreted as worse than FP32, if looking at the
first iteration at which 28 is passed FP16 performs equal and possibly
better. Although not part of the initial test runs, the highest value
recorded with the arguments below was 28.9 at iteration 12. Iterations are
not epochs, an iteration is a number of steps between evals.
""" """
self._setup() self._setup()
FLAGS.num_gpus = 8 FLAGS.num_gpus = 8
...@@ -328,7 +342,7 @@ class TransformerBigKerasAccuracy(TransformerBenchmark): ...@@ -328,7 +342,7 @@ class TransformerBigKerasAccuracy(TransformerBenchmark):
self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps, log_steps=FLAGS.log_steps,
bleu_min=28, bleu_min=28,
bleu_max=29) bleu_max=29.2)
def benchmark_8_gpu_fp16_amp(self): def benchmark_8_gpu_fp16_amp(self):
"""Benchmark 8 gpu with dynamic batch and fp16 with automatic mixed precision. """Benchmark 8 gpu with dynamic batch and fp16 with automatic mixed precision.
...@@ -377,7 +391,7 @@ class TransformerBigKerasAccuracy(TransformerBenchmark): ...@@ -377,7 +391,7 @@ class TransformerBigKerasAccuracy(TransformerBenchmark):
self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps, log_steps=FLAGS.log_steps,
bleu_min=28, bleu_min=28,
bleu_max=29) bleu_max=29.2)
def benchmark_xla_8_gpu_static_batch_fp16(self): def benchmark_xla_8_gpu_static_batch_fp16(self):
"""Benchmark 8 gpu with static batch, XLA, and FP16. """Benchmark 8 gpu with static batch, XLA, and FP16.
...@@ -404,7 +418,7 @@ class TransformerBigKerasAccuracy(TransformerBenchmark): ...@@ -404,7 +418,7 @@ class TransformerBigKerasAccuracy(TransformerBenchmark):
self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps, log_steps=FLAGS.log_steps,
bleu_min=28, bleu_min=28,
bleu_max=29) bleu_max=29.2)
class TransformerKerasBenchmark(TransformerBenchmark): class TransformerKerasBenchmark(TransformerBenchmark):
...@@ -635,17 +649,9 @@ class TransformerKerasBenchmark(TransformerBenchmark): ...@@ -635,17 +649,9 @@ class TransformerKerasBenchmark(TransformerBenchmark):
class TransformerBaseKerasBenchmarkReal(TransformerKerasBenchmark): class TransformerBaseKerasBenchmarkReal(TransformerKerasBenchmark):
"""Transformer based version real data benchmark tests.""" """Transformer based version real data benchmark tests."""
def __init__(self, output_dir=None, root_data_dir=None, **kwargs): def __init__(self, output_dir=TMP_DIR, root_data_dir=None, **kwargs):
train_data_dir = os.path.join(root_data_dir,
TRANSFORMER_EN2DE_DATA_DIR_NAME)
vocab_file = os.path.join(root_data_dir,
TRANSFORMER_EN2DE_DATA_DIR_NAME,
'vocab.ende.32768')
def_flags = {} def_flags = {}
def_flags['param_set'] = 'base' def_flags['param_set'] = 'base'
def_flags['vocab_file'] = vocab_file
def_flags['data_dir'] = train_data_dir
def_flags['train_steps'] = 200 def_flags['train_steps'] = 200
def_flags['log_steps'] = 10 def_flags['log_steps'] = 10
...@@ -657,20 +663,16 @@ class TransformerBaseKerasBenchmarkReal(TransformerKerasBenchmark): ...@@ -657,20 +663,16 @@ class TransformerBaseKerasBenchmarkReal(TransformerKerasBenchmark):
class TransformerBigKerasBenchmarkReal(TransformerKerasBenchmark): class TransformerBigKerasBenchmarkReal(TransformerKerasBenchmark):
"""Transformer based version real data benchmark tests.""" """Transformer based version real data benchmark tests."""
def __init__(self, output_dir=None, root_data_dir=None, **kwargs): def __init__(self, output_dir=TMP_DIR, root_data_dir=None, **kwargs):
train_data_dir = os.path.join(root_data_dir,
TRANSFORMER_EN2DE_DATA_DIR_NAME)
vocab_file = os.path.join(root_data_dir,
TRANSFORMER_EN2DE_DATA_DIR_NAME,
'vocab.ende.32768')
def_flags = {} def_flags = {}
def_flags['param_set'] = 'big' def_flags['param_set'] = 'big'
def_flags['vocab_file'] = vocab_file
def_flags['data_dir'] = train_data_dir
def_flags['train_steps'] = 200 def_flags['train_steps'] = 200
def_flags['log_steps'] = 10 def_flags['log_steps'] = 10
super(TransformerBigKerasBenchmarkReal, self).__init__( super(TransformerBigKerasBenchmarkReal, self).__init__(
output_dir=output_dir, default_flags=def_flags, output_dir=output_dir, default_flags=def_flags,
root_data_dir=root_data_dir, batch_per_gpu=3072) root_data_dir=root_data_dir, batch_per_gpu=3072)
if __name__ == '__main__':
tf.test.main()
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for layers in Transformer.""" """Tests for layers in Transformer."""
from __future__ import absolute_import from __future__ import absolute_import
...@@ -79,4 +93,5 @@ class TransformerLayersTest(tf.test.TestCase): ...@@ -79,4 +93,5 @@ class TransformerLayersTest(tf.test.TestCase):
if __name__ == "__main__": if __name__ == "__main__":
tf.compat.v1.enable_v2_behavior()
tf.test.main() tf.test.main()
...@@ -52,18 +52,40 @@ BLEU_DIR = "bleu" ...@@ -52,18 +52,40 @@ BLEU_DIR = "bleu"
_SINGLE_SAMPLE = 1 _SINGLE_SAMPLE = 1
def translate_and_compute_bleu(model, subtokenizer, bleu_source, bleu_ref): def translate_and_compute_bleu(model,
"""Translate file and report the cased and uncased bleu scores.""" params,
subtokenizer,
bleu_source,
bleu_ref,
distribution_strategy=None):
"""Translate file and report the cased and uncased bleu scores.
Args:
model: A Keras model, used to generate the translations.
params: A dictionary, containing the translation related parameters.
subtokenizer: A subtokenizer object, used for encoding and decoding source
and translated lines.
bleu_source: A file containing source sentences for translation.
bleu_ref: A file containing the reference for the translated sentences.
distribution_strategy: A platform distribution strategy, used for TPU based
translation.
Returns:
uncased_score: A float, the case insensitive BLEU score.
cased_score: A float, the case sensitive BLEU score.
"""
# Create temporary file to store translation. # Create temporary file to store translation.
tmp = tempfile.NamedTemporaryFile(delete=False) tmp = tempfile.NamedTemporaryFile(delete=False)
tmp_filename = tmp.name tmp_filename = tmp.name
translate.translate_file( translate.translate_file(
model, model,
params,
subtokenizer, subtokenizer,
bleu_source, bleu_source,
output_file=tmp_filename, output_file=tmp_filename,
print_all_translations=False) print_all_translations=False,
distribution_strategy=distribution_strategy)
# Compute uncased and cased bleu scores. # Compute uncased and cased bleu scores.
uncased_score = compute_bleu.bleu_wrapper(bleu_ref, tmp_filename, False) uncased_score = compute_bleu.bleu_wrapper(bleu_ref, tmp_filename, False)
...@@ -72,12 +94,31 @@ def translate_and_compute_bleu(model, subtokenizer, bleu_source, bleu_ref): ...@@ -72,12 +94,31 @@ def translate_and_compute_bleu(model, subtokenizer, bleu_source, bleu_ref):
return uncased_score, cased_score return uncased_score, cased_score
def evaluate_and_log_bleu(model, bleu_source, bleu_ref, vocab_file): def evaluate_and_log_bleu(model,
"""Calculate and record the BLEU score.""" params,
bleu_source,
bleu_ref,
vocab_file,
distribution_strategy=None):
"""Calculate and record the BLEU score.
Args:
model: A Keras model, used to generate the translations.
params: A dictionary, containing the translation related parameters.
bleu_source: A file containing source sentences for translation.
bleu_ref: A file containing the reference for the translated sentences.
vocab_file: A file containing the vocabulary for translation.
distribution_strategy: A platform distribution strategy, used for TPU based
translation.
Returns:
uncased_score: A float, the case insensitive BLEU score.
cased_score: A float, the case sensitive BLEU score.
"""
subtokenizer = tokenizer.Subtokenizer(vocab_file) subtokenizer = tokenizer.Subtokenizer(vocab_file)
uncased_score, cased_score = translate_and_compute_bleu( uncased_score, cased_score = translate_and_compute_bleu(
model, subtokenizer, bleu_source, bleu_ref) model, params, subtokenizer, bleu_source, bleu_ref, distribution_strategy)
logging.info("Bleu score (uncased): %s", uncased_score) logging.info("Bleu score (uncased): %s", uncased_score)
logging.info("Bleu score (cased): %s", cased_score) logging.info("Bleu score (cased): %s", cased_score)
...@@ -110,6 +151,9 @@ class TransformerTask(object): ...@@ -110,6 +151,9 @@ class TransformerTask(object):
params["model_dir"] = flags_obj.model_dir params["model_dir"] = flags_obj.model_dir
params["static_batch"] = flags_obj.static_batch params["static_batch"] = flags_obj.static_batch
params["max_length"] = flags_obj.max_length params["max_length"] = flags_obj.max_length
params["decode_batch_size"] = flags_obj.decode_batch_size
params["decode_max_length"] = flags_obj.decode_max_length
params["padded_decode"] = flags_obj.padded_decode
params["num_parallel_calls"] = ( params["num_parallel_calls"] = (
flags_obj.num_parallel_calls or tf.data.experimental.AUTOTUNE) flags_obj.num_parallel_calls or tf.data.experimental.AUTOTUNE)
...@@ -124,8 +168,10 @@ class TransformerTask(object): ...@@ -124,8 +168,10 @@ class TransformerTask(object):
# like this. What if multiple instances of TransformerTask are created? # like this. What if multiple instances of TransformerTask are created?
# We should have a better way in the tf.keras.mixed_precision API of doing # We should have a better way in the tf.keras.mixed_precision API of doing
# this. # this.
loss_scale = flags_core.get_loss_scale(flags_obj,
default_for_fp16="dynamic")
policy = tf.keras.mixed_precision.experimental.Policy( policy = tf.keras.mixed_precision.experimental.Policy(
"infer_float32_vars") "mixed_float16", loss_scale=loss_scale)
tf.keras.mixed_precision.experimental.set_policy(policy) tf.keras.mixed_precision.experimental.set_policy(policy)
self.distribution_strategy = distribution_utils.get_distribution_strategy( self.distribution_strategy = distribution_utils.get_distribution_strategy(
...@@ -133,6 +179,7 @@ class TransformerTask(object): ...@@ -133,6 +179,7 @@ class TransformerTask(object):
num_gpus=num_gpus, num_gpus=num_gpus,
tpu_address=flags_obj.tpu or "") tpu_address=flags_obj.tpu or "")
if self.use_tpu: if self.use_tpu:
params["num_replicas"] = self.distribution_strategy.num_replicas_in_sync
if not params["static_batch"]: if not params["static_batch"]:
raise ValueError("TPU requires static batch for input data.") raise ValueError("TPU requires static batch for input data.")
else: else:
...@@ -306,10 +353,10 @@ class TransformerTask(object): ...@@ -306,10 +353,10 @@ class TransformerTask(object):
self.predict_model, self.predict_model,
tf.train.latest_checkpoint(self.flags_obj.model_dir)) tf.train.latest_checkpoint(self.flags_obj.model_dir))
self.predict_model.summary() self.predict_model.summary()
return evaluate_and_log_bleu(self.predict_model, return evaluate_and_log_bleu(
self.flags_obj.bleu_source, self.predict_model, self.params, self.flags_obj.bleu_source,
self.flags_obj.bleu_ref, self.flags_obj.bleu_ref, self.flags_obj.vocab_file,
self.flags_obj.vocab_file) self.distribution_strategy if self.use_tpu else None)
def predict(self): def predict(self):
"""Predicts result from the model.""" """Predicts result from the model."""
...@@ -372,6 +419,7 @@ class TransformerTask(object): ...@@ -372,6 +419,7 @@ class TransformerTask(object):
params["optimizer_adam_beta1"], params["optimizer_adam_beta1"],
params["optimizer_adam_beta2"], params["optimizer_adam_beta2"],
epsilon=params["optimizer_adam_epsilon"]) epsilon=params["optimizer_adam_epsilon"])
if params["dtype"] == tf.float16: if params["dtype"] == tf.float16:
opt = tf.keras.mixed_precision.experimental.LossScaleOptimizer( opt = tf.keras.mixed_precision.experimental.LossScaleOptimizer(
opt, loss_scale=flags_core.get_loss_scale(self.flags_obj, opt, loss_scale=flags_core.get_loss_scale(self.flags_obj,
......
...@@ -20,6 +20,7 @@ from __future__ import print_function ...@@ -20,6 +20,7 @@ from __future__ import print_function
import os import os
import re import re
import sys
import unittest import unittest
from absl import flags from absl import flags
...@@ -178,10 +179,13 @@ class TransformerTaskTest(tf.test.TestCase): ...@@ -178,10 +179,13 @@ class TransformerTaskTest(tf.test.TestCase):
def test_eval(self): def test_eval(self):
if context.num_gpus() >= 2: if context.num_gpus() >= 2:
self.skipTest('No need to test 2+ GPUs without a distribution strategy.') self.skipTest('No need to test 2+ GPUs without a distribution strategy.')
if 'test_xla' in sys.argv[0]:
self.skipTest('TODO(xla): Make this test faster under XLA.')
self._prepare_files_and_flags() self._prepare_files_and_flags()
t = tm.TransformerTask(FLAGS) t = tm.TransformerTask(FLAGS)
t.eval() t.eval()
if __name__ == '__main__': if __name__ == '__main__':
tf.compat.v1.enable_v2_behavior()
tf.test.main() tf.test.main()
...@@ -65,4 +65,5 @@ class TransformerV2Test(tf.test.TestCase): ...@@ -65,4 +65,5 @@ class TransformerV2Test(tf.test.TestCase):
if __name__ == "__main__": if __name__ == "__main__":
tf.compat.v1.enable_v2_behavior()
tf.test.main() tf.test.main()
...@@ -18,11 +18,12 @@ from __future__ import absolute_import ...@@ -18,11 +18,12 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy as np
import tensorflow as tf import tensorflow as tf
from tensorflow.python.distribute import values
from official.transformer.utils import tokenizer from official.transformer.utils import tokenizer
_DECODE_BATCH_SIZE = 32
_EXTRA_DECODE_LENGTH = 100 _EXTRA_DECODE_LENGTH = 100
_BEAM_SIZE = 4 _BEAM_SIZE = 4
_ALPHA = 0.6 _ALPHA = 0.6
...@@ -68,23 +69,31 @@ def _trim_and_decode(ids, subtokenizer): ...@@ -68,23 +69,31 @@ def _trim_and_decode(ids, subtokenizer):
return subtokenizer.decode(ids) return subtokenizer.decode(ids)
def translate_file( def translate_file(model,
model, subtokenizer, input_file, output_file=None, params,
print_all_translations=True): subtokenizer,
input_file,
output_file=None,
print_all_translations=True,
distribution_strategy=None):
"""Translate lines in file, and save to output file if specified. """Translate lines in file, and save to output file if specified.
Args: Args:
model: Keras model used to generate the translations. model: A Keras model, used to generate the translations.
subtokenizer: Subtokenizer object for encoding and decoding source and params: A dictionary, containing the translation related parameters.
translated lines. subtokenizer: A subtokenizer object, used for encoding and decoding source
input_file: file containing lines to translate and translated lines.
output_file: file that stores the generated translations. input_file: A file containing lines to translate.
print_all_translations: If true, all translations are printed to stdout. output_file: A file that stores the generated translations.
print_all_translations: A bool. If true, all translations are printed to
stdout.
distribution_strategy: A distribution strategy, used to perform inference
directly with tf.function instead of Keras model.predict().
Raises: Raises:
ValueError: if output file is invalid. ValueError: if output file is invalid.
""" """
batch_size = _DECODE_BATCH_SIZE batch_size = params["decode_batch_size"]
# Read and sort inputs by length. Keep dictionary (original index-->new index # Read and sort inputs by length. Keep dictionary (original index-->new index
# in sorted list) to write translations in the original order. # in sorted list) to write translations in the original order.
...@@ -101,24 +110,59 @@ def translate_file( ...@@ -101,24 +110,59 @@ def translate_file(
if j + i * batch_size < total_samples if j + i * batch_size < total_samples
] ]
lines = [_encode_and_add_eos(l, subtokenizer) for l in lines] lines = [_encode_and_add_eos(l, subtokenizer) for l in lines]
if distribution_strategy:
for j in range(batch_size - len(lines)):
lines.append([tokenizer.EOS_ID])
batch = tf.keras.preprocessing.sequence.pad_sequences( batch = tf.keras.preprocessing.sequence.pad_sequences(
lines, dtype="int64", padding="post") lines,
maxlen=params["decode_max_length"],
dtype="int32",
padding="post")
tf.compat.v1.logging.info("Decoding batch %d out of %d.", i, tf.compat.v1.logging.info("Decoding batch %d out of %d.", i,
num_decode_batches) num_decode_batches)
yield batch yield batch
@tf.function
def predict_step(inputs):
"""Decoding step function for TPU runs."""
def _step_fn(inputs):
"""Per replica step function."""
val_outputs, _ = model([inputs], training=False)
return val_outputs
return distribution_strategy.experimental_run_v2(_step_fn, args=(inputs,))
translations = [] translations = []
if distribution_strategy:
num_replicas = distribution_strategy.num_replicas_in_sync
local_batch_size = params["decode_batch_size"] // num_replicas
for i, text in enumerate(input_generator()): for i, text in enumerate(input_generator()):
val_outputs, _ = model.predict(text) if distribution_strategy:
text = np.reshape(text, [num_replicas, local_batch_size, -1])
text = [
tf.convert_to_tensor(per_replica_text) for per_replica_text in text
]
# pylint: disable=protected-access
text = values.PerReplica(distribution_strategy.extended._device_map, text)
# pylint: enable=protected-access
val_outputs = distribution_strategy.experimental_local_results(
predict_step(text))
val_outputs = np.reshape(
[val_output.numpy() for val_output in val_outputs],
[params["decode_batch_size"], -1])
else:
val_outputs, _ = model.predict(text)
length = len(val_outputs) length = len(val_outputs)
for j in range(length): for j in range(length):
translation = _trim_and_decode(val_outputs[j], subtokenizer) if j + i * batch_size < total_samples:
translations.append(translation) translation = _trim_and_decode(val_outputs[j], subtokenizer)
if print_all_translations: translations.append(translation)
tf.compat.v1.logging.info( if print_all_translations:
"Translating:\n\tInput: %s\n\tOutput: %s" % tf.compat.v1.logging.info(
(sorted_inputs[j + i * batch_size], translation)) "Translating:\n\tInput: %s\n\tOutput: %s" %
(sorted_inputs[j + i * batch_size], translation))
# Write translations in the order they appeared in the original file. # Write translations in the order they appeared in the original file.
if output_file is not None: if output_file is not None:
......
...@@ -53,9 +53,9 @@ def get_loss_scale(flags_obj, default_for_fp16): ...@@ -53,9 +53,9 @@ def get_loss_scale(flags_obj, default_for_fp16):
return default_for_fp16 return default_for_fp16
def define_performance(num_parallel_calls=True, inter_op=True, intra_op=True, def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False,
synthetic_data=True, max_train_steps=False, dtype=True, synthetic_data=False, max_train_steps=False, dtype=False,
all_reduce_alg=True, num_packs=True, all_reduce_alg=False, num_packs=False,
tf_gpu_thread_mode=False, tf_gpu_thread_mode=False,
datasets_num_private_threads=False, datasets_num_private_threads=False,
datasets_num_parallel_batches=False, datasets_num_parallel_batches=False,
......
...@@ -23,7 +23,10 @@ from official.utils.flags import core as flags_core # pylint: disable=g-bad-imp ...@@ -23,7 +23,10 @@ from official.utils.flags import core as flags_core # pylint: disable=g-bad-imp
def define_flags(): def define_flags():
flags_core.define_base(num_gpu=False) flags_core.define_base(num_gpu=False)
flags_core.define_performance(dynamic_loss_scale=True, loss_scale=True) flags_core.define_performance(
num_parallel_calls=True, inter_op=True, intra_op=True,
dynamic_loss_scale=True, loss_scale=True, synthetic_data=True,
dtype=True)
flags_core.define_image() flags_core.define_image()
flags_core.define_benchmark() flags_core.define_benchmark()
......
[MESSAGES CONTROL] [MESSAGES CONTROL]
disable=R,W,bad-option-value,trailing-newlines disable=R,W,bad-option-value,trailing-newlines,no-name-in-module
[REPORTS] [REPORTS]
# Tells whether to display a full report or only the messages # Tells whether to display a full report or only the messages
......
...@@ -18,20 +18,20 @@ official.resnet`. ...@@ -18,20 +18,20 @@ official.resnet`.
Download and extract the CIFAR-10 data. You can use the following script: Download and extract the CIFAR-10 data. You can use the following script:
```bash ```bash
python cifar10_download_and_extract.py python ../../r1/resnet/cifar10_download_and_extract.py
``` ```
After you download the data, you can run the program by: After you download the data, you can run the program by:
```bash ```bash
python keras_cifar_main.py python resnet_cifar_main.py
``` ```
If you did not use the default directory to download the data, specify the If you did not use the default directory to download the data, specify the
location with the `--data_dir` flag, like: location with the `--data_dir` flag, like:
```bash ```bash
python keras_cifar_main.py --data_dir=/path/to/cifar python resnet_cifar_main.py --data_dir=/path/to/cifar
``` ```
## ImageNet ## ImageNet
...@@ -44,14 +44,14 @@ provide a few options. ...@@ -44,14 +44,14 @@ provide a few options.
Once your dataset is ready, you can begin training the model as follows: Once your dataset is ready, you can begin training the model as follows:
```bash ```bash
python keras_imagenet_main.py python resnet_imagenet_main.py
``` ```
Again, if you did not download the data to the default directory, specify the Again, if you did not download the data to the default directory, specify the
location with the `--data_dir` flag: location with the `--data_dir` flag:
```bash ```bash
python keras_imagenet_main.py --data_dir=/path/to/imagenet python resnet_imagenet_main.py --data_dir=/path/to/imagenet
``` ```
There are more flag options you can specify. Here are some examples: There are more flag options you can specify. Here are some examples:
...@@ -70,16 +70,16 @@ For example, this is a typical command line to run with ImageNet data with ...@@ -70,16 +70,16 @@ For example, this is a typical command line to run with ImageNet data with
batch size 128 per GPU: batch size 128 per GPU:
```bash ```bash
python -m keras_imagenet_main \ python -m resnet_imagenet_main \
--model_dir=/tmp/model_dir/something \ --model_dir=/tmp/model_dir/something \
--num_gpus=2 \ --num_gpus=2 \
--batch_size=128 \ --batch_size=128 \
--train_epochs=90 \ --train_epochs=90 \
--train_steps=10 \ --train_steps=10 \
--use_synthetic_data=false --use_synthetic_data=false
``` ```
See [`keras_common.py`](keras_common.py) for full list of options. See [`common.py`](common.py) for full list of options.
## Using multiple GPUs ## Using multiple GPUs
You can train these models on multiple GPUs using `tf.distribute.Strategy` API. You can train these models on multiple GPUs using `tf.distribute.Strategy` API.
......
...@@ -249,6 +249,10 @@ def define_keras_flags(dynamic_loss_scale=True): ...@@ -249,6 +249,10 @@ def define_keras_flags(dynamic_loss_scale=True):
"""Define flags for Keras models.""" """Define flags for Keras models."""
flags_core.define_base(run_eagerly=True) flags_core.define_base(run_eagerly=True)
flags_core.define_performance(num_parallel_calls=False, flags_core.define_performance(num_parallel_calls=False,
synthetic_data=True,
dtype=True,
all_reduce_alg=True,
num_packs=True,
tf_gpu_thread_mode=True, tf_gpu_thread_mode=True,
datasets_num_private_threads=True, datasets_num_private_threads=True,
dynamic_loss_scale=dynamic_loss_scale, dynamic_loss_scale=dynamic_loss_scale,
......
...@@ -31,7 +31,7 @@ from official.utils.misc import model_helpers ...@@ -31,7 +31,7 @@ from official.utils.misc import model_helpers
from official.vision.image_classification import common from official.vision.image_classification import common
from official.vision.image_classification import imagenet_preprocessing from official.vision.image_classification import imagenet_preprocessing
from official.vision.image_classification import resnet_model from official.vision.image_classification import resnet_model
from official.vision.image_classification import trivial_model from official.benchmark.models import trivial_model
LR_SCHEDULE = [ # (multiplier, epoch to start) tuples LR_SCHEDULE = [ # (multiplier, epoch to start) tuples
(1.0, 5), (0.1, 30), (0.01, 60), (0.001, 80) (1.0, 5), (0.1, 30), (0.01, 60), (0.001, 80)
...@@ -184,6 +184,7 @@ def run(flags_obj): ...@@ -184,6 +184,7 @@ def run(flags_obj):
optimizer = tf.keras.mixed_precision.experimental.LossScaleOptimizer( optimizer = tf.keras.mixed_precision.experimental.LossScaleOptimizer(
optimizer, loss_scale=flags_core.get_loss_scale(flags_obj, optimizer, loss_scale=flags_core.get_loss_scale(flags_obj,
default_for_fp16=128)) default_for_fp16=128))
if flags_obj.fp16_implementation == "graph_rewrite": if flags_obj.fp16_implementation == "graph_rewrite":
# Note: when flags_obj.fp16_implementation == "graph_rewrite", # Note: when flags_obj.fp16_implementation == "graph_rewrite",
# dtype as determined by flags_core.get_tf_dtype(flags_obj) would be 'float32' # dtype as determined by flags_core.get_tf_dtype(flags_obj) would be 'float32'
...@@ -191,6 +192,7 @@ def run(flags_obj): ...@@ -191,6 +192,7 @@ def run(flags_obj):
# do not double up. # do not double up.
optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(optimizer) optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(optimizer)
# TODO(hongkuny): Remove trivial model usage and move it to benchmark.
if flags_obj.use_trivial_model: if flags_obj.use_trivial_model:
model = trivial_model.trivial_model( model = trivial_model.trivial_model(
imagenet_preprocessing.NUM_CLASSES, dtype) imagenet_preprocessing.NUM_CLASSES, dtype)
......
...@@ -28,7 +28,7 @@ from __future__ import division ...@@ -28,7 +28,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
from tensorflow.python.keras import backend from tensorflow.python.keras import backend
from tensorflow.python.keras import initializers from tensorflow.python.keras import initializers
from tensorflow.python.keras import layers from tensorflow.python.keras import layers
from tensorflow.python.keras import models from tensorflow.python.keras import models
from tensorflow.python.keras import regularizers from tensorflow.python.keras import regularizers
...@@ -39,7 +39,16 @@ BATCH_NORM_DECAY = 0.9 ...@@ -39,7 +39,16 @@ BATCH_NORM_DECAY = 0.9
BATCH_NORM_EPSILON = 1e-5 BATCH_NORM_EPSILON = 1e-5
def identity_block(input_tensor, kernel_size, filters, stage, block): def _gen_l2_regularizer(use_l2_regularizer=True):
return regularizers.l2(L2_WEIGHT_DECAY) if use_l2_regularizer else None
def identity_block(input_tensor,
kernel_size,
filters,
stage,
block,
use_l2_regularizer=True):
"""The identity block is the block that has no conv layer at shortcut. """The identity block is the block that has no conv layer at shortcut.
Args: Args:
...@@ -48,6 +57,7 @@ def identity_block(input_tensor, kernel_size, filters, stage, block): ...@@ -48,6 +57,7 @@ def identity_block(input_tensor, kernel_size, filters, stage, block):
filters: list of integers, the filters of 3 conv layer at main path filters: list of integers, the filters of 3 conv layer at main path
stage: integer, current stage label, used for generating layer names stage: integer, current stage label, used for generating layer names
block: 'a','b'..., current block label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names
use_l2_regularizer: whether to use L2 regularizer on Conv layer.
Returns: Returns:
Output tensor for the block. Output tensor for the block.
...@@ -60,35 +70,51 @@ def identity_block(input_tensor, kernel_size, filters, stage, block): ...@@ -60,35 +70,51 @@ def identity_block(input_tensor, kernel_size, filters, stage, block):
conv_name_base = 'res' + str(stage) + block + '_branch' conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch'
x = layers.Conv2D(filters1, (1, 1), use_bias=False, x = layers.Conv2D(
kernel_initializer='he_normal', filters1, (1, 1),
kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), use_bias=False,
name=conv_name_base + '2a')(input_tensor) kernel_initializer='he_normal',
x = layers.BatchNormalization(axis=bn_axis, kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
momentum=BATCH_NORM_DECAY, name=conv_name_base + '2a')(
epsilon=BATCH_NORM_EPSILON, input_tensor)
name=bn_name_base + '2a')(x) x = layers.BatchNormalization(
axis=bn_axis,
momentum=BATCH_NORM_DECAY,
epsilon=BATCH_NORM_EPSILON,
name=bn_name_base + '2a')(
x)
x = layers.Activation('relu')(x) x = layers.Activation('relu')(x)
x = layers.Conv2D(filters2, kernel_size, x = layers.Conv2D(
padding='same', use_bias=False, filters2,
kernel_initializer='he_normal', kernel_size,
kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), padding='same',
name=conv_name_base + '2b')(x) use_bias=False,
x = layers.BatchNormalization(axis=bn_axis, kernel_initializer='he_normal',
momentum=BATCH_NORM_DECAY, kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
epsilon=BATCH_NORM_EPSILON, name=conv_name_base + '2b')(
name=bn_name_base + '2b')(x) x)
x = layers.BatchNormalization(
axis=bn_axis,
momentum=BATCH_NORM_DECAY,
epsilon=BATCH_NORM_EPSILON,
name=bn_name_base + '2b')(
x)
x = layers.Activation('relu')(x) x = layers.Activation('relu')(x)
x = layers.Conv2D(filters3, (1, 1), use_bias=False, x = layers.Conv2D(
kernel_initializer='he_normal', filters3, (1, 1),
kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), use_bias=False,
name=conv_name_base + '2c')(x) kernel_initializer='he_normal',
x = layers.BatchNormalization(axis=bn_axis, kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
momentum=BATCH_NORM_DECAY, name=conv_name_base + '2c')(
epsilon=BATCH_NORM_EPSILON, x)
name=bn_name_base + '2c')(x) x = layers.BatchNormalization(
axis=bn_axis,
momentum=BATCH_NORM_DECAY,
epsilon=BATCH_NORM_EPSILON,
name=bn_name_base + '2c')(
x)
x = layers.add([x, input_tensor]) x = layers.add([x, input_tensor])
x = layers.Activation('relu')(x) x = layers.Activation('relu')(x)
...@@ -100,7 +126,8 @@ def conv_block(input_tensor, ...@@ -100,7 +126,8 @@ def conv_block(input_tensor,
filters, filters,
stage, stage,
block, block,
strides=(2, 2)): strides=(2, 2),
use_l2_regularizer=True):
"""A block that has a conv layer at shortcut. """A block that has a conv layer at shortcut.
Note that from stage 3, Note that from stage 3,
...@@ -114,6 +141,7 @@ def conv_block(input_tensor, ...@@ -114,6 +141,7 @@ def conv_block(input_tensor,
stage: integer, current stage label, used for generating layer names stage: integer, current stage label, used for generating layer names
block: 'a','b'..., current block label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names
strides: Strides for the second conv layer in the block. strides: Strides for the second conv layer in the block.
use_l2_regularizer: whether to use L2 regularizer on Conv layer.
Returns: Returns:
Output tensor for the block. Output tensor for the block.
...@@ -126,114 +154,231 @@ def conv_block(input_tensor, ...@@ -126,114 +154,231 @@ def conv_block(input_tensor,
conv_name_base = 'res' + str(stage) + block + '_branch' conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch'
x = layers.Conv2D(filters1, (1, 1), use_bias=False, x = layers.Conv2D(
kernel_initializer='he_normal', filters1, (1, 1),
kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), use_bias=False,
name=conv_name_base + '2a')(input_tensor) kernel_initializer='he_normal',
x = layers.BatchNormalization(axis=bn_axis, kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
momentum=BATCH_NORM_DECAY, name=conv_name_base + '2a')(
epsilon=BATCH_NORM_EPSILON, input_tensor)
name=bn_name_base + '2a')(x) x = layers.BatchNormalization(
axis=bn_axis,
momentum=BATCH_NORM_DECAY,
epsilon=BATCH_NORM_EPSILON,
name=bn_name_base + '2a')(
x)
x = layers.Activation('relu')(x) x = layers.Activation('relu')(x)
x = layers.Conv2D(filters2, kernel_size, strides=strides, padding='same', x = layers.Conv2D(
use_bias=False, kernel_initializer='he_normal', filters2,
kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), kernel_size,
name=conv_name_base + '2b')(x) strides=strides,
x = layers.BatchNormalization(axis=bn_axis, padding='same',
momentum=BATCH_NORM_DECAY, use_bias=False,
epsilon=BATCH_NORM_EPSILON, kernel_initializer='he_normal',
name=bn_name_base + '2b')(x) kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
name=conv_name_base + '2b')(
x)
x = layers.BatchNormalization(
axis=bn_axis,
momentum=BATCH_NORM_DECAY,
epsilon=BATCH_NORM_EPSILON,
name=bn_name_base + '2b')(
x)
x = layers.Activation('relu')(x) x = layers.Activation('relu')(x)
x = layers.Conv2D(filters3, (1, 1), use_bias=False, x = layers.Conv2D(
kernel_initializer='he_normal', filters3, (1, 1),
kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), use_bias=False,
name=conv_name_base + '2c')(x) kernel_initializer='he_normal',
x = layers.BatchNormalization(axis=bn_axis, kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
momentum=BATCH_NORM_DECAY, name=conv_name_base + '2c')(
epsilon=BATCH_NORM_EPSILON, x)
name=bn_name_base + '2c')(x) x = layers.BatchNormalization(
axis=bn_axis,
shortcut = layers.Conv2D(filters3, (1, 1), strides=strides, use_bias=False, momentum=BATCH_NORM_DECAY,
kernel_initializer='he_normal', epsilon=BATCH_NORM_EPSILON,
kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=bn_name_base + '2c')(
name=conv_name_base + '1')(input_tensor) x)
shortcut = layers.BatchNormalization(axis=bn_axis,
momentum=BATCH_NORM_DECAY, shortcut = layers.Conv2D(
epsilon=BATCH_NORM_EPSILON, filters3, (1, 1),
name=bn_name_base + '1')(shortcut) strides=strides,
use_bias=False,
kernel_initializer='he_normal',
kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
name=conv_name_base + '1')(
input_tensor)
shortcut = layers.BatchNormalization(
axis=bn_axis,
momentum=BATCH_NORM_DECAY,
epsilon=BATCH_NORM_EPSILON,
name=bn_name_base + '1')(
shortcut)
x = layers.add([x, shortcut]) x = layers.add([x, shortcut])
x = layers.Activation('relu')(x) x = layers.Activation('relu')(x)
return x return x
def resnet50(num_classes, dtype='float32', batch_size=None): def resnet50(num_classes,
dtype='float32',
batch_size=None,
use_l2_regularizer=True):
"""Instantiates the ResNet50 architecture. """Instantiates the ResNet50 architecture.
Args: Args:
num_classes: `int` number of classes for image classification. num_classes: `int` number of classes for image classification.
dtype: dtype to use float32 or float16 are most common. dtype: dtype to use float32 or float16 are most common.
batch_size: Size of the batches for each step. batch_size: Size of the batches for each step.
use_l2_regularizer: whether to use L2 regularizer on Conv/Dense layer.
Returns: Returns:
A Keras model instance. A Keras model instance.
""" """
input_shape = (224, 224, 3) input_shape = (224, 224, 3)
img_input = layers.Input(shape=input_shape, dtype=dtype, img_input = layers.Input(
batch_size=batch_size) shape=input_shape, dtype=dtype, batch_size=batch_size)
if backend.image_data_format() == 'channels_first': if backend.image_data_format() == 'channels_first':
x = layers.Lambda(lambda x: backend.permute_dimensions(x, (0, 3, 1, 2)), x = layers.Lambda(
name='transpose')(img_input) lambda x: backend.permute_dimensions(x, (0, 3, 1, 2)),
name='transpose')(
img_input)
bn_axis = 1 bn_axis = 1
else: # channels_last else: # channels_last
x = img_input x = img_input
bn_axis = 3 bn_axis = 3
x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(x) x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(x)
x = layers.Conv2D(64, (7, 7), x = layers.Conv2D(
strides=(2, 2), 64, (7, 7),
padding='valid', use_bias=False, strides=(2, 2),
kernel_initializer='he_normal', padding='valid',
kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), use_bias=False,
name='conv1')(x) kernel_initializer='he_normal',
x = layers.BatchNormalization(axis=bn_axis, kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
momentum=BATCH_NORM_DECAY, name='conv1')(
epsilon=BATCH_NORM_EPSILON, x)
name='bn_conv1')(x) x = layers.BatchNormalization(
axis=bn_axis,
momentum=BATCH_NORM_DECAY,
epsilon=BATCH_NORM_EPSILON,
name='bn_conv1')(
x)
x = layers.Activation('relu')(x) x = layers.Activation('relu')(x)
x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) x = conv_block(
x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') x,
x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') 3, [64, 64, 256],
stage=2,
x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') block='a',
x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') strides=(1, 1),
x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') use_l2_regularizer=use_l2_regularizer)
x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') x = identity_block(
x,
x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') 3, [64, 64, 256],
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') stage=2,
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') block='b',
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') use_l2_regularizer=use_l2_regularizer)
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') x = identity_block(
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') x,
3, [64, 64, 256],
x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') stage=2,
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') block='c',
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') use_l2_regularizer=use_l2_regularizer)
x = conv_block(
x,
3, [128, 128, 512],
stage=3,
block='a',
use_l2_regularizer=use_l2_regularizer)
x = identity_block(
x,
3, [128, 128, 512],
stage=3,
block='b',
use_l2_regularizer=use_l2_regularizer)
x = identity_block(
x,
3, [128, 128, 512],
stage=3,
block='c',
use_l2_regularizer=use_l2_regularizer)
x = identity_block(
x,
3, [128, 128, 512],
stage=3,
block='d',
use_l2_regularizer=use_l2_regularizer)
x = conv_block(
x,
3, [256, 256, 1024],
stage=4,
block='a',
use_l2_regularizer=use_l2_regularizer)
x = identity_block(
x,
3, [256, 256, 1024],
stage=4,
block='b',
use_l2_regularizer=use_l2_regularizer)
x = identity_block(
x,
3, [256, 256, 1024],
stage=4,
block='c',
use_l2_regularizer=use_l2_regularizer)
x = identity_block(
x,
3, [256, 256, 1024],
stage=4,
block='d',
use_l2_regularizer=use_l2_regularizer)
x = identity_block(
x,
3, [256, 256, 1024],
stage=4,
block='e',
use_l2_regularizer=use_l2_regularizer)
x = identity_block(
x,
3, [256, 256, 1024],
stage=4,
block='f',
use_l2_regularizer=use_l2_regularizer)
x = conv_block(
x,
3, [512, 512, 2048],
stage=5,
block='a',
use_l2_regularizer=use_l2_regularizer)
x = identity_block(
x,
3, [512, 512, 2048],
stage=5,
block='b',
use_l2_regularizer=use_l2_regularizer)
x = identity_block(
x,
3, [512, 512, 2048],
stage=5,
block='c',
use_l2_regularizer=use_l2_regularizer)
rm_axes = [1, 2] if backend.image_data_format() == 'channels_last' else [2, 3] rm_axes = [1, 2] if backend.image_data_format() == 'channels_last' else [2, 3]
x = layers.Lambda(lambda x: backend.mean(x, rm_axes), name='reduce_mean')(x) x = layers.Lambda(lambda x: backend.mean(x, rm_axes), name='reduce_mean')(x)
x = layers.Dense( x = layers.Dense(
num_classes, num_classes,
kernel_initializer=initializers.RandomNormal(stddev=0.01), kernel_initializer=initializers.RandomNormal(stddev=0.01),
kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), bias_regularizer=_gen_l2_regularizer(use_l2_regularizer),
name='fc1000')(x) name='fc1000')(
x)
# TODO(reedwm): Remove manual casts once mixed precision can be enabled with a # TODO(reedwm): Remove manual casts once mixed precision can be enabled with a
# single line of code. # single line of code.
......
...@@ -90,6 +90,7 @@ http_archive( ...@@ -90,6 +90,7 @@ http_archive(
sha256 = "79d102c61e2a479a0b7e5fc167bcfaa4832a0c6aad4a75fa7da0480564931bcc", sha256 = "79d102c61e2a479a0b7e5fc167bcfaa4832a0c6aad4a75fa7da0480564931bcc",
) )
# Needed by TensorFlow # Needed by TensorFlow
http_archive( http_archive(
name = "io_bazel_rules_closure", name = "io_bazel_rules_closure",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment