Unverified Commit 965cc3ee authored by Ayushman Kumar's avatar Ayushman Kumar Committed by GitHub
Browse files

Merge pull request #7 from tensorflow/master

updated
parents 1f3247f4 1f685c54
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for nlp.nhnet.multi_channel_attention."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
from official.nlp.nhnet import multi_channel_attention
class MultiChannelAttentionTest(tf.test.TestCase):
def test_doc_attention(self):
num_heads = 2
doc_attention = multi_channel_attention.DocAttention(num_heads, head_size=8)
num_docs = 3
inputs = np.zeros((2, num_docs, 10, 16), dtype=np.float32)
doc_mask = np.zeros((2, num_docs), dtype=np.float32)
outputs = doc_attention(inputs, doc_mask)
self.assertEqual(outputs.shape, (2, num_docs))
def test_multi_channel_attention(self):
num_heads = 2
num_docs = 5
attention_layer = multi_channel_attention.MultiChannelAttention(
num_heads, head_size=2)
from_data = 10 * np.random.random_sample((3, 4, 8))
to_data = 10 * np.random.random_sample((3, num_docs, 2, 8))
mask_data = np.random.randint(2, size=(3, num_docs, 4, 2))
doc_probs = np.random.randint(
2, size=(3, num_heads, 4, num_docs)).astype(float)
outputs = attention_layer([from_data, to_data, mask_data, doc_probs])
self.assertEqual(outputs.shape, (3, 4, num_heads, 2))
if __name__ == "__main__":
tf.test.main()
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Optimizer and learning rate scheduler."""
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
from __future__ import print_function
import tensorflow as tf
from official.modeling.hyperparams import params_dict
class LearningRateSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
"""Learning rate schedule."""
def __init__(self, initial_learning_rate, hidden_size, warmup_steps):
"""Initialize configuration of the learning rate schedule.
Args:
initial_learning_rate: A float, the initial learning rate.
hidden_size: An integer, the model dimension in the hidden layers.
warmup_steps: An integer, the number of steps required for linear warmup.
"""
super(LearningRateSchedule, self).__init__()
self.initial_learning_rate = initial_learning_rate
self.hidden_size = hidden_size
self.warmup_steps = tf.cast(warmup_steps, tf.float32)
def __call__(self, global_step):
"""Calculate learning rate with linear warmup and rsqrt decay.
Args:
global_step: An integer, the current global step used for learning rate
calculation.
Returns:
A float, the learning rate needs to be used for current global step.
"""
with tf.name_scope('learning_rate_schedule'):
global_step = tf.cast(global_step, tf.float32)
learning_rate = self.initial_learning_rate
learning_rate *= (self.hidden_size**-0.5)
# Apply linear warmup
learning_rate *= tf.minimum(1.0, global_step / self.warmup_steps)
# Apply rsqrt decay
learning_rate /= tf.sqrt(tf.maximum(global_step, self.warmup_steps))
return learning_rate
def get_config(self):
"""Get the configuration of the learning rate schedule."""
return {
'initial_learning_rate': self.initial_learning_rate,
'hidden_size': self.hidden_size,
'warmup_steps': self.warmup_steps,
}
def create_optimizer(params: params_dict.ParamsDict):
"""Creates optimizer."""
lr_schedule = LearningRateSchedule(
params.learning_rate,
params.hidden_size,
params.learning_rate_warmup_steps)
return tf.keras.optimizers.Adam(
learning_rate=lr_schedule,
beta_1=params.adam_beta1,
beta_2=params.adam_beta2,
epsilon=params.adam_epsilon)
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Processes crawled content from news URLs by generating tfrecords."""
import os
from absl import app
from absl import flags
from official.nlp.nhnet import raw_data_processor
FLAGS = flags.FLAGS
flags.DEFINE_string("crawled_articles", "/tmp/nhnet/",
"Folder path to the crawled articles using news-please.")
flags.DEFINE_string("vocab", None, "Filepath of the BERT vocabulary.")
flags.DEFINE_bool("do_lower_case", True,
"Whether the vocabulary is uncased or not.")
flags.DEFINE_integer("len_title", 15,
"Maximum number of tokens in story headline.")
flags.DEFINE_integer("len_passage", 200,
"Maximum number of tokens in article passage.")
flags.DEFINE_integer("max_num_articles", 5,
"Maximum number of articles in a story.")
flags.DEFINE_bool("include_article_title_in_passage", False,
"Whether to include article title in article passage.")
flags.DEFINE_string("data_folder", None,
"Folder path to the downloaded data folder (output).")
flags.DEFINE_integer("num_tfrecords_shards", 20,
"Number of shards for train/valid/test.")
def transform_as_tfrecords(data_processor, filename):
"""Transforms story from json to tfrecord (sharded).
Args:
data_processor: Instance of RawDataProcessor.
filename: 'train', 'valid', or 'test'.
"""
print("Transforming json to tfrecord for %s..." % filename)
story_filepath = os.path.join(FLAGS.data_folder, filename + ".json")
output_folder = os.path.join(FLAGS.data_folder, "processed")
os.makedirs(output_folder, exist_ok=True)
output_filepaths = []
for i in range(FLAGS.num_tfrecords_shards):
output_filepaths.append(
os.path.join(
output_folder, "%s.tfrecord-%.5d-of-%.5d" %
(filename, i, FLAGS.num_tfrecords_shards)))
(total_num_examples,
generated_num_examples) = data_processor.generate_examples(
story_filepath, output_filepaths)
print("For %s, %d examples have been generated from %d stories in json." %
(filename, generated_num_examples, total_num_examples))
def main(_):
if not FLAGS.data_folder:
raise ValueError("data_folder must be set as the downloaded folder path.")
if not FLAGS.vocab:
raise ValueError("vocab must be set as the filepath of BERT vocabulary.")
data_processor = raw_data_processor.RawDataProcessor(
vocab=FLAGS.vocab,
do_lower_case=FLAGS.do_lower_case,
len_title=FLAGS.len_title,
len_passage=FLAGS.len_passage,
max_num_articles=FLAGS.max_num_articles,
include_article_title_in_passage=FLAGS.include_article_title_in_passage,
include_text_snippet_in_example=True)
print("Loading crawled articles...")
num_articles = data_processor.read_crawled_articles(FLAGS.crawled_articles)
print("Total number of articles loaded: %d" % num_articles)
print()
transform_as_tfrecords(data_processor, "train")
transform_as_tfrecords(data_processor, "valid")
transform_as_tfrecords(data_processor, "test")
if __name__ == "__main__":
app.run(main)
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Library for processing crawled content and generating tfrecords."""
import collections
import json
import multiprocessing
import os
import urllib.parse
import tensorflow as tf
from official.nlp.bert import tokenization
from official.nlp.data import classifier_data_lib
class RawDataProcessor(object):
"""Data converter for story examples."""
def __init__(self,
vocab: str,
do_lower_case: bool,
len_title: int = 15,
len_passage: int = 200,
max_num_articles: int = 5,
include_article_title_in_passage: bool = False,
include_text_snippet_in_example: bool = False):
"""Constructs a RawDataProcessor.
Args:
vocab: Filepath of the BERT vocabulary.
do_lower_case: Whether the vocabulary is uncased or not.
len_title: Maximum number of tokens in story headline.
len_passage: Maximum number of tokens in article passage.
max_num_articles: Maximum number of articles in a story.
include_article_title_in_passage: Whether to include article title in
article passage.
include_text_snippet_in_example: Whether to include text snippet
(headline and article content) in generated tensorflow Examples, for
debug usage. If include_article_title_in_passage=True, title and body
will be separated by [SEP].
"""
self.articles = dict()
self.tokenizer = tokenization.FullTokenizer(
vocab, do_lower_case=do_lower_case, split_on_punc=False)
self.len_title = len_title
self.len_passage = len_passage
self.max_num_articles = max_num_articles
self.include_article_title_in_passage = include_article_title_in_passage
self.include_text_snippet_in_example = include_text_snippet_in_example
# ex_index=5 deactivates printing inside convert_single_example.
self.ex_index = 5
# Parameters used in InputExample, not used in NHNet.
self.label = 0
self.guid = 0
self.num_generated_examples = 0
def read_crawled_articles(self, folder_path):
"""Reads crawled articles under folder_path."""
for path, _, files in os.walk(folder_path):
for name in files:
if not name.endswith(".json"):
continue
url, article = self._get_article_content_from_json(
os.path.join(path, name))
if not article.text_a:
continue
self.articles[RawDataProcessor.normalize_url(url)] = article
if len(self.articles) % 5000 == 0:
print("Number of articles loaded: %d\r" % len(self.articles), end="")
print()
return len(self.articles)
def generate_examples(self, input_file, output_files):
"""Loads story from input json file and exports examples in output_files."""
writers = []
story_partition = []
for output_file in output_files:
writers.append(tf.io.TFRecordWriter(output_file))
story_partition.append(list())
with tf.io.gfile.GFile(input_file, "r") as story_json_file:
stories = json.load(story_json_file)
writer_index = 0
for story in stories:
articles = []
for url in story["urls"]:
normalized_url = RawDataProcessor.normalize_url(url)
if normalized_url in self.articles:
articles.append(self.articles[normalized_url])
if not articles:
continue
story_partition[writer_index].append((story["label"], articles))
writer_index = (writer_index + 1) % len(writers)
lock = multiprocessing.Lock()
pool = multiprocessing.pool.ThreadPool(len(writers))
data = [(story_partition[i], writers[i], lock) for i in range(len(writers))]
pool.map(self._write_story_partition, data)
return len(stories), self.num_generated_examples
@classmethod
def normalize_url(cls, url):
"""Normalize url for better matching."""
url = urllib.parse.unquote(
urllib.parse.urlsplit(url)._replace(query=None).geturl())
output, part = [], None
for part in url.split("//"):
if part == "http:" or part == "https:":
continue
else:
output.append(part)
return "//".join(output)
def _get_article_content_from_json(self, file_path):
"""Returns (url, InputExample) keeping content extracted from file_path."""
with tf.io.gfile.GFile(file_path, "r") as article_json_file:
article = json.load(article_json_file)
if self.include_article_title_in_passage:
return article["url"], classifier_data_lib.InputExample(
guid=self.guid,
text_a=article["title"],
text_b=article["maintext"],
label=self.label)
else:
return article["url"], classifier_data_lib.InputExample(
guid=self.guid, text_a=article["maintext"], label=self.label)
def _write_story_partition(self, data):
"""Writes stories in a partition into file."""
for (story_headline, articles) in data[0]:
story_example = tf.train.Example(
features=tf.train.Features(
feature=self._get_single_story_features(story_headline,
articles)))
data[1].write(story_example.SerializeToString())
data[2].acquire()
try:
self.num_generated_examples += 1
if self.num_generated_examples % 1000 == 0:
print(
"Number of stories written: %d\r" % self.num_generated_examples,
end="")
finally:
data[2].release()
def _get_single_story_features(self, story_headline, articles):
"""Converts a list of articles to a tensorflow Example."""
def get_text_snippet(article):
if article.text_b:
return " [SEP] ".join([article.text_a, article.text_b])
else:
return article.text_a
story_features = collections.OrderedDict()
story_headline_feature = classifier_data_lib.convert_single_example(
ex_index=self.ex_index,
example=classifier_data_lib.InputExample(
guid=self.guid, text_a=story_headline, label=self.label),
label_list=[self.label],
max_seq_length=self.len_title,
tokenizer=self.tokenizer)
if self.include_text_snippet_in_example:
story_headline_feature.label_id = story_headline
self._add_feature_with_suffix(
feature=story_headline_feature,
suffix="a",
story_features=story_features)
for (article_index, article) in enumerate(articles):
if article_index == self.max_num_articles:
break
article_feature = classifier_data_lib.convert_single_example(
ex_index=self.ex_index,
example=article,
label_list=[self.label],
max_seq_length=self.len_passage,
tokenizer=self.tokenizer)
if self.include_text_snippet_in_example:
article_feature.label_id = get_text_snippet(article)
suffix = chr(ord("b") + article_index)
self._add_feature_with_suffix(
feature=article_feature, suffix=suffix, story_features=story_features)
# Adds empty features as placeholder.
for article_index in range(len(articles), self.max_num_articles):
suffix = chr(ord("b") + article_index)
empty_article = classifier_data_lib.InputExample(
guid=self.guid, text_a="", label=self.label)
empty_feature = classifier_data_lib.convert_single_example(
ex_index=self.ex_index,
example=empty_article,
label_list=[self.label],
max_seq_length=self.len_passage,
tokenizer=self.tokenizer)
if self.include_text_snippet_in_example:
empty_feature.label_id = ""
self._add_feature_with_suffix(
feature=empty_feature, suffix=suffix, story_features=story_features)
return story_features
def _add_feature_with_suffix(self, feature, suffix, story_features):
"""Appends suffix to feature names and fills in the corresponding values."""
def _create_int_feature(values):
return tf.train.Feature(int64_list=tf.train.Int64List(value=list(values)))
def _create_string_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
story_features["input_ids_%c" % suffix] = _create_int_feature(
feature.input_ids)
story_features["input_mask_%c" % suffix] = _create_int_feature(
feature.input_mask)
story_features["segment_ids_%c" % suffix] = _create_int_feature(
feature.segment_ids)
if self.include_text_snippet_in_example:
story_features["text_snippet_%c" % suffix] = _create_string_feature(
bytes(feature.label_id.encode()))
<!DOCTYPE html>
<meta charset="utf-8">
<title>Page Title 0</title>
{
"title": "title for 0",
"maintext": "text snippet for 0",
"url": "http://url_000.html"
}
<!DOCTYPE html>
<meta charset="utf-8">
<title>Page Title 1</title>
{
"title": "title for 1",
"maintext": "text snippet for 1",
"url": "url_001.html"
}
[
{
"urls": [
"http://url_000.html",
"http://url_001.html"
],
"label": "headline 0"
},
{
"urls": [
"http://url_000.html",
"http://url_001.html"
],
"label": "headline 1"
},
{
"urls": [
"http://url_002.html",
"http://url_001.html"
],
"label": "headline 2"
},
{
"urls": [
"http://url_003.html"
],
"label": "headline 3"
}
]
[UNK]
[CLS]
[SEP]
[MASK]
0
1
this
is
a
title
snippet
for
url
main
text
http
www
html
:
//
.
_
headline
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Run NHNet model training and eval."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from absl import app
from absl import flags
from absl import logging
from six.moves import zip
import tensorflow as tf
from official.modeling.hyperparams import params_dict
from official.nlp.nhnet import evaluation
from official.nlp.nhnet import input_pipeline
from official.nlp.nhnet import models
from official.nlp.nhnet import optimizer
from official.nlp.transformer import metrics as transformer_metrics
from official.utils.misc import distribution_utils
from official.utils.misc import keras_utils
FLAGS = flags.FLAGS
def define_flags():
"""Defines command line flags used by NHNet trainer."""
## Required parameters
flags.DEFINE_enum("mode", "train", ["train", "eval", "train_and_eval"],
"Execution mode.")
flags.DEFINE_string("train_file_pattern", "", "Train file pattern.")
flags.DEFINE_string("eval_file_pattern", "", "Eval file pattern.")
flags.DEFINE_string(
"model_dir", None,
"The output directory where the model checkpoints will be written.")
# Model training specific flags.
flags.DEFINE_enum(
"distribution_strategy", "mirrored", ["tpu", "mirrored"],
"Distribution Strategy type to use for training. `tpu` uses TPUStrategy "
"for running on TPUs, `mirrored` uses GPUs with single host.")
flags.DEFINE_string("tpu", "", "TPU address to connect to.")
flags.DEFINE_string(
"init_checkpoint", None,
"Initial checkpoint (usually from a pre-trained BERT model).")
flags.DEFINE_integer("train_steps", 100000, "Max train steps")
flags.DEFINE_integer("eval_steps", 32, "Number of eval steps per run.")
flags.DEFINE_integer("train_batch_size", 32, "Total batch size for training.")
flags.DEFINE_integer("eval_batch_size", 4, "Total batch size for evaluation.")
flags.DEFINE_integer(
"steps_per_loop", 1000,
"Number of steps per graph-mode loop. Only training step "
"happens inside the loop.")
flags.DEFINE_integer("checkpoint_interval", 2000, "Checkpointing interval.")
flags.DEFINE_integer("len_title", 15, "Title length.")
flags.DEFINE_integer("len_passage", 200, "Passage length.")
flags.DEFINE_integer("num_encoder_layers", 12,
"Number of hidden layers of encoder.")
flags.DEFINE_integer("num_decoder_layers", 12,
"Number of hidden layers of decoder.")
flags.DEFINE_string("model_type", "nhnet",
"Model type to choose a model configuration.")
flags.DEFINE_integer(
"num_nhnet_articles", 5,
"Maximum number of articles in NHNet, only used when model_type=nhnet")
flags.DEFINE_string(
"params_override",
default=None,
help=("a YAML/JSON string or a YAML file which specifies additional "
"overrides over the default parameters"))
# pylint: disable=protected-access
class Trainer(tf.keras.Model):
"""A training only model."""
def __init__(self, model, params):
super(Trainer, self).__init__()
self.model = model
self.params = params
self._num_replicas_in_sync = tf.distribute.get_strategy(
).num_replicas_in_sync
def call(self, inputs, mode="train"):
return self.model(inputs, mode)
def train_step(self, inputs):
"""The logic for one training step."""
with tf.GradientTape() as tape:
logits, _, _ = self(inputs, mode="train", training=True)
targets = models.remove_sos_from_seq(inputs["target_ids"],
self.params.pad_token_id)
loss = transformer_metrics.transformer_loss(logits, targets,
self.params.label_smoothing,
self.params.vocab_size)
# Scales the loss, which results in using the average loss across all
# of the replicas for backprop.
scaled_loss = loss / self._num_replicas_in_sync
tvars = self.trainable_variables
grads = tape.gradient(scaled_loss, tvars)
self.optimizer.apply_gradients(list(zip(grads, tvars)))
return {
"training_loss": loss,
"learning_rate": self.optimizer._decayed_lr(var_dtype=tf.float32)
}
def train(params, strategy, dataset=None):
"""Runs training."""
if not dataset:
dataset = input_pipeline.get_input_dataset(
FLAGS.train_file_pattern,
FLAGS.train_batch_size,
params,
is_training=True,
strategy=strategy)
with strategy.scope():
model = models.create_model(
FLAGS.model_type, params, init_checkpoint=FLAGS.init_checkpoint)
opt = optimizer.create_optimizer(params)
trainer = Trainer(model, params)
model.global_step = opt.iterations
trainer.compile(
optimizer=opt,
experimental_steps_per_execution=FLAGS.steps_per_loop)
summary_dir = os.path.join(FLAGS.model_dir, "summaries")
summary_callback = tf.keras.callbacks.TensorBoard(
summary_dir, update_freq=max(100, FLAGS.steps_per_loop))
checkpoint = tf.train.Checkpoint(model=model, optimizer=opt)
checkpoint_manager = tf.train.CheckpointManager(
checkpoint,
directory=FLAGS.model_dir,
max_to_keep=10,
step_counter=model.global_step,
checkpoint_interval=FLAGS.checkpoint_interval)
if checkpoint_manager.restore_or_initialize():
logging.info("Training restored from the checkpoints in: %s",
FLAGS.model_dir)
checkpoint_callback = keras_utils.SimpleCheckpoint(checkpoint_manager)
# Trains the model.
steps_per_epoch = min(FLAGS.train_steps, FLAGS.checkpoint_interval)
epochs = FLAGS.train_steps // steps_per_epoch
trainer.fit(
x=dataset,
steps_per_epoch=steps_per_epoch,
epochs=epochs,
callbacks=[summary_callback, checkpoint_callback],
verbose=2)
def run():
"""Runs NHNet using Keras APIs."""
strategy = distribution_utils.get_distribution_strategy(
distribution_strategy=FLAGS.distribution_strategy, tpu_address=FLAGS.tpu)
if strategy:
logging.info("***** Number of cores used : %d",
strategy.num_replicas_in_sync)
params = models.get_model_params(FLAGS.model_type)
params = params_dict.override_params_dict(
params, FLAGS.params_override, is_strict=True)
params.override(
{
"len_title":
FLAGS.len_title,
"len_passage":
FLAGS.len_passage,
"num_hidden_layers":
FLAGS.num_encoder_layers,
"num_decoder_layers":
FLAGS.num_decoder_layers,
"passage_list":
[chr(ord("b") + i) for i in range(FLAGS.num_nhnet_articles)],
},
is_strict=False)
stats = {}
if "train" in FLAGS.mode:
train(params, strategy)
if "eval" in FLAGS.mode:
timeout = 0 if FLAGS.mode == "train_and_eval" else 3000
# Uses padded decoding for TPU. Always uses cache.
padded_decode = isinstance(strategy, tf.distribute.experimental.TPUStrategy)
params.override({
"padded_decode": padded_decode,
}, is_strict=False)
stats = evaluation.continuous_eval(
strategy,
params,
model_type=FLAGS.model_type,
eval_file_pattern=FLAGS.eval_file_pattern,
batch_size=FLAGS.eval_batch_size,
eval_steps=FLAGS.eval_steps,
model_dir=FLAGS.model_dir,
timeout=timeout)
return stats
def main(_):
stats = run()
if stats:
logging.info("Stats:\n%s", stats)
if __name__ == "__main__":
define_flags()
app.run(main)
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for official.nlp.nhnet.trainer."""
import os
from absl import flags
from absl.testing import parameterized
import tensorflow as tf
# pylint: disable=g-direct-tensorflow-import
from tensorflow.python.distribute import combinations
from tensorflow.python.distribute import strategy_combinations
# pylint: enable=g-direct-tensorflow-import
from official.nlp.nhnet import trainer
from official.nlp.nhnet import utils
FLAGS = flags.FLAGS
trainer.define_flags()
def all_strategy_combinations():
return combinations.combine(
distribution=[
strategy_combinations.one_device_strategy,
strategy_combinations.one_device_strategy_gpu,
strategy_combinations.tpu_strategy,
],
mode="eager",
)
def get_trivial_data(config) -> tf.data.Dataset:
"""Gets trivial data in the ImageNet size."""
batch_size, num_docs = 2, len(config.passage_list),
len_passage = config.len_passage
len_title = config.len_title
def generate_data(_) -> tf.data.Dataset:
fake_ids = tf.zeros((num_docs, len_passage), dtype=tf.int32)
title = tf.zeros((len_title), dtype=tf.int32)
return dict(
input_ids=fake_ids,
input_mask=fake_ids,
segment_ids=fake_ids,
target_ids=title)
dataset = tf.data.Dataset.range(1)
dataset = dataset.repeat()
dataset = dataset.map(generate_data,
num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.prefetch(buffer_size=1).batch(batch_size)
return dataset
class TrainerTest(tf.test.TestCase, parameterized.TestCase):
def setUp(self):
super(TrainerTest, self).setUp()
self._config = utils.get_test_params()
self._config.override(
{
"vocab_size": 49911,
"max_position_embeddings": 200,
"len_title": 15,
"len_passage": 20,
"beam_size": 5,
"alpha": 0.6,
"learning_rate": 0.0,
"learning_rate_warmup_steps": 0,
"multi_channel_cross_attention": True,
"passage_list": ["a", "b"],
},
is_strict=False)
@combinations.generate(all_strategy_combinations())
def test_train(self, distribution):
FLAGS.train_steps = 10
FLAGS.checkpoint_interval = 5
FLAGS.model_dir = self.get_temp_dir()
FLAGS.model_type = "nhnet"
trainer.train(self._config, distribution, get_trivial_data(self._config))
self.assertLen(
tf.io.gfile.glob(os.path.join(FLAGS.model_dir, "ckpt*.index")), 2)
if __name__ == "__main__":
tf.test.main()
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utility helpers for Bert2Bert."""
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
from __future__ import print_function
from absl import logging
import tensorflow as tf
from typing import Optional, Text
from official.modeling.hyperparams import params_dict
from official.nlp.bert import configs
from official.nlp.nhnet import configs as nhnet_configs
def get_bert_config_from_params(
params: params_dict.ParamsDict) -> configs.BertConfig:
"""Converts a BertConfig to ParamsDict."""
return configs.BertConfig.from_dict(params.as_dict())
def get_test_params(cls=nhnet_configs.BERT2BERTConfig):
return cls.from_args(**nhnet_configs.UNITTEST_CONFIG)
# pylint: disable=protected-access
def encoder_common_layers(transformer_block):
return [
transformer_block._attention_layer,
transformer_block._attention_output_dense,
transformer_block._attention_layer_norm,
transformer_block._intermediate_dense, transformer_block._output_dense,
transformer_block._output_layer_norm
]
# pylint: enable=protected-access
def initialize_bert2bert_from_pretrained_bert(
bert_encoder: tf.keras.layers.Layer,
bert_decoder: tf.keras.layers.Layer,
init_checkpoint: Optional[Text] = None) -> None:
"""Helper function to initialze Bert2Bert from Bert pretrained checkpoint."""
ckpt = tf.train.Checkpoint(model=bert_encoder)
logging.info(
"Checkpoint file %s found and restoring from "
"initial checkpoint for core model.", init_checkpoint)
status = ckpt.restore(init_checkpoint)
# Expects the bert model is a subset of checkpoint as pooling layer is
# not used.
status.assert_existing_objects_matched()
logging.info("Loading from checkpoint file completed.")
# Saves a checkpoint with transformer layers.
encoder_layers = []
for transformer_block in bert_encoder.transformer_layers:
encoder_layers.extend(encoder_common_layers(transformer_block))
# Restores from the checkpoint with encoder layers.
decoder_layers_to_initialize = []
for decoder_block in bert_decoder.decoder.layers:
decoder_layers_to_initialize.extend(
decoder_block.common_layers_with_encoder())
if len(decoder_layers_to_initialize) != len(encoder_layers):
raise ValueError(
"Source encoder layers with %d objects does not match destination "
"decoder layers with %d objects." %
(len(decoder_layers_to_initialize), len(encoder_layers)))
for dest_layer, source_layer in zip(decoder_layers_to_initialize,
encoder_layers):
try:
dest_layer.set_weights(source_layer.get_weights())
except ValueError as e:
logging.error(
"dest_layer: %s failed to set weights from "
"source_layer: %s as %s", dest_layer.name, source_layer.name, str(e))
......@@ -20,19 +20,20 @@ from __future__ import print_function
import re
from absl import logging
import tensorflow as tf
import tensorflow_addons.optimizers as tfa_optimizers
class WarmUp(tf.keras.optimizers.schedules.LearningRateSchedule):
"""Applies a warmup schedule on a given learning rate decay schedule."""
def __init__(
self,
initial_learning_rate,
decay_schedule_fn,
warmup_steps,
power=1.0,
name=None):
def __init__(self,
initial_learning_rate,
decay_schedule_fn,
warmup_steps,
power=1.0,
name=None):
super(WarmUp, self).__init__()
self.initial_learning_rate = initial_learning_rate
self.warmup_steps = warmup_steps
......@@ -50,10 +51,11 @@ class WarmUp(tf.keras.optimizers.schedules.LearningRateSchedule):
warmup_learning_rate = (
self.initial_learning_rate *
tf.math.pow(warmup_percent_done, self.power))
return tf.cond(global_step_float < warmup_steps_float,
lambda: warmup_learning_rate,
lambda: self.decay_schedule_fn(step),
name=name)
return tf.cond(
global_step_float < warmup_steps_float,
lambda: warmup_learning_rate,
lambda: self.decay_schedule_fn(step),
name=name)
def get_config(self):
return {
......@@ -65,24 +67,44 @@ class WarmUp(tf.keras.optimizers.schedules.LearningRateSchedule):
}
def create_optimizer(init_lr, num_train_steps, num_warmup_steps):
def create_optimizer(init_lr,
num_train_steps,
num_warmup_steps,
end_lr=0.0,
optimizer_type='adamw'):
"""Creates an optimizer with learning rate schedule."""
# Implements linear decay of the learning rate.
learning_rate_fn = tf.keras.optimizers.schedules.PolynomialDecay(
lr_schedule = tf.keras.optimizers.schedules.PolynomialDecay(
initial_learning_rate=init_lr,
decay_steps=num_train_steps,
end_learning_rate=0.0)
end_learning_rate=end_lr)
if num_warmup_steps:
learning_rate_fn = WarmUp(initial_learning_rate=init_lr,
decay_schedule_fn=learning_rate_fn,
warmup_steps=num_warmup_steps)
optimizer = AdamWeightDecay(
learning_rate=learning_rate_fn,
weight_decay_rate=0.01,
beta_1=0.9,
beta_2=0.999,
epsilon=1e-6,
exclude_from_weight_decay=['layer_norm', 'bias'])
lr_schedule = WarmUp(
initial_learning_rate=init_lr,
decay_schedule_fn=lr_schedule,
warmup_steps=num_warmup_steps)
if optimizer_type == 'adamw':
logging.info('using Adamw optimizer')
optimizer = AdamWeightDecay(
learning_rate=lr_schedule,
weight_decay_rate=0.01,
beta_1=0.9,
beta_2=0.999,
epsilon=1e-6,
exclude_from_weight_decay=['layer_norm', 'bias'])
elif optimizer_type == 'lamb':
logging.info('using Lamb optimizer')
optimizer = tfa_optimizers.LAMB(
learning_rate=lr_schedule,
weight_decay_rate=0.01,
beta_1=0.9,
beta_2=0.999,
epsilon=1e-6,
exclude_from_weight_decay=['layer_norm', 'bias'])
else:
raise ValueError('Unsupported optimizer type: ', optimizer_type)
return optimizer
......@@ -109,8 +131,8 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
exclude_from_weight_decay=None,
name='AdamWeightDecay',
**kwargs):
super(AdamWeightDecay, self).__init__(
learning_rate, beta_1, beta_2, epsilon, amsgrad, name, **kwargs)
super(AdamWeightDecay, self).__init__(learning_rate, beta_1, beta_2,
epsilon, amsgrad, name, **kwargs)
self.weight_decay_rate = weight_decay_rate
self._include_in_weight_decay = include_in_weight_decay
self._exclude_from_weight_decay = exclude_from_weight_decay
......@@ -171,15 +193,15 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
lr_t, kwargs = self._get_lr(var.device, var.dtype.base_dtype, apply_state)
decay = self._decay_weights_op(var, lr_t, apply_state)
with tf.control_dependencies([decay]):
return super(AdamWeightDecay, self)._resource_apply_dense(
grad, var, **kwargs)
return super(AdamWeightDecay,
self)._resource_apply_dense(grad, var, **kwargs)
def _resource_apply_sparse(self, grad, var, indices, apply_state=None):
lr_t, kwargs = self._get_lr(var.device, var.dtype.base_dtype, apply_state)
decay = self._decay_weights_op(var, lr_t, apply_state)
with tf.control_dependencies([decay]):
return super(AdamWeightDecay, self)._resource_apply_sparse(
grad, var, indices, **kwargs)
return super(AdamWeightDecay,
self)._resource_apply_sparse(grad, var, indices, **kwargs)
def get_config(self):
config = super(AdamWeightDecay, self).get_config()
......
......@@ -1019,7 +1019,8 @@ class Summarization(tf.keras.layers.Layer):
summary = inputs[0]
else:
raise ValueError('Invalid summary type provided: %s' % self.summary_type)
summary = self.proj_layer(summary)
if self.use_proj:
summary = self.proj_layer(summary)
summary = self.dropout_layer(summary)
return summary
......
......@@ -78,6 +78,7 @@ setup(
'official.r1*',
'official.pip_package*',
'official.benchmark*',
'official.colab*',
]),
exclude_package_data={
'': ['*_test.py',],
......
# Legacy Models Collection
![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg)
![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen)
![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg)
The R1 folder contains legacy model implmentation and models that will not
update to TensorFlow 2.x. They do not have solid performance tracking.
# Legacy Models
**Note: models will be removed from the master branch by 2020/06.**
The **r1** folder contains legacy model implementations developed
using TensorFlow 1.x.
After removal, you can still access to these legacy models in the previous
released tags, e.g. [v2.1.0](https://github.com/tensorflow/models/releases/tag/v2.1.0).
**Note: We will remove this r1 folder from the master branch in June, 2020.**
After removal, you will still be able to access legacy models
in the previous releases.
(e.g., [v2.1.0](https://github.com/tensorflow/models/releases/tag/v2.1.0))
## Legacy model implmentation
Transformer and MNIST implementation uses pure TF 1.x TF-Estimator.
Users should follow the corresponding TF 2.x implmentation inside the
official model garden.
## Models that will not update to TensorFlow 2.x
* [boosted_trees](boosted_trees): A Gradient Boosted Trees model to
classify higgs boson process from HIGGS Data Set.
* [wide_deep](wide_deep): A model that combines a wide model and deep
network to classify census income data.
| Model | Description | Reference |
| ----- | ----------- | --------- |
| [Gradient Boosted Trees](boosted_trees) | A gradient boosted trees model to classify higgs boson process from HIGGS dataset | [Link](https://en.wikipedia.org/wiki/Gradient_boosting) |
| [MNIST](mnist) | A basic model to classify digits from the MNIST dataset | [Link](http://yann.lecun.com/exdb/mnist/) |
| [NCF](ncf) | NCF Estimator implementation | [arXiv:1708.05031](https://arxiv.org/abs/1708.05031) |
| [ResNet](resnet) | A deep residual network for image recognition | [arXiv:1512.03385](https://arxiv.org/abs/1512.03385) |
| [Transformer](transformer) | A transformer model to translate the WMT English to German dataset | [arXiv:1706.03762](https://arxiv.org/abs/1706.03762) |
| [Wide & Deep Learning](wide_deep) | A model that combines a wide linear model and deep neural network for recommender systems | [arXiv:1606.07792](https://arxiv.org/abs/1606.07792) |
![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg)
![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen)
![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg)
# Classifying Higgs boson processes in the HIGGS Data Set
## Overview
The [HIGGS Data Set](https://archive.ics.uci.edu/ml/datasets/HIGGS) contains 11 million samples with 28 features, and is for the classification problem to distinguish between a signal process which produces Higgs bosons and a background process which does not.
......
![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg)
![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen)
![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg)
# MNIST in TensorFlow
This directory builds a convolutional neural net to classify the [MNIST
......
![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg)
![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen)
![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg)
# NCF Estimator implementation
NCF framework to train and evaluate the NeuMF model
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment