Commit 356c98bd authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

Merge remote-tracking branch 'upstream/master' into detr-push-3

parents d31aba8a b9785623
......@@ -98,17 +98,24 @@ pip3 install tf-nightly
#### Method 1: Install the TensorFlow Model Garden pip package
**tf-models-nightly** is the nightly Model Garden package
created daily automatically. pip will install all models
and dependencies automatically.
**tf-models-official** is the stable Model Garden package.
pip will install all models and dependencies automatically.
```shell
pip install tf-models-nightly
pip install tf-models-official
```
Please check out our [example](colab/fine_tuning_bert.ipynb)
to learn how to use a PIP package.
Note that **tf-models-official** may not include the latest changes in this
github repo. To include latest changes, you may install **tf-models-nightly**,
which is the nightly Model Garden package created daily automatically.
```shell
pip install tf-models-nightly
```
#### Method 2: Clone the source
1. Clone the GitHub repository:
......
......@@ -98,7 +98,8 @@
"source": [
"### Install the TensorFlow Model Garden pip package\n",
"\n",
"* `tf-models-nightly` is the nightly Model Garden package created daily automatically.\n",
"* `tf-models-official` is the stable Model Garden package. Note that it may not include the latest changes in the `tensorflow_models` github repo. To include latest changes, you may install `tf-models-nightly`,\n",
"which is the nightly Model Garden package created daily automatically.\n",
"* pip will install all models and dependencies automatically."
]
},
......@@ -112,8 +113,7 @@
},
"outputs": [],
"source": [
"!pip install -q tf-nightly\n",
"!pip install -q tf-models-nightly"
"!pip install -q tf-models-official==2.3.0"
]
},
{
......
......@@ -100,7 +100,8 @@
"source": [
"### Install the TensorFlow Model Garden pip package\n",
"\n",
"* `tf-models-nightly` is the nightly Model Garden package created daily automatically.\n",
"* `tf-models-official` is the stable Model Garden package. Note that it may not include the latest changes in the `tensorflow_models` github repo. To include latest changes, you may install `tf-models-nightly`,\n",
"which is the nightly Model Garden package created daily automatically.\n",
"* `pip` will install all models and dependencies automatically."
]
},
......@@ -114,8 +115,7 @@
},
"outputs": [],
"source": [
"!pip install -q tf-nightly\n",
"!pip install -q tf-models-nightly"
"!pip install -q tf-models-official==2.3.0"
]
},
{
......
......@@ -98,7 +98,8 @@
"source": [
"### Install the TensorFlow Model Garden pip package\n",
"\n",
"* `tf-models-nightly` is the nightly Model Garden package created daily automatically.\n",
"* `tf-models-official` is the stable Model Garden package. Note that it may not include the latest changes in the `tensorflow_models` github repo. To include latest changes, you may install `tf-models-nightly`,\n",
"which is the nightly Model Garden package created daily automatically.\n",
"* `pip` will install all models and dependencies automatically."
]
},
......@@ -112,8 +113,7 @@
},
"outputs": [],
"source": [
"!pip install -q tf-nightly\n",
"!pip install -q tf-models-nightly"
"!pip install -q tf-models-official==2.3.0"
]
},
{
......@@ -478,7 +478,7 @@
"source": [
"### Build a BertClassifier model wrapping TransformerEncoder\n",
"\n",
"[BertClassifier](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/models/bert_classifier.py) implements a simple token classification model containing a single classification head using the `TokenClassification` network."
"[BertClassifier](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/models/bert_classifier.py) implements a [CLS] token classification model containing a single classification head."
]
},
{
......
......@@ -18,11 +18,11 @@ import abc
import functools
from typing import Any, Callable, Optional
from absl import logging
import six
import tensorflow as tf
from official.modeling.hyperparams import config_definitions as cfg
from official.utils import registry
@six.add_metaclass(abc.ABCMeta)
......@@ -67,7 +67,19 @@ class Task(tf.Module):
Args:
model: The keras.Model built or used by this task.
"""
pass
ckpt_dir_or_file = self.task_config.init_checkpoint
logging.info("Trying to load pretrained checkpoint from %s",
ckpt_dir_or_file)
if tf.io.gfile.isdir(ckpt_dir_or_file):
ckpt_dir_or_file = tf.train.latest_checkpoint(ckpt_dir_or_file)
if not ckpt_dir_or_file:
return
ckpt = tf.train.Checkpoint(**model.checkpoint_items)
status = ckpt.read(ckpt_dir_or_file)
status.expect_partial().assert_existing_objects_matched()
logging.info("Finished loading pretrained checkpoint from %s",
ckpt_dir_or_file)
@abc.abstractmethod
def build_model(self) -> tf.keras.Model:
......@@ -282,49 +294,3 @@ class Task(tf.Module):
"""Optional reduce of aggregated logs over validation steps."""
return {}
_REGISTERED_TASK_CLS = {}
# TODO(b/158268740): Move these outside the base class file.
# TODO(b/158741360): Add type annotations once pytype checks across modules.
def register_task_cls(task_config_cls):
"""Decorates a factory of Tasks for lookup by a subclass of TaskConfig.
This decorator supports registration of tasks as follows:
```
@dataclasses.dataclass
class MyTaskConfig(TaskConfig):
# Add fields here.
pass
@register_task_cls(MyTaskConfig)
class MyTask(Task):
# Inherits def __init__(self, task_config).
pass
my_task_config = MyTaskConfig()
my_task = get_task(my_task_config) # Returns MyTask(my_task_config).
```
Besisdes a class itself, other callables that create a Task from a TaskConfig
can be decorated by the result of this function, as long as there is at most
one registration for each config class.
Args:
task_config_cls: a subclass of TaskConfig (*not* an instance of TaskConfig).
Each task_config_cls can only be used for a single registration.
Returns:
A callable for use as class decorator that registers the decorated class
for creation from an instance of task_config_cls.
"""
return registry.register(_REGISTERED_TASK_CLS, task_config_cls)
# The user-visible get_task() is defined after classes have been registered.
# TODO(b/158741360): Add type annotations once pytype checks across modules.
def get_task_cls(task_config_cls):
task_cls = registry.lookup(_REGISTERED_TASK_CLS, task_config_cls)
return task_cls
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for tensorflow_models.core.base_task."""
import functools
from absl.testing import parameterized
import tensorflow as tf
from tensorflow.python.distribute import combinations
from tensorflow.python.distribute import strategy_combinations
from official.utils.testing import mock_task
def all_strategy_combinations():
return combinations.combine(
distribution=[
strategy_combinations.default_strategy,
strategy_combinations.tpu_strategy,
strategy_combinations.one_device_strategy_gpu,
],
mode='eager',
)
class TaskKerasTest(tf.test.TestCase, parameterized.TestCase):
@combinations.generate(all_strategy_combinations())
def test_task_with_step_override(self, distribution):
with distribution.scope():
task = mock_task.MockTask()
model = task.build_model()
model = task.compile_model(
model,
optimizer=tf.keras.optimizers.SGD(learning_rate=1e-3),
metrics=task.build_metrics(),
train_step=task.train_step,
validation_step=task.validation_step)
dataset = task.build_inputs(params=None)
logs = model.fit(dataset, epochs=1, steps_per_epoch=2)
self.assertIn('loss', logs.history)
self.assertIn('acc', logs.history)
# Without specifying metrics through compile.
with distribution.scope():
train_metrics = task.build_metrics(training=True)
val_metrics = task.build_metrics(training=False)
model = task.build_model()
model = task.compile_model(
model,
optimizer=tf.keras.optimizers.SGD(learning_rate=1e-3),
train_step=functools.partial(task.train_step, metrics=train_metrics),
validation_step=functools.partial(
task.validation_step, metrics=val_metrics))
logs = model.fit(dataset, epochs=1, steps_per_epoch=2)
self.assertIn('loss', logs.history)
self.assertIn('acc', logs.history)
def test_task_with_fit(self):
task = mock_task.MockTask()
model = task.build_model()
model = task.compile_model(
model,
optimizer=tf.keras.optimizers.SGD(learning_rate=1e-3),
loss=tf.keras.losses.CategoricalCrossentropy(),
metrics=task.build_metrics())
dataset = task.build_inputs(params=None)
logs = model.fit(dataset, epochs=1, steps_per_epoch=2)
self.assertIn('loss', logs.history)
self.assertIn('acc', logs.history)
self.assertLen(model.evaluate(dataset, steps=1), 2)
def test_task_invalid_compile(self):
task = mock_task.MockTask()
model = task.build_model()
with self.assertRaises(ValueError):
_ = task.compile_model(
model,
optimizer=tf.keras.optimizers.SGD(learning_rate=1e-3),
loss=tf.keras.losses.CategoricalCrossentropy(),
metrics=task.build_metrics(),
train_step=task.train_step)
if __name__ == '__main__':
tf.test.main()
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Standard Trainer implementation.
The base trainer implements the Orbit `StandardTrainable` and
`StandardEvaluable` interfaces. Trainers inside this project should be
interchangable and independent on model architectures and tasks.
"""
import gin
import orbit
import tensorflow as tf
from official.core import base_task
from official.modeling import optimization
from official.modeling import performance
from official.modeling.hyperparams import config_definitions
ExperimentConfig = config_definitions.ExperimentConfig
@gin.configurable
class Trainer(orbit.StandardTrainer, orbit.StandardEvaluator):
"""Implements the common trainer shared for TensorFlow models."""
def __init__(self,
config: ExperimentConfig,
task: base_task.Task,
train: bool = True,
evaluate: bool = True,
model=None,
optimizer=None):
"""Initialize common trainer for TensorFlow models.
Args:
config: An `ExperimentConfig` instance specifying experiment config.
task: A base_task.Task instance.
train: bool, whether or not this trainer will be used for training.
default to True.
evaluate: bool, whether or not this trainer will be used for evaluation.
default to True.
model: tf.keras.Model instance. If provided, it will be used instead
of building model using task.build_model(). Default to None.
optimizer: tf.keras.optimizers.Optimizer instance. If provided, it will
used instead of the optimizer from config. Default to None.
"""
# Gets the current distribution strategy. If not inside any strategy scope,
# it gets a single-replica no-op strategy.
self._strategy = tf.distribute.get_strategy()
self._config = config
self._task = task
self._model = model or task.build_model()
if optimizer is None:
opt_factory = optimization.OptimizerFactory(
config.trainer.optimizer_config)
self._optimizer = opt_factory.build_optimizer(
opt_factory.build_learning_rate())
else:
self._optimizer = optimizer
# Configuring optimizer when loss_scale is set in runtime config. This helps
# avoiding overflow/underflow for float16 computations.
if config.runtime.loss_scale:
self._optimizer = performance.configure_optimizer(
self._optimizer,
use_float16=config.runtime.mixed_precision_dtype == 'float16',
loss_scale=config.runtime.loss_scale)
# global_step increases by 1 after each training iteration.
# We should have global_step.numpy() == self.optimizer.iterations.numpy()
# when there is only 1 optimizer.
self._global_step = orbit.utils.create_global_step()
if hasattr(self.model, 'checkpoint_items'):
checkpoint_items = self.model.checkpoint_items
else:
checkpoint_items = {}
self._checkpoint = tf.train.Checkpoint(
global_step=self.global_step, model=self.model,
optimizer=self.optimizer, **checkpoint_items)
self._train_loss = tf.keras.metrics.Mean('training_loss', dtype=tf.float32)
self._validation_loss = tf.keras.metrics.Mean(
'validation_loss', dtype=tf.float32)
self._train_metrics = self.task.build_metrics(
training=True) + self.model.metrics
self._validation_metrics = self.task.build_metrics(
training=False) + self.model.metrics
if train:
train_dataset = orbit.utils.make_distributed_dataset(
self.strategy, self.task.build_inputs, self.config.task.train_data)
orbit.StandardTrainer.__init__(
self,
train_dataset,
options=orbit.StandardTrainerOptions(
use_tf_while_loop=config.trainer.train_tf_while_loop,
use_tf_function=config.trainer.train_tf_function,
use_tpu_summary_optimization=config.trainer.allow_tpu_summary))
if evaluate:
eval_dataset = orbit.utils.make_distributed_dataset(
self.strategy, self.task.build_inputs,
self.config.task.validation_data)
orbit.StandardEvaluator.__init__(
self,
eval_dataset,
options=orbit.StandardEvaluatorOptions(
use_tf_function=config.trainer.eval_tf_function))
@property
def strategy(self):
return self._strategy
@property
def config(self):
return self._config
@property
def task(self):
return self._task
@property
def model(self):
return self._model
@property
def optimizer(self):
return self._optimizer
@property
def global_step(self):
return self._global_step
@property
def train_loss(self):
"""Accesses the training loss metric object."""
return self._train_loss
@property
def validation_loss(self):
"""Accesses the validation loss metric object."""
return self._validation_loss
@property
def train_metrics(self):
"""Accesses all training metric objects."""
return self._train_metrics
@property
def validation_metrics(self):
"""Accesses all validation metric metric objects."""
return self._validation_metrics
def initialize(self):
"""A callback function.
This function will be called when no checkpoint found for the model.
If there is a checkpoint, the checkpoint will be loaded and this function
will not be called. Tasks may use this callback function to load a
pretrained checkpoint, saved under a directory other than the model_dir.
"""
self.task.initialize(self.model)
@property
def checkpoint(self):
"""Accesses the training checkpoint."""
return self._checkpoint
def train_loop_end(self):
"""See base class."""
logs = {}
for metric in self.train_metrics + [self.train_loss]:
logs[metric.name] = metric.result()
metric.reset_states()
if callable(self.optimizer.learning_rate):
logs['learning_rate'] = self.optimizer.learning_rate(self.global_step)
else:
logs['learning_rate'] = self.optimizer.learning_rate
return logs
def train_step(self, iterator):
"""See base class."""
def step_fn(inputs):
logs = self.task.train_step(
inputs,
model=self.model,
optimizer=self.optimizer,
metrics=self.train_metrics)
self._train_loss.update_state(logs[self.task.loss])
self.global_step.assign_add(1)
self.strategy.run(step_fn, args=(next(iterator),))
def eval_begin(self):
"""Sets up metrics."""
for metric in self.validation_metrics + [self.validation_loss]:
metric.reset_states()
def eval_step(self, iterator):
"""See base class."""
def step_fn(inputs):
logs = self.task.validation_step(
inputs, model=self.model, metrics=self.validation_metrics)
self._validation_loss.update_state(logs[self.task.loss])
return logs
distributed_outputs = self.strategy.run(step_fn, args=(next(iterator),))
return tf.nest.map_structure(self.strategy.experimental_local_results,
distributed_outputs)
def eval_end(self, aggregated_logs=None):
"""Processes evaluation results."""
logs = {}
for metric in self.validation_metrics + [self.validation_loss]:
logs[metric.name] = metric.result()
if aggregated_logs:
metrics = self.task.reduce_aggregated_logs(aggregated_logs)
logs.update(metrics)
return logs
def eval_reduce(self, state=None, step_outputs=None):
return self.task.aggregate_logs(state, step_outputs)
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for tensorflow_models.core.trainers.trainer."""
# pylint: disable=g-direct-tensorflow-import
from absl.testing import parameterized
import tensorflow as tf
from tensorflow.python.distribute import combinations
from tensorflow.python.distribute import strategy_combinations
from official.core import base_trainer as trainer_lib
from official.modeling.hyperparams import config_definitions as cfg
from official.utils.testing import mock_task
def all_strategy_combinations():
return combinations.combine(
distribution=[
strategy_combinations.default_strategy,
strategy_combinations.tpu_strategy,
strategy_combinations.one_device_strategy_gpu,
],
mode='eager',
)
class TrainerTest(tf.test.TestCase, parameterized.TestCase):
def setUp(self):
super().setUp()
self._config = cfg.ExperimentConfig(
trainer=cfg.TrainerConfig(
optimizer_config=cfg.OptimizationConfig(
{'optimizer': {
'type': 'sgd'
},
'learning_rate': {
'type': 'constant'
}})))
def create_test_trainer(self):
task = mock_task.MockTask()
trainer = trainer_lib.Trainer(self._config, task)
return trainer
@combinations.generate(all_strategy_combinations())
def test_trainer_train(self, distribution):
with distribution.scope():
trainer = self.create_test_trainer()
logs = trainer.train(tf.convert_to_tensor(5, dtype=tf.int32))
self.assertIn('training_loss', logs)
self.assertIn('learning_rate', logs)
@combinations.generate(all_strategy_combinations())
def test_trainer_validate(self, distribution):
with distribution.scope():
trainer = self.create_test_trainer()
logs = trainer.evaluate(tf.convert_to_tensor(5, dtype=tf.int32))
self.assertIn('validation_loss', logs)
self.assertEqual(logs['acc'], 5. * distribution.num_replicas_in_sync)
@combinations.generate(
combinations.combine(
mixed_precision_dtype=['float32', 'bfloat16', 'float16'],
loss_scale=[None, 'dynamic', 128, 256],
))
def test_configure_optimizer(self, mixed_precision_dtype, loss_scale):
config = cfg.ExperimentConfig(
runtime=cfg.RuntimeConfig(
mixed_precision_dtype=mixed_precision_dtype, loss_scale=loss_scale),
trainer=cfg.TrainerConfig(
optimizer_config=cfg.OptimizationConfig(
{'optimizer': {
'type': 'sgd'
},
'learning_rate': {
'type': 'constant'
}})))
task = mock_task.MockTask()
trainer = trainer_lib.Trainer(config, task)
if mixed_precision_dtype != 'float16':
self.assertIsInstance(trainer.optimizer, tf.keras.optimizers.SGD)
elif mixed_precision_dtype == 'float16' and loss_scale is None:
self.assertIsInstance(trainer.optimizer, tf.keras.optimizers.SGD)
else:
self.assertIsInstance(
trainer.optimizer,
tf.keras.mixed_precision.experimental.LossScaleOptimizer)
metrics = trainer.train(tf.convert_to_tensor(5, dtype=tf.int32))
self.assertIn('training_loss', metrics)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -12,24 +13,25 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Experiment factory methods."""
"""Activation and weight binarizer implementations."""
from official.modeling.hyperparams import config_definitions as cfg
from official.utils import registry
import math
import numpy as np
import tensorflow as tf
_REGISTERED_CONFIGS = {}
def ConvertSignCodeToZeroOneCode(x):
"""Conversion from codes {-1, +1} to codes {0, 1}."""
return 0.5 * (x + 1.0)
def register_config_factory(name):
"""Register ExperimentConfig factory method."""
return registry.register(_REGISTERED_CONFIGS, name)
def ConvertZeroOneCodeToSignCode(x):
"""Convert from codes {0, 1} to codes {-1, +1}."""
return 2.0 * x - 1.0
def get_exp_config_creater(exp_name: str):
"""Looks up ExperimentConfig factory methods."""
exp_creater = registry.lookup(_REGISTERED_CONFIGS, exp_name)
return exp_creater
def CheckZeroOneCode(x):
return tf.reduce_all(tf.equal(x * (x - 1.0), 0))
def get_exp_config(exp_name: str) -> cfg.ExperimentConfig:
return get_exp_config_creater(exp_name)()
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A global factory to register and access all registered tasks."""
from official.utils import registry
_REGISTERED_TASK_CLS = {}
# TODO(b/158741360): Add type annotations once pytype checks across modules.
def register_task_cls(task_config_cls):
"""Decorates a factory of Tasks for lookup by a subclass of TaskConfig.
This decorator supports registration of tasks as follows:
```
@dataclasses.dataclass
class MyTaskConfig(TaskConfig):
# Add fields here.
pass
@register_task_cls(MyTaskConfig)
class MyTask(Task):
# Inherits def __init__(self, task_config).
pass
my_task_config = MyTaskConfig()
my_task = get_task(my_task_config) # Returns MyTask(my_task_config).
```
Besisdes a class itself, other callables that create a Task from a TaskConfig
can be decorated by the result of this function, as long as there is at most
one registration for each config class.
Args:
task_config_cls: a subclass of TaskConfig (*not* an instance of TaskConfig).
Each task_config_cls can only be used for a single registration.
Returns:
A callable for use as class decorator that registers the decorated class
for creation from an instance of task_config_cls.
"""
return registry.register(_REGISTERED_TASK_CLS, task_config_cls)
def get_task(task_config, **kwargs):
"""Creates a Task (of suitable subclass type) from task_config."""
return get_task_cls(task_config.__class__)(task_config, **kwargs)
# The user-visible get_task() is defined after classes have been registered.
# TODO(b/158741360): Add type annotations once pytype checks across modules.
def get_task_cls(task_config_cls):
task_cls = registry.lookup(_REGISTERED_TASK_CLS, task_config_cls)
return task_cls
......@@ -14,12 +14,6 @@
# ==============================================================================
"""Gaussian error linear unit."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import tensorflow as tf
......@@ -35,6 +29,4 @@ def gelu(x):
Returns:
`x` with the GELU activation applied.
"""
cdf = 0.5 * (1.0 + tf.tanh(
(math.sqrt(2 / math.pi) * (x + 0.044715 * tf.pow(x, 3)))))
return x * cdf
return tf.keras.activations.gelu(x, approximate=True)
......@@ -21,7 +21,6 @@ import dataclasses
from official.modeling.hyperparams import base_config
from official.modeling.optimization.configs import optimization_config
from official.utils import registry
OptimizationConfig = optimization_config.OptimizationConfig
......@@ -179,6 +178,7 @@ class TrainerConfig(base_config.Config):
max_to_keep: max checkpoints to keep.
continuous_eval_timeout: maximum number of seconds to wait between
checkpoints, if set to None, continuous eval will wait indefinitely.
This is only used continuous_train_and_eval and continuous_eval modes.
train_steps: number of train steps.
validation_steps: number of eval steps. If `None`, the entire eval dataset
is used.
......@@ -205,6 +205,7 @@ class TrainerConfig(base_config.Config):
@dataclasses.dataclass
class TaskConfig(base_config.Config):
init_checkpoint: str = ""
model: base_config.Config = None
train_data: DataConfig = DataConfig()
validation_data: DataConfig = DataConfig()
......@@ -217,16 +218,3 @@ class ExperimentConfig(base_config.Config):
trainer: TrainerConfig = TrainerConfig()
runtime: RuntimeConfig = RuntimeConfig()
_REGISTERED_CONFIGS = {}
def register_config_factory(name):
"""Register ExperimentConfig factory method."""
return registry.register(_REGISTERED_CONFIGS, name)
def get_exp_config_creater(exp_name: str):
"""Looks up ExperimentConfig factory methods."""
exp_creater = registry.lookup(_REGISTERED_CONFIGS, exp_name)
return exp_creater
......@@ -106,6 +106,7 @@ class AdamWeightDecayConfig(base_config.Config):
weight_decay_rate: float = 0.0
include_in_weight_decay: Optional[List[str]] = None
exclude_from_weight_decay: Optional[List[str]] = None
gradient_clip_norm: float = 1.0
@dataclasses.dataclass
......
......@@ -63,8 +63,8 @@ def metrics_as_dict(metric):
"""Puts input metric(s) into a list.
Args:
metric: metric(s) to be put into the list. `metric` could be a object, a
list or a dict of tf.keras.metrics.Metric or has the `required_method`.
metric: metric(s) to be put into the list. `metric` could be an object, a
list, or a dict of tf.keras.metrics.Metric or has the `required_method`.
Returns:
A dictionary of valid metrics.
......@@ -351,7 +351,8 @@ class DistributedExecutor(object):
train_input_fn: (params: dict) -> tf.data.Dataset training data input
function.
eval_input_fn: (Optional) same type as train_input_fn. If not None, will
trigger evaluting metric on eval data. If None, will not run eval step.
trigger evaluating metric on eval data. If None, will not run the eval
step.
model_dir: the folder path for model checkpoints.
total_steps: total training steps.
iterations_per_loop: train steps per loop. After each loop, this job will
......@@ -672,7 +673,7 @@ class DistributedExecutor(object):
raise ValueError('if `eval_metric_fn` is specified, '
'eval_metric_fn must be a callable.')
old_phrase = tf.keras.backend.learning_phase()
old_phase = tf.keras.backend.learning_phase()
tf.keras.backend.set_learning_phase(0)
params = self._params
strategy = self._strategy
......@@ -698,7 +699,8 @@ class DistributedExecutor(object):
logging.info(
'Checkpoint file %s found and restoring from '
'checkpoint', checkpoint_path)
checkpoint.restore(checkpoint_path)
status = checkpoint.restore(checkpoint_path)
status.expect_partial().assert_existing_objects_matched()
self.global_train_step = model.optimizer.iterations
eval_iterator = self._get_input_iterator(eval_input_fn, strategy)
......@@ -709,7 +711,7 @@ class DistributedExecutor(object):
summary_writer(metrics=eval_metric_result, step=current_step)
reset_states(eval_metric)
tf.keras.backend.set_learning_phase(old_phrase)
tf.keras.backend.set_learning_phase(old_phase)
return eval_metric_result, current_step
def predict(self):
......@@ -759,7 +761,7 @@ class ExecutorBuilder(object):
Args:
strategy_type: string. One of 'tpu', 'mirrored', 'multi_worker_mirrored'.
If None. User is responsible to set the strategy before calling
If None, the user is responsible to set the strategy before calling
build_executor(...).
strategy_config: necessary config for constructing the proper Strategy.
Check strategy_flags_dict() for examples of the structure.
......
......@@ -86,7 +86,7 @@ def _create_albert_model(cfg):
activation=activations.gelu,
dropout_rate=cfg.hidden_dropout_prob,
attention_dropout_rate=cfg.attention_probs_dropout_prob,
sequence_length=cfg.max_position_embeddings,
max_sequence_length=cfg.max_position_embeddings,
type_vocab_size=cfg.type_vocab_size,
initializer=tf.keras.initializers.TruncatedNormal(
stddev=cfg.initializer_range))
......
......@@ -46,6 +46,8 @@ The new checkpoints are:**
12-layer, 768-hidden, 12-heads , 110M parameters
* **[`BERT-Large, Cased`](https://storage.googleapis.com/cloud-tpu-checkpoints/bert/keras_bert/cased_L-24_H-1024_A-16.tar.gz)**:
24-layer, 1024-hidden, 16-heads, 340M parameters
* **[`BERT-Base, Multilingual Cased`](https://storage.googleapis.com/cloud-tpu-checkpoints/bert/keras_bert/multi_cased_L-12_H-768_A-12.tar.gz)**:
104 languages, 12-layer, 768-hidden, 12-heads, 110M parameters
We recommend to host checkpoints on Google Cloud storage buckets when you use
Cloud GPU/TPU.
......
......@@ -104,14 +104,14 @@ class BertPretrainLossAndMetricLayer(tf.keras.layers.Layer):
@gin.configurable
def get_transformer_encoder(bert_config,
sequence_length,
sequence_length=None,
transformer_encoder_cls=None,
output_range=None):
"""Gets a 'TransformerEncoder' object.
Args:
bert_config: A 'modeling.BertConfig' or 'modeling.AlbertConfig' object.
sequence_length: Maximum sequence length of the training data.
sequence_length: [Deprecated].
transformer_encoder_cls: A EncoderScaffold class. If it is None, uses the
default BERT encoder implementation.
output_range: the sequence output range, [0, output_range). Default setting
......@@ -120,13 +120,13 @@ def get_transformer_encoder(bert_config,
Returns:
A networks.TransformerEncoder object.
"""
del sequence_length
if transformer_encoder_cls is not None:
# TODO(hongkuny): evaluate if it is better to put cfg definition in gin.
embedding_cfg = dict(
vocab_size=bert_config.vocab_size,
type_vocab_size=bert_config.type_vocab_size,
hidden_size=bert_config.hidden_size,
seq_length=sequence_length,
max_seq_length=bert_config.max_position_embeddings,
initializer=tf.keras.initializers.TruncatedNormal(
stddev=bert_config.initializer_range),
......@@ -161,7 +161,6 @@ def get_transformer_encoder(bert_config,
activation=tf_utils.get_activation(bert_config.hidden_act),
dropout_rate=bert_config.hidden_dropout_prob,
attention_dropout_rate=bert_config.attention_probs_dropout_prob,
sequence_length=sequence_length,
max_sequence_length=bert_config.max_position_embeddings,
type_vocab_size=bert_config.type_vocab_size,
embedding_width=bert_config.embedding_size,
......
......@@ -56,8 +56,6 @@ class BertModelsTest(tf.test.TestCase):
# Expect two output from encoder: sequence and classification output.
self.assertIsInstance(encoder.output, list)
self.assertLen(encoder.output, 2)
# shape should be [batch size, seq_length, hidden_size]
self.assertEqual(encoder.output[0].shape.as_list(), [None, 5, 16])
# shape should be [batch size, hidden_size]
self.assertEqual(encoder.output[1].shape.as_list(), [None, 16])
......@@ -74,16 +72,12 @@ class BertModelsTest(tf.test.TestCase):
# Expect two output from model: start positions and end positions
self.assertIsInstance(model.output, list)
self.assertLen(model.output, 2)
# shape should be [batch size, seq_length]
self.assertEqual(model.output[0].shape.as_list(), [None, 5])
# shape should be [batch size, seq_length]
self.assertEqual(model.output[1].shape.as_list(), [None, 5])
# Expect two output from core_model: sequence and classification output.
self.assertIsInstance(core_model.output, list)
self.assertLen(core_model.output, 2)
# shape should be [batch size, seq_length, hidden_size]
self.assertEqual(core_model.output[0].shape.as_list(), [None, 5, 16])
# shape should be [batch size, None, hidden_size]
self.assertEqual(core_model.output[0].shape.as_list(), [None, None, 16])
# shape should be [batch size, hidden_size]
self.assertEqual(core_model.output[1].shape.as_list(), [None, 16])
......@@ -104,8 +98,8 @@ class BertModelsTest(tf.test.TestCase):
# Expect two output from core_model: sequence and classification output.
self.assertIsInstance(core_model.output, list)
self.assertLen(core_model.output, 2)
# shape should be [batch size, 1, hidden_size]
self.assertEqual(core_model.output[0].shape.as_list(), [None, 1, 16])
# shape should be [batch size, None, hidden_size]
self.assertEqual(core_model.output[0].shape.as_list(), [None, None, 16])
# shape should be [batch size, hidden_size]
self.assertEqual(core_model.output[1].shape.as_list(), [None, 16])
......
......@@ -61,7 +61,7 @@ def _create_bert_model(cfg):
activation=activations.gelu,
dropout_rate=cfg.hidden_dropout_prob,
attention_dropout_rate=cfg.attention_probs_dropout_prob,
sequence_length=cfg.max_position_embeddings,
max_sequence_length=cfg.max_position_embeddings,
type_vocab_size=cfg.type_vocab_size,
initializer=tf.keras.initializers.TruncatedNormal(
stddev=cfg.initializer_range),
......@@ -73,6 +73,7 @@ def _create_bert_model(cfg):
def convert_checkpoint(bert_config, output_path, v1_checkpoint):
"""Converts a V1 checkpoint into an OO V2 checkpoint."""
output_dir, _ = os.path.split(output_path)
tf.io.gfile.makedirs(output_dir)
# Create a temporary V1 name-converted checkpoint in the output directory.
temporary_checkpoint_dir = os.path.join(output_dir, "temp_v1")
......
......@@ -20,13 +20,9 @@ Includes configurations and instantiation methods.
from typing import List, Optional, Text
import dataclasses
import tensorflow as tf
from official.modeling import tf_utils
from official.modeling.hyperparams import base_config
from official.nlp.configs import encoders
from official.nlp.modeling import layers
from official.nlp.modeling.models import bert_pretrainer
@dataclasses.dataclass
......@@ -40,32 +36,9 @@ class ClsHeadConfig(base_config.Config):
@dataclasses.dataclass
class BertPretrainerConfig(base_config.Config):
"""BERT encoder configuration."""
encoder: encoders.TransformerEncoderConfig = (
encoders.TransformerEncoderConfig())
class PretrainerConfig(base_config.Config):
"""Pretrainer configuration."""
encoder: encoders.EncoderConfig = encoders.EncoderConfig()
cls_heads: List[ClsHeadConfig] = dataclasses.field(default_factory=list)
def instantiate_classification_heads_from_cfgs(
cls_head_configs: List[ClsHeadConfig]) -> List[layers.ClassificationHead]:
return [
layers.ClassificationHead(**cfg.as_dict()) for cfg in cls_head_configs
] if cls_head_configs else []
def instantiate_pretrainer_from_cfg(
config: BertPretrainerConfig,
encoder_network: Optional[tf.keras.Model] = None
) -> bert_pretrainer.BertPretrainerV2:
"""Instantiates a BertPretrainer from the config."""
encoder_cfg = config.encoder
if encoder_network is None:
encoder_network = encoders.instantiate_encoder_from_cfg(encoder_cfg)
return bert_pretrainer.BertPretrainerV2(
mlm_activation=tf_utils.get_activation(encoder_cfg.hidden_activation),
mlm_initializer=tf.keras.initializers.TruncatedNormal(
stddev=encoder_cfg.initializer_range),
encoder_network=encoder_network,
classification_heads=instantiate_classification_heads_from_cfgs(
config.cls_heads))
mlm_activation: str = "gelu"
mlm_initializer_range: float = 0.02
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment