"app/package.json" did not exist on "ad4ffdf754e4e9bc73d3a92c2192a02704d9cbf3"
Commit 657dcda5 authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

pull latest

parents 26e24e21 e6017471
...@@ -26,7 +26,6 @@ from official.core import base_task ...@@ -26,7 +26,6 @@ from official.core import base_task
from official.modeling.hyperparams import config_definitions as cfg from official.modeling.hyperparams import config_definitions as cfg
from official.nlp.configs import bert from official.nlp.configs import bert
from official.nlp.data import sentence_prediction_dataloader from official.nlp.data import sentence_prediction_dataloader
from official.nlp.modeling import losses as loss_lib
from official.nlp.tasks import utils from official.nlp.tasks import utils
...@@ -36,6 +35,7 @@ class SentencePredictionConfig(cfg.TaskConfig): ...@@ -36,6 +35,7 @@ class SentencePredictionConfig(cfg.TaskConfig):
# At most one of `init_checkpoint` and `hub_module_url` can # At most one of `init_checkpoint` and `hub_module_url` can
# be specified. # be specified.
init_checkpoint: str = '' init_checkpoint: str = ''
init_cls_pooler: bool = False
hub_module_url: str = '' hub_module_url: str = ''
metric_type: str = 'accuracy' metric_type: str = 'accuracy'
model: bert.BertPretrainerConfig = bert.BertPretrainerConfig( model: bert.BertPretrainerConfig = bert.BertPretrainerConfig(
...@@ -55,11 +55,11 @@ class SentencePredictionConfig(cfg.TaskConfig): ...@@ -55,11 +55,11 @@ class SentencePredictionConfig(cfg.TaskConfig):
class SentencePredictionTask(base_task.Task): class SentencePredictionTask(base_task.Task):
"""Task object for sentence_prediction.""" """Task object for sentence_prediction."""
def __init__(self, params=cfg.TaskConfig): def __init__(self, params=cfg.TaskConfig, logging_dir=None):
super(SentencePredictionTask, self).__init__(params) super(SentencePredictionTask, self).__init__(params, logging_dir)
if params.hub_module_url and params.init_checkpoint: if params.hub_module_url and params.init_checkpoint:
raise ValueError('At most one of `hub_module_url` and ' raise ValueError('At most one of `hub_module_url` and '
'`pretrain_checkpoint_dir` can be specified.') '`init_checkpoint` can be specified.')
if params.hub_module_url: if params.hub_module_url:
self._hub_module = hub.load(params.hub_module_url) self._hub_module = hub.load(params.hub_module_url)
else: else:
...@@ -75,10 +75,10 @@ class SentencePredictionTask(base_task.Task): ...@@ -75,10 +75,10 @@ class SentencePredictionTask(base_task.Task):
return bert.instantiate_bertpretrainer_from_cfg(self.task_config.model) return bert.instantiate_bertpretrainer_from_cfg(self.task_config.model)
def build_losses(self, labels, model_outputs, aux_losses=None) -> tf.Tensor: def build_losses(self, labels, model_outputs, aux_losses=None) -> tf.Tensor:
loss = loss_lib.weighted_sparse_categorical_crossentropy_loss( loss = tf.keras.losses.sparse_categorical_crossentropy(
labels=labels, labels,
predictions=tf.nn.log_softmax( tf.cast(model_outputs['sentence_prediction'], tf.float32),
tf.cast(model_outputs['sentence_prediction'], tf.float32), axis=-1)) from_logits=True)
if aux_losses: if aux_losses:
loss += tf.add_n(aux_losses) loss += tf.add_n(aux_losses)
...@@ -94,7 +94,7 @@ class SentencePredictionTask(base_task.Task): ...@@ -94,7 +94,7 @@ class SentencePredictionTask(base_task.Task):
input_word_ids=dummy_ids, input_word_ids=dummy_ids,
input_mask=dummy_ids, input_mask=dummy_ids,
input_type_ids=dummy_ids) input_type_ids=dummy_ids)
y = tf.ones((1, 1), dtype=tf.int32) y = tf.zeros((1, 1), dtype=tf.int32)
return (x, y) return (x, y)
dataset = tf.data.Dataset.range(1) dataset = tf.data.Dataset.range(1)
...@@ -126,25 +126,26 @@ class SentencePredictionTask(base_task.Task): ...@@ -126,25 +126,26 @@ class SentencePredictionTask(base_task.Task):
outputs = self.inference_step(features, model) outputs = self.inference_step(features, model)
loss = self.build_losses( loss = self.build_losses(
labels=labels, model_outputs=outputs, aux_losses=model.losses) labels=labels, model_outputs=outputs, aux_losses=model.losses)
logs = {self.loss: loss}
if self.metric_type == 'matthews_corrcoef': if self.metric_type == 'matthews_corrcoef':
return { logs.update({
self.loss:
loss,
'sentence_prediction': 'sentence_prediction':
tf.expand_dims( tf.expand_dims(
tf.math.argmax(outputs['sentence_prediction'], axis=1), tf.math.argmax(outputs['sentence_prediction'], axis=1),
axis=0), axis=0),
'labels': 'labels':
labels, labels,
} })
if self.metric_type == 'pearson_spearman_corr': if self.metric_type == 'pearson_spearman_corr':
return { logs.update({
self.loss: loss,
'sentence_prediction': outputs['sentence_prediction'], 'sentence_prediction': outputs['sentence_prediction'],
'labels': labels, 'labels': labels,
} })
return logs
def aggregate_logs(self, state=None, step_outputs=None): def aggregate_logs(self, state=None, step_outputs=None):
if self.metric_type == 'accuracy':
return None
if state is None: if state is None:
state = {'sentence_prediction': [], 'labels': []} state = {'sentence_prediction': [], 'labels': []}
state['sentence_prediction'].append( state['sentence_prediction'].append(
...@@ -178,13 +179,16 @@ class SentencePredictionTask(base_task.Task): ...@@ -178,13 +179,16 @@ class SentencePredictionTask(base_task.Task):
return return
pretrain2finetune_mapping = { pretrain2finetune_mapping = {
'encoder': 'encoder': model.checkpoint_items['encoder'],
model.checkpoint_items['encoder'],
'next_sentence.pooler_dense':
model.checkpoint_items['sentence_prediction.pooler_dense'],
} }
# TODO(b/160251903): Investigate why no pooler dense improves finetuning
# accuracies.
if self.task_config.init_cls_pooler:
pretrain2finetune_mapping[
'next_sentence.pooler_dense'] = model.checkpoint_items[
'sentence_prediction.pooler_dense']
ckpt = tf.train.Checkpoint(**pretrain2finetune_mapping) ckpt = tf.train.Checkpoint(**pretrain2finetune_mapping)
status = ckpt.restore(ckpt_dir_or_file) status = ckpt.read(ckpt_dir_or_file)
status.expect_partial().assert_existing_objects_matched() status.expect_partial().assert_existing_objects_matched()
logging.info('finished loading pretrained checkpoint from %s', logging.info('finished loading pretrained checkpoint from %s',
ckpt_dir_or_file) ckpt_dir_or_file)
...@@ -15,7 +15,12 @@ ...@@ -15,7 +15,12 @@
# ============================================================================== # ==============================================================================
"""Tagging (e.g., NER/POS) task.""" """Tagging (e.g., NER/POS) task."""
import logging import logging
from typing import List, Optional
import dataclasses import dataclasses
from seqeval import metrics as seqeval_metrics
import tensorflow as tf import tensorflow as tf
import tensorflow_hub as hub import tensorflow_hub as hub
...@@ -36,12 +41,12 @@ class TaggingConfig(cfg.TaskConfig): ...@@ -36,12 +41,12 @@ class TaggingConfig(cfg.TaskConfig):
model: encoders.TransformerEncoderConfig = ( model: encoders.TransformerEncoderConfig = (
encoders.TransformerEncoderConfig()) encoders.TransformerEncoderConfig())
# The number of real labels. Note that a word may be tokenized into # The real class names, the order of which should match real label id.
# multiple word_pieces tokens, and we asssume the real label id (non-negative) # Note that a word may be tokenized into multiple word_pieces tokens, and
# is assigned to the first token of the word, and a negative label id is # we asssume the real label id (non-negative) is assigned to the first token
# assigned to the remaining tokens. The negative label id will not contribute # of the word, and a negative label id is assigned to the remaining tokens.
# to loss and metrics. # The negative label id will not contribute to loss and metrics.
num_classes: int = 0 class_names: Optional[List[str]] = None
train_data: cfg.DataConfig = cfg.DataConfig() train_data: cfg.DataConfig = cfg.DataConfig()
validation_data: cfg.DataConfig = cfg.DataConfig() validation_data: cfg.DataConfig = cfg.DataConfig()
...@@ -70,13 +75,13 @@ def _masked_labels_and_weights(y_true): ...@@ -70,13 +75,13 @@ def _masked_labels_and_weights(y_true):
class TaggingTask(base_task.Task): class TaggingTask(base_task.Task):
"""Task object for tagging (e.g., NER or POS).""" """Task object for tagging (e.g., NER or POS)."""
def __init__(self, params=cfg.TaskConfig): def __init__(self, params=cfg.TaskConfig, logging_dir=None):
super(TaggingTask, self).__init__(params) super(TaggingTask, self).__init__(params, logging_dir)
if params.hub_module_url and params.init_checkpoint: if params.hub_module_url and params.init_checkpoint:
raise ValueError('At most one of `hub_module_url` and ' raise ValueError('At most one of `hub_module_url` and '
'`init_checkpoint` can be specified.') '`init_checkpoint` can be specified.')
if params.num_classes == 0: if not params.class_names:
raise ValueError('TaggingConfig.num_classes cannot be 0.') raise ValueError('TaggingConfig.class_names cannot be empty.')
if params.hub_module_url: if params.hub_module_url:
self._hub_module = hub.load(params.hub_module_url) self._hub_module = hub.load(params.hub_module_url)
...@@ -92,7 +97,7 @@ class TaggingTask(base_task.Task): ...@@ -92,7 +97,7 @@ class TaggingTask(base_task.Task):
return models.BertTokenClassifier( return models.BertTokenClassifier(
network=encoder_network, network=encoder_network,
num_classes=self.task_config.num_classes, num_classes=len(self.task_config.class_names),
initializer=tf.keras.initializers.TruncatedNormal( initializer=tf.keras.initializers.TruncatedNormal(
stddev=self.task_config.model.initializer_range), stddev=self.task_config.model.initializer_range),
dropout_rate=self.task_config.model.dropout_rate, dropout_rate=self.task_config.model.dropout_rate,
...@@ -123,7 +128,7 @@ class TaggingTask(base_task.Task): ...@@ -123,7 +128,7 @@ class TaggingTask(base_task.Task):
y = tf.random.uniform( y = tf.random.uniform(
shape=(1, params.seq_length), shape=(1, params.seq_length),
minval=-1, minval=-1,
maxval=self.task_config.num_classes, maxval=len(self.task_config.class_names),
dtype=tf.dtypes.int32) dtype=tf.dtypes.int32)
return (x, y) return (x, y)
...@@ -136,19 +141,66 @@ class TaggingTask(base_task.Task): ...@@ -136,19 +141,66 @@ class TaggingTask(base_task.Task):
dataset = tagging_data_loader.TaggingDataLoader(params).load(input_context) dataset = tagging_data_loader.TaggingDataLoader(params).load(input_context)
return dataset return dataset
def build_metrics(self, training=None): def validation_step(self, inputs, model: tf.keras.Model, metrics=None):
del training """Validatation step.
# TODO(chendouble): evaluate using seqeval's f1/precision/recall.
return [tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy')] Args:
inputs: a dictionary of input tensors.
def process_metrics(self, metrics, labels, model_outputs): model: the keras.Model.
masked_labels, masked_weights = _masked_labels_and_weights(labels) metrics: a nested structure of metrics objects.
for metric in metrics:
metric.update_state(masked_labels, model_outputs, masked_weights) Returns:
A dictionary of logs.
def process_compiled_metrics(self, compiled_metrics, labels, model_outputs): """
masked_labels, masked_weights = _masked_labels_and_weights(labels) features, labels = inputs
compiled_metrics.update_state(masked_labels, model_outputs, masked_weights) outputs = self.inference_step(features, model)
loss = self.build_losses(labels=labels, model_outputs=outputs)
# Negative label ids are padding labels which should be ignored.
real_label_index = tf.where(tf.greater_equal(labels, 0))
predict_ids = tf.math.argmax(outputs, axis=-1)
predict_ids = tf.gather_nd(predict_ids, real_label_index)
label_ids = tf.gather_nd(labels, real_label_index)
return {
self.loss: loss,
'predict_ids': predict_ids,
'label_ids': label_ids,
}
def aggregate_logs(self, state=None, step_outputs=None):
"""Aggregates over logs returned from a validation step."""
if state is None:
state = {'predict_class': [], 'label_class': []}
def id_to_class_name(batched_ids):
class_names = []
for per_example_ids in batched_ids:
class_names.append([])
for per_token_id in per_example_ids.numpy().tolist():
class_names[-1].append(self.task_config.class_names[per_token_id])
return class_names
# Convert id to class names, because `seqeval_metrics` relies on the class
# name to decide IOB tags.
state['predict_class'].extend(id_to_class_name(step_outputs['predict_ids']))
state['label_class'].extend(id_to_class_name(step_outputs['label_ids']))
return state
def reduce_aggregated_logs(self, aggregated_logs):
"""Reduces aggregated logs over validation steps."""
label_class = aggregated_logs['label_class']
predict_class = aggregated_logs['predict_class']
return {
'f1':
seqeval_metrics.f1_score(label_class, predict_class),
'precision':
seqeval_metrics.precision_score(label_class, predict_class),
'recall':
seqeval_metrics.recall_score(label_class, predict_class),
'accuracy':
seqeval_metrics.accuracy_score(label_class, predict_class),
}
def initialize(self, model): def initialize(self, model):
"""Load a pretrained checkpoint (if exists) and then train from iter 0.""" """Load a pretrained checkpoint (if exists) and then train from iter 0."""
......
...@@ -58,7 +58,7 @@ class TaggingTest(tf.test.TestCase): ...@@ -58,7 +58,7 @@ class TaggingTest(tf.test.TestCase):
init_checkpoint=saved_path, init_checkpoint=saved_path,
model=self._encoder_config, model=self._encoder_config,
train_data=self._train_data_config, train_data=self._train_data_config,
num_classes=3) class_names=["O", "B-PER", "I-PER"])
task = tagging.TaggingTask(config) task = tagging.TaggingTask(config)
model = task.build_model() model = task.build_model()
metrics = task.build_metrics() metrics = task.build_metrics()
...@@ -74,7 +74,7 @@ class TaggingTest(tf.test.TestCase): ...@@ -74,7 +74,7 @@ class TaggingTest(tf.test.TestCase):
config = tagging.TaggingConfig( config = tagging.TaggingConfig(
model=self._encoder_config, model=self._encoder_config,
train_data=self._train_data_config, train_data=self._train_data_config,
num_classes=3) class_names=["O", "B-PER", "I-PER"])
task = tagging.TaggingTask(config) task = tagging.TaggingTask(config)
model = task.build_model() model = task.build_model()
...@@ -116,10 +116,31 @@ class TaggingTest(tf.test.TestCase): ...@@ -116,10 +116,31 @@ class TaggingTest(tf.test.TestCase):
config = tagging.TaggingConfig( config = tagging.TaggingConfig(
hub_module_url=hub_module_url, hub_module_url=hub_module_url,
model=self._encoder_config, model=self._encoder_config,
num_classes=4, class_names=["O", "B-PER", "I-PER"],
train_data=self._train_data_config) train_data=self._train_data_config)
self._run_task(config) self._run_task(config)
def test_seqeval_metrics(self):
config = tagging.TaggingConfig(
model=self._encoder_config,
train_data=self._train_data_config,
class_names=["O", "B-PER", "I-PER"])
task = tagging.TaggingTask(config)
model = task.build_model()
dataset = task.build_inputs(config.train_data)
iterator = iter(dataset)
strategy = tf.distribute.get_strategy()
distributed_outputs = strategy.run(
functools.partial(task.validation_step, model=model),
args=(next(iterator),))
outputs = tf.nest.map_structure(strategy.experimental_local_results,
distributed_outputs)
aggregated = task.aggregate_logs(step_outputs=outputs)
aggregated = task.aggregate_logs(state=aggregated, step_outputs=outputs)
self.assertCountEqual({"f1", "precision", "recall", "accuracy"},
task.reduce_aggregated_logs(aggregated).keys())
if __name__ == "__main__": if __name__ == "__main__":
tf.test.main() tf.test.main()
...@@ -18,9 +18,7 @@ from __future__ import absolute_import ...@@ -18,9 +18,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy as np
import tensorflow as tf import tensorflow as tf
K = tf.keras.backend
class LearningRateSchedule(tf.keras.optimizers.schedules.LearningRateSchedule): class LearningRateSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
...@@ -66,72 +64,3 @@ class LearningRateSchedule(tf.keras.optimizers.schedules.LearningRateSchedule): ...@@ -66,72 +64,3 @@ class LearningRateSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
'hidden_size': self.hidden_size, 'hidden_size': self.hidden_size,
'warmup_steps': self.warmup_steps, 'warmup_steps': self.warmup_steps,
} }
class LearningRateFn(object):
"""Creates learning rate function."""
def __init__(self, learning_rate, hidden_size, warmup_steps):
self.learning_rate = learning_rate
self.hidden_size = hidden_size
self.warmup_steps = float(warmup_steps)
def __call__(self, global_step):
"""Calculate learning rate with linear warmup and rsqrt decay."""
step = float(global_step)
learning_rate = self.learning_rate
learning_rate *= (self.hidden_size ** -0.5)
# Apply linear warmup
learning_rate *= np.minimum(1.0, step / self.warmup_steps)
# Apply rsqrt decay
learning_rate /= np.sqrt(np.maximum(step, self.warmup_steps))
return learning_rate
class LearningRateScheduler(tf.keras.callbacks.Callback):
"""Keras callback to schedule learning rate.
TODO(tianlin): Refactor this scheduler and LearningRateBatchScheduler in
official/resnet/keras/keras_common.py.
"""
def __init__(self, schedule, init_steps=None, verbose=False):
super(LearningRateScheduler, self).__init__()
self.schedule = schedule
self.verbose = verbose
if init_steps is None:
init_steps = 0.0
self.steps = float(init_steps) # Total steps during training.
def on_epoch_begin(self, epoch, logs=None):
if not hasattr(self.model.optimizer, 'lr'):
raise ValueError('Optimizer must have a "lr" attribute.')
if not hasattr(self.model.optimizer, 'iterations'):
raise ValueError('Optimizer must have a "iterations" attribute.')
def on_train_batch_begin(self, batch, logs=None):
"""Adjusts learning rate for each train batch."""
if self.verbose > 0:
iterations = K.get_value(self.model.optimizer.iterations)
print('Original iteration %d' % iterations)
self.steps += 1.0
try: # new API
lr = float(K.get_value(self.model.optimizer.lr))
lr = self.schedule(self.steps, lr)
except TypeError: # Support for old API for backward compatibility
lr = self.schedule(self.steps)
if not isinstance(lr, (float, np.float32, np.float64)):
raise ValueError('The output of the "schedule" function '
'should be float.')
K.set_value(self.model.optimizer.lr, lr)
K.set_value(self.model.optimizer.iterations, self.steps)
if self.verbose > 0:
print('Batch %05d Step %05d: LearningRateScheduler setting learning '
'rate to %s.' % (batch + 1, self.steps, lr))
def on_epoch_end(self, epoch, logs=None):
logs = logs or {}
logs['lr'] = K.get_value(self.model.optimizer.lr)
logs['steps'] = self.steps
...@@ -241,7 +241,7 @@ class TransformerTask(object): ...@@ -241,7 +241,7 @@ class TransformerTask(object):
if params["use_ctl"]: if params["use_ctl"]:
train_ds_iterator = iter(train_ds) train_ds_iterator = iter(train_ds)
callbacks = self._create_callbacks(flags_obj.model_dir, 0, params) callbacks = self._create_callbacks(flags_obj.model_dir, params)
# Only TimeHistory callback is supported for CTL # Only TimeHistory callback is supported for CTL
if params["use_ctl"]: if params["use_ctl"]:
...@@ -408,14 +408,9 @@ class TransformerTask(object): ...@@ -408,14 +408,9 @@ class TransformerTask(object):
for i in range(length): for i in range(length):
translate.translate_from_input(val_outputs[i], subtokenizer) translate.translate_from_input(val_outputs[i], subtokenizer)
def _create_callbacks(self, cur_log_dir, init_steps, params): def _create_callbacks(self, cur_log_dir, params):
"""Creates a list of callbacks.""" """Creates a list of callbacks."""
sfunc = optimizer.LearningRateFn(params["learning_rate"],
params["hidden_size"],
params["learning_rate_warmup_steps"])
scheduler_callback = optimizer.LearningRateScheduler(sfunc, init_steps)
callbacks = misc.get_callbacks() callbacks = misc.get_callbacks()
callbacks.append(scheduler_callback)
if params["enable_checkpointing"]: if params["enable_checkpointing"]:
ckpt_full_path = os.path.join(cur_log_dir, "cp-{epoch:04d}.ckpt") ckpt_full_path = os.path.join(cur_log_dir, "cp-{epoch:04d}.ckpt")
callbacks.append( callbacks.append(
...@@ -445,7 +440,7 @@ class TransformerTask(object): ...@@ -445,7 +440,7 @@ class TransformerTask(object):
params["learning_rate"], params["hidden_size"], params["learning_rate"], params["hidden_size"],
params["learning_rate_warmup_steps"]) params["learning_rate_warmup_steps"])
opt = tf.keras.optimizers.Adam( opt = tf.keras.optimizers.Adam(
lr_schedule if self.use_tpu else params["learning_rate"], lr_schedule,
params["optimizer_adam_beta1"], params["optimizer_adam_beta1"],
params["optimizer_adam_beta2"], params["optimizer_adam_beta2"],
epsilon=params["optimizer_adam_epsilon"]) epsilon=params["optimizer_adam_epsilon"])
......
...@@ -181,7 +181,7 @@ def translate_file(model, ...@@ -181,7 +181,7 @@ def translate_file(model,
raise ValueError("File output is a directory, will not save outputs to " raise ValueError("File output is a directory, will not save outputs to "
"file.") "file.")
logging.info("Writing to file %s", output_file) logging.info("Writing to file %s", output_file)
with tf.compat.v1.gfile.Open(output_file, "w") as f: with tf.io.gfile.GFile(output_file, "w") as f:
for i in sorted_keys: for i in sorted_keys:
f.write("%s\n" % translations[i]) f.write("%s\n" % translations[i])
......
...@@ -45,6 +45,9 @@ def _get_requirements(): ...@@ -45,6 +45,9 @@ def _get_requirements():
os.path.join(os.path.dirname(__file__), '../requirements.txt'), 'r') as f: os.path.join(os.path.dirname(__file__), '../requirements.txt'), 'r') as f:
for line in f: for line in f:
package_name = line.strip() package_name = line.strip()
# Skip empty line or comments starting with "#".
if not package_name or package_name[0] == '#':
continue
if package_name.startswith('-e '): if package_name.startswith('-e '):
dependency_links_tmp.append(package_name[3:].strip()) dependency_links_tmp.append(package_name[3:].strip())
else: else:
......
...@@ -16,10 +16,13 @@ dataclasses ...@@ -16,10 +16,13 @@ dataclasses
gin-config gin-config
tf_slim>=1.1.0 tf_slim>=1.1.0
typing typing
sentencepiece
Cython Cython
matplotlib matplotlib
opencv-python-headless
pyyaml pyyaml
# CV related dependencies
opencv-python-headless
Pillow Pillow
-e git+https://github.com/cocodataset/cocoapi#egg=pycocotools&subdirectory=PythonAPI -e git+https://github.com/cocodataset/cocoapi#egg=pycocotools&subdirectory=PythonAPI
# NLP related dependencies
seqeval
sentencepiece
...@@ -48,6 +48,22 @@ so the checkpoints are not compatible. ...@@ -48,6 +48,22 @@ so the checkpoints are not compatible.
We will unify the implementation soon. We will unify the implementation soon.
### Train a SpineNet-49 based RetinaNet.
```bash
TPU_NAME="<your GCP TPU name>"
MODEL_DIR="<path to the directory to store model files>"
TRAIN_FILE_PATTERN="<path to the TFRecord training data>"
EVAL_FILE_PATTERN="<path to the TFRecord validation data>"
VAL_JSON_FILE="<path to the validation annotation JSON file>"
python3 ~/models/official/vision/detection/main.py \
--strategy_type=tpu \
--tpu="${TPU_NAME?}" \
--model_dir="${MODEL_DIR?}" \
--mode=train \
--params_override="{ type: retinanet, architecture: {backbone: spinenet, multilevel_features: identity}, spinenet: {model_id: 49}, train_file_pattern: ${TRAIN_FILE_PATTERN?} }, eval: { val_json_file: ${VAL_JSON_FILE?}, eval_file_pattern: ${EVAL_FILE_PATTERN?} } }"
```
### Train a custom RetinaNet using the config file. ### Train a custom RetinaNet using the config file.
...@@ -163,6 +179,24 @@ so the checkpoints are not compatible. ...@@ -163,6 +179,24 @@ so the checkpoints are not compatible.
We will unify the implementation soon. We will unify the implementation soon.
### Train a SpineNet-49 based Mask R-CNN.
```bash
TPU_NAME="<your GCP TPU name>"
MODEL_DIR="<path to the directory to store model files>"
TRAIN_FILE_PATTERN="<path to the TFRecord training data>"
EVAL_FILE_PATTERN="<path to the TFRecord validation data>"
VAL_JSON_FILE="<path to the validation annotation JSON file>"
python3 ~/models/official/vision/detection/main.py \
--strategy_type=tpu \
--tpu="${TPU_NAME?}" \
--model_dir="${MODEL_DIR?}" \
--mode=train \
--model=mask_rcnn \
--params_override="{architecture: {backbone: spinenet, multilevel_features: identity}, spinenet: {model_id: 49}, train_file_pattern: ${TRAIN_FILE_PATTERN?} }, eval: { val_json_file: ${VAL_JSON_FILE?}, eval_file_pattern: ${EVAL_FILE_PATTERN?} } }"
```
### Train a custom Mask R-CNN using the config file. ### Train a custom Mask R-CNN using the config file.
First, create a YAML config file, e.g. *my_maskrcnn.yaml*. First, create a YAML config file, e.g. *my_maskrcnn.yaml*.
......
...@@ -17,10 +17,12 @@ ...@@ -17,10 +17,12 @@
BACKBONES = [ BACKBONES = [
'resnet', 'resnet',
'spinenet',
] ]
MULTILEVEL_FEATURES = [ MULTILEVEL_FEATURES = [
'fpn', 'fpn',
'identity',
] ]
# pylint: disable=line-too-long # pylint: disable=line-too-long
...@@ -118,6 +120,9 @@ BASE_CFG = { ...@@ -118,6 +120,9 @@ BASE_CFG = {
'resnet': { 'resnet': {
'resnet_depth': 50, 'resnet_depth': 50,
}, },
'spinenet': {
'model_id': '49',
},
'fpn': { 'fpn': {
'fpn_feat_dims': 256, 'fpn_feat_dims': 256,
'use_separable_conv': False, 'use_separable_conv': False,
......
...@@ -23,6 +23,7 @@ from official.vision.detection.modeling.architecture import heads ...@@ -23,6 +23,7 @@ from official.vision.detection.modeling.architecture import heads
from official.vision.detection.modeling.architecture import identity from official.vision.detection.modeling.architecture import identity
from official.vision.detection.modeling.architecture import nn_ops from official.vision.detection.modeling.architecture import nn_ops
from official.vision.detection.modeling.architecture import resnet from official.vision.detection.modeling.architecture import resnet
from official.vision.detection.modeling.architecture import spinenet
def norm_activation_generator(params): def norm_activation_generator(params):
...@@ -42,6 +43,9 @@ def backbone_generator(params): ...@@ -42,6 +43,9 @@ def backbone_generator(params):
activation=params.norm_activation.activation, activation=params.norm_activation.activation,
norm_activation=norm_activation_generator( norm_activation=norm_activation_generator(
params.norm_activation)) params.norm_activation))
elif params.architecture.backbone == 'spinenet':
spinenet_params = params.spinenet
backbone_fn = spinenet.SpineNetBuilder(model_id=spinenet_params.model_id)
else: else:
raise ValueError('Backbone model `{}` is not supported.' raise ValueError('Backbone model `{}` is not supported.'
.format(params.architecture.backbone)) .format(params.architecture.backbone))
......
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains common building blocks for neural networks."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from official.modeling import tf_utils
@tf.keras.utils.register_keras_serializable(package='Vision')
class ResidualBlock(tf.keras.layers.Layer):
"""A residual block."""
def __init__(self,
filters,
strides,
use_projection=False,
kernel_initializer='VarianceScaling',
kernel_regularizer=None,
bias_regularizer=None,
activation='relu',
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001,
**kwargs):
"""A residual block with BN after convolutions.
Args:
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input.
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually `True`
for the first block of a block group, which may change the number of
filters and the resolution.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
Default to None.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
Default to None.
activation: `str` name of the activation function.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float` normalization omentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
**kwargs: keyword arguments to be passed.
"""
super(ResidualBlock, self).__init__(**kwargs)
self._filters = filters
self._strides = strides
self._use_projection = use_projection
self._use_sync_bn = use_sync_bn
self._activation = activation
self._kernel_initializer = kernel_initializer
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer
if use_sync_bn:
self._norm = tf.keras.layers.experimental.SyncBatchNormalization
else:
self._norm = tf.keras.layers.BatchNormalization
if tf.keras.backend.image_data_format() == 'channels_last':
self._bn_axis = -1
else:
self._bn_axis = 1
self._activation_fn = tf_utils.get_activation(activation)
def build(self, input_shape):
if self._use_projection:
self._shortcut = tf.keras.layers.Conv2D(
filters=self._filters,
kernel_size=1,
strides=self._strides,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm0 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
self._conv1 = tf.keras.layers.Conv2D(
filters=self._filters,
kernel_size=3,
strides=self._strides,
padding='same',
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm1 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
self._conv2 = tf.keras.layers.Conv2D(
filters=self._filters,
kernel_size=3,
strides=1,
padding='same',
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm2 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
super(ResidualBlock, self).build(input_shape)
def get_config(self):
config = {
'filters': self._filters,
'strides': self._strides,
'use_projection': self._use_projection,
'kernel_initializer': self._kernel_initializer,
'kernel_regularizer': self._kernel_regularizer,
'bias_regularizer': self._bias_regularizer,
'activation': self._activation,
'use_sync_bn': self._use_sync_bn,
'norm_momentum': self._norm_momentum,
'norm_epsilon': self._norm_epsilon
}
base_config = super(ResidualBlock, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def call(self, inputs):
shortcut = inputs
if self._use_projection:
shortcut = self._shortcut(shortcut)
shortcut = self._norm0(shortcut)
x = self._conv1(inputs)
x = self._norm1(x)
x = self._activation_fn(x)
x = self._conv2(x)
x = self._norm2(x)
return self._activation_fn(x + shortcut)
@tf.keras.utils.register_keras_serializable(package='Vision')
class BottleneckBlock(tf.keras.layers.Layer):
"""A standard bottleneck block."""
def __init__(self,
filters,
strides,
use_projection=False,
kernel_initializer='VarianceScaling',
kernel_regularizer=None,
bias_regularizer=None,
activation='relu',
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001,
**kwargs):
"""A standard bottleneck block with BN after convolutions.
Args:
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input.
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually `True`
for the first block of a block group, which may change the number of
filters and the resolution.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
Default to None.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
Default to None.
activation: `str` name of the activation function.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float` normalization omentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
**kwargs: keyword arguments to be passed.
"""
super(BottleneckBlock, self).__init__(**kwargs)
self._filters = filters
self._strides = strides
self._use_projection = use_projection
self._use_sync_bn = use_sync_bn
self._activation = activation
self._kernel_initializer = kernel_initializer
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer
if use_sync_bn:
self._norm = tf.keras.layers.experimental.SyncBatchNormalization
else:
self._norm = tf.keras.layers.BatchNormalization
if tf.keras.backend.image_data_format() == 'channels_last':
self._bn_axis = -1
else:
self._bn_axis = 1
self._activation_fn = tf_utils.get_activation(activation)
def build(self, input_shape):
if self._use_projection:
self._shortcut = tf.keras.layers.Conv2D(
filters=self._filters * 4,
kernel_size=1,
strides=self._strides,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm0 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
self._conv1 = tf.keras.layers.Conv2D(
filters=self._filters,
kernel_size=1,
strides=1,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm1 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
self._conv2 = tf.keras.layers.Conv2D(
filters=self._filters,
kernel_size=3,
strides=self._strides,
padding='same',
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm2 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
self._conv3 = tf.keras.layers.Conv2D(
filters=self._filters * 4,
kernel_size=1,
strides=1,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm3 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
super(BottleneckBlock, self).build(input_shape)
def get_config(self):
config = {
'filters': self._filters,
'strides': self._strides,
'use_projection': self._use_projection,
'kernel_initializer': self._kernel_initializer,
'kernel_regularizer': self._kernel_regularizer,
'bias_regularizer': self._bias_regularizer,
'activation': self._activation,
'use_sync_bn': self._use_sync_bn,
'norm_momentum': self._norm_momentum,
'norm_epsilon': self._norm_epsilon
}
base_config = super(BottleneckBlock, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def call(self, inputs):
shortcut = inputs
if self._use_projection:
shortcut = self._shortcut(shortcut)
shortcut = self._norm0(shortcut)
x = self._conv1(inputs)
x = self._norm1(x)
x = self._activation_fn(x)
x = self._conv2(x)
x = self._norm2(x)
x = self._activation_fn(x)
x = self._conv3(x)
x = self._norm3(x)
return self._activation_fn(x + shortcut)
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Implementation of SpineNet model.
X. Du, T-Y. Lin, P. Jin, G. Ghiasi, M. Tan, Y. Cui, Q. V. Le, X. Song
SpineNet: Learning Scale-Permuted Backbone for Recognition and Localization
https://arxiv.org/abs/1912.05027
"""
import math
from absl import logging
import tensorflow as tf
from tensorflow.python.keras import backend
from official.modeling import tf_utils
from official.vision.detection.modeling.architecture import nn_blocks
layers = tf.keras.layers
FILTER_SIZE_MAP = {
1: 32,
2: 64,
3: 128,
4: 256,
5: 256,
6: 256,
7: 256,
}
# The fixed SpineNet architecture discovered by NAS.
# Each element represents a specification of a building block:
# (block_level, block_fn, (input_offset0, input_offset1), is_output).
SPINENET_BLOCK_SPECS = [
(2, 'bottleneck', (0, 1), False),
(4, 'residual', (0, 1), False),
(3, 'bottleneck', (2, 3), False),
(4, 'bottleneck', (2, 4), False),
(6, 'residual', (3, 5), False),
(4, 'bottleneck', (3, 5), False),
(5, 'residual', (6, 7), False),
(7, 'residual', (6, 8), False),
(5, 'bottleneck', (8, 9), False),
(5, 'bottleneck', (8, 10), False),
(4, 'bottleneck', (5, 10), True),
(3, 'bottleneck', (4, 10), True),
(5, 'bottleneck', (7, 12), True),
(7, 'bottleneck', (5, 14), True),
(6, 'bottleneck', (12, 14), True),
]
SCALING_MAP = {
'49S': {
'endpoints_num_filters': 128,
'filter_size_scale': 0.65,
'resample_alpha': 0.5,
'block_repeats': 1,
},
'49': {
'endpoints_num_filters': 256,
'filter_size_scale': 1.0,
'resample_alpha': 0.5,
'block_repeats': 1,
},
'96': {
'endpoints_num_filters': 256,
'filter_size_scale': 1.0,
'resample_alpha': 0.5,
'block_repeats': 2,
},
'143': {
'endpoints_num_filters': 256,
'filter_size_scale': 1.0,
'resample_alpha': 1.0,
'block_repeats': 3,
},
'190': {
'endpoints_num_filters': 512,
'filter_size_scale': 1.3,
'resample_alpha': 1.0,
'block_repeats': 4,
},
}
class BlockSpec(object):
"""A container class that specifies the block configuration for SpineNet."""
def __init__(self, level, block_fn, input_offsets, is_output):
self.level = level
self.block_fn = block_fn
self.input_offsets = input_offsets
self.is_output = is_output
def build_block_specs(block_specs=None):
"""Builds the list of BlockSpec objects for SpineNet."""
if not block_specs:
block_specs = SPINENET_BLOCK_SPECS
logging.info('Building SpineNet block specs: %s', block_specs)
return [BlockSpec(*b) for b in block_specs]
@tf.keras.utils.register_keras_serializable(package='Vision')
class SpineNet(tf.keras.Model):
"""Class to build SpineNet models."""
def __init__(self,
input_specs=tf.keras.layers.InputSpec(shape=[None, 640, 640, 3]),
min_level=3,
max_level=7,
block_specs=build_block_specs(),
endpoints_num_filters=256,
resample_alpha=0.5,
block_repeats=1,
filter_size_scale=1.0,
kernel_initializer='VarianceScaling',
kernel_regularizer=None,
bias_regularizer=None,
activation='relu',
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001,
**kwargs):
"""SpineNet model."""
self._min_level = min_level
self._max_level = max_level
self._block_specs = block_specs
self._endpoints_num_filters = endpoints_num_filters
self._resample_alpha = resample_alpha
self._block_repeats = block_repeats
self._filter_size_scale = filter_size_scale
self._kernel_initializer = kernel_initializer
self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer
self._use_sync_bn = use_sync_bn
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
if activation == 'relu':
self._activation = tf.nn.relu
elif activation == 'swish':
self._activation = tf.nn.swish
else:
raise ValueError('Activation {} not implemented.'.format(activation))
self._init_block_fn = 'bottleneck'
self._num_init_blocks = 2
if use_sync_bn:
self._norm = layers.experimental.SyncBatchNormalization
else:
self._norm = layers.BatchNormalization
if tf.keras.backend.image_data_format() == 'channels_last':
self._bn_axis = -1
else:
self._bn_axis = 1
# Build SpineNet.
inputs = tf.keras.Input(shape=input_specs.shape[1:])
net = self._build_stem(inputs=inputs)
net = self._build_scale_permuted_network(
net=net, input_width=input_specs.shape[1])
net = self._build_endpoints(net=net)
super(SpineNet, self).__init__(inputs=inputs, outputs=net)
def _block_group(self,
inputs,
filters,
strides,
block_fn_cand,
block_repeats=1,
name='block_group'):
"""Creates one group of blocks for the SpineNet model."""
block_fn_candidates = {
'bottleneck': nn_blocks.BottleneckBlock,
'residual': nn_blocks.ResidualBlock,
}
block_fn = block_fn_candidates[block_fn_cand]
_, _, _, num_filters = inputs.get_shape().as_list()
if block_fn_cand == 'bottleneck':
use_projection = not (num_filters == (filters * 4) and strides == 1)
else:
use_projection = not (num_filters == filters and strides == 1)
x = block_fn(
filters=filters,
strides=strides,
use_projection=use_projection,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer,
activation=self._activation,
use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_momentum,
norm_epsilon=self._norm_epsilon)(
inputs)
for _ in range(1, block_repeats):
x = block_fn(
filters=filters,
strides=1,
use_projection=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer,
activation=self._activation,
use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_momentum,
norm_epsilon=self._norm_epsilon)(
x)
return tf.identity(x, name=name)
def _build_stem(self, inputs):
"""Build SpineNet stem."""
x = layers.Conv2D(
filters=64,
kernel_size=7,
strides=2,
use_bias=False,
padding='same',
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)(
inputs)
x = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)(
x)
x = tf_utils.get_activation(self._activation)(x)
x = layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)
net = []
# Build the initial level 2 blocks.
for i in range(self._num_init_blocks):
x = self._block_group(
inputs=x,
filters=int(FILTER_SIZE_MAP[2] * self._filter_size_scale),
strides=1,
block_fn_cand=self._init_block_fn,
block_repeats=self._block_repeats,
name='stem_block_{}'.format(i + 1))
net.append(x)
return net
def _build_scale_permuted_network(self,
net,
input_width,
weighted_fusion=False):
"""Build scale-permuted network."""
net_sizes = [int(math.ceil(input_width / 2**2))] * len(net)
net_block_fns = [self._init_block_fn] * len(net)
num_outgoing_connections = [0] * len(net)
endpoints = {}
for i, block_spec in enumerate(self._block_specs):
# Find out specs for the target block.
target_width = int(math.ceil(input_width / 2**block_spec.level))
target_num_filters = int(FILTER_SIZE_MAP[block_spec.level] *
self._filter_size_scale)
target_block_fn = block_spec.block_fn
# Resample then merge input0 and input1.
parents = []
input0 = block_spec.input_offsets[0]
input1 = block_spec.input_offsets[1]
x0 = self._resample_with_alpha(
inputs=net[input0],
input_width=net_sizes[input0],
input_block_fn=net_block_fns[input0],
target_width=target_width,
target_num_filters=target_num_filters,
target_block_fn=target_block_fn,
alpha=self._resample_alpha)
parents.append(x0)
num_outgoing_connections[input0] += 1
x1 = self._resample_with_alpha(
inputs=net[input1],
input_width=net_sizes[input1],
input_block_fn=net_block_fns[input1],
target_width=target_width,
target_num_filters=target_num_filters,
target_block_fn=target_block_fn,
alpha=self._resample_alpha)
parents.append(x1)
num_outgoing_connections[input1] += 1
# Merge 0 outdegree blocks to the output block.
if block_spec.is_output:
for j, (j_feat,
j_connections) in enumerate(zip(net, num_outgoing_connections)):
if j_connections == 0 and (j_feat.shape[2] == target_width and
j_feat.shape[3] == x0.shape[3]):
parents.append(j_feat)
num_outgoing_connections[j] += 1
# pylint: disable=g-direct-tensorflow-import
if weighted_fusion:
dtype = parents[0].dtype
parent_weights = [
tf.nn.relu(tf.cast(tf.Variable(1.0, name='block{}_fusion{}'.format(
i, j)), dtype=dtype)) for j in range(len(parents))]
weights_sum = tf.add_n(parent_weights)
parents = [
parents[i] * parent_weights[i] / (weights_sum + 0.0001)
for i in range(len(parents))
]
# Fuse all parent nodes then build a new block.
x = tf_utils.get_activation(self._activation)(tf.add_n(parents))
x = self._block_group(
inputs=x,
filters=target_num_filters,
strides=1,
block_fn_cand=target_block_fn,
block_repeats=self._block_repeats,
name='scale_permuted_block_{}'.format(i + 1))
net.append(x)
net_sizes.append(target_width)
net_block_fns.append(target_block_fn)
num_outgoing_connections.append(0)
# Save output feats.
if block_spec.is_output:
if block_spec.level in endpoints:
raise ValueError('Duplicate feats found for output level {}.'.format(
block_spec.level))
if (block_spec.level < self._min_level or
block_spec.level > self._max_level):
raise ValueError('Output level is out of range [{}, {}]'.format(
self._min_level, self._max_level))
endpoints[block_spec.level] = x
return endpoints
def _build_endpoints(self, net):
"""Match filter size for endpoints before sharing conv layers."""
endpoints = {}
for level in range(self._min_level, self._max_level + 1):
x = layers.Conv2D(
filters=self._endpoints_num_filters,
kernel_size=1,
strides=1,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)(
net[level])
x = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)(
x)
x = tf_utils.get_activation(self._activation)(x)
endpoints[level] = x
return endpoints
def _resample_with_alpha(self,
inputs,
input_width,
input_block_fn,
target_width,
target_num_filters,
target_block_fn,
alpha=0.5):
"""Match resolution and feature dimension."""
_, _, _, input_num_filters = inputs.get_shape().as_list()
if input_block_fn == 'bottleneck':
input_num_filters /= 4
new_num_filters = int(input_num_filters * alpha)
x = layers.Conv2D(
filters=new_num_filters,
kernel_size=1,
strides=1,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)(
inputs)
x = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)(
x)
x = tf_utils.get_activation(self._activation)(x)
# Spatial resampling.
if input_width > target_width:
x = layers.Conv2D(
filters=new_num_filters,
kernel_size=3,
strides=2,
padding='SAME',
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)(
x)
x = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)(
x)
x = tf_utils.get_activation(self._activation)(x)
input_width /= 2
while input_width > target_width:
x = layers.MaxPool2D(pool_size=3, strides=2, padding='SAME')(x)
input_width /= 2
elif input_width < target_width:
scale = target_width // input_width
x = layers.UpSampling2D(size=(scale, scale))(x)
# Last 1x1 conv to match filter size.
if target_block_fn == 'bottleneck':
target_num_filters *= 4
x = layers.Conv2D(
filters=target_num_filters,
kernel_size=1,
strides=1,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)(
x)
x = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)(
x)
return x
class SpineNetBuilder(object):
"""SpineNet builder."""
def __init__(self,
model_id,
input_specs=tf.keras.layers.InputSpec(shape=[None, 640, 640, 3]),
min_level=3,
max_level=7,
block_specs=build_block_specs(),
kernel_initializer='VarianceScaling',
kernel_regularizer=None,
bias_regularizer=None,
activation='relu',
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001):
if model_id not in SCALING_MAP:
raise ValueError(
'SpineNet {} is not a valid architecture.'.format(model_id))
scaling_params = SCALING_MAP[model_id]
self._input_specs = input_specs
self._min_level = min_level
self._max_level = max_level
self._block_specs = block_specs
self._endpoints_num_filters = scaling_params['endpoints_num_filters']
self._resample_alpha = scaling_params['resample_alpha']
self._block_repeats = scaling_params['block_repeats']
self._filter_size_scale = scaling_params['filter_size_scale']
self._kernel_initializer = kernel_initializer
self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer
self._activation = activation
self._use_sync_bn = use_sync_bn
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
def __call__(self, inputs, is_training=None):
with backend.get_graph().as_default():
model = SpineNet(
input_specs=self._input_specs,
min_level=self._min_level,
max_level=self._max_level,
block_specs=self._block_specs,
endpoints_num_filters=self._endpoints_num_filters,
resample_alpha=self._resample_alpha,
block_repeats=self._block_repeats,
filter_size_scale=self._filter_size_scale,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer,
activation=self._activation,
use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_momentum,
norm_epsilon=self._norm_epsilon)
return model(inputs)
...@@ -339,7 +339,8 @@ def train_and_eval( ...@@ -339,7 +339,8 @@ def train_and_eval(
optimizer = optimizer_factory.build_optimizer( optimizer = optimizer_factory.build_optimizer(
optimizer_name=params.model.optimizer.name, optimizer_name=params.model.optimizer.name,
base_learning_rate=learning_rate, base_learning_rate=learning_rate,
params=params.model.optimizer.as_dict()) params=params.model.optimizer.as_dict(),
model=model)
metrics_map = _get_metrics(one_hot) metrics_map = _get_metrics(one_hot)
metrics = [metrics_map[metric] for metric in params.train.metrics] metrics = [metrics_map[metric] for metric in params.train.metrics]
......
...@@ -18,11 +18,12 @@ from __future__ import division ...@@ -18,11 +18,12 @@ from __future__ import division
# from __future__ import google_type_annotations # from __future__ import google_type_annotations
from __future__ import print_function from __future__ import print_function
from typing import Any, Dict, Text, List
from absl import logging from absl import logging
import tensorflow as tf import tensorflow as tf
import tensorflow_addons as tfa import tensorflow_addons as tfa
from typing import Any, Dict, Text, List
from official.vision.image_classification import learning_rate from official.vision.image_classification import learning_rate
from official.vision.image_classification.configs import base_configs from official.vision.image_classification.configs import base_configs
...@@ -250,7 +251,8 @@ class MovingAverage(tf.keras.optimizers.Optimizer): ...@@ -250,7 +251,8 @@ class MovingAverage(tf.keras.optimizers.Optimizer):
def build_optimizer( def build_optimizer(
optimizer_name: Text, optimizer_name: Text,
base_learning_rate: tf.keras.optimizers.schedules.LearningRateSchedule, base_learning_rate: tf.keras.optimizers.schedules.LearningRateSchedule,
params: Dict[Text, Any]): params: Dict[Text, Any],
model: tf.keras.Model = None):
"""Build the optimizer based on name. """Build the optimizer based on name.
Args: Args:
...@@ -261,6 +263,8 @@ def build_optimizer( ...@@ -261,6 +263,8 @@ def build_optimizer(
params: String -> Any dictionary representing the optimizer params. params: String -> Any dictionary representing the optimizer params.
This should contain optimizer specific parameters such as This should contain optimizer specific parameters such as
`base_learning_rate`, `decay`, etc. `base_learning_rate`, `decay`, etc.
model: The `tf.keras.Model`. This is used for the shadow copy if using
`MovingAverage`.
Returns: Returns:
A tf.keras.Optimizer. A tf.keras.Optimizer.
...@@ -322,10 +326,13 @@ def build_optimizer( ...@@ -322,10 +326,13 @@ def build_optimizer(
# Moving average should be applied last, as it's applied at test time # Moving average should be applied last, as it's applied at test time
moving_average_decay = params.get('moving_average_decay', 0.) moving_average_decay = params.get('moving_average_decay', 0.)
if moving_average_decay is not None and moving_average_decay > 0.: if moving_average_decay is not None and moving_average_decay > 0.:
if model is None:
raise ValueError('`model` must be provided if using `MovingAverage`.')
logging.info('Including moving average decay.') logging.info('Including moving average decay.')
optimizer = MovingAverage( optimizer = MovingAverage(
optimizer, optimizer=optimizer,
average_decay=moving_average_decay) average_decay=moving_average_decay)
optimizer.shadow_copy(model)
return optimizer return optimizer
......
...@@ -19,15 +19,21 @@ from __future__ import division ...@@ -19,15 +19,21 @@ from __future__ import division
# from __future__ import google_type_annotations # from __future__ import google_type_annotations
from __future__ import print_function from __future__ import print_function
import tensorflow as tf
from absl.testing import parameterized from absl.testing import parameterized
import tensorflow as tf
from official.vision.image_classification import optimizer_factory from official.vision.image_classification import optimizer_factory
from official.vision.image_classification.configs import base_configs from official.vision.image_classification.configs import base_configs
class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase): class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase):
def build_toy_model(self) -> tf.keras.Model:
"""Creates a toy `tf.Keras.Model`."""
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(1, input_shape=(1,)))
return model
@parameterized.named_parameters( @parameterized.named_parameters(
('sgd', 'sgd', 0., False), ('sgd', 'sgd', 0., False),
('momentum', 'momentum', 0., False), ('momentum', 'momentum', 0., False),
...@@ -40,6 +46,7 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase): ...@@ -40,6 +46,7 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase):
('rmsprop_ema', 'rmsprop', 0.999, False)) ('rmsprop_ema', 'rmsprop', 0.999, False))
def test_optimizer(self, optimizer_name, moving_average_decay, lookahead): def test_optimizer(self, optimizer_name, moving_average_decay, lookahead):
"""Smoke test to be sure no syntax errors.""" """Smoke test to be sure no syntax errors."""
model = self.build_toy_model()
params = { params = {
'learning_rate': 0.001, 'learning_rate': 0.001,
'rho': 0.09, 'rho': 0.09,
...@@ -51,7 +58,8 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase): ...@@ -51,7 +58,8 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase):
optimizer = optimizer_factory.build_optimizer( optimizer = optimizer_factory.build_optimizer(
optimizer_name=optimizer_name, optimizer_name=optimizer_name,
base_learning_rate=params['learning_rate'], base_learning_rate=params['learning_rate'],
params=params) params=params,
model=model)
self.assertTrue(issubclass(type(optimizer), tf.keras.optimizers.Optimizer)) self.assertTrue(issubclass(type(optimizer), tf.keras.optimizers.Optimizer))
def test_unknown_optimizer(self): def test_unknown_optimizer(self):
......
...@@ -86,6 +86,9 @@ message DelfConfig { ...@@ -86,6 +86,9 @@ message DelfConfig {
// Path to DELF model. // Path to DELF model.
optional string model_path = 1; // Required. optional string model_path = 1; // Required.
// Whether model has been exported using TF version 2+.
optional bool is_tf2_exported = 10 [default = false];
// Image scales to be used. // Image scales to be used.
repeated float image_scales = 2; repeated float image_scales = 2;
......
...@@ -131,7 +131,7 @@ def main(argv): ...@@ -131,7 +131,7 @@ def main(argv):
delf_dataset = tf.data.Dataset.from_tensor_slices((features_placeholder)) delf_dataset = tf.data.Dataset.from_tensor_slices((features_placeholder))
delf_dataset = delf_dataset.shuffle(1000).batch( delf_dataset = delf_dataset.shuffle(1000).batch(
features_for_clustering.shape[0]) features_for_clustering.shape[0])
iterator = delf_dataset.make_initializable_iterator() iterator = tf.compat.v1.data.make_initializable_iterator(delf_dataset)
def _initializer_fn(sess): def _initializer_fn(sess):
"""Initialize dataset iterator, feed in the data.""" """Initialize dataset iterator, feed in the data."""
......
...@@ -102,7 +102,15 @@ def MakeExtractor(config): ...@@ -102,7 +102,15 @@ def MakeExtractor(config):
Returns: Returns:
Function that receives an image and returns features. Function that receives an image and returns features.
Raises:
ValueError: if config is invalid.
""" """
# Assert the configuration
if config.use_global_features and hasattr(
config, 'is_tf2_exported') and config.is_tf2_exported:
raise ValueError('use_global_features is incompatible with is_tf2_exported')
# Load model. # Load model.
model = tf.saved_model.load(config.model_path) model = tf.saved_model.load(config.model_path)
...@@ -178,7 +186,8 @@ def MakeExtractor(config): ...@@ -178,7 +186,8 @@ def MakeExtractor(config):
else: else:
global_pca_parameters['variances'] = None global_pca_parameters['variances'] = None
model = model.prune(feeds=feeds, fetches=fetches) if not hasattr(config, 'is_tf2_exported') or not config.is_tf2_exported:
model = model.prune(feeds=feeds, fetches=fetches)
def ExtractorFn(image, resize_factor=1.0): def ExtractorFn(image, resize_factor=1.0):
"""Receives an image and returns DELF global and/or local features. """Receives an image and returns DELF global and/or local features.
...@@ -197,7 +206,6 @@ def MakeExtractor(config): ...@@ -197,7 +206,6 @@ def MakeExtractor(config):
features (key 'local_features' mapping to a dict with keys 'locations', features (key 'local_features' mapping to a dict with keys 'locations',
'descriptors', 'scales', 'attention'). 'descriptors', 'scales', 'attention').
""" """
resized_image, scale_factors = ResizeImage( resized_image, scale_factors = ResizeImage(
image, config, resize_factor=resize_factor) image, config, resize_factor=resize_factor)
...@@ -224,8 +232,20 @@ def MakeExtractor(config): ...@@ -224,8 +232,20 @@ def MakeExtractor(config):
output = None output = None
if config.use_local_features: if config.use_local_features:
output = model(image_tensor, image_scales_tensor, score_threshold_tensor, if hasattr(config, 'is_tf2_exported') and config.is_tf2_exported:
max_feature_num_tensor) predict = model.signatures['serving_default']
output_dict = predict(
input_image=image_tensor,
input_scales=image_scales_tensor,
input_max_feature_num=max_feature_num_tensor,
input_abs_thres=score_threshold_tensor)
output = [
output_dict['boxes'], output_dict['features'],
output_dict['scales'], output_dict['scores']
]
else:
output = model(image_tensor, image_scales_tensor,
score_threshold_tensor, max_feature_num_tensor)
else: else:
output = model(image_tensor, image_scales_tensor) output = model(image_tensor, image_scales_tensor)
......
...@@ -269,8 +269,7 @@ class ExtractAggregatedRepresentation(object): ...@@ -269,8 +269,7 @@ class ExtractAggregatedRepresentation(object):
axis=0), [num_assignments, 1]) - tf.gather( axis=0), [num_assignments, 1]) - tf.gather(
codebook, selected_visual_words[ind]) codebook, selected_visual_words[ind])
return ind + 1, tf.tensor_scatter_nd_add( return ind + 1, tf.tensor_scatter_nd_add(
vlad, tf.expand_dims(selected_visual_words[ind], axis=1), vlad, tf.expand_dims(selected_visual_words[ind], axis=1), diff)
tf.cast(diff, dtype=tf.float32))
ind_vlad = tf.constant(0, dtype=tf.int32) ind_vlad = tf.constant(0, dtype=tf.int32)
keep_going = lambda j, vlad: tf.less(j, num_features) keep_going = lambda j, vlad: tf.less(j, num_features)
...@@ -396,9 +395,7 @@ class ExtractAggregatedRepresentation(object): ...@@ -396,9 +395,7 @@ class ExtractAggregatedRepresentation(object):
visual_words = tf.reshape( visual_words = tf.reshape(
tf.where( tf.where(
tf.greater( tf.greater(per_centroid_norms, tf.sqrt(_NORM_SQUARED_TOLERANCE))),
per_centroid_norms,
tf.cast(tf.sqrt(_NORM_SQUARED_TOLERANCE), dtype=tf.float32))),
[-1]) [-1])
per_centroid_normalized_vector = tf.math.l2_normalize( per_centroid_normalized_vector = tf.math.l2_normalize(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment