Commit cf80ed4e authored by anivegesana's avatar anivegesana
Browse files

Merge branch 'purdue-yolo' of https://github.com/tensorflow/models into detection_generator_pr_2

parents 394cefcc 461b3587
......@@ -43,6 +43,9 @@ S12: KernelSize = (1, 2, 2)
S22: KernelSize = (2, 2, 2)
S21: KernelSize = (2, 1, 1)
# Type for a state container (map)
TensorMap = Mapping[str, tf.Tensor]
@dataclasses.dataclass
class BlockSpec:
......@@ -319,6 +322,7 @@ class Movinet(tf.keras.Model):
bias_regularizer: Optional[str] = None,
stochastic_depth_drop_rate: float = 0.,
use_external_states: bool = False,
output_states: bool = True,
**kwargs):
"""MoViNet initialization function.
......@@ -353,6 +357,10 @@ class Movinet(tf.keras.Model):
stochastic_depth_drop_rate: the base rate for stochastic depth.
use_external_states: if True, expects states to be passed as additional
input.
output_states: if True, output intermediate states that can be used to run
the model in streaming mode. Inputting the output states of the
previous input clip with the current input clip will utilize a stream
buffer for streaming video.
**kwargs: keyword arguments to be passed.
"""
block_specs = BLOCK_SPECS[model_id]
......@@ -385,6 +393,7 @@ class Movinet(tf.keras.Model):
self._bias_regularizer = bias_regularizer
self._stochastic_depth_drop_rate = stochastic_depth_drop_rate
self._use_external_states = use_external_states
self._output_states = output_states
if self._use_external_states and not self._causal:
raise ValueError('External states should be used with causal mode.')
......@@ -411,8 +420,7 @@ class Movinet(tf.keras.Model):
self,
input_specs: tf.keras.layers.InputSpec,
state_specs: Optional[Mapping[str, tf.keras.layers.InputSpec]] = None,
) -> Tuple[Mapping[str, tf.keras.Input], Tuple[Mapping[str, tf.Tensor],
Mapping[str, tf.Tensor]]]:
) -> Tuple[TensorMap, Union[TensorMap, Tuple[TensorMap, TensorMap]]]:
"""Builds the model network.
Args:
......@@ -423,7 +431,7 @@ class Movinet(tf.keras.Model):
Returns:
Inputs and outputs as a tuple. Inputs are expected to be a dict with
base input and states. Outputs are expected to be a dict of endpoints
and output states.
and (optional) output states.
"""
state_specs = state_specs if state_specs is not None else {}
......@@ -519,7 +527,7 @@ class Movinet(tf.keras.Model):
else:
raise ValueError('Unknown block type {}'.format(block))
outputs = (endpoints, states)
outputs = (endpoints, states) if self._output_states else endpoints
return inputs, outputs
......@@ -679,6 +687,8 @@ class Movinet(tf.keras.Model):
'kernel_regularizer': self._kernel_regularizer,
'bias_regularizer': self._bias_regularizer,
'stochastic_depth_drop_rate': self._stochastic_depth_drop_rate,
'use_external_states': self._use_external_states,
'output_states': self._output_states,
}
return config_dict
......
......@@ -265,7 +265,7 @@ class ConvBlock(tf.keras.layers.Layer):
tf.keras.regularizers.L2(KERNEL_WEIGHT_DECAY),
use_batch_norm: bool = True,
batch_norm_layer: tf.keras.layers.Layer =
tf.keras.layers.experimental.SyncBatchNormalization,
tf.keras.layers.BatchNormalization,
batch_norm_momentum: float = 0.99,
batch_norm_epsilon: float = 1e-3,
activation: Optional[Any] = None,
......@@ -547,8 +547,8 @@ class StreamConvBlock(ConvBlock):
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = tf.keras
.regularizers.L2(KERNEL_WEIGHT_DECAY),
use_batch_norm: bool = True,
batch_norm_layer: tf.keras.layers.Layer = tf.keras.layers.experimental
.SyncBatchNormalization,
batch_norm_layer: tf.keras.layers.Layer =
tf.keras.layers.BatchNormalization,
batch_norm_momentum: float = 0.99,
batch_norm_epsilon: float = 1e-3,
activation: Optional[Any] = None,
......@@ -915,7 +915,7 @@ class SkipBlock(tf.keras.layers.Layer):
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] =
tf.keras.regularizers.L2(KERNEL_WEIGHT_DECAY),
batch_norm_layer: tf.keras.layers.Layer =
tf.keras.layers.experimental.SyncBatchNormalization,
tf.keras.layers.BatchNormalization,
batch_norm_momentum: float = 0.99,
batch_norm_epsilon: float = 1e-3,
**kwargs):
......@@ -1031,8 +1031,8 @@ class MovinetBlock(tf.keras.layers.Layer):
kernel_initializer: tf.keras.initializers.Initializer = 'HeNormal',
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = tf.keras
.regularizers.L2(KERNEL_WEIGHT_DECAY),
batch_norm_layer: tf.keras.layers.Layer = tf.keras.layers.experimental
.SyncBatchNormalization,
batch_norm_layer: tf.keras.layers.Layer =
tf.keras.layers.BatchNormalization,
batch_norm_momentum: float = 0.99,
batch_norm_epsilon: float = 1e-3,
state_prefix: Optional[str] = None,
......@@ -1078,7 +1078,6 @@ class MovinetBlock(tf.keras.layers.Layer):
se_ratio * expand_filters * se_multiplier, divisor=8)
self._out_filters = out_filters
self._expand_filters = expand_filters
self._kernel_size = kernel_size
self._causal = causal
self._activation = activation
self._gating_activation = gating_activation
......@@ -1232,8 +1231,8 @@ class Stem(tf.keras.layers.Layer):
kernel_initializer: tf.keras.initializers.Initializer = 'HeNormal',
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = tf.keras
.regularizers.L2(KERNEL_WEIGHT_DECAY),
batch_norm_layer: tf.keras.layers.Layer = tf.keras.layers.experimental
.SyncBatchNormalization,
batch_norm_layer: tf.keras.layers.Layer =
tf.keras.layers.BatchNormalization,
batch_norm_momentum: float = 0.99,
batch_norm_epsilon: float = 1e-3,
state_prefix: Optional[str] = None,
......@@ -1340,8 +1339,8 @@ class Head(tf.keras.layers.Layer):
kernel_initializer: tf.keras.initializers.Initializer = 'HeNormal',
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = tf.keras
.regularizers.L2(KERNEL_WEIGHT_DECAY),
batch_norm_layer: tf.keras.layers.Layer = tf.keras.layers.experimental
.SyncBatchNormalization,
batch_norm_layer: tf.keras.layers.Layer =
tf.keras.layers.BatchNormalization,
batch_norm_momentum: float = 0.99,
batch_norm_epsilon: float = 1e-3,
state_prefix: Optional[str] = None,
......@@ -1470,6 +1469,7 @@ class ClassifierHead(tf.keras.layers.Layer):
self._num_classes = num_classes
self._dropout_rate = dropout_rate
self._conv_type = conv_type
self._activation = activation
self._output_activation = output_activation
self._max_pool_predictions = max_pool_predictions
self._kernel_initializer = kernel_initializer
......@@ -1509,6 +1509,7 @@ class ClassifierHead(tf.keras.layers.Layer):
'num_classes': self._num_classes,
'dropout_rate': self._dropout_rate,
'conv_type': self._conv_type,
'activation': self._activation,
'output_activation': self._output_activation,
'max_pool_predictions': self._max_pool_predictions,
'kernel_initializer': self._kernel_initializer,
......
......@@ -36,6 +36,7 @@ class MovinetClassifier(tf.keras.Model):
backbone: tf.keras.Model,
num_classes: int,
input_specs: Optional[Mapping[str, tf.keras.layers.InputSpec]] = None,
activation: str = 'swish',
dropout_rate: float = 0.0,
kernel_initializer: str = 'HeNormal',
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
......@@ -48,6 +49,7 @@ class MovinetClassifier(tf.keras.Model):
backbone: A 3d backbone network.
num_classes: Number of classes in classification task.
input_specs: Specs of the input tensor.
activation: name of the main activation function.
dropout_rate: Rate for dropout regularization.
kernel_initializer: Kernel initializer for the final dense layer.
kernel_regularizer: Kernel regularizer.
......@@ -65,6 +67,7 @@ class MovinetClassifier(tf.keras.Model):
self._num_classes = num_classes
self._input_specs = input_specs
self._activation = activation
self._dropout_rate = dropout_rate
self._kernel_initializer = kernel_initializer
self._kernel_regularizer = kernel_regularizer
......@@ -151,7 +154,8 @@ class MovinetClassifier(tf.keras.Model):
dropout_rate=self._dropout_rate,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
conv_type=backbone.conv_type)(
conv_type=backbone.conv_type,
activation=self._activation)(
x)
outputs = (x, states) if self._output_states else x
......@@ -180,6 +184,7 @@ class MovinetClassifier(tf.keras.Model):
def get_config(self):
config = {
'backbone': self._backbone,
'activation': self._activation,
'num_classes': self._num_classes,
'input_specs': self._input_specs,
'dropout_rate': self._dropout_rate,
......@@ -226,6 +231,7 @@ def build_movinet_model(
num_classes=num_classes,
kernel_regularizer=l2_regularizer,
input_specs=input_specs_dict,
activation=model_config.activation,
dropout_rate=model_config.dropout_rate,
output_states=model_config.output_states)
......
......@@ -15,15 +15,153 @@
"""Panoptic Mask R-CNN configuration definition."""
import dataclasses
import os
from typing import List, Optional
from official.core import config_definitions as cfg
from official.core import exp_factory
from official.modeling import optimization
from official.vision.beta.configs import maskrcnn
from official.vision.beta.configs import semantic_segmentation
SEGMENTATION_MODEL = semantic_segmentation.SemanticSegmentationModel
SEGMENTATION_HEAD = semantic_segmentation.SegmentationHead
_COCO_INPUT_PATH_BASE = 'coco'
_COCO_TRAIN_EXAMPLES = 118287
_COCO_VAL_EXAMPLES = 5000
# pytype: disable=wrong-keyword-args
@dataclasses.dataclass
class Parser(maskrcnn.Parser):
"""Panoptic Mask R-CNN parser config."""
# If segmentation_resize_eval_groundtruth is set to False, original image
# sizes are used for eval. In that case,
# segmentation_groundtruth_padded_size has to be specified too to allow for
# batching the variable input sizes of images.
segmentation_resize_eval_groundtruth: bool = True
segmentation_groundtruth_padded_size: List[int] = dataclasses.field(
default_factory=list)
segmentation_ignore_label: int = 255
@dataclasses.dataclass
class DataConfig(maskrcnn.DataConfig):
"""Input config for training."""
parser: Parser = Parser()
@dataclasses.dataclass
class PanopticMaskRCNN(maskrcnn.MaskRCNN):
"""Panoptic Mask R-CNN model config."""
segmentation_model: semantic_segmentation.SemanticSegmentationModel = (
semantic_segmentation.SemanticSegmentationModel(num_classes=2))
SEGMENTATION_MODEL(num_classes=2))
include_mask = True
shared_backbone: bool = True
shared_decoder: bool = True
@dataclasses.dataclass
class Losses(maskrcnn.Losses):
"""Panoptic Mask R-CNN loss config."""
semantic_segmentation_label_smoothing: float = 0.0
semantic_segmentation_ignore_label: int = 255
semantic_segmentation_class_weights: List[float] = dataclasses.field(
default_factory=list)
semantic_segmentation_use_groundtruth_dimension: bool = True
semantic_segmentation_top_k_percent_pixels: float = 1.0
semantic_segmentation_weight: float = 1.0
@dataclasses.dataclass
class PanopticMaskRCNNTask(maskrcnn.MaskRCNNTask):
"""Panoptic Mask R-CNN task config."""
model: PanopticMaskRCNN = PanopticMaskRCNN()
train_data: DataConfig = DataConfig(is_training=True)
validation_data: DataConfig = DataConfig(is_training=False,
drop_remainder=False)
segmentation_evaluation: semantic_segmentation.Evaluation = semantic_segmentation.Evaluation() # pylint: disable=line-too-long
losses: Losses = Losses()
init_checkpoint: Optional[str] = None
segmentation_init_checkpoint: Optional[str] = None
# 'init_checkpoint_modules' controls the modules that need to be initialized
# from checkpoint paths given by 'init_checkpoint' and/or
# 'segmentation_init_checkpoint. Supports modules:
# 'backbone': Initialize MaskRCNN backbone
# 'segmentation_backbone': Initialize segmentation backbone
# 'segmentation_decoder': Initialize segmentation decoder
# 'all': Initialize all modules
init_checkpoint_modules: Optional[List[str]] = dataclasses.field(
default_factory=list)
@exp_factory.register_config_factory('panoptic_maskrcnn_resnetfpn_coco')
def panoptic_maskrcnn_resnetfpn_coco() -> cfg.ExperimentConfig:
"""COCO panoptic segmentation with Panoptic Mask R-CNN."""
train_batch_size = 64
eval_batch_size = 8
steps_per_epoch = _COCO_TRAIN_EXAMPLES // train_batch_size
validation_steps = _COCO_VAL_EXAMPLES // eval_batch_size
config = cfg.ExperimentConfig(
runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
task=PanopticMaskRCNNTask(
init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080', # pylint: disable=line-too-long
init_checkpoint_modules=['backbone'],
model=PanopticMaskRCNN(
num_classes=91, input_size=[1024, 1024, 3],
segmentation_model=SEGMENTATION_MODEL(
num_classes=91,
head=SEGMENTATION_HEAD(level=3))),
losses=Losses(l2_weight_decay=0.00004),
train_data=DataConfig(
input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'train*'),
is_training=True,
global_batch_size=train_batch_size,
parser=Parser(
aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.25)),
validation_data=DataConfig(
input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'val*'),
is_training=False,
global_batch_size=eval_batch_size,
drop_remainder=False),
annotation_file=os.path.join(_COCO_INPUT_PATH_BASE,
'instances_val2017.json')),
trainer=cfg.TrainerConfig(
train_steps=22500,
validation_steps=validation_steps,
validation_interval=steps_per_epoch,
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'optimizer': {
'type': 'sgd',
'sgd': {
'momentum': 0.9
}
},
'learning_rate': {
'type': 'stepwise',
'stepwise': {
'boundaries': [15000, 20000],
'values': [0.12, 0.012, 0.0012],
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 500,
'warmup_learning_rate': 0.0067
}
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
return config
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for panoptic maskrcnn config."""
# pylint: disable=unused-import
from absl.testing import parameterized
import tensorflow as tf
from official.core import config_definitions as cfg
from official.core import exp_factory
from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_maskrcnn as exp_cfg
class PanopticMaskRCNNConfigTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.parameters(
('panoptic_maskrcnn_resnetfpn_coco',),
)
def test_panoptic_maskrcnn_configs(self, config_name):
config = exp_factory.get_exp_config(config_name)
self.assertIsInstance(config, cfg.ExperimentConfig)
self.assertIsInstance(config.task, exp_cfg.PanopticMaskRCNNTask)
self.assertIsInstance(config.task.model, exp_cfg.PanopticMaskRCNN)
self.assertIsInstance(config.task.train_data, exp_cfg.DataConfig)
config.validate()
config.task.train_data.is_training = None
with self.assertRaisesRegex(KeyError, 'Found inconsistncy between key'):
config.validate()
if __name__ == '__main__':
tf.test.main()
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Data parser and processing for Panoptic Mask R-CNN."""
import tensorflow as tf
from official.vision.beta.dataloaders import maskrcnn_input
from official.vision.beta.dataloaders import tf_example_decoder
from official.vision.beta.ops import preprocess_ops
class TfExampleDecoder(tf_example_decoder.TfExampleDecoder):
"""Tensorflow Example proto decoder."""
def __init__(self, regenerate_source_id, mask_binarize_threshold):
super(TfExampleDecoder, self).__init__(
include_mask=True,
regenerate_source_id=regenerate_source_id,
mask_binarize_threshold=None)
self._segmentation_keys_to_features = {
'image/segmentation/class/encoded':
tf.io.FixedLenFeature((), tf.string, default_value='')
}
def decode(self, serialized_example):
decoded_tensors = super(TfExampleDecoder, self).decode(serialized_example)
segmentation_parsed_tensors = tf.io.parse_single_example(
serialized_example, self._segmentation_keys_to_features)
segmentation_mask = tf.io.decode_image(
segmentation_parsed_tensors['image/segmentation/class/encoded'],
channels=1)
segmentation_mask.set_shape([None, None, 1])
decoded_tensors.update({'groundtruth_segmentation_mask': segmentation_mask})
return decoded_tensors
class Parser(maskrcnn_input.Parser):
"""Parser to parse an image and its annotations into a dictionary of tensors."""
def __init__(self,
output_size,
min_level,
max_level,
num_scales,
aspect_ratios,
anchor_size,
rpn_match_threshold=0.7,
rpn_unmatched_threshold=0.3,
rpn_batch_size_per_im=256,
rpn_fg_fraction=0.5,
aug_rand_hflip=False,
aug_scale_min=1.0,
aug_scale_max=1.0,
skip_crowd_during_training=True,
max_num_instances=100,
mask_crop_size=112,
segmentation_resize_eval_groundtruth=True,
segmentation_groundtruth_padded_size=None,
segmentation_ignore_label=255,
dtype='float32'):
"""Initializes parameters for parsing annotations in the dataset.
Args:
output_size: `Tensor` or `list` for [height, width] of output image. The
output_size should be divided by the largest feature stride 2^max_level.
min_level: `int` number of minimum level of the output feature pyramid.
max_level: `int` number of maximum level of the output feature pyramid.
num_scales: `int` number representing intermediate scales added
on each level. For instance, num_scales=2 adds one additional
intermediate anchor scales [2^0, 2^0.5] on each level.
aspect_ratios: `list` of float numbers representing the aspect raito
anchors added on each level. The number indicates the ratio of width to
height. For instance, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors
on each scale level.
anchor_size: `float` number representing the scale of size of the base
anchor to the feature stride 2^level.
rpn_match_threshold: `float`, match threshold for anchors in RPN.
rpn_unmatched_threshold: `float`, unmatched threshold for anchors in RPN.
rpn_batch_size_per_im: `int` for batch size per image in RPN.
rpn_fg_fraction: `float` for forground fraction per batch in RPN.
aug_rand_hflip: `bool`, if True, augment training with random
horizontal flip.
aug_scale_min: `float`, the minimum scale applied to `output_size` for
data augmentation during training.
aug_scale_max: `float`, the maximum scale applied to `output_size` for
data augmentation during training.
skip_crowd_during_training: `bool`, if True, skip annotations labeled with
`is_crowd` equals to 1.
max_num_instances: `int` number of maximum number of instances in an
image. The groundtruth data will be padded to `max_num_instances`.
mask_crop_size: the size which groundtruth mask is cropped to.
segmentation_resize_eval_groundtruth: `bool`, if True, eval groundtruth
masks are resized to output_size.
segmentation_groundtruth_padded_size: `Tensor` or `list` for [height,
width]. When resize_eval_groundtruth is set to False, the groundtruth
masks are padded to this size.
segmentation_ignore_label: `int` the pixel with ignore label will not used
for training and evaluation.
dtype: `str`, data type. One of {`bfloat16`, `float32`, `float16`}.
"""
super(Parser, self).__init__(
output_size=output_size,
min_level=min_level,
max_level=max_level,
num_scales=num_scales,
aspect_ratios=aspect_ratios,
anchor_size=anchor_size,
rpn_match_threshold=rpn_match_threshold,
rpn_unmatched_threshold=rpn_unmatched_threshold,
rpn_batch_size_per_im=rpn_batch_size_per_im,
rpn_fg_fraction=rpn_fg_fraction,
aug_rand_hflip=False,
aug_scale_min=aug_scale_min,
aug_scale_max=aug_scale_max,
skip_crowd_during_training=skip_crowd_during_training,
max_num_instances=max_num_instances,
include_mask=True,
mask_crop_size=mask_crop_size,
dtype=dtype)
self.aug_rand_hflip = aug_rand_hflip
self._segmentation_resize_eval_groundtruth = segmentation_resize_eval_groundtruth
if (not segmentation_resize_eval_groundtruth) and (
segmentation_groundtruth_padded_size is None):
raise ValueError(
'segmentation_groundtruth_padded_size ([height, width]) needs to be'
'specified when segmentation_resize_eval_groundtruth is False.')
self._segmentation_groundtruth_padded_size = segmentation_groundtruth_padded_size
self._segmentation_ignore_label = segmentation_ignore_label
def _parse_train_data(self, data):
"""Parses data for training.
Args:
data: the decoded tensor dictionary from TfExampleDecoder.
Returns:
image: image tensor that is preproessed to have normalized value and
dimension [output_size[0], output_size[1], 3]
labels: a dictionary of tensors used for training. The following describes
{key: value} pairs in the dictionary.
image_info: a 2D `Tensor` that encodes the information of the image and
the applied preprocessing. It is in the format of
[[original_height, original_width], [scaled_height, scaled_width]],
anchor_boxes: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, 4] representing anchor boxes at each level.
rpn_score_targets: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, anchors_per_location]. The height_l and
width_l represent the dimension of class logits at l-th level.
rpn_box_targets: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, anchors_per_location * 4]. The height_l and
width_l represent the dimension of bounding box regression output at
l-th level.
gt_boxes: Groundtruth bounding box annotations. The box is represented
in [y1, x1, y2, x2] format. The coordinates are w.r.t the scaled
image that is fed to the network. The tennsor is padded with -1 to
the fixed dimension [self._max_num_instances, 4].
gt_classes: Groundtruth classes annotations. The tennsor is padded
with -1 to the fixed dimension [self._max_num_instances].
gt_masks: Groundtruth masks cropped by the bounding box and
resized to a fixed size determined by mask_crop_size.
gt_segmentation_mask: Groundtruth mask for segmentation head, this is
resized to a fixed size determined by output_size.
gt_segmentation_valid_mask: Binary mask that marks the pixels that
are supposed to be used in computing the segmentation loss while
training.
"""
segmentation_mask = data['groundtruth_segmentation_mask']
# Flips image randomly during training.
if self.aug_rand_hflip:
masks = data['groundtruth_instance_masks']
image_mask = tf.concat([data['image'], segmentation_mask], axis=2)
image_mask, boxes, masks = preprocess_ops.random_horizontal_flip(
image_mask, data['groundtruth_boxes'], masks)
segmentation_mask = image_mask[:, :, -1:]
image = image_mask[:, :, :-1]
data['image'] = image
data['boxes'] = boxes
data['masks'] = masks
image, labels = super(Parser, self)._parse_train_data(data)
image_info = labels['image_info']
image_scale = image_info[2, :]
offset = image_info[3, :]
segmentation_mask = tf.reshape(
segmentation_mask, shape=[1, data['height'], data['width']])
segmentation_mask = tf.cast(segmentation_mask, tf.float32)
# Pad label and make sure the padded region assigned to the ignore label.
# The label is first offset by +1 and then padded with 0.
segmentation_mask += 1
segmentation_mask = tf.expand_dims(segmentation_mask, axis=3)
segmentation_mask = preprocess_ops.resize_and_crop_masks(
segmentation_mask, image_scale, self._output_size, offset)
segmentation_mask -= 1
segmentation_mask = tf.where(
tf.equal(segmentation_mask, -1),
self._segmentation_ignore_label * tf.ones_like(segmentation_mask),
segmentation_mask)
segmentation_mask = tf.squeeze(segmentation_mask, axis=0)
segmentation_valid_mask = tf.not_equal(
segmentation_mask, self._segmentation_ignore_label)
labels.update({
'gt_segmentation_mask': segmentation_mask,
'gt_segmentation_valid_mask': segmentation_valid_mask})
return image, labels
def _parse_eval_data(self, data):
"""Parses data for evaluation.
Args:
data: the decoded tensor dictionary from TfExampleDecoder.
Returns:
A dictionary of {'images': image, 'labels': labels} where
image: image tensor that is preproessed to have normalized value and
dimension [output_size[0], output_size[1], 3]
labels: a dictionary of tensors used for training. The following
describes {key: value} pairs in the dictionary.
source_ids: Source image id. Default value -1 if the source id is
empty in the groundtruth annotation.
image_info: a 2D `Tensor` that encodes the information of the image
and the applied preprocessing. It is in the format of
[[original_height, original_width], [scaled_height, scaled_width]],
anchor_boxes: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, 4] representing anchor boxes at each
level.
"""
segmentation_mask = tf.cast(
data['groundtruth_segmentation_mask'], tf.float32)
segmentation_mask = tf.reshape(
segmentation_mask, shape=[1, data['height'], data['width'], 1])
segmentation_mask += 1
image, labels = super(Parser, self)._parse_eval_data(data)
if self._segmentation_resize_eval_groundtruth:
# Resizes eval masks to match input image sizes. In that case, mean IoU
# is computed on output_size not the original size of the images.
image_info = labels['image_info']
image_scale = image_info[2, :]
offset = image_info[3, :]
segmentation_mask = preprocess_ops.resize_and_crop_masks(
segmentation_mask, image_scale, self._output_size, offset)
else:
segmentation_mask = tf.image.pad_to_bounding_box(
segmentation_mask, 0, 0,
self._segmentation_groundtruth_padded_size[0],
self._segmentation_groundtruth_padded_size[1])
segmentation_mask -= 1
# Assign ignore label to the padded region.
segmentation_mask = tf.where(
tf.equal(segmentation_mask, -1),
self._segmentation_ignore_label * tf.ones_like(segmentation_mask),
segmentation_mask)
segmentation_mask = tf.squeeze(segmentation_mask, axis=0)
segmentation_valid_mask = tf.not_equal(
segmentation_mask, self._segmentation_ignore_label)
labels['groundtruths'].update({
'gt_segmentation_mask': segmentation_mask,
'gt_segmentation_valid_mask': segmentation_valid_mask})
return image, labels
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Panoptic MaskRCNN task definition."""
from typing import Any, List, Mapping, Optional, Tuple, Dict
from absl import logging
import tensorflow as tf
from official.common import dataset_fn
from official.core import task_factory
from official.vision.beta.dataloaders import input_reader_factory
from official.vision.beta.evaluation import coco_evaluator
from official.vision.beta.evaluation import segmentation_metrics
from official.vision.beta.losses import segmentation_losses
from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_maskrcnn as exp_cfg
from official.vision.beta.projects.panoptic_maskrcnn.dataloaders import panoptic_maskrcnn_input
from official.vision.beta.projects.panoptic_maskrcnn.modeling import factory
from official.vision.beta.tasks import maskrcnn
@task_factory.register_task_cls(exp_cfg.PanopticMaskRCNNTask)
class PanopticMaskRCNNTask(maskrcnn.MaskRCNNTask):
"""A single-replica view of training procedure.
Panoptic Mask R-CNN task provides artifacts for training/evalution procedures,
including loading/iterating over Datasets, initializing the model, calculating
the loss, post-processing, and customized metrics with reduction.
"""
def build_model(self) -> tf.keras.Model:
"""Build Panoptic Mask R-CNN model."""
input_specs = tf.keras.layers.InputSpec(
shape=[None] + self.task_config.model.input_size)
l2_weight_decay = self.task_config.losses.l2_weight_decay
# Divide weight decay by 2.0 to match the implementation of tf.nn.l2_loss.
# (https://www.tensorflow.org/api_docs/python/tf/keras/regularizers/l2)
# (https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss)
l2_regularizer = (tf.keras.regularizers.l2(
l2_weight_decay / 2.0) if l2_weight_decay else None)
model = factory.build_panoptic_maskrcnn(
input_specs=input_specs,
model_config=self.task_config.model,
l2_regularizer=l2_regularizer)
return model
def initialize(self, model: tf.keras.Model) -> None:
"""Loading pretrained checkpoint."""
if not self.task_config.init_checkpoint_modules:
return
def _get_checkpoint_path(checkpoint_dir_or_file):
if tf.io.gfile.isdir(checkpoint_dir_or_file):
checkpoint_path = tf.train.latest_checkpoint(
checkpoint_dir_or_file)
return checkpoint_path
for init_module in self.task_config.init_checkpoint_modules:
# Restoring checkpoint.
if init_module == 'all':
checkpoint_path = _get_checkpoint_path(
self.task_config.init_checkpoint)
ckpt = tf.train.Checkpoint(**model.checkpoint_items)
status = ckpt.restore(checkpoint_path)
status.assert_consumed()
elif init_module == 'backbone':
checkpoint_path = _get_checkpoint_path(
self.task_config.init_checkpoint)
ckpt = tf.train.Checkpoint(backbone=model.backbone)
status = ckpt.restore(checkpoint_path)
status.expect_partial().assert_existing_objects_matched()
elif init_module == 'segmentation_backbone':
checkpoint_path = _get_checkpoint_path(
self.task_config.segmentation_init_checkpoint)
ckpt = tf.train.Checkpoint(
segmentation_backbone=model.segmentation_backbone)
status = ckpt.restore(checkpoint_path)
status.expect_partial().assert_existing_objects_matched()
elif init_module == 'segmentation_decoder':
checkpoint_path = _get_checkpoint_path(
self.task_config.segmentation_init_checkpoint)
ckpt = tf.train.Checkpoint(
segmentation_decoder=model.segmentation_decoder)
status = ckpt.restore(checkpoint_path)
status.expect_partial().assert_existing_objects_matched()
else:
raise ValueError(
"Only 'all', 'backbone', 'segmentation_backbone' and/or "
"segmentation_backbone' can be used to initialize the model, but "
"got {}".format(init_module))
logging.info('Finished loading pretrained checkpoint from %s for %s',
checkpoint_path, init_module)
def build_inputs(
self,
params: exp_cfg.DataConfig,
input_context: Optional[tf.distribute.InputContext] = None
) -> tf.data.Dataset:
"""Build input dataset."""
decoder_cfg = params.decoder.get()
if params.decoder.type == 'simple_decoder':
decoder = panoptic_maskrcnn_input.TfExampleDecoder(
regenerate_source_id=decoder_cfg.regenerate_source_id,
mask_binarize_threshold=decoder_cfg.mask_binarize_threshold)
else:
raise ValueError('Unknown decoder type: {}!'.format(params.decoder.type))
parser = panoptic_maskrcnn_input.Parser(
output_size=self.task_config.model.input_size[:2],
min_level=self.task_config.model.min_level,
max_level=self.task_config.model.max_level,
num_scales=self.task_config.model.anchor.num_scales,
aspect_ratios=self.task_config.model.anchor.aspect_ratios,
anchor_size=self.task_config.model.anchor.anchor_size,
dtype=params.dtype,
rpn_match_threshold=params.parser.rpn_match_threshold,
rpn_unmatched_threshold=params.parser.rpn_unmatched_threshold,
rpn_batch_size_per_im=params.parser.rpn_batch_size_per_im,
rpn_fg_fraction=params.parser.rpn_fg_fraction,
aug_rand_hflip=params.parser.aug_rand_hflip,
aug_scale_min=params.parser.aug_scale_min,
aug_scale_max=params.parser.aug_scale_max,
skip_crowd_during_training=params.parser.skip_crowd_during_training,
max_num_instances=params.parser.max_num_instances,
mask_crop_size=params.parser.mask_crop_size,
segmentation_resize_eval_groundtruth=params.parser
.segmentation_resize_eval_groundtruth,
segmentation_groundtruth_padded_size=params.parser
.segmentation_groundtruth_padded_size,
segmentation_ignore_label=params.parser.segmentation_ignore_label)
reader = input_reader_factory.input_reader_generator(
params,
dataset_fn=dataset_fn.pick_dataset_fn(params.file_type),
decoder_fn=decoder.decode,
parser_fn=parser.parse_fn(params.is_training))
dataset = reader.read(input_context=input_context)
return dataset
def build_losses(self,
outputs: Mapping[str, Any],
labels: Mapping[str, Any],
aux_losses: Optional[Any] = None) -> Dict[str, tf.Tensor]:
"""Build Panoptic Mask R-CNN losses."""
params = self.task_config.losses
use_groundtruth_dimension = params.semantic_segmentation_use_groundtruth_dimension
segmentation_loss_fn = segmentation_losses.SegmentationLoss(
label_smoothing=params.semantic_segmentation_label_smoothing,
class_weights=params.semantic_segmentation_class_weights,
ignore_label=params.semantic_segmentation_ignore_label,
use_groundtruth_dimension=use_groundtruth_dimension,
top_k_percent_pixels=params.semantic_segmentation_top_k_percent_pixels)
semantic_segmentation_weight = params.semantic_segmentation_weight
losses = super(PanopticMaskRCNNTask, self).build_losses(
outputs=outputs,
labels=labels,
aux_losses=None)
maskrcnn_loss = losses['model_loss']
segmentation_loss = segmentation_loss_fn(
outputs['segmentation_outputs'],
labels['gt_segmentation_mask'])
model_loss = (
maskrcnn_loss + semantic_segmentation_weight * segmentation_loss)
total_loss = model_loss
if aux_losses:
reg_loss = tf.reduce_sum(aux_losses)
total_loss = model_loss + reg_loss
losses.update({
'total_loss': total_loss,
'maskrcnn_loss': maskrcnn_loss,
'segmentation_loss': segmentation_loss,
'model_loss': model_loss,
})
return losses
def build_metrics(self, training: bool = True) -> List[
tf.keras.metrics.Metric]:
"""Build detection metrics."""
metrics = []
if training:
metric_names = [
'total_loss',
'rpn_score_loss',
'rpn_box_loss',
'frcnn_cls_loss',
'frcnn_box_loss',
'mask_loss',
'maskrcnn_loss',
'segmentation_loss',
'model_loss'
]
for name in metric_names:
metrics.append(tf.keras.metrics.Mean(name, dtype=tf.float32))
if self.task_config.segmentation_evaluation.report_train_mean_iou:
self.segmentation_train_mean_iou = segmentation_metrics.MeanIoU(
name='train_mean_iou',
num_classes=self.task_config.model.num_classes,
rescale_predictions=False,
dtype=tf.float32)
else:
self.coco_metric = coco_evaluator.COCOEvaluator(
annotation_file=self.task_config.annotation_file,
include_mask=self.task_config.model.include_mask,
per_category_metrics=self.task_config.per_category_metrics)
rescale_predictions = (not self.task_config.validation_data.parser
.segmentation_resize_eval_groundtruth)
self.segmentation_perclass_iou_metric = segmentation_metrics.PerClassIoU(
name='per_class_iou',
num_classes=self.task_config.model.num_classes,
rescale_predictions=rescale_predictions,
dtype=tf.float32)
return metrics
def train_step(self,
inputs: Tuple[Any, Any],
model: tf.keras.Model,
optimizer: tf.keras.optimizers.Optimizer,
metrics: Optional[List[Any]] = None) -> Dict[str, Any]:
"""Does forward and backward.
Args:
inputs: a dictionary of input tensors.
model: the model, forward pass definition.
optimizer: the optimizer for this training step.
metrics: a nested structure of metrics objects.
Returns:
A dictionary of logs.
"""
images, labels = inputs
num_replicas = tf.distribute.get_strategy().num_replicas_in_sync
with tf.GradientTape() as tape:
outputs = model(
images,
image_shape=labels['image_info'][:, 1, :],
anchor_boxes=labels['anchor_boxes'],
gt_boxes=labels['gt_boxes'],
gt_classes=labels['gt_classes'],
gt_masks=(labels['gt_masks'] if self.task_config.model.include_mask
else None),
training=True)
outputs = tf.nest.map_structure(
lambda x: tf.cast(x, tf.float32), outputs)
# Computes per-replica loss.
losses = self.build_losses(
outputs=outputs, labels=labels, aux_losses=model.losses)
scaled_loss = losses['total_loss'] / num_replicas
# For mixed_precision policy, when LossScaleOptimizer is used, loss is
# scaled for numerical stability.
if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
scaled_loss = optimizer.get_scaled_loss(scaled_loss)
tvars = model.trainable_variables
grads = tape.gradient(scaled_loss, tvars)
# Scales back gradient when LossScaleOptimizer is used.
if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
grads = optimizer.get_unscaled_gradients(grads)
optimizer.apply_gradients(list(zip(grads, tvars)))
logs = {self.loss: losses['total_loss']}
if metrics:
for m in metrics:
m.update_state(losses[m.name])
if self.task_config.segmentation_evaluation.report_train_mean_iou:
segmentation_labels = {
'masks': labels['gt_segmentation_mask'],
'valid_masks': labels['gt_segmentation_valid_mask'],
'image_info': labels['image_info']
}
self.process_metrics(
metrics=[self.segmentation_train_mean_iou],
labels=segmentation_labels,
model_outputs=outputs['segmentation_outputs'])
logs.update({
self.segmentation_train_mean_iou.name:
self.segmentation_train_mean_iou.result()
})
return logs
def validation_step(self,
inputs: Tuple[Any, Any],
model: tf.keras.Model,
metrics: Optional[List[Any]] = None) -> Dict[str, Any]:
"""Validatation step.
Args:
inputs: a dictionary of input tensors.
model: the keras.Model.
metrics: a nested structure of metrics objects.
Returns:
A dictionary of logs.
"""
images, labels = inputs
outputs = model(
images,
anchor_boxes=labels['anchor_boxes'],
image_shape=labels['image_info'][:, 1, :],
training=False)
logs = {self.loss: 0}
coco_model_outputs = {
'detection_masks': outputs['detection_masks'],
'detection_boxes': outputs['detection_boxes'],
'detection_scores': outputs['detection_scores'],
'detection_classes': outputs['detection_classes'],
'num_detections': outputs['num_detections'],
'source_id': labels['groundtruths']['source_id'],
'image_info': labels['image_info']
}
segmentation_labels = {
'masks': labels['groundtruths']['gt_segmentation_mask'],
'valid_masks': labels['groundtruths']['gt_segmentation_valid_mask'],
'image_info': labels['image_info']
}
logs.update({
self.coco_metric.name: (labels['groundtruths'], coco_model_outputs),
self.segmentation_perclass_iou_metric.name: (
segmentation_labels,
outputs['segmentation_outputs'])
})
return logs
def aggregate_logs(self, state=None, step_outputs=None):
if state is None:
self.coco_metric.reset_states()
self.segmentation_perclass_iou_metric.reset_states()
state = [self.coco_metric, self.segmentation_perclass_iou_metric]
self.coco_metric.update_state(
step_outputs[self.coco_metric.name][0],
step_outputs[self.coco_metric.name][1])
self.segmentation_perclass_iou_metric.update_state(
step_outputs[self.segmentation_perclass_iou_metric.name][0],
step_outputs[self.segmentation_perclass_iou_metric.name][1])
return state
def reduce_aggregated_logs(self, aggregated_logs, global_step=None):
result = {}
result[self.coco_metric.name] = super(
PanopticMaskRCNNTask, self).reduce_aggregated_logs(
aggregated_logs=aggregated_logs,
global_step=global_step)
ious = self.segmentation_perclass_iou_metric.result()
if self.task_config.segmentation_evaluation.report_per_class_iou:
for i, value in enumerate(ious.numpy()):
result.update({'segmentation_iou/class_{}'.format(i): value})
# Computes mean IoU
result.update({'segmentation_mean_iou': tf.reduce_mean(ious).numpy()})
return result
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for panoptic_maskrcnn.py."""
import os
from absl.testing import parameterized
import tensorflow as tf
from official.vision.beta.configs import decoders as decoder_cfg
from official.vision.beta.configs import semantic_segmentation as segmentation_cfg
from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_maskrcnn as cfg
from official.vision.beta.projects.panoptic_maskrcnn.tasks import panoptic_maskrcnn
class PanopticMaskRCNNTaskTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.parameters(
(['all'],),
(['backbone'],),
(['segmentation_backbone'],),
(['segmentation_decoder'],),
(['backbone', 'segmentation_backbone'],),
(['segmentation_backbone', 'segmentation_decoder'],))
def test_model_initializing(self, init_checkpoint_modules):
shared_backbone = ('segmentation_backbone' not in init_checkpoint_modules)
shared_decoder = ('segmentation_decoder' not in init_checkpoint_modules and
shared_backbone)
task_config = cfg.PanopticMaskRCNNTask(
model=cfg.PanopticMaskRCNN(
num_classes=2,
input_size=[640, 640, 3],
segmentation_model=segmentation_cfg.SemanticSegmentationModel(
decoder=decoder_cfg.Decoder(type='fpn')),
shared_backbone=shared_backbone,
shared_decoder=shared_decoder))
task = panoptic_maskrcnn.PanopticMaskRCNNTask(task_config)
model = task.build_model()
ckpt = tf.train.Checkpoint(**model.checkpoint_items)
ckpt_save_dir = self.create_tempdir().full_path
ckpt.save(os.path.join(ckpt_save_dir, 'ckpt'))
if (init_checkpoint_modules == ['all'] or
'backbone' in init_checkpoint_modules):
task._task_config.init_checkpoint = ckpt_save_dir
if ('segmentation_backbone' in init_checkpoint_modules or
'segmentation_decoder' in init_checkpoint_modules):
task._task_config.segmentation_init_checkpoint = ckpt_save_dir
task._task_config.init_checkpoint_modules = init_checkpoint_modules
task.initialize(model)
if __name__ == '__main__':
tf.test.main()
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Multi-task SimCLR configs."""
import dataclasses
from typing import List, Tuple
from official.core import exp_factory
from official.modeling import hyperparams
from official.modeling.multitask import configs as multitask_configs
from official.vision.beta.configs import backbones
from official.vision.beta.configs import common
from official.vision.beta.projects.simclr.configs import simclr as simclr_configs
from official.vision.beta.projects.simclr.modeling import simclr_model
@dataclasses.dataclass
class SimCLRMTHeadConfig(hyperparams.Config):
"""Per-task specific configs."""
# Supervised head is required for finetune, but optional for pretrain.
supervised_head: simclr_configs.SupervisedHead = simclr_configs.SupervisedHead(
num_classes=1001)
mode: str = simclr_model.PRETRAIN
@dataclasses.dataclass
class SimCLRMTModelConfig(hyperparams.Config):
"""Model config for multi-task SimCLR model."""
input_size: List[int] = dataclasses.field(default_factory=list)
backbone: backbones.Backbone = backbones.Backbone(
type='resnet', resnet=backbones.ResNet())
backbone_trainable: bool = True
projection_head: simclr_configs.ProjectionHead = simclr_configs.ProjectionHead(
proj_output_dim=128, num_proj_layers=3, ft_proj_idx=1)
norm_activation: common.NormActivation = common.NormActivation(
norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False)
heads: Tuple[SimCLRMTHeadConfig, ...] = ()
# L2 weight decay is used in the model, not in task.
# Note that this can not be used together with lars optimizer.
l2_weight_decay: float = 0.0
@exp_factory.register_config_factory('multitask_simclr')
def multitask_simclr() -> multitask_configs.MultiTaskExperimentConfig:
return multitask_configs.MultiTaskExperimentConfig(
task=multitask_configs.MultiTaskConfig(
model=SimCLRMTModelConfig(
heads=(SimCLRMTHeadConfig(mode=simclr_model.PRETRAIN),
SimCLRMTHeadConfig(mode=simclr_model.FINETUNE))),
task_routines=(multitask_configs.TaskRoutine(
task_name=simclr_model.PRETRAIN,
task_config=simclr_configs.SimCLRPretrainTask(),
task_weight=2.0),
multitask_configs.TaskRoutine(
task_name=simclr_model.FINETUNE,
task_config=simclr_configs.SimCLRFinetuneTask(),
task_weight=1.0))),
trainer=multitask_configs.MultiTaskTrainerConfig())
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for multitask_config."""
import tensorflow as tf
from official.core import exp_factory
from official.modeling.multitask import configs as multitask_configs
from official.vision.beta.projects.simclr.configs import multitask_config as simclr_multitask_config
from official.vision.beta.projects.simclr.configs import simclr as exp_cfg
class MultitaskConfigTest(tf.test.TestCase):
def test_simclr_configs(self):
config = exp_factory.get_exp_config('multitask_simclr')
self.assertIsInstance(config, multitask_configs.MultiTaskExperimentConfig)
self.assertIsInstance(config.task.model,
simclr_multitask_config.SimCLRMTModelConfig)
self.assertIsInstance(config.task.task_routines[0].task_config,
exp_cfg.SimCLRPretrainTask)
self.assertIsInstance(config.task.task_routines[1].task_config,
exp_cfg.SimCLRFinetuneTask)
if __name__ == '__main__':
tf.test.main()
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Multi-task image multi-taskSimCLR model definition."""
from typing import Dict, Text
import tensorflow as tf
from official.modeling.multitask import base_model
from official.vision.beta.modeling import backbones
from official.vision.beta.projects.simclr.configs import multitask_config as simclr_multitask_config
from official.vision.beta.projects.simclr.heads import simclr_head
from official.vision.beta.projects.simclr.modeling import simclr_model
PROJECTION_OUTPUT_KEY = 'projection_outputs'
SUPERVISED_OUTPUT_KEY = 'supervised_outputs'
class SimCLRMTModel(base_model.MultiTaskBaseModel):
"""A multi-task SimCLR model that does both pretrain and finetune."""
def __init__(self, config: simclr_multitask_config.SimCLRMTModelConfig,
**kwargs):
self._config = config
# Build shared backbone.
self._input_specs = tf.keras.layers.InputSpec(shape=[None] +
config.input_size)
l2_weight_decay = config.l2_weight_decay
# Divide weight decay by 2.0 to match the implementation of tf.nn.l2_loss.
# (https://www.tensorflow.org/api_docs/python/tf/keras/regularizers/l2)
# (https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss)
self._l2_regularizer = (
tf.keras.regularizers.l2(l2_weight_decay /
2.0) if l2_weight_decay else None)
self._backbone = backbones.factory.build_backbone(
input_specs=self._input_specs,
backbone_config=config.backbone,
norm_activation_config=config.norm_activation,
l2_regularizer=self._l2_regularizer)
super().__init__(**kwargs)
def _instantiate_sub_tasks(self) -> Dict[Text, tf.keras.Model]:
tasks = {}
# Build the shared projection head
norm_activation_config = self._config.norm_activation
projection_head_config = self._config.projection_head
projection_head = simclr_head.ProjectionHead(
proj_output_dim=projection_head_config.proj_output_dim,
num_proj_layers=projection_head_config.num_proj_layers,
ft_proj_idx=projection_head_config.ft_proj_idx,
kernel_regularizer=self._l2_regularizer,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon)
for model_config in self._config.heads:
# Build supervised head
supervised_head_config = model_config.supervised_head
if supervised_head_config:
if supervised_head_config.zero_init:
s_kernel_initializer = 'zeros'
else:
s_kernel_initializer = 'random_uniform'
supervised_head = simclr_head.ClassificationHead(
num_classes=supervised_head_config.num_classes,
kernel_initializer=s_kernel_initializer,
kernel_regularizer=self._l2_regularizer)
else:
supervised_head = None
tasks[model_config.mode] = simclr_model.SimCLRModel(
input_specs=self._input_specs,
backbone=self._backbone,
projection_head=projection_head,
supervised_head=supervised_head,
mode=model_config.mode,
backbone_trainable=self._config.backbone_trainable)
return tasks
# TODO(huythong): Implement initialize function to load the pretrained
# checkpoint of backbone.
# def initialize(self):
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for multitask_model."""
import os.path
import tensorflow as tf
from official.vision.beta.projects.simclr.configs import multitask_config
from official.vision.beta.projects.simclr.modeling import multitask_model
from official.vision.beta.projects.simclr.modeling import simclr_model
class MultitaskModelTest(tf.test.TestCase):
def test_initialize_model_success(self):
ckpt_dir = self.get_temp_dir()
config = multitask_config.SimCLRMTModelConfig(
input_size=[64, 64, 3],
heads=(multitask_config.SimCLRMTHeadConfig(mode=simclr_model.PRETRAIN),
multitask_config.SimCLRMTHeadConfig(mode=simclr_model.FINETUNE)))
model = multitask_model.SimCLRMTModel(config)
self.assertIn(simclr_model.PRETRAIN, model.sub_tasks)
self.assertIn(simclr_model.FINETUNE, model.sub_tasks)
ckpt = tf.train.Checkpoint(backbone=model._backbone)
ckpt.save(os.path.join(ckpt_dir, 'ckpt'))
model.initialize()
if __name__ == '__main__':
tf.test.main()
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Trainer binary for multitask simclr."""
from absl import app
from absl import flags
import gin
from official.common import distribute_utils
from official.common import flags as tfm_flags
from official.core import train_utils
from official.modeling import performance
from official.modeling.multitask import multitask
from official.modeling.multitask import train_lib
# pylint: disable=unused-import
from official.vision.beta.projects.simclr.common import registry_imports
from official.vision.beta.projects.simclr.configs import multitask_config
from official.vision.beta.projects.simclr.modeling import multitask_model
# pylint: enable=unused-import
FLAGS = flags.FLAGS
def main(_):
gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_params)
params = train_utils.parse_configuration(FLAGS)
model_dir = FLAGS.model_dir
if 'train' in FLAGS.mode:
# Pure eval modes do not output yaml files. Otherwise continuous eval job
# may race against the train job for writing the same file.
train_utils.serialize_config(params, model_dir)
# Sets mixed_precision policy. Using 'mixed_float16' or 'mixed_bfloat16'
# can have significant impact on model speeds by utilizing float16 in case of
# GPUs, and bfloat16 in the case of TPUs. loss_scale takes effect only when
# dtype is float16
if params.runtime.mixed_precision_dtype:
performance.set_mixed_precision_policy(params.runtime.mixed_precision_dtype)
distribution_strategy = distribute_utils.get_distribution_strategy(
distribution_strategy=params.runtime.distribution_strategy,
all_reduce_alg=params.runtime.all_reduce_alg,
num_gpus=params.runtime.num_gpus,
tpu_address=params.runtime.tpu)
with distribution_strategy.scope():
tasks = multitask.MultiTask.from_config(params.task)
model = multitask_model.SimCLRMTModel(params.task.model)
train_lib.run_experiment(
distribution_strategy=distribution_strategy,
task=tasks,
model=model,
mode=FLAGS.mode,
params=params,
model_dir=model_dir)
train_utils.save_gin_config(FLAGS.mode, model_dir)
if __name__ == '__main__':
tfm_flags.define_flags()
app.run(main)
......@@ -59,6 +59,12 @@ VIT_SPECS = {
patch_size=14,
transformer=dict(mlp_dim=5120, num_heads=16, num_layers=32),
),
'vit-g14':
dict(
hidden_size=1664,
patch_size=14,
transformer=dict(mlp_dim=8192, num_heads=16, num_layers=48),
),
}
......
......@@ -13,6 +13,7 @@
# limitations under the License.
"""RetinaNet task definition."""
import os
from typing import Any, Optional, List, Tuple, Mapping
from absl import logging
......@@ -26,6 +27,7 @@ from official.vision.beta.dataloaders import maskrcnn_input
from official.vision.beta.dataloaders import tf_example_decoder
from official.vision.beta.dataloaders import tf_example_label_map_decoder
from official.vision.beta.evaluation import coco_evaluator
from official.vision.beta.evaluation import coco_utils
from official.vision.beta.losses import maskrcnn_losses
from official.vision.beta.modeling import factory
......@@ -259,10 +261,33 @@ class MaskRCNNTask(base_task.Task):
metrics.append(tf.keras.metrics.Mean(name, dtype=tf.float32))
else:
self.coco_metric = coco_evaluator.COCOEvaluator(
annotation_file=self._task_config.annotation_file,
include_mask=self._task_config.model.include_mask,
per_category_metrics=self._task_config.per_category_metrics)
if self._task_config.annotation_file:
self.coco_metric = coco_evaluator.COCOEvaluator(
annotation_file=self._task_config.annotation_file,
include_mask=self._task_config.model.include_mask,
per_category_metrics=self._task_config.per_category_metrics)
else:
annotation_path = os.path.join(self._logging_dir, 'annotation.json')
if tf.io.gfile.exists(annotation_path):
logging.info(
'annotation.json file exists, skipping creating the annotation'
' file.')
else:
if self._task_config.validation_data.num_examples <= 0:
logging.info('validation_data.num_examples needs to be > 0')
if not self._task_config.validation_data.input_path:
logging.info('Can not create annotation file for tfds.')
logging.info(
'Creating coco-style annotation file: %s', annotation_path)
coco_utils.scan_and_generator_annotation_file(
self._task_config.validation_data.input_path,
self._task_config.validation_data.file_type,
self._task_config.validation_data.num_examples,
self.task_config.model.include_mask, annotation_path)
self.coco_metric = coco_evaluator.COCOEvaluator(
annotation_file=annotation_path,
include_mask=self._task_config.model.include_mask,
per_category_metrics=self._task_config.per_category_metrics)
return metrics
......
......@@ -446,14 +446,13 @@ class Controller:
f"{num_steps}. Old value was {current_step}, expected updated value "
f"to be {expected_step}, but it was {self.global_step.numpy()}.")
logging.warning(message)
return
train_output = train_output or {}
for action in self.train_actions:
action(train_output)
train_output = tf.nest.map_structure(utils.get_value, train_output)
current_step = expected_step
current_step = self.global_step.numpy()
steps_per_second = self.step_timer.steps_per_second()
_log(f"train | step: {current_step: 6d} | "
f"steps/sec: {steps_per_second: 6.1f} | "
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment