Commit cf80ed4e authored by anivegesana's avatar anivegesana
Browse files

Merge branch 'purdue-yolo' of https://github.com/tensorflow/models into detection_generator_pr_2

parents 394cefcc 461b3587
...@@ -43,6 +43,9 @@ S12: KernelSize = (1, 2, 2) ...@@ -43,6 +43,9 @@ S12: KernelSize = (1, 2, 2)
S22: KernelSize = (2, 2, 2) S22: KernelSize = (2, 2, 2)
S21: KernelSize = (2, 1, 1) S21: KernelSize = (2, 1, 1)
# Type for a state container (map)
TensorMap = Mapping[str, tf.Tensor]
@dataclasses.dataclass @dataclasses.dataclass
class BlockSpec: class BlockSpec:
...@@ -319,6 +322,7 @@ class Movinet(tf.keras.Model): ...@@ -319,6 +322,7 @@ class Movinet(tf.keras.Model):
bias_regularizer: Optional[str] = None, bias_regularizer: Optional[str] = None,
stochastic_depth_drop_rate: float = 0., stochastic_depth_drop_rate: float = 0.,
use_external_states: bool = False, use_external_states: bool = False,
output_states: bool = True,
**kwargs): **kwargs):
"""MoViNet initialization function. """MoViNet initialization function.
...@@ -353,6 +357,10 @@ class Movinet(tf.keras.Model): ...@@ -353,6 +357,10 @@ class Movinet(tf.keras.Model):
stochastic_depth_drop_rate: the base rate for stochastic depth. stochastic_depth_drop_rate: the base rate for stochastic depth.
use_external_states: if True, expects states to be passed as additional use_external_states: if True, expects states to be passed as additional
input. input.
output_states: if True, output intermediate states that can be used to run
the model in streaming mode. Inputting the output states of the
previous input clip with the current input clip will utilize a stream
buffer for streaming video.
**kwargs: keyword arguments to be passed. **kwargs: keyword arguments to be passed.
""" """
block_specs = BLOCK_SPECS[model_id] block_specs = BLOCK_SPECS[model_id]
...@@ -385,6 +393,7 @@ class Movinet(tf.keras.Model): ...@@ -385,6 +393,7 @@ class Movinet(tf.keras.Model):
self._bias_regularizer = bias_regularizer self._bias_regularizer = bias_regularizer
self._stochastic_depth_drop_rate = stochastic_depth_drop_rate self._stochastic_depth_drop_rate = stochastic_depth_drop_rate
self._use_external_states = use_external_states self._use_external_states = use_external_states
self._output_states = output_states
if self._use_external_states and not self._causal: if self._use_external_states and not self._causal:
raise ValueError('External states should be used with causal mode.') raise ValueError('External states should be used with causal mode.')
...@@ -411,8 +420,7 @@ class Movinet(tf.keras.Model): ...@@ -411,8 +420,7 @@ class Movinet(tf.keras.Model):
self, self,
input_specs: tf.keras.layers.InputSpec, input_specs: tf.keras.layers.InputSpec,
state_specs: Optional[Mapping[str, tf.keras.layers.InputSpec]] = None, state_specs: Optional[Mapping[str, tf.keras.layers.InputSpec]] = None,
) -> Tuple[Mapping[str, tf.keras.Input], Tuple[Mapping[str, tf.Tensor], ) -> Tuple[TensorMap, Union[TensorMap, Tuple[TensorMap, TensorMap]]]:
Mapping[str, tf.Tensor]]]:
"""Builds the model network. """Builds the model network.
Args: Args:
...@@ -423,7 +431,7 @@ class Movinet(tf.keras.Model): ...@@ -423,7 +431,7 @@ class Movinet(tf.keras.Model):
Returns: Returns:
Inputs and outputs as a tuple. Inputs are expected to be a dict with Inputs and outputs as a tuple. Inputs are expected to be a dict with
base input and states. Outputs are expected to be a dict of endpoints base input and states. Outputs are expected to be a dict of endpoints
and output states. and (optional) output states.
""" """
state_specs = state_specs if state_specs is not None else {} state_specs = state_specs if state_specs is not None else {}
...@@ -519,7 +527,7 @@ class Movinet(tf.keras.Model): ...@@ -519,7 +527,7 @@ class Movinet(tf.keras.Model):
else: else:
raise ValueError('Unknown block type {}'.format(block)) raise ValueError('Unknown block type {}'.format(block))
outputs = (endpoints, states) outputs = (endpoints, states) if self._output_states else endpoints
return inputs, outputs return inputs, outputs
...@@ -679,6 +687,8 @@ class Movinet(tf.keras.Model): ...@@ -679,6 +687,8 @@ class Movinet(tf.keras.Model):
'kernel_regularizer': self._kernel_regularizer, 'kernel_regularizer': self._kernel_regularizer,
'bias_regularizer': self._bias_regularizer, 'bias_regularizer': self._bias_regularizer,
'stochastic_depth_drop_rate': self._stochastic_depth_drop_rate, 'stochastic_depth_drop_rate': self._stochastic_depth_drop_rate,
'use_external_states': self._use_external_states,
'output_states': self._output_states,
} }
return config_dict return config_dict
......
...@@ -265,7 +265,7 @@ class ConvBlock(tf.keras.layers.Layer): ...@@ -265,7 +265,7 @@ class ConvBlock(tf.keras.layers.Layer):
tf.keras.regularizers.L2(KERNEL_WEIGHT_DECAY), tf.keras.regularizers.L2(KERNEL_WEIGHT_DECAY),
use_batch_norm: bool = True, use_batch_norm: bool = True,
batch_norm_layer: tf.keras.layers.Layer = batch_norm_layer: tf.keras.layers.Layer =
tf.keras.layers.experimental.SyncBatchNormalization, tf.keras.layers.BatchNormalization,
batch_norm_momentum: float = 0.99, batch_norm_momentum: float = 0.99,
batch_norm_epsilon: float = 1e-3, batch_norm_epsilon: float = 1e-3,
activation: Optional[Any] = None, activation: Optional[Any] = None,
...@@ -547,8 +547,8 @@ class StreamConvBlock(ConvBlock): ...@@ -547,8 +547,8 @@ class StreamConvBlock(ConvBlock):
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = tf.keras kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = tf.keras
.regularizers.L2(KERNEL_WEIGHT_DECAY), .regularizers.L2(KERNEL_WEIGHT_DECAY),
use_batch_norm: bool = True, use_batch_norm: bool = True,
batch_norm_layer: tf.keras.layers.Layer = tf.keras.layers.experimental batch_norm_layer: tf.keras.layers.Layer =
.SyncBatchNormalization, tf.keras.layers.BatchNormalization,
batch_norm_momentum: float = 0.99, batch_norm_momentum: float = 0.99,
batch_norm_epsilon: float = 1e-3, batch_norm_epsilon: float = 1e-3,
activation: Optional[Any] = None, activation: Optional[Any] = None,
...@@ -915,7 +915,7 @@ class SkipBlock(tf.keras.layers.Layer): ...@@ -915,7 +915,7 @@ class SkipBlock(tf.keras.layers.Layer):
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] =
tf.keras.regularizers.L2(KERNEL_WEIGHT_DECAY), tf.keras.regularizers.L2(KERNEL_WEIGHT_DECAY),
batch_norm_layer: tf.keras.layers.Layer = batch_norm_layer: tf.keras.layers.Layer =
tf.keras.layers.experimental.SyncBatchNormalization, tf.keras.layers.BatchNormalization,
batch_norm_momentum: float = 0.99, batch_norm_momentum: float = 0.99,
batch_norm_epsilon: float = 1e-3, batch_norm_epsilon: float = 1e-3,
**kwargs): **kwargs):
...@@ -1031,8 +1031,8 @@ class MovinetBlock(tf.keras.layers.Layer): ...@@ -1031,8 +1031,8 @@ class MovinetBlock(tf.keras.layers.Layer):
kernel_initializer: tf.keras.initializers.Initializer = 'HeNormal', kernel_initializer: tf.keras.initializers.Initializer = 'HeNormal',
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = tf.keras kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = tf.keras
.regularizers.L2(KERNEL_WEIGHT_DECAY), .regularizers.L2(KERNEL_WEIGHT_DECAY),
batch_norm_layer: tf.keras.layers.Layer = tf.keras.layers.experimental batch_norm_layer: tf.keras.layers.Layer =
.SyncBatchNormalization, tf.keras.layers.BatchNormalization,
batch_norm_momentum: float = 0.99, batch_norm_momentum: float = 0.99,
batch_norm_epsilon: float = 1e-3, batch_norm_epsilon: float = 1e-3,
state_prefix: Optional[str] = None, state_prefix: Optional[str] = None,
...@@ -1078,7 +1078,6 @@ class MovinetBlock(tf.keras.layers.Layer): ...@@ -1078,7 +1078,6 @@ class MovinetBlock(tf.keras.layers.Layer):
se_ratio * expand_filters * se_multiplier, divisor=8) se_ratio * expand_filters * se_multiplier, divisor=8)
self._out_filters = out_filters self._out_filters = out_filters
self._expand_filters = expand_filters self._expand_filters = expand_filters
self._kernel_size = kernel_size
self._causal = causal self._causal = causal
self._activation = activation self._activation = activation
self._gating_activation = gating_activation self._gating_activation = gating_activation
...@@ -1232,8 +1231,8 @@ class Stem(tf.keras.layers.Layer): ...@@ -1232,8 +1231,8 @@ class Stem(tf.keras.layers.Layer):
kernel_initializer: tf.keras.initializers.Initializer = 'HeNormal', kernel_initializer: tf.keras.initializers.Initializer = 'HeNormal',
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = tf.keras kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = tf.keras
.regularizers.L2(KERNEL_WEIGHT_DECAY), .regularizers.L2(KERNEL_WEIGHT_DECAY),
batch_norm_layer: tf.keras.layers.Layer = tf.keras.layers.experimental batch_norm_layer: tf.keras.layers.Layer =
.SyncBatchNormalization, tf.keras.layers.BatchNormalization,
batch_norm_momentum: float = 0.99, batch_norm_momentum: float = 0.99,
batch_norm_epsilon: float = 1e-3, batch_norm_epsilon: float = 1e-3,
state_prefix: Optional[str] = None, state_prefix: Optional[str] = None,
...@@ -1340,8 +1339,8 @@ class Head(tf.keras.layers.Layer): ...@@ -1340,8 +1339,8 @@ class Head(tf.keras.layers.Layer):
kernel_initializer: tf.keras.initializers.Initializer = 'HeNormal', kernel_initializer: tf.keras.initializers.Initializer = 'HeNormal',
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = tf.keras kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = tf.keras
.regularizers.L2(KERNEL_WEIGHT_DECAY), .regularizers.L2(KERNEL_WEIGHT_DECAY),
batch_norm_layer: tf.keras.layers.Layer = tf.keras.layers.experimental batch_norm_layer: tf.keras.layers.Layer =
.SyncBatchNormalization, tf.keras.layers.BatchNormalization,
batch_norm_momentum: float = 0.99, batch_norm_momentum: float = 0.99,
batch_norm_epsilon: float = 1e-3, batch_norm_epsilon: float = 1e-3,
state_prefix: Optional[str] = None, state_prefix: Optional[str] = None,
...@@ -1470,6 +1469,7 @@ class ClassifierHead(tf.keras.layers.Layer): ...@@ -1470,6 +1469,7 @@ class ClassifierHead(tf.keras.layers.Layer):
self._num_classes = num_classes self._num_classes = num_classes
self._dropout_rate = dropout_rate self._dropout_rate = dropout_rate
self._conv_type = conv_type self._conv_type = conv_type
self._activation = activation
self._output_activation = output_activation self._output_activation = output_activation
self._max_pool_predictions = max_pool_predictions self._max_pool_predictions = max_pool_predictions
self._kernel_initializer = kernel_initializer self._kernel_initializer = kernel_initializer
...@@ -1509,6 +1509,7 @@ class ClassifierHead(tf.keras.layers.Layer): ...@@ -1509,6 +1509,7 @@ class ClassifierHead(tf.keras.layers.Layer):
'num_classes': self._num_classes, 'num_classes': self._num_classes,
'dropout_rate': self._dropout_rate, 'dropout_rate': self._dropout_rate,
'conv_type': self._conv_type, 'conv_type': self._conv_type,
'activation': self._activation,
'output_activation': self._output_activation, 'output_activation': self._output_activation,
'max_pool_predictions': self._max_pool_predictions, 'max_pool_predictions': self._max_pool_predictions,
'kernel_initializer': self._kernel_initializer, 'kernel_initializer': self._kernel_initializer,
......
...@@ -36,6 +36,7 @@ class MovinetClassifier(tf.keras.Model): ...@@ -36,6 +36,7 @@ class MovinetClassifier(tf.keras.Model):
backbone: tf.keras.Model, backbone: tf.keras.Model,
num_classes: int, num_classes: int,
input_specs: Optional[Mapping[str, tf.keras.layers.InputSpec]] = None, input_specs: Optional[Mapping[str, tf.keras.layers.InputSpec]] = None,
activation: str = 'swish',
dropout_rate: float = 0.0, dropout_rate: float = 0.0,
kernel_initializer: str = 'HeNormal', kernel_initializer: str = 'HeNormal',
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
...@@ -48,6 +49,7 @@ class MovinetClassifier(tf.keras.Model): ...@@ -48,6 +49,7 @@ class MovinetClassifier(tf.keras.Model):
backbone: A 3d backbone network. backbone: A 3d backbone network.
num_classes: Number of classes in classification task. num_classes: Number of classes in classification task.
input_specs: Specs of the input tensor. input_specs: Specs of the input tensor.
activation: name of the main activation function.
dropout_rate: Rate for dropout regularization. dropout_rate: Rate for dropout regularization.
kernel_initializer: Kernel initializer for the final dense layer. kernel_initializer: Kernel initializer for the final dense layer.
kernel_regularizer: Kernel regularizer. kernel_regularizer: Kernel regularizer.
...@@ -65,6 +67,7 @@ class MovinetClassifier(tf.keras.Model): ...@@ -65,6 +67,7 @@ class MovinetClassifier(tf.keras.Model):
self._num_classes = num_classes self._num_classes = num_classes
self._input_specs = input_specs self._input_specs = input_specs
self._activation = activation
self._dropout_rate = dropout_rate self._dropout_rate = dropout_rate
self._kernel_initializer = kernel_initializer self._kernel_initializer = kernel_initializer
self._kernel_regularizer = kernel_regularizer self._kernel_regularizer = kernel_regularizer
...@@ -151,7 +154,8 @@ class MovinetClassifier(tf.keras.Model): ...@@ -151,7 +154,8 @@ class MovinetClassifier(tf.keras.Model):
dropout_rate=self._dropout_rate, dropout_rate=self._dropout_rate,
kernel_initializer=self._kernel_initializer, kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer, kernel_regularizer=self._kernel_regularizer,
conv_type=backbone.conv_type)( conv_type=backbone.conv_type,
activation=self._activation)(
x) x)
outputs = (x, states) if self._output_states else x outputs = (x, states) if self._output_states else x
...@@ -180,6 +184,7 @@ class MovinetClassifier(tf.keras.Model): ...@@ -180,6 +184,7 @@ class MovinetClassifier(tf.keras.Model):
def get_config(self): def get_config(self):
config = { config = {
'backbone': self._backbone, 'backbone': self._backbone,
'activation': self._activation,
'num_classes': self._num_classes, 'num_classes': self._num_classes,
'input_specs': self._input_specs, 'input_specs': self._input_specs,
'dropout_rate': self._dropout_rate, 'dropout_rate': self._dropout_rate,
...@@ -226,6 +231,7 @@ def build_movinet_model( ...@@ -226,6 +231,7 @@ def build_movinet_model(
num_classes=num_classes, num_classes=num_classes,
kernel_regularizer=l2_regularizer, kernel_regularizer=l2_regularizer,
input_specs=input_specs_dict, input_specs=input_specs_dict,
activation=model_config.activation,
dropout_rate=model_config.dropout_rate, dropout_rate=model_config.dropout_rate,
output_states=model_config.output_states) output_states=model_config.output_states)
......
...@@ -15,15 +15,153 @@ ...@@ -15,15 +15,153 @@
"""Panoptic Mask R-CNN configuration definition.""" """Panoptic Mask R-CNN configuration definition."""
import dataclasses import dataclasses
import os
from typing import List, Optional
from official.core import config_definitions as cfg
from official.core import exp_factory
from official.modeling import optimization
from official.vision.beta.configs import maskrcnn from official.vision.beta.configs import maskrcnn
from official.vision.beta.configs import semantic_segmentation from official.vision.beta.configs import semantic_segmentation
SEGMENTATION_MODEL = semantic_segmentation.SemanticSegmentationModel
SEGMENTATION_HEAD = semantic_segmentation.SegmentationHead
_COCO_INPUT_PATH_BASE = 'coco'
_COCO_TRAIN_EXAMPLES = 118287
_COCO_VAL_EXAMPLES = 5000
# pytype: disable=wrong-keyword-args
@dataclasses.dataclass
class Parser(maskrcnn.Parser):
"""Panoptic Mask R-CNN parser config."""
# If segmentation_resize_eval_groundtruth is set to False, original image
# sizes are used for eval. In that case,
# segmentation_groundtruth_padded_size has to be specified too to allow for
# batching the variable input sizes of images.
segmentation_resize_eval_groundtruth: bool = True
segmentation_groundtruth_padded_size: List[int] = dataclasses.field(
default_factory=list)
segmentation_ignore_label: int = 255
@dataclasses.dataclass
class DataConfig(maskrcnn.DataConfig):
"""Input config for training."""
parser: Parser = Parser()
@dataclasses.dataclass @dataclasses.dataclass
class PanopticMaskRCNN(maskrcnn.MaskRCNN): class PanopticMaskRCNN(maskrcnn.MaskRCNN):
"""Panoptic Mask R-CNN model config.""" """Panoptic Mask R-CNN model config."""
segmentation_model: semantic_segmentation.SemanticSegmentationModel = ( segmentation_model: semantic_segmentation.SemanticSegmentationModel = (
semantic_segmentation.SemanticSegmentationModel(num_classes=2)) SEGMENTATION_MODEL(num_classes=2))
include_mask = True
shared_backbone: bool = True shared_backbone: bool = True
shared_decoder: bool = True shared_decoder: bool = True
@dataclasses.dataclass
class Losses(maskrcnn.Losses):
"""Panoptic Mask R-CNN loss config."""
semantic_segmentation_label_smoothing: float = 0.0
semantic_segmentation_ignore_label: int = 255
semantic_segmentation_class_weights: List[float] = dataclasses.field(
default_factory=list)
semantic_segmentation_use_groundtruth_dimension: bool = True
semantic_segmentation_top_k_percent_pixels: float = 1.0
semantic_segmentation_weight: float = 1.0
@dataclasses.dataclass
class PanopticMaskRCNNTask(maskrcnn.MaskRCNNTask):
"""Panoptic Mask R-CNN task config."""
model: PanopticMaskRCNN = PanopticMaskRCNN()
train_data: DataConfig = DataConfig(is_training=True)
validation_data: DataConfig = DataConfig(is_training=False,
drop_remainder=False)
segmentation_evaluation: semantic_segmentation.Evaluation = semantic_segmentation.Evaluation() # pylint: disable=line-too-long
losses: Losses = Losses()
init_checkpoint: Optional[str] = None
segmentation_init_checkpoint: Optional[str] = None
# 'init_checkpoint_modules' controls the modules that need to be initialized
# from checkpoint paths given by 'init_checkpoint' and/or
# 'segmentation_init_checkpoint. Supports modules:
# 'backbone': Initialize MaskRCNN backbone
# 'segmentation_backbone': Initialize segmentation backbone
# 'segmentation_decoder': Initialize segmentation decoder
# 'all': Initialize all modules
init_checkpoint_modules: Optional[List[str]] = dataclasses.field(
default_factory=list)
@exp_factory.register_config_factory('panoptic_maskrcnn_resnetfpn_coco')
def panoptic_maskrcnn_resnetfpn_coco() -> cfg.ExperimentConfig:
"""COCO panoptic segmentation with Panoptic Mask R-CNN."""
train_batch_size = 64
eval_batch_size = 8
steps_per_epoch = _COCO_TRAIN_EXAMPLES // train_batch_size
validation_steps = _COCO_VAL_EXAMPLES // eval_batch_size
config = cfg.ExperimentConfig(
runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
task=PanopticMaskRCNNTask(
init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080', # pylint: disable=line-too-long
init_checkpoint_modules=['backbone'],
model=PanopticMaskRCNN(
num_classes=91, input_size=[1024, 1024, 3],
segmentation_model=SEGMENTATION_MODEL(
num_classes=91,
head=SEGMENTATION_HEAD(level=3))),
losses=Losses(l2_weight_decay=0.00004),
train_data=DataConfig(
input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'train*'),
is_training=True,
global_batch_size=train_batch_size,
parser=Parser(
aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.25)),
validation_data=DataConfig(
input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'val*'),
is_training=False,
global_batch_size=eval_batch_size,
drop_remainder=False),
annotation_file=os.path.join(_COCO_INPUT_PATH_BASE,
'instances_val2017.json')),
trainer=cfg.TrainerConfig(
train_steps=22500,
validation_steps=validation_steps,
validation_interval=steps_per_epoch,
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'optimizer': {
'type': 'sgd',
'sgd': {
'momentum': 0.9
}
},
'learning_rate': {
'type': 'stepwise',
'stepwise': {
'boundaries': [15000, 20000],
'values': [0.12, 0.012, 0.0012],
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 500,
'warmup_learning_rate': 0.0067
}
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
return config
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for panoptic maskrcnn config."""
# pylint: disable=unused-import
from absl.testing import parameterized
import tensorflow as tf
from official.core import config_definitions as cfg
from official.core import exp_factory
from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_maskrcnn as exp_cfg
class PanopticMaskRCNNConfigTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.parameters(
('panoptic_maskrcnn_resnetfpn_coco',),
)
def test_panoptic_maskrcnn_configs(self, config_name):
config = exp_factory.get_exp_config(config_name)
self.assertIsInstance(config, cfg.ExperimentConfig)
self.assertIsInstance(config.task, exp_cfg.PanopticMaskRCNNTask)
self.assertIsInstance(config.task.model, exp_cfg.PanopticMaskRCNN)
self.assertIsInstance(config.task.train_data, exp_cfg.DataConfig)
config.validate()
config.task.train_data.is_training = None
with self.assertRaisesRegex(KeyError, 'Found inconsistncy between key'):
config.validate()
if __name__ == '__main__':
tf.test.main()
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Data parser and processing for Panoptic Mask R-CNN."""
import tensorflow as tf
from official.vision.beta.dataloaders import maskrcnn_input
from official.vision.beta.dataloaders import tf_example_decoder
from official.vision.beta.ops import preprocess_ops
class TfExampleDecoder(tf_example_decoder.TfExampleDecoder):
"""Tensorflow Example proto decoder."""
def __init__(self, regenerate_source_id, mask_binarize_threshold):
super(TfExampleDecoder, self).__init__(
include_mask=True,
regenerate_source_id=regenerate_source_id,
mask_binarize_threshold=None)
self._segmentation_keys_to_features = {
'image/segmentation/class/encoded':
tf.io.FixedLenFeature((), tf.string, default_value='')
}
def decode(self, serialized_example):
decoded_tensors = super(TfExampleDecoder, self).decode(serialized_example)
segmentation_parsed_tensors = tf.io.parse_single_example(
serialized_example, self._segmentation_keys_to_features)
segmentation_mask = tf.io.decode_image(
segmentation_parsed_tensors['image/segmentation/class/encoded'],
channels=1)
segmentation_mask.set_shape([None, None, 1])
decoded_tensors.update({'groundtruth_segmentation_mask': segmentation_mask})
return decoded_tensors
class Parser(maskrcnn_input.Parser):
"""Parser to parse an image and its annotations into a dictionary of tensors."""
def __init__(self,
output_size,
min_level,
max_level,
num_scales,
aspect_ratios,
anchor_size,
rpn_match_threshold=0.7,
rpn_unmatched_threshold=0.3,
rpn_batch_size_per_im=256,
rpn_fg_fraction=0.5,
aug_rand_hflip=False,
aug_scale_min=1.0,
aug_scale_max=1.0,
skip_crowd_during_training=True,
max_num_instances=100,
mask_crop_size=112,
segmentation_resize_eval_groundtruth=True,
segmentation_groundtruth_padded_size=None,
segmentation_ignore_label=255,
dtype='float32'):
"""Initializes parameters for parsing annotations in the dataset.
Args:
output_size: `Tensor` or `list` for [height, width] of output image. The
output_size should be divided by the largest feature stride 2^max_level.
min_level: `int` number of minimum level of the output feature pyramid.
max_level: `int` number of maximum level of the output feature pyramid.
num_scales: `int` number representing intermediate scales added
on each level. For instance, num_scales=2 adds one additional
intermediate anchor scales [2^0, 2^0.5] on each level.
aspect_ratios: `list` of float numbers representing the aspect raito
anchors added on each level. The number indicates the ratio of width to
height. For instance, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors
on each scale level.
anchor_size: `float` number representing the scale of size of the base
anchor to the feature stride 2^level.
rpn_match_threshold: `float`, match threshold for anchors in RPN.
rpn_unmatched_threshold: `float`, unmatched threshold for anchors in RPN.
rpn_batch_size_per_im: `int` for batch size per image in RPN.
rpn_fg_fraction: `float` for forground fraction per batch in RPN.
aug_rand_hflip: `bool`, if True, augment training with random
horizontal flip.
aug_scale_min: `float`, the minimum scale applied to `output_size` for
data augmentation during training.
aug_scale_max: `float`, the maximum scale applied to `output_size` for
data augmentation during training.
skip_crowd_during_training: `bool`, if True, skip annotations labeled with
`is_crowd` equals to 1.
max_num_instances: `int` number of maximum number of instances in an
image. The groundtruth data will be padded to `max_num_instances`.
mask_crop_size: the size which groundtruth mask is cropped to.
segmentation_resize_eval_groundtruth: `bool`, if True, eval groundtruth
masks are resized to output_size.
segmentation_groundtruth_padded_size: `Tensor` or `list` for [height,
width]. When resize_eval_groundtruth is set to False, the groundtruth
masks are padded to this size.
segmentation_ignore_label: `int` the pixel with ignore label will not used
for training and evaluation.
dtype: `str`, data type. One of {`bfloat16`, `float32`, `float16`}.
"""
super(Parser, self).__init__(
output_size=output_size,
min_level=min_level,
max_level=max_level,
num_scales=num_scales,
aspect_ratios=aspect_ratios,
anchor_size=anchor_size,
rpn_match_threshold=rpn_match_threshold,
rpn_unmatched_threshold=rpn_unmatched_threshold,
rpn_batch_size_per_im=rpn_batch_size_per_im,
rpn_fg_fraction=rpn_fg_fraction,
aug_rand_hflip=False,
aug_scale_min=aug_scale_min,
aug_scale_max=aug_scale_max,
skip_crowd_during_training=skip_crowd_during_training,
max_num_instances=max_num_instances,
include_mask=True,
mask_crop_size=mask_crop_size,
dtype=dtype)
self.aug_rand_hflip = aug_rand_hflip
self._segmentation_resize_eval_groundtruth = segmentation_resize_eval_groundtruth
if (not segmentation_resize_eval_groundtruth) and (
segmentation_groundtruth_padded_size is None):
raise ValueError(
'segmentation_groundtruth_padded_size ([height, width]) needs to be'
'specified when segmentation_resize_eval_groundtruth is False.')
self._segmentation_groundtruth_padded_size = segmentation_groundtruth_padded_size
self._segmentation_ignore_label = segmentation_ignore_label
def _parse_train_data(self, data):
"""Parses data for training.
Args:
data: the decoded tensor dictionary from TfExampleDecoder.
Returns:
image: image tensor that is preproessed to have normalized value and
dimension [output_size[0], output_size[1], 3]
labels: a dictionary of tensors used for training. The following describes
{key: value} pairs in the dictionary.
image_info: a 2D `Tensor` that encodes the information of the image and
the applied preprocessing. It is in the format of
[[original_height, original_width], [scaled_height, scaled_width]],
anchor_boxes: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, 4] representing anchor boxes at each level.
rpn_score_targets: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, anchors_per_location]. The height_l and
width_l represent the dimension of class logits at l-th level.
rpn_box_targets: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, anchors_per_location * 4]. The height_l and
width_l represent the dimension of bounding box regression output at
l-th level.
gt_boxes: Groundtruth bounding box annotations. The box is represented
in [y1, x1, y2, x2] format. The coordinates are w.r.t the scaled
image that is fed to the network. The tennsor is padded with -1 to
the fixed dimension [self._max_num_instances, 4].
gt_classes: Groundtruth classes annotations. The tennsor is padded
with -1 to the fixed dimension [self._max_num_instances].
gt_masks: Groundtruth masks cropped by the bounding box and
resized to a fixed size determined by mask_crop_size.
gt_segmentation_mask: Groundtruth mask for segmentation head, this is
resized to a fixed size determined by output_size.
gt_segmentation_valid_mask: Binary mask that marks the pixels that
are supposed to be used in computing the segmentation loss while
training.
"""
segmentation_mask = data['groundtruth_segmentation_mask']
# Flips image randomly during training.
if self.aug_rand_hflip:
masks = data['groundtruth_instance_masks']
image_mask = tf.concat([data['image'], segmentation_mask], axis=2)
image_mask, boxes, masks = preprocess_ops.random_horizontal_flip(
image_mask, data['groundtruth_boxes'], masks)
segmentation_mask = image_mask[:, :, -1:]
image = image_mask[:, :, :-1]
data['image'] = image
data['boxes'] = boxes
data['masks'] = masks
image, labels = super(Parser, self)._parse_train_data(data)
image_info = labels['image_info']
image_scale = image_info[2, :]
offset = image_info[3, :]
segmentation_mask = tf.reshape(
segmentation_mask, shape=[1, data['height'], data['width']])
segmentation_mask = tf.cast(segmentation_mask, tf.float32)
# Pad label and make sure the padded region assigned to the ignore label.
# The label is first offset by +1 and then padded with 0.
segmentation_mask += 1
segmentation_mask = tf.expand_dims(segmentation_mask, axis=3)
segmentation_mask = preprocess_ops.resize_and_crop_masks(
segmentation_mask, image_scale, self._output_size, offset)
segmentation_mask -= 1
segmentation_mask = tf.where(
tf.equal(segmentation_mask, -1),
self._segmentation_ignore_label * tf.ones_like(segmentation_mask),
segmentation_mask)
segmentation_mask = tf.squeeze(segmentation_mask, axis=0)
segmentation_valid_mask = tf.not_equal(
segmentation_mask, self._segmentation_ignore_label)
labels.update({
'gt_segmentation_mask': segmentation_mask,
'gt_segmentation_valid_mask': segmentation_valid_mask})
return image, labels
def _parse_eval_data(self, data):
"""Parses data for evaluation.
Args:
data: the decoded tensor dictionary from TfExampleDecoder.
Returns:
A dictionary of {'images': image, 'labels': labels} where
image: image tensor that is preproessed to have normalized value and
dimension [output_size[0], output_size[1], 3]
labels: a dictionary of tensors used for training. The following
describes {key: value} pairs in the dictionary.
source_ids: Source image id. Default value -1 if the source id is
empty in the groundtruth annotation.
image_info: a 2D `Tensor` that encodes the information of the image
and the applied preprocessing. It is in the format of
[[original_height, original_width], [scaled_height, scaled_width]],
anchor_boxes: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, 4] representing anchor boxes at each
level.
"""
segmentation_mask = tf.cast(
data['groundtruth_segmentation_mask'], tf.float32)
segmentation_mask = tf.reshape(
segmentation_mask, shape=[1, data['height'], data['width'], 1])
segmentation_mask += 1
image, labels = super(Parser, self)._parse_eval_data(data)
if self._segmentation_resize_eval_groundtruth:
# Resizes eval masks to match input image sizes. In that case, mean IoU
# is computed on output_size not the original size of the images.
image_info = labels['image_info']
image_scale = image_info[2, :]
offset = image_info[3, :]
segmentation_mask = preprocess_ops.resize_and_crop_masks(
segmentation_mask, image_scale, self._output_size, offset)
else:
segmentation_mask = tf.image.pad_to_bounding_box(
segmentation_mask, 0, 0,
self._segmentation_groundtruth_padded_size[0],
self._segmentation_groundtruth_padded_size[1])
segmentation_mask -= 1
# Assign ignore label to the padded region.
segmentation_mask = tf.where(
tf.equal(segmentation_mask, -1),
self._segmentation_ignore_label * tf.ones_like(segmentation_mask),
segmentation_mask)
segmentation_mask = tf.squeeze(segmentation_mask, axis=0)
segmentation_valid_mask = tf.not_equal(
segmentation_mask, self._segmentation_ignore_label)
labels['groundtruths'].update({
'gt_segmentation_mask': segmentation_mask,
'gt_segmentation_valid_mask': segmentation_valid_mask})
return image, labels
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Panoptic MaskRCNN task definition."""
from typing import Any, List, Mapping, Optional, Tuple, Dict
from absl import logging
import tensorflow as tf
from official.common import dataset_fn
from official.core import task_factory
from official.vision.beta.dataloaders import input_reader_factory
from official.vision.beta.evaluation import coco_evaluator
from official.vision.beta.evaluation import segmentation_metrics
from official.vision.beta.losses import segmentation_losses
from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_maskrcnn as exp_cfg
from official.vision.beta.projects.panoptic_maskrcnn.dataloaders import panoptic_maskrcnn_input
from official.vision.beta.projects.panoptic_maskrcnn.modeling import factory
from official.vision.beta.tasks import maskrcnn
@task_factory.register_task_cls(exp_cfg.PanopticMaskRCNNTask)
class PanopticMaskRCNNTask(maskrcnn.MaskRCNNTask):
"""A single-replica view of training procedure.
Panoptic Mask R-CNN task provides artifacts for training/evalution procedures,
including loading/iterating over Datasets, initializing the model, calculating
the loss, post-processing, and customized metrics with reduction.
"""
def build_model(self) -> tf.keras.Model:
"""Build Panoptic Mask R-CNN model."""
input_specs = tf.keras.layers.InputSpec(
shape=[None] + self.task_config.model.input_size)
l2_weight_decay = self.task_config.losses.l2_weight_decay
# Divide weight decay by 2.0 to match the implementation of tf.nn.l2_loss.
# (https://www.tensorflow.org/api_docs/python/tf/keras/regularizers/l2)
# (https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss)
l2_regularizer = (tf.keras.regularizers.l2(
l2_weight_decay / 2.0) if l2_weight_decay else None)
model = factory.build_panoptic_maskrcnn(
input_specs=input_specs,
model_config=self.task_config.model,
l2_regularizer=l2_regularizer)
return model
def initialize(self, model: tf.keras.Model) -> None:
"""Loading pretrained checkpoint."""
if not self.task_config.init_checkpoint_modules:
return
def _get_checkpoint_path(checkpoint_dir_or_file):
if tf.io.gfile.isdir(checkpoint_dir_or_file):
checkpoint_path = tf.train.latest_checkpoint(
checkpoint_dir_or_file)
return checkpoint_path
for init_module in self.task_config.init_checkpoint_modules:
# Restoring checkpoint.
if init_module == 'all':
checkpoint_path = _get_checkpoint_path(
self.task_config.init_checkpoint)
ckpt = tf.train.Checkpoint(**model.checkpoint_items)
status = ckpt.restore(checkpoint_path)
status.assert_consumed()
elif init_module == 'backbone':
checkpoint_path = _get_checkpoint_path(
self.task_config.init_checkpoint)
ckpt = tf.train.Checkpoint(backbone=model.backbone)
status = ckpt.restore(checkpoint_path)
status.expect_partial().assert_existing_objects_matched()
elif init_module == 'segmentation_backbone':
checkpoint_path = _get_checkpoint_path(
self.task_config.segmentation_init_checkpoint)
ckpt = tf.train.Checkpoint(
segmentation_backbone=model.segmentation_backbone)
status = ckpt.restore(checkpoint_path)
status.expect_partial().assert_existing_objects_matched()
elif init_module == 'segmentation_decoder':
checkpoint_path = _get_checkpoint_path(
self.task_config.segmentation_init_checkpoint)
ckpt = tf.train.Checkpoint(
segmentation_decoder=model.segmentation_decoder)
status = ckpt.restore(checkpoint_path)
status.expect_partial().assert_existing_objects_matched()
else:
raise ValueError(
"Only 'all', 'backbone', 'segmentation_backbone' and/or "
"segmentation_backbone' can be used to initialize the model, but "
"got {}".format(init_module))
logging.info('Finished loading pretrained checkpoint from %s for %s',
checkpoint_path, init_module)
def build_inputs(
self,
params: exp_cfg.DataConfig,
input_context: Optional[tf.distribute.InputContext] = None
) -> tf.data.Dataset:
"""Build input dataset."""
decoder_cfg = params.decoder.get()
if params.decoder.type == 'simple_decoder':
decoder = panoptic_maskrcnn_input.TfExampleDecoder(
regenerate_source_id=decoder_cfg.regenerate_source_id,
mask_binarize_threshold=decoder_cfg.mask_binarize_threshold)
else:
raise ValueError('Unknown decoder type: {}!'.format(params.decoder.type))
parser = panoptic_maskrcnn_input.Parser(
output_size=self.task_config.model.input_size[:2],
min_level=self.task_config.model.min_level,
max_level=self.task_config.model.max_level,
num_scales=self.task_config.model.anchor.num_scales,
aspect_ratios=self.task_config.model.anchor.aspect_ratios,
anchor_size=self.task_config.model.anchor.anchor_size,
dtype=params.dtype,
rpn_match_threshold=params.parser.rpn_match_threshold,
rpn_unmatched_threshold=params.parser.rpn_unmatched_threshold,
rpn_batch_size_per_im=params.parser.rpn_batch_size_per_im,
rpn_fg_fraction=params.parser.rpn_fg_fraction,
aug_rand_hflip=params.parser.aug_rand_hflip,
aug_scale_min=params.parser.aug_scale_min,
aug_scale_max=params.parser.aug_scale_max,
skip_crowd_during_training=params.parser.skip_crowd_during_training,
max_num_instances=params.parser.max_num_instances,
mask_crop_size=params.parser.mask_crop_size,
segmentation_resize_eval_groundtruth=params.parser
.segmentation_resize_eval_groundtruth,
segmentation_groundtruth_padded_size=params.parser
.segmentation_groundtruth_padded_size,
segmentation_ignore_label=params.parser.segmentation_ignore_label)
reader = input_reader_factory.input_reader_generator(
params,
dataset_fn=dataset_fn.pick_dataset_fn(params.file_type),
decoder_fn=decoder.decode,
parser_fn=parser.parse_fn(params.is_training))
dataset = reader.read(input_context=input_context)
return dataset
def build_losses(self,
outputs: Mapping[str, Any],
labels: Mapping[str, Any],
aux_losses: Optional[Any] = None) -> Dict[str, tf.Tensor]:
"""Build Panoptic Mask R-CNN losses."""
params = self.task_config.losses
use_groundtruth_dimension = params.semantic_segmentation_use_groundtruth_dimension
segmentation_loss_fn = segmentation_losses.SegmentationLoss(
label_smoothing=params.semantic_segmentation_label_smoothing,
class_weights=params.semantic_segmentation_class_weights,
ignore_label=params.semantic_segmentation_ignore_label,
use_groundtruth_dimension=use_groundtruth_dimension,
top_k_percent_pixels=params.semantic_segmentation_top_k_percent_pixels)
semantic_segmentation_weight = params.semantic_segmentation_weight
losses = super(PanopticMaskRCNNTask, self).build_losses(
outputs=outputs,
labels=labels,
aux_losses=None)
maskrcnn_loss = losses['model_loss']
segmentation_loss = segmentation_loss_fn(
outputs['segmentation_outputs'],
labels['gt_segmentation_mask'])
model_loss = (
maskrcnn_loss + semantic_segmentation_weight * segmentation_loss)
total_loss = model_loss
if aux_losses:
reg_loss = tf.reduce_sum(aux_losses)
total_loss = model_loss + reg_loss
losses.update({
'total_loss': total_loss,
'maskrcnn_loss': maskrcnn_loss,
'segmentation_loss': segmentation_loss,
'model_loss': model_loss,
})
return losses
def build_metrics(self, training: bool = True) -> List[
tf.keras.metrics.Metric]:
"""Build detection metrics."""
metrics = []
if training:
metric_names = [
'total_loss',
'rpn_score_loss',
'rpn_box_loss',
'frcnn_cls_loss',
'frcnn_box_loss',
'mask_loss',
'maskrcnn_loss',
'segmentation_loss',
'model_loss'
]
for name in metric_names:
metrics.append(tf.keras.metrics.Mean(name, dtype=tf.float32))
if self.task_config.segmentation_evaluation.report_train_mean_iou:
self.segmentation_train_mean_iou = segmentation_metrics.MeanIoU(
name='train_mean_iou',
num_classes=self.task_config.model.num_classes,
rescale_predictions=False,
dtype=tf.float32)
else:
self.coco_metric = coco_evaluator.COCOEvaluator(
annotation_file=self.task_config.annotation_file,
include_mask=self.task_config.model.include_mask,
per_category_metrics=self.task_config.per_category_metrics)
rescale_predictions = (not self.task_config.validation_data.parser
.segmentation_resize_eval_groundtruth)
self.segmentation_perclass_iou_metric = segmentation_metrics.PerClassIoU(
name='per_class_iou',
num_classes=self.task_config.model.num_classes,
rescale_predictions=rescale_predictions,
dtype=tf.float32)
return metrics
def train_step(self,
inputs: Tuple[Any, Any],
model: tf.keras.Model,
optimizer: tf.keras.optimizers.Optimizer,
metrics: Optional[List[Any]] = None) -> Dict[str, Any]:
"""Does forward and backward.
Args:
inputs: a dictionary of input tensors.
model: the model, forward pass definition.
optimizer: the optimizer for this training step.
metrics: a nested structure of metrics objects.
Returns:
A dictionary of logs.
"""
images, labels = inputs
num_replicas = tf.distribute.get_strategy().num_replicas_in_sync
with tf.GradientTape() as tape:
outputs = model(
images,
image_shape=labels['image_info'][:, 1, :],
anchor_boxes=labels['anchor_boxes'],
gt_boxes=labels['gt_boxes'],
gt_classes=labels['gt_classes'],
gt_masks=(labels['gt_masks'] if self.task_config.model.include_mask
else None),
training=True)
outputs = tf.nest.map_structure(
lambda x: tf.cast(x, tf.float32), outputs)
# Computes per-replica loss.
losses = self.build_losses(
outputs=outputs, labels=labels, aux_losses=model.losses)
scaled_loss = losses['total_loss'] / num_replicas
# For mixed_precision policy, when LossScaleOptimizer is used, loss is
# scaled for numerical stability.
if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
scaled_loss = optimizer.get_scaled_loss(scaled_loss)
tvars = model.trainable_variables
grads = tape.gradient(scaled_loss, tvars)
# Scales back gradient when LossScaleOptimizer is used.
if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
grads = optimizer.get_unscaled_gradients(grads)
optimizer.apply_gradients(list(zip(grads, tvars)))
logs = {self.loss: losses['total_loss']}
if metrics:
for m in metrics:
m.update_state(losses[m.name])
if self.task_config.segmentation_evaluation.report_train_mean_iou:
segmentation_labels = {
'masks': labels['gt_segmentation_mask'],
'valid_masks': labels['gt_segmentation_valid_mask'],
'image_info': labels['image_info']
}
self.process_metrics(
metrics=[self.segmentation_train_mean_iou],
labels=segmentation_labels,
model_outputs=outputs['segmentation_outputs'])
logs.update({
self.segmentation_train_mean_iou.name:
self.segmentation_train_mean_iou.result()
})
return logs
def validation_step(self,
inputs: Tuple[Any, Any],
model: tf.keras.Model,
metrics: Optional[List[Any]] = None) -> Dict[str, Any]:
"""Validatation step.
Args:
inputs: a dictionary of input tensors.
model: the keras.Model.
metrics: a nested structure of metrics objects.
Returns:
A dictionary of logs.
"""
images, labels = inputs
outputs = model(
images,
anchor_boxes=labels['anchor_boxes'],
image_shape=labels['image_info'][:, 1, :],
training=False)
logs = {self.loss: 0}
coco_model_outputs = {
'detection_masks': outputs['detection_masks'],
'detection_boxes': outputs['detection_boxes'],
'detection_scores': outputs['detection_scores'],
'detection_classes': outputs['detection_classes'],
'num_detections': outputs['num_detections'],
'source_id': labels['groundtruths']['source_id'],
'image_info': labels['image_info']
}
segmentation_labels = {
'masks': labels['groundtruths']['gt_segmentation_mask'],
'valid_masks': labels['groundtruths']['gt_segmentation_valid_mask'],
'image_info': labels['image_info']
}
logs.update({
self.coco_metric.name: (labels['groundtruths'], coco_model_outputs),
self.segmentation_perclass_iou_metric.name: (
segmentation_labels,
outputs['segmentation_outputs'])
})
return logs
def aggregate_logs(self, state=None, step_outputs=None):
if state is None:
self.coco_metric.reset_states()
self.segmentation_perclass_iou_metric.reset_states()
state = [self.coco_metric, self.segmentation_perclass_iou_metric]
self.coco_metric.update_state(
step_outputs[self.coco_metric.name][0],
step_outputs[self.coco_metric.name][1])
self.segmentation_perclass_iou_metric.update_state(
step_outputs[self.segmentation_perclass_iou_metric.name][0],
step_outputs[self.segmentation_perclass_iou_metric.name][1])
return state
def reduce_aggregated_logs(self, aggregated_logs, global_step=None):
result = {}
result[self.coco_metric.name] = super(
PanopticMaskRCNNTask, self).reduce_aggregated_logs(
aggregated_logs=aggregated_logs,
global_step=global_step)
ious = self.segmentation_perclass_iou_metric.result()
if self.task_config.segmentation_evaluation.report_per_class_iou:
for i, value in enumerate(ious.numpy()):
result.update({'segmentation_iou/class_{}'.format(i): value})
# Computes mean IoU
result.update({'segmentation_mean_iou': tf.reduce_mean(ious).numpy()})
return result
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for panoptic_maskrcnn.py."""
import os
from absl.testing import parameterized
import tensorflow as tf
from official.vision.beta.configs import decoders as decoder_cfg
from official.vision.beta.configs import semantic_segmentation as segmentation_cfg
from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_maskrcnn as cfg
from official.vision.beta.projects.panoptic_maskrcnn.tasks import panoptic_maskrcnn
class PanopticMaskRCNNTaskTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.parameters(
(['all'],),
(['backbone'],),
(['segmentation_backbone'],),
(['segmentation_decoder'],),
(['backbone', 'segmentation_backbone'],),
(['segmentation_backbone', 'segmentation_decoder'],))
def test_model_initializing(self, init_checkpoint_modules):
shared_backbone = ('segmentation_backbone' not in init_checkpoint_modules)
shared_decoder = ('segmentation_decoder' not in init_checkpoint_modules and
shared_backbone)
task_config = cfg.PanopticMaskRCNNTask(
model=cfg.PanopticMaskRCNN(
num_classes=2,
input_size=[640, 640, 3],
segmentation_model=segmentation_cfg.SemanticSegmentationModel(
decoder=decoder_cfg.Decoder(type='fpn')),
shared_backbone=shared_backbone,
shared_decoder=shared_decoder))
task = panoptic_maskrcnn.PanopticMaskRCNNTask(task_config)
model = task.build_model()
ckpt = tf.train.Checkpoint(**model.checkpoint_items)
ckpt_save_dir = self.create_tempdir().full_path
ckpt.save(os.path.join(ckpt_save_dir, 'ckpt'))
if (init_checkpoint_modules == ['all'] or
'backbone' in init_checkpoint_modules):
task._task_config.init_checkpoint = ckpt_save_dir
if ('segmentation_backbone' in init_checkpoint_modules or
'segmentation_decoder' in init_checkpoint_modules):
task._task_config.segmentation_init_checkpoint = ckpt_save_dir
task._task_config.init_checkpoint_modules = init_checkpoint_modules
task.initialize(model)
if __name__ == '__main__':
tf.test.main()
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Multi-task SimCLR configs."""
import dataclasses
from typing import List, Tuple
from official.core import exp_factory
from official.modeling import hyperparams
from official.modeling.multitask import configs as multitask_configs
from official.vision.beta.configs import backbones
from official.vision.beta.configs import common
from official.vision.beta.projects.simclr.configs import simclr as simclr_configs
from official.vision.beta.projects.simclr.modeling import simclr_model
@dataclasses.dataclass
class SimCLRMTHeadConfig(hyperparams.Config):
"""Per-task specific configs."""
# Supervised head is required for finetune, but optional for pretrain.
supervised_head: simclr_configs.SupervisedHead = simclr_configs.SupervisedHead(
num_classes=1001)
mode: str = simclr_model.PRETRAIN
@dataclasses.dataclass
class SimCLRMTModelConfig(hyperparams.Config):
"""Model config for multi-task SimCLR model."""
input_size: List[int] = dataclasses.field(default_factory=list)
backbone: backbones.Backbone = backbones.Backbone(
type='resnet', resnet=backbones.ResNet())
backbone_trainable: bool = True
projection_head: simclr_configs.ProjectionHead = simclr_configs.ProjectionHead(
proj_output_dim=128, num_proj_layers=3, ft_proj_idx=1)
norm_activation: common.NormActivation = common.NormActivation(
norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False)
heads: Tuple[SimCLRMTHeadConfig, ...] = ()
# L2 weight decay is used in the model, not in task.
# Note that this can not be used together with lars optimizer.
l2_weight_decay: float = 0.0
@exp_factory.register_config_factory('multitask_simclr')
def multitask_simclr() -> multitask_configs.MultiTaskExperimentConfig:
return multitask_configs.MultiTaskExperimentConfig(
task=multitask_configs.MultiTaskConfig(
model=SimCLRMTModelConfig(
heads=(SimCLRMTHeadConfig(mode=simclr_model.PRETRAIN),
SimCLRMTHeadConfig(mode=simclr_model.FINETUNE))),
task_routines=(multitask_configs.TaskRoutine(
task_name=simclr_model.PRETRAIN,
task_config=simclr_configs.SimCLRPretrainTask(),
task_weight=2.0),
multitask_configs.TaskRoutine(
task_name=simclr_model.FINETUNE,
task_config=simclr_configs.SimCLRFinetuneTask(),
task_weight=1.0))),
trainer=multitask_configs.MultiTaskTrainerConfig())
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for multitask_config."""
import tensorflow as tf
from official.core import exp_factory
from official.modeling.multitask import configs as multitask_configs
from official.vision.beta.projects.simclr.configs import multitask_config as simclr_multitask_config
from official.vision.beta.projects.simclr.configs import simclr as exp_cfg
class MultitaskConfigTest(tf.test.TestCase):
def test_simclr_configs(self):
config = exp_factory.get_exp_config('multitask_simclr')
self.assertIsInstance(config, multitask_configs.MultiTaskExperimentConfig)
self.assertIsInstance(config.task.model,
simclr_multitask_config.SimCLRMTModelConfig)
self.assertIsInstance(config.task.task_routines[0].task_config,
exp_cfg.SimCLRPretrainTask)
self.assertIsInstance(config.task.task_routines[1].task_config,
exp_cfg.SimCLRFinetuneTask)
if __name__ == '__main__':
tf.test.main()
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Multi-task image multi-taskSimCLR model definition."""
from typing import Dict, Text
import tensorflow as tf
from official.modeling.multitask import base_model
from official.vision.beta.modeling import backbones
from official.vision.beta.projects.simclr.configs import multitask_config as simclr_multitask_config
from official.vision.beta.projects.simclr.heads import simclr_head
from official.vision.beta.projects.simclr.modeling import simclr_model
PROJECTION_OUTPUT_KEY = 'projection_outputs'
SUPERVISED_OUTPUT_KEY = 'supervised_outputs'
class SimCLRMTModel(base_model.MultiTaskBaseModel):
"""A multi-task SimCLR model that does both pretrain and finetune."""
def __init__(self, config: simclr_multitask_config.SimCLRMTModelConfig,
**kwargs):
self._config = config
# Build shared backbone.
self._input_specs = tf.keras.layers.InputSpec(shape=[None] +
config.input_size)
l2_weight_decay = config.l2_weight_decay
# Divide weight decay by 2.0 to match the implementation of tf.nn.l2_loss.
# (https://www.tensorflow.org/api_docs/python/tf/keras/regularizers/l2)
# (https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss)
self._l2_regularizer = (
tf.keras.regularizers.l2(l2_weight_decay /
2.0) if l2_weight_decay else None)
self._backbone = backbones.factory.build_backbone(
input_specs=self._input_specs,
backbone_config=config.backbone,
norm_activation_config=config.norm_activation,
l2_regularizer=self._l2_regularizer)
super().__init__(**kwargs)
def _instantiate_sub_tasks(self) -> Dict[Text, tf.keras.Model]:
tasks = {}
# Build the shared projection head
norm_activation_config = self._config.norm_activation
projection_head_config = self._config.projection_head
projection_head = simclr_head.ProjectionHead(
proj_output_dim=projection_head_config.proj_output_dim,
num_proj_layers=projection_head_config.num_proj_layers,
ft_proj_idx=projection_head_config.ft_proj_idx,
kernel_regularizer=self._l2_regularizer,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon)
for model_config in self._config.heads:
# Build supervised head
supervised_head_config = model_config.supervised_head
if supervised_head_config:
if supervised_head_config.zero_init:
s_kernel_initializer = 'zeros'
else:
s_kernel_initializer = 'random_uniform'
supervised_head = simclr_head.ClassificationHead(
num_classes=supervised_head_config.num_classes,
kernel_initializer=s_kernel_initializer,
kernel_regularizer=self._l2_regularizer)
else:
supervised_head = None
tasks[model_config.mode] = simclr_model.SimCLRModel(
input_specs=self._input_specs,
backbone=self._backbone,
projection_head=projection_head,
supervised_head=supervised_head,
mode=model_config.mode,
backbone_trainable=self._config.backbone_trainable)
return tasks
# TODO(huythong): Implement initialize function to load the pretrained
# checkpoint of backbone.
# def initialize(self):
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for multitask_model."""
import os.path
import tensorflow as tf
from official.vision.beta.projects.simclr.configs import multitask_config
from official.vision.beta.projects.simclr.modeling import multitask_model
from official.vision.beta.projects.simclr.modeling import simclr_model
class MultitaskModelTest(tf.test.TestCase):
def test_initialize_model_success(self):
ckpt_dir = self.get_temp_dir()
config = multitask_config.SimCLRMTModelConfig(
input_size=[64, 64, 3],
heads=(multitask_config.SimCLRMTHeadConfig(mode=simclr_model.PRETRAIN),
multitask_config.SimCLRMTHeadConfig(mode=simclr_model.FINETUNE)))
model = multitask_model.SimCLRMTModel(config)
self.assertIn(simclr_model.PRETRAIN, model.sub_tasks)
self.assertIn(simclr_model.FINETUNE, model.sub_tasks)
ckpt = tf.train.Checkpoint(backbone=model._backbone)
ckpt.save(os.path.join(ckpt_dir, 'ckpt'))
model.initialize()
if __name__ == '__main__':
tf.test.main()
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Trainer binary for multitask simclr."""
from absl import app
from absl import flags
import gin
from official.common import distribute_utils
from official.common import flags as tfm_flags
from official.core import train_utils
from official.modeling import performance
from official.modeling.multitask import multitask
from official.modeling.multitask import train_lib
# pylint: disable=unused-import
from official.vision.beta.projects.simclr.common import registry_imports
from official.vision.beta.projects.simclr.configs import multitask_config
from official.vision.beta.projects.simclr.modeling import multitask_model
# pylint: enable=unused-import
FLAGS = flags.FLAGS
def main(_):
gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_params)
params = train_utils.parse_configuration(FLAGS)
model_dir = FLAGS.model_dir
if 'train' in FLAGS.mode:
# Pure eval modes do not output yaml files. Otherwise continuous eval job
# may race against the train job for writing the same file.
train_utils.serialize_config(params, model_dir)
# Sets mixed_precision policy. Using 'mixed_float16' or 'mixed_bfloat16'
# can have significant impact on model speeds by utilizing float16 in case of
# GPUs, and bfloat16 in the case of TPUs. loss_scale takes effect only when
# dtype is float16
if params.runtime.mixed_precision_dtype:
performance.set_mixed_precision_policy(params.runtime.mixed_precision_dtype)
distribution_strategy = distribute_utils.get_distribution_strategy(
distribution_strategy=params.runtime.distribution_strategy,
all_reduce_alg=params.runtime.all_reduce_alg,
num_gpus=params.runtime.num_gpus,
tpu_address=params.runtime.tpu)
with distribution_strategy.scope():
tasks = multitask.MultiTask.from_config(params.task)
model = multitask_model.SimCLRMTModel(params.task.model)
train_lib.run_experiment(
distribution_strategy=distribution_strategy,
task=tasks,
model=model,
mode=FLAGS.mode,
params=params,
model_dir=model_dir)
train_utils.save_gin_config(FLAGS.mode, model_dir)
if __name__ == '__main__':
tfm_flags.define_flags()
app.run(main)
...@@ -59,6 +59,12 @@ VIT_SPECS = { ...@@ -59,6 +59,12 @@ VIT_SPECS = {
patch_size=14, patch_size=14,
transformer=dict(mlp_dim=5120, num_heads=16, num_layers=32), transformer=dict(mlp_dim=5120, num_heads=16, num_layers=32),
), ),
'vit-g14':
dict(
hidden_size=1664,
patch_size=14,
transformer=dict(mlp_dim=8192, num_heads=16, num_layers=48),
),
} }
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
"""RetinaNet task definition.""" """RetinaNet task definition."""
import os
from typing import Any, Optional, List, Tuple, Mapping from typing import Any, Optional, List, Tuple, Mapping
from absl import logging from absl import logging
...@@ -26,6 +27,7 @@ from official.vision.beta.dataloaders import maskrcnn_input ...@@ -26,6 +27,7 @@ from official.vision.beta.dataloaders import maskrcnn_input
from official.vision.beta.dataloaders import tf_example_decoder from official.vision.beta.dataloaders import tf_example_decoder
from official.vision.beta.dataloaders import tf_example_label_map_decoder from official.vision.beta.dataloaders import tf_example_label_map_decoder
from official.vision.beta.evaluation import coco_evaluator from official.vision.beta.evaluation import coco_evaluator
from official.vision.beta.evaluation import coco_utils
from official.vision.beta.losses import maskrcnn_losses from official.vision.beta.losses import maskrcnn_losses
from official.vision.beta.modeling import factory from official.vision.beta.modeling import factory
...@@ -259,10 +261,33 @@ class MaskRCNNTask(base_task.Task): ...@@ -259,10 +261,33 @@ class MaskRCNNTask(base_task.Task):
metrics.append(tf.keras.metrics.Mean(name, dtype=tf.float32)) metrics.append(tf.keras.metrics.Mean(name, dtype=tf.float32))
else: else:
self.coco_metric = coco_evaluator.COCOEvaluator( if self._task_config.annotation_file:
annotation_file=self._task_config.annotation_file, self.coco_metric = coco_evaluator.COCOEvaluator(
include_mask=self._task_config.model.include_mask, annotation_file=self._task_config.annotation_file,
per_category_metrics=self._task_config.per_category_metrics) include_mask=self._task_config.model.include_mask,
per_category_metrics=self._task_config.per_category_metrics)
else:
annotation_path = os.path.join(self._logging_dir, 'annotation.json')
if tf.io.gfile.exists(annotation_path):
logging.info(
'annotation.json file exists, skipping creating the annotation'
' file.')
else:
if self._task_config.validation_data.num_examples <= 0:
logging.info('validation_data.num_examples needs to be > 0')
if not self._task_config.validation_data.input_path:
logging.info('Can not create annotation file for tfds.')
logging.info(
'Creating coco-style annotation file: %s', annotation_path)
coco_utils.scan_and_generator_annotation_file(
self._task_config.validation_data.input_path,
self._task_config.validation_data.file_type,
self._task_config.validation_data.num_examples,
self.task_config.model.include_mask, annotation_path)
self.coco_metric = coco_evaluator.COCOEvaluator(
annotation_file=annotation_path,
include_mask=self._task_config.model.include_mask,
per_category_metrics=self._task_config.per_category_metrics)
return metrics return metrics
......
...@@ -446,14 +446,13 @@ class Controller: ...@@ -446,14 +446,13 @@ class Controller:
f"{num_steps}. Old value was {current_step}, expected updated value " f"{num_steps}. Old value was {current_step}, expected updated value "
f"to be {expected_step}, but it was {self.global_step.numpy()}.") f"to be {expected_step}, but it was {self.global_step.numpy()}.")
logging.warning(message) logging.warning(message)
return
train_output = train_output or {} train_output = train_output or {}
for action in self.train_actions: for action in self.train_actions:
action(train_output) action(train_output)
train_output = tf.nest.map_structure(utils.get_value, train_output) train_output = tf.nest.map_structure(utils.get_value, train_output)
current_step = expected_step current_step = self.global_step.numpy()
steps_per_second = self.step_timer.steps_per_second() steps_per_second = self.step_timer.steps_per_second()
_log(f"train | step: {current_step: 6d} | " _log(f"train | step: {current_step: 6d} | "
f"steps/sec: {steps_per_second: 6.1f} | " f"steps/sec: {steps_per_second: 6.1f} | "
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment