Commit 20c78a91 authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 301643231
parent b86ffb12
...@@ -80,12 +80,11 @@ class DetectionDistributedExecutor(executor.DistributedExecutor): ...@@ -80,12 +80,11 @@ class DetectionDistributedExecutor(executor.DistributedExecutor):
all_losses = loss_fn(labels, outputs) all_losses = loss_fn(labels, outputs)
losses = {} losses = {}
for k, v in all_losses.items(): for k, v in all_losses.items():
v = tf.reduce_mean(v) / strategy.num_replicas_in_sync losses[k] = tf.reduce_mean(v)
losses[k] = v per_replica_loss = losses['total_loss'] / strategy.num_replicas_in_sync
loss = losses['total_loss']
_update_state(labels, outputs) _update_state(labels, outputs)
grads = tape.gradient(loss, trainable_variables) grads = tape.gradient(per_replica_loss, trainable_variables)
optimizer.apply_gradients(zip(grads, trainable_variables)) optimizer.apply_gradients(zip(grads, trainable_variables))
return losses return losses
......
...@@ -21,8 +21,6 @@ from __future__ import print_function ...@@ -21,8 +21,6 @@ from __future__ import print_function
import abc import abc
import functools import functools
import re import re
from absl import logging
import tensorflow.compat.v2 as tf import tensorflow.compat.v2 as tf
from official.vision.detection.modeling import checkpoint_utils from official.vision.detection.modeling import checkpoint_utils
from official.vision.detection.modeling import learning_rates from official.vision.detection.modeling import learning_rates
...@@ -60,11 +58,10 @@ class OptimizerFactory(object): ...@@ -60,11 +58,10 @@ class OptimizerFactory(object):
def _make_filter_trainable_variables_fn(frozen_variable_prefix): def _make_filter_trainable_variables_fn(frozen_variable_prefix):
"""Creates a function for filtering trainable varialbes. """Creates a function for filtering trainable varialbes."""
"""
def _filter_trainable_variables(variables): def _filter_trainable_variables(variables):
"""Filters trainable varialbes """Filters trainable varialbes.
Args: Args:
variables: a list of tf.Variable to be filtered. variables: a list of tf.Variable to be filtered.
...@@ -141,8 +138,7 @@ class Model(object): ...@@ -141,8 +138,7 @@ class Model(object):
return self._optimizer_fn(self._learning_rate) return self._optimizer_fn(self._learning_rate)
def make_filter_trainable_variables_fn(self): def make_filter_trainable_variables_fn(self):
"""Creates a function for filtering trainable varialbes. """Creates a function for filtering trainable varialbes."""
"""
return _make_filter_trainable_variables_fn(self._frozen_variable_prefix) return _make_filter_trainable_variables_fn(self._frozen_variable_prefix)
def weight_decay_loss(self, trainable_variables): def weight_decay_loss(self, trainable_variables):
...@@ -151,8 +147,6 @@ class Model(object): ...@@ -151,8 +147,6 @@ class Model(object):
if self._regularization_var_regex is None if self._regularization_var_regex is None
or re.match(self._regularization_var_regex, v.name) or re.match(self._regularization_var_regex, v.name)
] ]
logging.info('Regularization Variables: %s',
[v.name for v in reg_variables])
return self._l2_weight_decay * tf.add_n( return self._l2_weight_decay * tf.add_n(
[tf.nn.l2_loss(v) for v in reg_variables]) [tf.nn.l2_loss(v) for v in reg_variables])
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
"""Factory to build detection model.""" """Factory to build detection model."""
from official.vision.detection.modeling import maskrcnn_model
from official.vision.detection.modeling import retinanet_model from official.vision.detection.modeling import retinanet_model
...@@ -22,6 +23,8 @@ def model_generator(params): ...@@ -22,6 +23,8 @@ def model_generator(params):
"""Model function generator.""" """Model function generator."""
if params.type == 'retinanet': if params.type == 'retinanet':
model_fn = retinanet_model.RetinanetModel(params) model_fn = retinanet_model.RetinanetModel(params)
elif params.type == 'mask_rcnn':
model_fn = maskrcnn_model.MaskrcnnModel(params)
else: else:
raise ValueError('Model %s is not supported.'% params.type) raise ValueError('Model %s is not supported.'% params.type)
......
...@@ -18,6 +18,7 @@ from __future__ import absolute_import ...@@ -18,6 +18,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from absl import logging
import tensorflow.compat.v2 as tf import tensorflow.compat.v2 as tf
...@@ -89,6 +90,8 @@ class RpnScoreLoss(object): ...@@ -89,6 +90,8 @@ class RpnScoreLoss(object):
def __init__(self, params): def __init__(self, params):
self._rpn_batch_size_per_im = params.rpn_batch_size_per_im self._rpn_batch_size_per_im = params.rpn_batch_size_per_im
self._binary_crossentropy = tf.keras.losses.BinaryCrossentropy(
reduction=tf.keras.losses.Reduction.SUM, from_logits=True)
def __call__(self, score_outputs, labels): def __call__(self, score_outputs, labels):
"""Computes total RPN detection loss. """Computes total RPN detection loss.
...@@ -129,16 +132,15 @@ class RpnScoreLoss(object): ...@@ -129,16 +132,15 @@ class RpnScoreLoss(object):
with tf.name_scope('rpn_score_loss'): with tf.name_scope('rpn_score_loss'):
mask = tf.math.logical_or(tf.math.equal(score_targets, 1), mask = tf.math.logical_or(tf.math.equal(score_targets, 1),
tf.math.equal(score_targets, 0)) tf.math.equal(score_targets, 0))
score_targets = tf.math.maximum(score_targets, tf.zeros_like(score_targets))
# RPN score loss is sum over all except ignored samples. score_targets = tf.math.maximum(score_targets,
# Keep the compat.v1 loss because Keras does not have a tf.zeros_like(score_targets))
# sigmoid_cross_entropy substitution yet.
# TODO(b/143720144): replace this loss. score_targets = tf.expand_dims(score_targets, axis=-1)
score_loss = tf.compat.v1.losses.sigmoid_cross_entropy( score_outputs = tf.expand_dims(score_outputs, axis=-1)
score_targets, score_loss = self._binary_crossentropy(
score_outputs, score_targets, score_outputs, sample_weight=mask)
weights=mask,
reduction=tf.compat.v1.losses.Reduction.SUM)
score_loss /= normalizer score_loss /= normalizer
return score_loss return score_loss
...@@ -147,7 +149,10 @@ class RpnBoxLoss(object): ...@@ -147,7 +149,10 @@ class RpnBoxLoss(object):
"""Region Proposal Network box regression loss function.""" """Region Proposal Network box regression loss function."""
def __init__(self, params): def __init__(self, params):
self._delta = params.huber_loss_delta logging.info('RpnBoxLoss huber_loss_delta %s', params.huber_loss_delta)
# The delta is typically around the mean value of regression target.
# for instances, the regression targets of 512x512 input with 6 anchors on
# P2-P6 pyramid is about [0.1, 0.1, 0.2, 0.2].
self._huber_loss = tf.keras.losses.Huber( self._huber_loss = tf.keras.losses.Huber(
delta=params.huber_loss_delta, reduction=tf.keras.losses.Reduction.SUM) delta=params.huber_loss_delta, reduction=tf.keras.losses.Reduction.SUM)
...@@ -171,35 +176,32 @@ class RpnBoxLoss(object): ...@@ -171,35 +176,32 @@ class RpnBoxLoss(object):
box_losses = [] box_losses = []
for level in levels: for level in levels:
box_losses.append( box_losses.append(self._rpn_box_loss(box_outputs[level], labels[level]))
self._rpn_box_loss(
box_outputs[level], labels[level], delta=self._delta))
# Sum per level losses to total loss. # Sum per level losses to total loss.
return tf.add_n(box_losses) return tf.add_n(box_losses)
def _rpn_box_loss(self, box_outputs, box_targets, normalizer=1.0, delta=1./9): def _rpn_box_loss(self, box_outputs, box_targets, normalizer=1.0):
"""Computes box regression loss.""" """Computes box regression loss."""
# The delta is typically around the mean value of regression target.
# for instances, the regression targets of 512x512 input with 6 anchors on
# P2-P6 pyramid is about [0.1, 0.1, 0.2, 0.2].
with tf.name_scope('rpn_box_loss'): with tf.name_scope('rpn_box_loss'):
mask = tf.math.not_equal(box_targets, 0.0) mask = tf.cast(tf.not_equal(box_targets, 0.0), dtype=tf.float32)
# The loss is normalized by the sum of non-zero weights before additional box_targets = tf.expand_dims(box_targets, axis=-1)
# normalizer provided by the function caller. box_outputs = tf.expand_dims(box_outputs, axis=-1)
box_loss = tf.compat.v1.losses.huber_loss( box_loss = self._huber_loss(box_targets, box_outputs, sample_weight=mask)
box_targets, # The loss is normalized by the sum of non-zero weights and additional
box_outputs, # normalizer provided by the function caller. Using + 0.01 here to avoid
weights=mask, # division by zero.
delta=delta, box_loss /= normalizer * (tf.reduce_sum(mask) + 0.01)
reduction=tf.compat.v1.losses.Reduction.SUM_BY_NONZERO_WEIGHTS)
box_loss /= normalizer
return box_loss return box_loss
class FastrcnnClassLoss(object): class FastrcnnClassLoss(object):
"""Fast R-CNN classification loss function.""" """Fast R-CNN classification loss function."""
def __init__(self):
self._categorical_crossentropy = tf.keras.losses.CategoricalCrossentropy(
reduction=tf.keras.losses.Reduction.SUM, from_logits=True)
def __call__(self, class_outputs, class_targets): def __call__(self, class_outputs, class_targets):
"""Computes the class loss (Fast-RCNN branch) of Mask-RCNN. """Computes the class loss (Fast-RCNN branch) of Mask-RCNN.
...@@ -218,24 +220,19 @@ class FastrcnnClassLoss(object): ...@@ -218,24 +220,19 @@ class FastrcnnClassLoss(object):
a scalar tensor representing total class loss. a scalar tensor representing total class loss.
""" """
with tf.name_scope('fast_rcnn_loss'): with tf.name_scope('fast_rcnn_loss'):
_, _, num_classes = class_outputs.get_shape().as_list() batch_size, num_boxes, num_classes = class_outputs.get_shape().as_list()
class_targets = tf.cast(class_targets, dtype=tf.int32) class_targets = tf.cast(class_targets, dtype=tf.int32)
class_targets_one_hot = tf.one_hot(class_targets, num_classes) class_targets_one_hot = tf.one_hot(class_targets, num_classes)
return self._fast_rcnn_class_loss(class_outputs, class_targets_one_hot) return self._fast_rcnn_class_loss(class_outputs, class_targets_one_hot,
normalizer=batch_size * num_boxes / 2.0)
def _fast_rcnn_class_loss(self, class_outputs, class_targets_one_hot, def _fast_rcnn_class_loss(self, class_outputs, class_targets_one_hot,
normalizer=1.0): normalizer):
"""Computes classification loss.""" """Computes classification loss."""
with tf.name_scope('fast_rcnn_class_loss'): with tf.name_scope('fast_rcnn_class_loss'):
# The loss is normalized by the sum of non-zero weights before additional class_loss = self._categorical_crossentropy(class_targets_one_hot,
# normalizer provided by the function caller. class_outputs)
# Keep the compat.v1 loss because Keras does not have a
# softmax_cross_entropy substitution yet.
# TODO(b/143720144): replace this loss.
class_loss = tf.compat.v1.losses.softmax_cross_entropy(
class_targets_one_hot,
class_outputs,
reduction=tf.compat.v1.losses.Reduction.SUM_BY_NONZERO_WEIGHTS)
class_loss /= normalizer class_loss /= normalizer
return class_loss return class_loss
...@@ -244,7 +241,12 @@ class FastrcnnBoxLoss(object): ...@@ -244,7 +241,12 @@ class FastrcnnBoxLoss(object):
"""Fast R-CNN box regression loss function.""" """Fast R-CNN box regression loss function."""
def __init__(self, params): def __init__(self, params):
self._delta = params.huber_loss_delta logging.info('FastrcnnBoxLoss huber_loss_delta %s', params.huber_loss_delta)
# The delta is typically around the mean value of regression target.
# for instances, the regression targets of 512x512 input with 6 anchors on
# P2-P6 pyramid is about [0.1, 0.1, 0.2, 0.2].
self._huber_loss = tf.keras.losses.Huber(
delta=params.huber_loss_delta, reduction=tf.keras.losses.Reduction.SUM)
def __call__(self, box_outputs, class_targets, box_targets): def __call__(self, box_outputs, class_targets, box_targets):
"""Computes the box loss (Fast-RCNN branch) of Mask-RCNN. """Computes the box loss (Fast-RCNN branch) of Mask-RCNN.
...@@ -296,36 +298,32 @@ class FastrcnnBoxLoss(object): ...@@ -296,36 +298,32 @@ class FastrcnnBoxLoss(object):
dtype=box_outputs.dtype), tf.reshape(box_outputs, [-1, 4])) dtype=box_outputs.dtype), tf.reshape(box_outputs, [-1, 4]))
box_outputs = tf.reshape(box_outputs, [batch_size, -1, 4]) box_outputs = tf.reshape(box_outputs, [batch_size, -1, 4])
return self._fast_rcnn_box_loss(box_outputs, box_targets, class_targets, return self._fast_rcnn_box_loss(box_outputs, box_targets, class_targets)
delta=self._delta)
def _fast_rcnn_box_loss(self, box_outputs, box_targets, class_targets, def _fast_rcnn_box_loss(self, box_outputs, box_targets, class_targets,
normalizer=1.0, delta=1.): normalizer=1.0):
"""Computes box regression loss.""" """Computes box regression loss."""
# The delta is typically around the mean value of regression target.
# for instances, the regression targets of 512x512 input with 6 anchors on
# P2-P6 pyramid is about [0.1, 0.1, 0.2, 0.2].
with tf.name_scope('fast_rcnn_box_loss'): with tf.name_scope('fast_rcnn_box_loss'):
mask = tf.tile(tf.expand_dims(tf.greater(class_targets, 0), axis=2), mask = tf.tile(tf.expand_dims(tf.greater(class_targets, 0), axis=2),
[1, 1, 4]) [1, 1, 4])
# The loss is normalized by the sum of non-zero weights before additional mask = tf.cast(mask, dtype=tf.float32)
# normalizer provided by the function caller. box_targets = tf.expand_dims(box_targets, axis=-1)
# Keep the compat.v1 loss because Keras does not have a box_outputs = tf.expand_dims(box_outputs, axis=-1)
# Reduction.SUM_BY_NONZERO_WEIGHTS substitution yet. box_loss = self._huber_loss(box_targets, box_outputs, sample_weight=mask)
# TODO(b/143720144): replace this loss. # The loss is normalized by the number of ones in mask,
box_loss = tf.compat.v1.losses.huber_loss( # additianal normalizer provided by the user and using 0.01 here to avoid
box_targets, # division by 0.
box_outputs, box_loss /= normalizer * (tf.reduce_sum(mask) + 0.01)
weights=mask,
delta=delta,
reduction=tf.compat.v1.losses.Reduction.SUM_BY_NONZERO_WEIGHTS)
box_loss /= normalizer
return box_loss return box_loss
class MaskrcnnLoss(object): class MaskrcnnLoss(object):
"""Mask R-CNN instance segmentation mask loss function.""" """Mask R-CNN instance segmentation mask loss function."""
def __init__(self):
self._binary_crossentropy = tf.keras.losses.BinaryCrossentropy(
reduction=tf.keras.losses.Reduction.SUM, from_logits=True)
def __call__(self, mask_outputs, mask_targets, select_class_targets): def __call__(self, mask_outputs, mask_targets, select_class_targets):
"""Computes the mask loss of Mask-RCNN. """Computes the mask loss of Mask-RCNN.
...@@ -358,11 +356,16 @@ class MaskrcnnLoss(object): ...@@ -358,11 +356,16 @@ class MaskrcnnLoss(object):
tf.reshape(tf.greater(select_class_targets, 0), tf.reshape(tf.greater(select_class_targets, 0),
[batch_size, num_masks, 1, 1]), [batch_size, num_masks, 1, 1]),
[1, 1, mask_height, mask_width]) [1, 1, mask_height, mask_width])
return tf.compat.v1.losses.sigmoid_cross_entropy( weights = tf.cast(weights, dtype=tf.float32)
mask_targets,
mask_outputs, mask_targets = tf.expand_dims(mask_targets, axis=-1)
weights=weights, mask_outputs = tf.expand_dims(mask_outputs, axis=-1)
reduction=tf.compat.v1.losses.Reduction.SUM_BY_NONZERO_WEIGHTS) mask_loss = self._binary_crossentropy(mask_targets, mask_outputs,
sample_weight=weights)
# The loss is normalized by the number of 1's in weights and
# + 0.01 is used to avoid division by zero.
return mask_loss / (tf.reduce_sum(weights) + 0.01)
class RetinanetClassLoss(object): class RetinanetClassLoss(object):
......
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Model defination for the Mask R-CNN Model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow.compat.v2 as tf
from tensorflow.python.keras import backend
from official.vision.detection.dataloader import anchor
from official.vision.detection.dataloader import mode_keys
from official.vision.detection.evaluation import factory as eval_factory
from official.vision.detection.modeling import base_model
from official.vision.detection.modeling import losses
from official.vision.detection.modeling.architecture import factory
from official.vision.detection.ops import postprocess_ops
from official.vision.detection.ops import roi_ops
from official.vision.detection.ops import sampling_ops
from official.vision.detection.ops import spatial_transform_ops
from official.vision.detection.utils import box_utils
class MaskrcnnModel(base_model.Model):
"""Mask R-CNN model function."""
def __init__(self, params):
super(MaskrcnnModel, self).__init__(params)
# For eval metrics.
self._params = params
self._keras_model = None
self._include_mask = params.architecture.include_mask
# Architecture generators.
self._backbone_fn = factory.backbone_generator(params)
self._fpn_fn = factory.multilevel_features_generator(params)
self._rpn_head_fn = factory.rpn_head_generator(params.rpn_head)
self._generate_rois_fn = roi_ops.ROIGenerator(params.roi_proposal)
self._sample_rois_fn = sampling_ops.ROISampler(params.roi_sampling)
self._sample_masks_fn = sampling_ops.MaskSampler(params.mask_sampling)
self._frcnn_head_fn = factory.fast_rcnn_head_generator(params.frcnn_head)
if self._include_mask:
self._mrcnn_head_fn = factory.mask_rcnn_head_generator(params.mrcnn_head)
# Loss function.
self._rpn_score_loss_fn = losses.RpnScoreLoss(params.rpn_score_loss)
self._rpn_box_loss_fn = losses.RpnBoxLoss(params.rpn_box_loss)
self._frcnn_class_loss_fn = losses.FastrcnnClassLoss()
self._frcnn_box_loss_fn = losses.FastrcnnBoxLoss(params.frcnn_box_loss)
if self._include_mask:
self._mask_loss_fn = losses.MaskrcnnLoss()
self._generate_detections_fn = postprocess_ops.GenericDetectionGenerator(
params.postprocess)
self._transpose_input = params.train.transpose_input
assert not self._transpose_input, 'Transpose input is not supportted.'
def build_outputs(self, inputs, mode):
is_training = mode == mode_keys.TRAIN
model_outputs = {}
image = inputs['image']
_, image_height, image_width, _ = image.get_shape().as_list()
backbone_features = self._backbone_fn(image, is_training)
fpn_features = self._fpn_fn(backbone_features, is_training)
rpn_score_outputs, rpn_box_outputs = self._rpn_head_fn(
fpn_features, is_training)
model_outputs.update({
'rpn_score_outputs':
tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
rpn_score_outputs),
'rpn_box_outputs':
tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
rpn_box_outputs),
})
input_anchor = anchor.Anchor(self._params.anchor.min_level,
self._params.anchor.max_level,
self._params.anchor.num_scales,
self._params.anchor.aspect_ratios,
self._params.anchor.anchor_size,
(image_height, image_width))
rpn_rois, _ = self._generate_rois_fn(rpn_box_outputs, rpn_score_outputs,
input_anchor.multilevel_boxes,
inputs['image_info'][:, 1, :],
is_training)
if is_training:
rpn_rois = tf.stop_gradient(rpn_rois)
# Sample proposals.
rpn_rois, matched_gt_boxes, matched_gt_classes, matched_gt_indices = (
self._sample_rois_fn(rpn_rois, inputs['gt_boxes'],
inputs['gt_classes']))
# Create bounding box training targets.
box_targets = box_utils.encode_boxes(
matched_gt_boxes, rpn_rois, weights=[10.0, 10.0, 5.0, 5.0])
# If the target is background, the box target is set to all 0s.
box_targets = tf.where(
tf.tile(
tf.expand_dims(tf.equal(matched_gt_classes, 0), axis=-1),
[1, 1, 4]),
tf.zeros_like(box_targets),
box_targets)
model_outputs.update({
'class_targets': matched_gt_classes,
'box_targets': box_targets,
})
roi_features = spatial_transform_ops.multilevel_crop_and_resize(
fpn_features, rpn_rois, output_size=7)
class_outputs, box_outputs = self._frcnn_head_fn(roi_features, is_training)
model_outputs.update({
'class_outputs':
tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
class_outputs),
'box_outputs':
tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
box_outputs),
})
# Add this output to train to make the checkpoint loadable in predict mode.
# If we skip it in train mode, the heads will be out-of-order and checkpoint
# loading will fail.
boxes, scores, classes, valid_detections = self._generate_detections_fn(
box_outputs, class_outputs, rpn_rois, inputs['image_info'][:, 1:2, :])
model_outputs.update({
'num_detections': valid_detections,
'detection_boxes': boxes,
'detection_classes': classes,
'detection_scores': scores,
})
if not self._include_mask:
return model_outputs
if is_training:
rpn_rois, classes, mask_targets = self._sample_masks_fn(
rpn_rois, matched_gt_boxes, matched_gt_classes, matched_gt_indices,
inputs['gt_masks'])
mask_targets = tf.stop_gradient(mask_targets)
classes = tf.cast(classes, dtype=tf.int32)
model_outputs.update({
'mask_targets': mask_targets,
'sampled_class_targets': classes,
})
else:
rpn_rois = boxes
classes = tf.cast(classes, dtype=tf.int32)
mask_roi_features = spatial_transform_ops.multilevel_crop_and_resize(
fpn_features, rpn_rois, output_size=14)
mask_outputs = self._mrcnn_head_fn(mask_roi_features, classes, is_training)
if is_training:
model_outputs.update({
'mask_outputs':
tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
mask_outputs),
})
else:
model_outputs.update({
'detection_masks': tf.nn.sigmoid(mask_outputs)
})
return model_outputs
def build_loss_fn(self):
if self._keras_model is None:
raise ValueError('build_loss_fn() must be called after build_model().')
filter_fn = self.make_filter_trainable_variables_fn()
trainable_variables = filter_fn(self._keras_model.trainable_variables)
def _total_loss_fn(labels, outputs):
rpn_score_loss = self._rpn_score_loss_fn(outputs['rpn_score_outputs'],
labels['rpn_score_targets'])
rpn_box_loss = self._rpn_box_loss_fn(outputs['rpn_box_outputs'],
labels['rpn_box_targets'])
frcnn_class_loss = self._frcnn_class_loss_fn(outputs['class_outputs'],
outputs['class_targets'])
frcnn_box_loss = self._frcnn_box_loss_fn(outputs['box_outputs'],
outputs['class_targets'],
outputs['box_targets'])
if self._include_mask:
mask_loss = self._mask_loss_fn(outputs['mask_outputs'],
outputs['mask_targets'],
outputs['sampled_class_targets'])
else:
mask_loss = 0.0
model_loss = (
rpn_score_loss + rpn_box_loss + frcnn_class_loss + frcnn_box_loss +
mask_loss)
l2_regularization_loss = self.weight_decay_loss(trainable_variables)
total_loss = model_loss + l2_regularization_loss
return {
'total_loss': total_loss,
'loss': total_loss,
'fast_rcnn_class_loss': frcnn_class_loss,
'fast_rcnn_box_loss': frcnn_box_loss,
'mask_loss': mask_loss,
'model_loss': model_loss,
'l2_regularization_loss': l2_regularization_loss,
'rpn_score_loss': rpn_score_loss,
'rpn_box_loss': rpn_box_loss,
}
return _total_loss_fn
def build_input_layers(self, params, mode):
is_training = mode == mode_keys.TRAIN
input_shape = (
params.maskrcnn_parser.output_size +
[params.maskrcnn_parser.num_channels])
if is_training:
batch_size = params.train.batch_size
input_layer = {
'image':
tf.keras.layers.Input(
shape=input_shape,
batch_size=batch_size,
name='image',
dtype=tf.bfloat16 if self._use_bfloat16 else tf.float32),
'image_info':
tf.keras.layers.Input(
shape=[4, 2],
batch_size=batch_size,
name='image_info',
),
'gt_boxes':
tf.keras.layers.Input(
shape=[params.maskrcnn_parser.max_num_instances, 4],
batch_size=batch_size,
name='gt_boxes'),
'gt_classes':
tf.keras.layers.Input(
shape=[params.maskrcnn_parser.max_num_instances],
batch_size=batch_size,
name='gt_classes',
dtype=tf.int64),
}
if self._include_mask:
input_layer['gt_masks'] = tf.keras.layers.Input(
shape=[
params.maskrcnn_parser.max_num_instances,
params.maskrcnn_parser.mask_crop_size,
params.maskrcnn_parser.mask_crop_size
],
batch_size=batch_size,
name='gt_masks')
else:
batch_size = params.eval.batch_size
input_layer = {
'image':
tf.keras.layers.Input(
shape=input_shape,
batch_size=batch_size,
name='image',
dtype=tf.bfloat16 if self._use_bfloat16 else tf.float32),
'image_info':
tf.keras.layers.Input(
shape=[4, 2],
batch_size=batch_size,
name='image_info',
),
}
return input_layer
def build_model(self, params, mode):
if self._keras_model is None:
input_layers = self.build_input_layers(self._params, mode)
with backend.get_graph().as_default():
outputs = self.model_outputs(input_layers, mode)
model = tf.keras.models.Model(
inputs=input_layers, outputs=outputs, name='maskrcnn')
assert model is not None, 'Fail to build tf.keras.Model.'
model.optimizer = self.build_optimizer()
self._keras_model = model
return self._keras_model
def post_processing(self, labels, outputs):
required_output_fields = ['class_outputs', 'box_outputs']
for field in required_output_fields:
if field not in outputs:
raise ValueError('"%s" is missing in outputs, requried %s found %s'
%(field, required_output_fields, outputs.keys()))
predictions = {
'image_info': labels['image_info'],
'num_detections': outputs['num_detections'],
'detection_boxes': outputs['detection_boxes'],
'detection_classes': outputs['detection_classes'],
'detection_scores': outputs['detection_scores'],
}
if self._include_mask:
predictions.update({
'detection_masks': outputs['detection_masks'],
})
if 'groundtruths' in labels:
predictions['source_id'] = labels['groundtruths']['source_id']
predictions['gt_source_id'] = labels['groundtruths']['source_id']
predictions['gt_height'] = labels['groundtruths']['height']
predictions['gt_width'] = labels['groundtruths']['width']
predictions['gt_image_info'] = labels['image_info']
predictions['gt_num_detections'] = (
labels['groundtruths']['num_detections'])
predictions['gt_boxes'] = labels['groundtruths']['boxes']
predictions['gt_classes'] = labels['groundtruths']['classes']
predictions['gt_areas'] = labels['groundtruths']['areas']
predictions['gt_is_crowds'] = labels['groundtruths']['is_crowds']
return labels, predictions
def eval_metrics(self):
return eval_factory.evaluator_generator(self._params.eval)
...@@ -21,6 +21,7 @@ The functions do not return a value, instead they modify the image itself. ...@@ -21,6 +21,7 @@ The functions do not return a value, instead they modify the image itself.
""" """
import collections import collections
import functools import functools
from absl import logging
# Set headless-friendly backend. # Set headless-friendly backend.
import matplotlib; matplotlib.use('Agg') # pylint: disable=multiple-statements import matplotlib; matplotlib.use('Agg') # pylint: disable=multiple-statements
import matplotlib.pyplot as plt # pylint: disable=g-import-not-at-top import matplotlib.pyplot as plt # pylint: disable=g-import-not-at-top
...@@ -97,6 +98,12 @@ def encode_image_array_as_png_str(image): ...@@ -97,6 +98,12 @@ def encode_image_array_as_png_str(image):
def visualize_images_with_bounding_boxes(images, box_outputs, step, def visualize_images_with_bounding_boxes(images, box_outputs, step,
summary_writer): summary_writer):
"""Records subset of evaluation images with bounding boxes.""" """Records subset of evaluation images with bounding boxes."""
if not isinstance(images, list):
logging.warning('visualize_images_with_bounding_boxes expects list of '
'images but received type: %s and value: %s',
type(images), images)
return
image_shape = tf.shape(images[0]) image_shape = tf.shape(images[0])
image_height = tf.cast(image_shape[0], tf.float32) image_height = tf.cast(image_shape[0], tf.float32)
image_width = tf.cast(image_shape[1], tf.float32) image_width = tf.cast(image_shape[1], tf.float32)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment