Commit 4f7d403d authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

remove meta arch

parent 00eac920
import abc
import collections
import functools
import numpy as np
import tensorflow.compat.v1 as tf
import tensorflow.compat.v2 as tf2
from object_detection.core import box_list
from object_detection.core import box_list_ops
from object_detection.core import keypoint_ops
from object_detection.core import model
from object_detection.core import standard_fields as fields
from object_detection.core import target_assigner
from object_detection.utils import shape_utils
from object_detection.models import faster_rcnn_resnet_keras_feature_extractor
from object_detection.meta_architectures import detr_transformer
from object_detection.matchers import hungarian_matcher
class DETRMetaArch(model.DetectionModel):
def __init__(self):
self.num_queries = 100
self.hidden_dimension = 100
self.feature_extractor = faster_rcnn_resnet_keras_feature_extractor.FasterRCNNResnet50KerasFeatureExtractor(is_training=False)
self.first_stage = self.feature_extractor.get_proposal_feature_extractor_model()
self.target_assigner = target_assigner.create_target_assigner('DETR', 'detection')
self.transformer = detr_transformer.Transformer()
self.ffn = self.feature_extractor.get_box_classifier_feature_extractor_model()
self.bboxes = tf.keras.layers.Dense(4)
self.cls = tf.keras.layers.Dense(2)
self.queries = tf.keras.Variable(tf.random([self.num_queries, self.hidden_dimension]))
def predict(self, preprocessed_inputs, true_image_shapes, **side_inputs):
x = self.first_stage(preprocessed_inputs)
x = tf.reshape(x, [x.shape[0], x.shape[1] * x.shape[2], x.shape[3]])
x = self.transformer([x, tf.repeat(tf.expand_dims(self.queries, 0), x.shape[0], axis=0)])
x = self.ffn(x)
return self.bboxes(x), self.cls(x)
def loss(self, prediction_dict, true_image_shapes, scope=None):
return 1
def preprocess(self, inputs):
"""Feature-extractor specific preprocessing.
See base class.
For Faster R-CNN, we perform image resizing in the base class --- each
class subclassing FasterRCNNMetaArch is responsible for any additional
preprocessing (e.g., scaling pixel values to be in [-1, 1]).
Args:
inputs: a [batch, height_in, width_in, channels] float tensor representing
a batch of images with values between 0 and 255.0.
Returns:
preprocessed_inputs: a [batch, height_out, width_out, channels] float
tensor representing a batch of images.
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
Raises:
ValueError: if inputs tensor does not have type tf.float32
"""
with tf.name_scope('Preprocessor'):
(resized_inputs,
true_image_shapes) = shape_utils.resize_images_and_return_shapes(
inputs, self._image_resizer_fn)
return (self.feature_extractor.preprocess(resized_inputs),
true_image_shapes)
def restore_from_objects(self, fine_tune_checkpoint_type='detection'):
raise NotImplementedError("Model restoration implemented yet.")
def restore_map(self,
fine_tune_checkpoint_type='detection',
load_all_detection_checkpoint_vars=False):
raise NotImplementedError("Model restoration implemented yet.")
def loss(self, prediction_dict, true_image_shapes, scope=None):
"""Compute scalar loss tensors given prediction tensors.
If number_of_stages=1, only RPN related losses are computed (i.e.,
`rpn_localization_loss` and `rpn_objectness_loss`). Otherwise all
losses are computed.
Args:
prediction_dict: a dictionary holding prediction tensors (see the
documentation for the predict method. If number_of_stages=1, we
expect prediction_dict to contain `rpn_box_encodings`,
`rpn_objectness_predictions_with_background`, `rpn_features_to_crop`,
`image_shape`, and `anchors` fields. Otherwise we expect
prediction_dict to additionally contain `refined_box_encodings`,
`class_predictions_with_background`, `num_proposals`, and
`proposal_boxes` fields.
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
scope: Optional scope name.
Returns:
a dictionary mapping loss keys (`first_stage_localization_loss`,
`first_stage_objectness_loss`, 'second_stage_localization_loss',
'second_stage_classification_loss') to scalar tensors representing
corresponding loss values.
"""
with tf.name_scope(scope, 'Loss', prediction_dict.values()):
(groundtruth_boxlists, groundtruth_classes_with_background_list,
groundtruth_masks_list, groundtruth_weights_list
) = self._format_groundtruth_data(
self._image_batch_shape_2d(prediction_dict['image_shape']))
loss_dict = self._loss_box_classifier(
prediction_dict['refined_box_encodings'],
prediction_dict['class_predictions_with_background'],
prediction_dict['proposal_boxes'],
prediction_dict['num_proposals'], groundtruth_boxlists,
groundtruth_classes_with_background_list,
groundtruth_weights_list, prediction_dict['image_shape'],
prediction_dict.get('mask_predictions'), groundtruth_masks_list,
prediction_dict.get(
fields.DetectionResultFields.detection_boxes),
prediction_dict.get(
fields.DetectionResultFields.num_detections))
return loss_dict
def _loss_box_classifier(self,
refined_box_encodings,
class_predictions_with_background,
proposal_boxes,
num_proposals,
groundtruth_boxlists,
groundtruth_classes_with_background_list,
groundtruth_weights_list,
image_shape,
prediction_masks=None,
groundtruth_masks_list=None,
detection_boxes=None,
num_detections=None):
"""Computes scalar box classifier loss tensors.
Uses self._detector_target_assigner to obtain regression and classification
targets for the second stage box classifier, optionally performs
hard mining, and returns losses. All losses are computed independently
for each image and then averaged across the batch.
Please note that for boxes and masks with multiple labels, the box
regression and mask prediction losses are only computed for one label.
This function assumes that the proposal boxes in the "padded" regions are
actually zero (and thus should not be matched to).
Args:
refined_box_encodings: a 3-D tensor with shape
[total_num_proposals, num_classes, box_coder.code_size] representing
predicted (final) refined box encodings. If using a shared box across
classes this will instead have shape
[total_num_proposals, 1, box_coder.code_size].
class_predictions_with_background: a 2-D tensor with shape
[total_num_proposals, num_classes + 1] containing class
predictions (logits) for each of the anchors. Note that this tensor
*includes* background class predictions (at class index 0).
proposal_boxes: [batch_size, self.max_num_proposals, 4] representing
decoded proposal bounding boxes.
num_proposals: A Tensor of type `int32`. A 1-D tensor of shape [batch]
representing the number of proposals predicted for each image in
the batch.
groundtruth_boxlists: a list of BoxLists containing coordinates of the
groundtruth boxes.
groundtruth_classes_with_background_list: a list of 2-D one-hot
(or k-hot) tensors of shape [num_boxes, num_classes + 1] containing the
class targets with the 0th index assumed to map to the background class.
groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape
[num_boxes] containing weights for groundtruth boxes.
image_shape: a 1-D tensor of shape [4] representing the image shape.
prediction_masks: an optional 4-D tensor with shape [total_num_proposals,
num_classes, mask_height, mask_width] containing the instance masks for
each box.
groundtruth_masks_list: an optional list of 3-D tensors of shape
[num_boxes, image_height, image_width] containing the instance masks for
each of the boxes.
detection_boxes: 3-D float tensor of shape [batch,
max_total_detections, 4] containing post-processed detection boxes in
normalized co-ordinates.
num_detections: 1-D int32 tensor of shape [batch] containing number of
valid detections in `detection_boxes`.
Returns:
a dictionary mapping loss keys ('second_stage_localization_loss',
'second_stage_classification_loss') to scalar tensors representing
corresponding loss values.
Raises:
ValueError: if `predict_instance_masks` in
second_stage_mask_rcnn_box_predictor is True and
`groundtruth_masks_list` is not provided.
"""
with tf.name_scope('BoxClassifierLoss'):
paddings_indicator = self._padded_batched_proposals_indicator(
num_proposals, proposal_boxes.shape[1])
proposal_boxlists = [
box_list.BoxList(proposal_boxes_single_image)
for proposal_boxes_single_image in tf.unstack(proposal_boxes)]
batch_size = len(proposal_boxlists)
num_proposals_or_one = tf.cast(tf.expand_dims(
tf.maximum(num_proposals, tf.ones_like(num_proposals)), 1),
dtype=tf.float32)
normalizer = tf.tile(num_proposals_or_one,
[1, self.max_num_proposals]) * batch_size
(batch_cls_targets_with_background, batch_cls_weights, batch_reg_targets,
batch_reg_weights, _) = target_assigner.batch_assign_targets(
target_assigner=self._detector_target_assigner,
anchors_batch=proposal_boxlists,
gt_box_batch=groundtruth_boxlists,
gt_class_targets_batch=groundtruth_classes_with_background_list,
unmatched_class_label=tf.constant(
[1] + self._num_classes * [0], dtype=tf.float32),
gt_weights_batch=groundtruth_weights_list)
class_predictions_with_background = tf.reshape(
class_predictions_with_background,
[batch_size, self.max_num_proposals, -1])
flat_cls_targets_with_background = tf.reshape(
batch_cls_targets_with_background,
[batch_size * self.max_num_proposals, -1])
one_hot_flat_cls_targets_with_background = tf.argmax(
flat_cls_targets_with_background, axis=1)
one_hot_flat_cls_targets_with_background = tf.one_hot(
one_hot_flat_cls_targets_with_background,
flat_cls_targets_with_background.get_shape()[1])
# If using a shared box across classes use directly
if refined_box_encodings.shape[1] == 1:
reshaped_refined_box_encodings = tf.reshape(
refined_box_encodings,
[batch_size, self.max_num_proposals, self._box_coder.code_size])
# For anchors with multiple labels, picks refined_location_encodings
# for just one class to avoid over-counting for regression loss and
# (optionally) mask loss.
else:
reshaped_refined_box_encodings = (
self._get_refined_encodings_for_postitive_class(
refined_box_encodings,
one_hot_flat_cls_targets_with_background, batch_size))
losses_mask = None
if self.groundtruth_has_field(fields.InputDataFields.is_annotated):
losses_mask = tf.stack(self.groundtruth_lists(
fields.InputDataFields.is_annotated))
second_stage_loc_losses = self._second_stage_localization_loss(
reshaped_refined_box_encodings,
batch_reg_targets,
weights=batch_reg_weights,
losses_mask=losses_mask) / normalizer
second_stage_cls_losses = ops.reduce_sum_trailing_dimensions(
self._second_stage_classification_loss(
class_predictions_with_background,
batch_cls_targets_with_background,
weights=batch_cls_weights,
losses_mask=losses_mask),
ndims=2) / normalizer
second_stage_loc_loss = tf.reduce_sum(
second_stage_loc_losses * tf.cast(paddings_indicator,
dtype=tf.float32))
second_stage_cls_loss = tf.reduce_sum(
second_stage_cls_losses * tf.cast(paddings_indicator,
dtype=tf.float32))
if self._hard_example_miner:
(second_stage_loc_loss, second_stage_cls_loss
) = self._unpad_proposals_and_apply_hard_mining(
proposal_boxlists, second_stage_loc_losses,
second_stage_cls_losses, num_proposals)
localization_loss = tf.multiply(self._second_stage_loc_loss_weight,
second_stage_loc_loss,
name='localization_loss')
classification_loss = tf.multiply(self._second_stage_cls_loss_weight,
second_stage_cls_loss,
name='classification_loss')
loss_dict = {'Loss/BoxClassifierLoss/localization_loss':
localization_loss,
'Loss/BoxClassifierLoss/classification_loss':
classification_loss}
return loss_dict
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment