remove meta arch

4f7d403d · Kaushik Shivakumar · 00eac920 · 00eac920
Commit 4f7d403d authored Jul 16, 2020 by Kaushik Shivakumar
Hide whitespace changes
Inline Side-by-side

Showing with 0 additions and 293 deletions

research/object_detection/meta_architectures/detr_meta_arch.py ...rch/object_detection/meta_architectures/detr_meta_arch.py +0 -293

No files found.
--- a/research/object_detection/meta_architectures/detr_meta_arch.py
+++ b/research/object_detection/meta_architectures/detr_meta_arch.py
-import abc
-import collections
-import functools
-import numpy as np
-import tensorflow.compat.v1 as tf
-import tensorflow.compat.v2 as tf2
-from object_detection.core import box_list
-from object_detection.core import box_list_ops
-from object_detection.core import keypoint_ops
-from object_detection.core import model
-from object_detection.core import standard_fields as fields
-from object_detection.core import target_assigner
-from object_detection.utils import shape_utils
-from object_detection.models import faster_rcnn_resnet_keras_feature_extractor
-from object_detection.meta_architectures import detr_transformer
-from object_detection.matchers import hungarian_matcher
-class DETRMetaArch(model.DetectionModel):
-    def __init__(self):
-        self.num_queries = 100
-        self.hidden_dimension = 100
-        self.feature_extractor = faster_rcnn_resnet_keras_feature_extractor.FasterRCNNResnet50KerasFeatureExtractor(is_training=False)
-        self.first_stage = self.feature_extractor.get_proposal_feature_extractor_model()
-        self.target_assigner = target_assigner.create_target_assigner('DETR', 'detection')
-        self.transformer = detr_transformer.Transformer()
-        self.ffn = self.feature_extractor.get_box_classifier_feature_extractor_model()
-        self.bboxes = tf.keras.layers.Dense(4)
-        self.cls = tf.keras.layers.Dense(2)
-        self.queries = tf.keras.Variable(tf.random([self.num_queries, self.hidden_dimension]))
-    def predict(self, preprocessed_inputs, true_image_shapes, **side_inputs):
-        x = self.first_stage(preprocessed_inputs)
-        x = tf.reshape(x, [x.shape[0], x.shape[1] * x.shape[2], x.shape[3]])
-        x = self.transformer([x, tf.repeat(tf.expand_dims(self.queries, 0), x.shape[0], axis=0)])
-        x = self.ffn(x)
-        return self.bboxes(x), self.cls(x)
-    def loss(self, prediction_dict, true_image_shapes, scope=None):
-        return 1
-    def preprocess(self, inputs):
-        """Feature-extractor specific preprocessing.
-        See base class.
-        For Faster R-CNN, we perform image resizing in the base class --- each
-        class subclassing FasterRCNNMetaArch is responsible for any additional
-        preprocessing (e.g., scaling pixel values to be in [-1, 1]).
-        Args:
-        inputs: a [batch, height_in, width_in, channels] float tensor representing
-            a batch of images with values between 0 and 255.0.
-        Returns:
-        preprocessed_inputs: a [batch, height_out, width_out, channels] float
-            tensor representing a batch of images.
-        true_image_shapes: int32 tensor of shape [batch, 3] where each row is
-            of the form [height, width, channels] indicating the shapes
-            of true images in the resized images, as resized images can be padded
-            with zeros.
-        Raises:
-        ValueError: if inputs tensor does not have type tf.float32
-        """
-        with tf.name_scope('Preprocessor'):
-            (resized_inputs,
-            true_image_shapes) = shape_utils.resize_images_and_return_shapes(
-                inputs, self._image_resizer_fn)
-        return (self.feature_extractor.preprocess(resized_inputs),
-                true_image_shapes)
-    def restore_from_objects(self, fine_tune_checkpoint_type='detection'):
-        raise NotImplementedError("Model restoration implemented yet.")
-    def restore_map(self,
-                    fine_tune_checkpoint_type='detection',
-                    load_all_detection_checkpoint_vars=False):
-        raise NotImplementedError("Model restoration implemented yet.")
-    def loss(self, prediction_dict, true_image_shapes, scope=None):
-    """Compute scalar loss tensors given prediction tensors.
-    If number_of_stages=1, only RPN related losses are computed (i.e.,
-    `rpn_localization_loss` and `rpn_objectness_loss`).  Otherwise all
-    losses are computed.
-    Args:
-      prediction_dict: a dictionary holding prediction tensors (see the
-        documentation for the predict method.  If number_of_stages=1, we
-        expect prediction_dict to contain `rpn_box_encodings`,
-        `rpn_objectness_predictions_with_background`, `rpn_features_to_crop`,
-        `image_shape`, and `anchors` fields.  Otherwise we expect
-        prediction_dict to additionally contain `refined_box_encodings`,
-        `class_predictions_with_background`, `num_proposals`, and
-        `proposal_boxes` fields.
-      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
-        of the form [height, width, channels] indicating the shapes
-        of true images in the resized images, as resized images can be padded
-        with zeros.
-      scope: Optional scope name.
-    Returns:
-      a dictionary mapping loss keys (`first_stage_localization_loss`,
-        `first_stage_objectness_loss`, 'second_stage_localization_loss',
-        'second_stage_classification_loss') to scalar tensors representing
-        corresponding loss values.
-    """
-    with tf.name_scope(scope, 'Loss', prediction_dict.values()):
-      (groundtruth_boxlists, groundtruth_classes_with_background_list,
-       groundtruth_masks_list, groundtruth_weights_list
-      ) = self._format_groundtruth_data(
-          self._image_batch_shape_2d(prediction_dict['image_shape']))
-      loss_dict = self._loss_box_classifier(
-            prediction_dict['refined_box_encodings'],
-            prediction_dict['class_predictions_with_background'],
-            prediction_dict['proposal_boxes'],
-            prediction_dict['num_proposals'], groundtruth_boxlists,
-            groundtruth_classes_with_background_list,
-            groundtruth_weights_list, prediction_dict['image_shape'],
-            prediction_dict.get('mask_predictions'), groundtruth_masks_list,
-            prediction_dict.get(
-                fields.DetectionResultFields.detection_boxes),
-            prediction_dict.get(
-                fields.DetectionResultFields.num_detections))
-    return loss_dict
-    def _loss_box_classifier(self,
-                           refined_box_encodings,
-                           class_predictions_with_background,
-                           proposal_boxes,
-                           num_proposals,
-                           groundtruth_boxlists,
-                           groundtruth_classes_with_background_list,
-                           groundtruth_weights_list,
-                           image_shape,
-                           prediction_masks=None,
-                           groundtruth_masks_list=None,
-                           detection_boxes=None,
-                           num_detections=None):
-    """Computes scalar box classifier loss tensors.
-    Uses self._detector_target_assigner to obtain regression and classification
-    targets for the second stage box classifier, optionally performs
-    hard mining, and returns losses.  All losses are computed independently
-    for each image and then averaged across the batch.
-    Please note that for boxes and masks with multiple labels, the box
-    regression and mask prediction losses are only computed for one label.
-    This function assumes that the proposal boxes in the "padded" regions are
-    actually zero (and thus should not be matched to).
-    Args:
-      refined_box_encodings: a 3-D tensor with shape
-        [total_num_proposals, num_classes, box_coder.code_size] representing
-        predicted (final) refined box encodings. If using a shared box across
-        classes this will instead have shape
-        [total_num_proposals, 1, box_coder.code_size].
-      class_predictions_with_background: a 2-D tensor with shape
-        [total_num_proposals, num_classes + 1] containing class
-        predictions (logits) for each of the anchors.  Note that this tensor
-        *includes* background class predictions (at class index 0).
-      proposal_boxes: [batch_size, self.max_num_proposals, 4] representing
-        decoded proposal bounding boxes.
-      num_proposals: A Tensor of type `int32`. A 1-D tensor of shape [batch]
-        representing the number of proposals predicted for each image in
-        the batch.
-      groundtruth_boxlists: a list of BoxLists containing coordinates of the
-        groundtruth boxes.
-      groundtruth_classes_with_background_list: a list of 2-D one-hot
-        (or k-hot) tensors of shape [num_boxes, num_classes + 1] containing the
-        class targets with the 0th index assumed to map to the background class.
-      groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape
-        [num_boxes] containing weights for groundtruth boxes.
-      image_shape: a 1-D tensor of shape [4] representing the image shape.
-      prediction_masks: an optional 4-D tensor with shape [total_num_proposals,
-        num_classes, mask_height, mask_width] containing the instance masks for
-        each box.
-      groundtruth_masks_list: an optional list of 3-D tensors of shape
-        [num_boxes, image_height, image_width] containing the instance masks for
-        each of the boxes.
-      detection_boxes: 3-D float tensor of shape [batch,
-        max_total_detections, 4] containing post-processed detection boxes in
-        normalized co-ordinates.
-      num_detections: 1-D int32 tensor of shape [batch] containing number of
-        valid detections in `detection_boxes`.
-    Returns:
-      a dictionary mapping loss keys ('second_stage_localization_loss',
-        'second_stage_classification_loss') to scalar tensors representing
-        corresponding loss values.
-    Raises:
-      ValueError: if `predict_instance_masks` in
-        second_stage_mask_rcnn_box_predictor is True and
-        `groundtruth_masks_list` is not provided.
-    """
-    with tf.name_scope('BoxClassifierLoss'):
-      paddings_indicator = self._padded_batched_proposals_indicator(
-          num_proposals, proposal_boxes.shape[1])
-      proposal_boxlists = [
-          box_list.BoxList(proposal_boxes_single_image)
-          for proposal_boxes_single_image in tf.unstack(proposal_boxes)]
-      batch_size = len(proposal_boxlists)
-      num_proposals_or_one = tf.cast(tf.expand_dims(
-          tf.maximum(num_proposals, tf.ones_like(num_proposals)), 1),
-                                     dtype=tf.float32)
-      normalizer = tf.tile(num_proposals_or_one,
-                           [1, self.max_num_proposals]) * batch_size
-      (batch_cls_targets_with_background, batch_cls_weights, batch_reg_targets,
-       batch_reg_weights, _) = target_assigner.batch_assign_targets(
-           target_assigner=self._detector_target_assigner,
-           anchors_batch=proposal_boxlists,
-           gt_box_batch=groundtruth_boxlists,
-           gt_class_targets_batch=groundtruth_classes_with_background_list,
-           unmatched_class_label=tf.constant(
-               [1] + self._num_classes * [0], dtype=tf.float32),
-           gt_weights_batch=groundtruth_weights_list)
-      class_predictions_with_background = tf.reshape(
-          class_predictions_with_background,
-          [batch_size, self.max_num_proposals, -1])
-      flat_cls_targets_with_background = tf.reshape(
-          batch_cls_targets_with_background,
-          [batch_size * self.max_num_proposals, -1])
-      one_hot_flat_cls_targets_with_background = tf.argmax(
-          flat_cls_targets_with_background, axis=1)
-      one_hot_flat_cls_targets_with_background = tf.one_hot(
-          one_hot_flat_cls_targets_with_background,
-          flat_cls_targets_with_background.get_shape()[1])
-      # If using a shared box across classes use directly
-      if refined_box_encodings.shape[1] == 1:
-        reshaped_refined_box_encodings = tf.reshape(
-            refined_box_encodings,
-            [batch_size, self.max_num_proposals, self._box_coder.code_size])
-      # For anchors with multiple labels, picks refined_location_encodings
-      # for just one class to avoid over-counting for regression loss and
-      # (optionally) mask loss.
-      else:
-        reshaped_refined_box_encodings = (
-            self._get_refined_encodings_for_postitive_class(
-                refined_box_encodings,
-                one_hot_flat_cls_targets_with_background, batch_size))
-      losses_mask = None
-      if self.groundtruth_has_field(fields.InputDataFields.is_annotated):
-        losses_mask = tf.stack(self.groundtruth_lists(
-            fields.InputDataFields.is_annotated))
-      second_stage_loc_losses = self._second_stage_localization_loss(
-          reshaped_refined_box_encodings,
-          batch_reg_targets,
-          weights=batch_reg_weights,
-          losses_mask=losses_mask) / normalizer
-      second_stage_cls_losses = ops.reduce_sum_trailing_dimensions(
-          self._second_stage_classification_loss(
-              class_predictions_with_background,
-              batch_cls_targets_with_background,
-              weights=batch_cls_weights,
-              losses_mask=losses_mask),
-          ndims=2) / normalizer
-      second_stage_loc_loss = tf.reduce_sum(
-          second_stage_loc_losses * tf.cast(paddings_indicator,
-                                            dtype=tf.float32))
-      second_stage_cls_loss = tf.reduce_sum(
-          second_stage_cls_losses * tf.cast(paddings_indicator,
-                                            dtype=tf.float32))
-      if self._hard_example_miner:
-        (second_stage_loc_loss, second_stage_cls_loss
-        ) = self._unpad_proposals_and_apply_hard_mining(
-            proposal_boxlists, second_stage_loc_losses,
-            second_stage_cls_losses, num_proposals)
-      localization_loss = tf.multiply(self._second_stage_loc_loss_weight,
-                                      second_stage_loc_loss,
-                                      name='localization_loss')
-      classification_loss = tf.multiply(self._second_stage_cls_loss_weight,
-                                        second_stage_cls_loss,
-                                        name='classification_loss')
-      loss_dict = {'Loss/BoxClassifierLoss/localization_loss':
-                       localization_loss,
-                   'Loss/BoxClassifierLoss/classification_loss':
-                       classification_loss}
-    return loss_dict
\ No newline at end of file