Merge branch 'master' of https://github.com/tensorflow/models into RTESuperGLUE

bb124157 · stephenwu · 2e9bb539 · 0edeb7f6 · bb124157 · bb124157
Commit bb124157 authored Mar 10, 2021 by stephenwu
6 changed files
--- a/research/object_detection/builders/model_builder.py
+++ b/research/object_detection/builders/model_builder.py
@@ -237,9 +237,12 @@ if tf_version.is_tf1():
      frcnn_resnet_v1.FasterRCNNResnet152FeatureExtractor,
  }

+  CENTER_NET_EXTRACTOR_FUNCTION_MAP = {}
+
  FEATURE_EXTRACTOR_MAPS = [
      SSD_FEATURE_EXTRACTOR_CLASS_MAP,
-      FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP
+      FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP,
+      CENTER_NET_EXTRACTOR_FUNCTION_MAP
  ]


@@ -996,7 +999,7 @@ def _build_center_net_model(center_net_config, is_training, add_summaries):
      center_net_config.image_resizer)
  _check_feature_extractor_exists(center_net_config.feature_extractor.type)
  feature_extractor = _build_center_net_feature_extractor(
-      center_net_config.feature_extractor)
+      center_net_config.feature_extractor, is_training)
  object_center_params = object_center_proto_to_params(
      center_net_config.object_center_params)

@@ -1067,19 +1070,21 @@ def _build_center_net_model(center_net_config, is_training, add_summaries):
      non_max_suppression_fn=non_max_suppression_fn)


-def _build_center_net_feature_extractor(
-    feature_extractor_config):
+def _build_center_net_feature_extractor(feature_extractor_config, is_training):
  """Build a CenterNet feature extractor from the given config."""

  if feature_extractor_config.type not in CENTER_NET_EXTRACTOR_FUNCTION_MAP:
    raise ValueError('\'{}\' is not a known CenterNet feature extractor type'
                     .format(feature_extractor_config.type))
+  kwargs = {
+      'channel_means': list(feature_extractor_config.channel_means),
+      'channel_stds': list(feature_extractor_config.channel_stds),
+      'bgr_ordering': feature_extractor_config.bgr_ordering,
+  }
+

  return CENTER_NET_EXTRACTOR_FUNCTION_MAP[feature_extractor_config.type](
-      channel_means=list(feature_extractor_config.channel_means),
-      channel_stds=list(feature_extractor_config.channel_stds),
-      bgr_ordering=feature_extractor_config.bgr_ordering
-  )
+      **kwargs)


 META_ARCH_BUILDER_MAP = {

--- a/research/object_detection/meta_architectures/center_net_meta_arch.py
+++ b/research/object_detection/meta_architectures/center_net_meta_arch.py
@@ -33,6 +33,8 @@ from object_detection.core import standard_fields as fields
 from object_detection.core import target_assigner as cn_assigner
 from object_detection.utils import shape_utils
 from object_detection.utils import target_assigner_utils as ta_utils
+from object_detection.utils import tf_version
+

 # Number of channels needed to predict size and offsets.
 NUM_OFFSET_CHANNELS = 2
@@ -166,16 +168,26 @@ def make_prediction_net(num_out_channels, kernel_size=3, num_filters=256,
  else:
    conv_fn = tf.keras.layers.Conv2D

-  out_conv = tf.keras.layers.Conv2D(num_out_channels, kernel_size=1)
+  # We name the convolution operations explicitly because Keras, by default,
+  # uses different names during training and evaluation. By setting the names
+  # here, we avoid unexpected pipeline breakage in TF1.
+  out_conv = tf.keras.layers.Conv2D(
+      num_out_channels,
+      kernel_size=1,
+      name='conv1' if tf_version.is_tf1() else None)

  if bias_fill is not None:
    out_conv.bias_initializer = tf.keras.initializers.constant(bias_fill)

-  net = tf.keras.Sequential(
-      [conv_fn(num_filters, kernel_size=kernel_size, padding='same'),
-       tf.keras.layers.ReLU(),
-       out_conv],
-      name=name)
+  net = tf.keras.Sequential([
+      conv_fn(
+          num_filters,
+          kernel_size=kernel_size,
+          padding='same',
+          name='conv2' if tf_version.is_tf1() else None),
+      tf.keras.layers.ReLU(), out_conv
+  ],
+                            name=name)

  return net

@@ -2096,6 +2108,21 @@ class CenterNetMetaArch(model.DetectionModel):
                         'tensor names.')
    return self._batched_prediction_tensor_names

+  def _make_prediction_net_list(self, num_feature_outputs, num_out_channels,
+                                kernel_size=3, num_filters=256, bias_fill=None,
+                                name=None):
+    prediction_net_list = []
+    for i in range(num_feature_outputs):
+      prediction_net_list.append(
+          make_prediction_net(
+              num_out_channels,
+              kernel_size=kernel_size,
+              num_filters=num_filters,
+              bias_fill=bias_fill,
+              use_depthwise=self._use_depthwise,
+              name='{}_{}'.format(name, i) if name else name))
+    return prediction_net_list
+
  def _construct_prediction_heads(self, num_classes, num_feature_outputs,
                                  class_prediction_bias_init):
    """Constructs the prediction heads based on the specific parameters.
@@ -2116,86 +2143,72 @@ class CenterNetMetaArch(model.DetectionModel):
      learning the tracking task.
    """
    prediction_heads = {}
-    prediction_heads[OBJECT_CENTER] = [
-        make_prediction_net(num_classes, bias_fill=class_prediction_bias_init,
-                            use_depthwise=self._use_depthwise)
-        for _ in range(num_feature_outputs)
-    ]
+    prediction_heads[OBJECT_CENTER] = self._make_prediction_net_list(
+        num_feature_outputs, num_classes, bias_fill=class_prediction_bias_init,
+        name='center')
+
    if self._od_params is not None:
-      prediction_heads[BOX_SCALE] = [
-          make_prediction_net(
-              NUM_SIZE_CHANNELS, use_depthwise=self._use_depthwise)
-          for _ in range(num_feature_outputs)
-      ]
-      prediction_heads[BOX_OFFSET] = [
-          make_prediction_net(
-              NUM_OFFSET_CHANNELS, use_depthwise=self._use_depthwise)
-          for _ in range(num_feature_outputs)
-      ]
+      prediction_heads[BOX_SCALE] = self._make_prediction_net_list(
+          num_feature_outputs, NUM_SIZE_CHANNELS, name='box_scale')
+      prediction_heads[BOX_OFFSET] = self._make_prediction_net_list(
+          num_feature_outputs, NUM_OFFSET_CHANNELS, name='box_offset')
+
    if self._kp_params_dict is not None:
      for task_name, kp_params in self._kp_params_dict.items():
        num_keypoints = len(kp_params.keypoint_indices)
-        # pylint: disable=g-complex-comprehension
-        prediction_heads[get_keypoint_name(task_name, KEYPOINT_HEATMAP)] = [
-            make_prediction_net(
+        prediction_heads[get_keypoint_name(
+            task_name, KEYPOINT_HEATMAP)] = self._make_prediction_net_list(
+                num_feature_outputs,
                num_keypoints,
                bias_fill=kp_params.heatmap_bias_init,
-                use_depthwise=self._use_depthwise)
-            for _ in range(num_feature_outputs)
-        ]
-        # pylint: enable=g-complex-comprehension
-        prediction_heads[get_keypoint_name(task_name, KEYPOINT_REGRESSION)] = [
-            make_prediction_net(NUM_OFFSET_CHANNELS * num_keypoints,
-                                use_depthwise=self._use_depthwise)
-            for _ in range(num_feature_outputs)
-        ]
+                name='kpt_heatmap')
+        prediction_heads[get_keypoint_name(
+            task_name, KEYPOINT_REGRESSION)] = self._make_prediction_net_list(
+                num_feature_outputs,
+                NUM_OFFSET_CHANNELS * num_keypoints,
+                name='kpt_regress')
+
        if kp_params.per_keypoint_offset:
-          prediction_heads[get_keypoint_name(task_name, KEYPOINT_OFFSET)] = [
-              make_prediction_net(NUM_OFFSET_CHANNELS * num_keypoints,
-                                  use_depthwise=self._use_depthwise)
-              for _ in range(num_feature_outputs)
-          ]
+          prediction_heads[get_keypoint_name(
+              task_name, KEYPOINT_OFFSET)] = self._make_prediction_net_list(
+                  num_feature_outputs,
+                  NUM_OFFSET_CHANNELS * num_keypoints,
+                  name='kpt_offset')
        else:
-          prediction_heads[get_keypoint_name(task_name, KEYPOINT_OFFSET)] = [
-              make_prediction_net(NUM_OFFSET_CHANNELS,
-                                  use_depthwise=self._use_depthwise)
-              for _ in range(num_feature_outputs)
-          ]
+          prediction_heads[get_keypoint_name(
+              task_name, KEYPOINT_OFFSET)] = self._make_prediction_net_list(
+                  num_feature_outputs, NUM_OFFSET_CHANNELS, name='kpt_offset')

        if kp_params.predict_depth:
          num_depth_channel = (
              num_keypoints if kp_params.per_keypoint_depth else 1)
-          prediction_heads[get_keypoint_name(task_name, KEYPOINT_DEPTH)] = [
-              make_prediction_net(
-                  num_depth_channel, use_depthwise=self._use_depthwise)
-              for _ in range(num_feature_outputs)
-          ]
-    # pylint: disable=g-complex-comprehension
+          prediction_heads[get_keypoint_name(
+              task_name, KEYPOINT_DEPTH)] = self._make_prediction_net_list(
+                  num_feature_outputs, num_depth_channel, name='kpt_depth')
+
    if self._mask_params is not None:
-      prediction_heads[SEGMENTATION_HEATMAP] = [
-          make_prediction_net(
-              num_classes,
-              bias_fill=self._mask_params.heatmap_bias_init,
-              use_depthwise=self._use_depthwise)
-          for _ in range(num_feature_outputs)]
+      prediction_heads[SEGMENTATION_HEATMAP] = self._make_prediction_net_list(
+          num_feature_outputs,
+          num_classes,
+          bias_fill=self._mask_params.heatmap_bias_init,
+          name='seg_heatmap')
+
    if self._densepose_params is not None:
-      prediction_heads[DENSEPOSE_HEATMAP] = [
-          make_prediction_net(
-              self._densepose_params.num_parts,
-              bias_fill=self._densepose_params.heatmap_bias_init,
-              use_depthwise=self._use_depthwise)
-          for _ in range(num_feature_outputs)]
-      prediction_heads[DENSEPOSE_REGRESSION] = [
-          make_prediction_net(2 * self._densepose_params.num_parts,
-                              use_depthwise=self._use_depthwise)
-          for _ in range(num_feature_outputs)
-      ]
-    # pylint: enable=g-complex-comprehension
+      prediction_heads[DENSEPOSE_HEATMAP] = self._make_prediction_net_list(
+          num_feature_outputs,
+          self._densepose_params.num_parts,
+          bias_fill=self._densepose_params.heatmap_bias_init,
+          name='dense_pose_heatmap')
+      prediction_heads[DENSEPOSE_REGRESSION] = self._make_prediction_net_list(
+          num_feature_outputs,
+          2 * self._densepose_params.num_parts,
+          name='dense_pose_regress')
+
    if self._track_params is not None:
-      prediction_heads[TRACK_REID] = [
-          make_prediction_net(self._track_params.reid_embed_size,
-                              use_depthwise=self._use_depthwise)
-          for _ in range(num_feature_outputs)]
+      prediction_heads[TRACK_REID] = self._make_prediction_net_list(
+          num_feature_outputs,
+          self._track_params.reid_embed_size,
+          name='track_reid')

      # Creates a classification network to train object embeddings by learning
      # a projection from embedding space to object track ID space.
@@ -2213,11 +2226,8 @@ class CenterNetMetaArch(model.DetectionModel):
                                input_shape=(
                                    self._track_params.reid_embed_size,)))
    if self._temporal_offset_params is not None:
-      prediction_heads[TEMPORAL_OFFSET] = [
-          make_prediction_net(NUM_OFFSET_CHANNELS,
-                              use_depthwise=self._use_depthwise)
-          for _ in range(num_feature_outputs)
-      ]
+      prediction_heads[TEMPORAL_OFFSET] = self._make_prediction_net_list(
+          num_feature_outputs, NUM_OFFSET_CHANNELS, name='temporal_offset')
    return prediction_heads

  def _initialize_target_assigners(self, stride, min_box_overlap_iou):
@@ -3524,6 +3534,37 @@ class CenterNetMetaArch(model.DetectionModel):

    return embeddings

+  def _scatter_keypoints_to_batch(self, num_ind, kpt_coords_for_example,
+                                  kpt_scores_for_example,
+                                  instance_inds_for_example, max_detections,
+                                  total_num_keypoints):
+    """Helper function to convert scattered keypoints into batch."""
+    def left_fn(kpt_coords_for_example, kpt_scores_for_example,
+                instance_inds_for_example):
+      # Scatter into tensor where instances align with original detection
+      # instances. New shape of keypoint coordinates and scores are
+      # [1, max_detections, num_total_keypoints, 2] and
+      # [1, max_detections, num_total_keypoints], respectively.
+      return _pad_to_full_instance_dim(
+          kpt_coords_for_example, kpt_scores_for_example,
+          instance_inds_for_example,
+          self._center_params.max_box_predictions)
+
+    def right_fn():
+      kpt_coords_for_example_all_det = tf.zeros(
+          [1, max_detections, total_num_keypoints, 2], dtype=tf.float32)
+      kpt_scores_for_example_all_det = tf.zeros(
+          [1, max_detections, total_num_keypoints], dtype=tf.float32)
+      return (kpt_coords_for_example_all_det,
+              kpt_scores_for_example_all_det)
+
+    left_fn = functools.partial(left_fn, kpt_coords_for_example,
+                                kpt_scores_for_example,
+                                instance_inds_for_example)
+
+    # Use dimension values instead of tf.size for tf.lite compatibility.
+    return tf.cond(num_ind[0] > 0, left_fn, right_fn)
+
  def _postprocess_keypoints_multi_class(self, prediction_dict, classes,
                                         y_indices, x_indices, boxes,
                                         num_detections):
@@ -3630,26 +3671,13 @@ class CenterNetMetaArch(model.DetectionModel):
      instance_inds_for_example = tf.concat(instance_inds_for_class_list,
                                            axis=0)

-      # Use dimension values instead of tf.size for tf.lite compatibility.
-      num_inds = _get_shape(instance_inds_for_example, 1)
-      if num_inds[0] > 0:
-        # Scatter into tensor where instances align with original detection
-        # instances. New shape of keypoint coordinates and scores are
-        # [1, max_detections, num_total_keypoints, 2] and
-        # [1, max_detections, num_total_keypoints], respectively.
-        kpt_coords_for_example_all_det, kpt_scores_for_example_all_det = (
-            _pad_to_full_instance_dim(
-                kpt_coords_for_example, kpt_scores_for_example,
-                instance_inds_for_example,
-                self._center_params.max_box_predictions))
-      else:
-        kpt_coords_for_example_all_det = tf.zeros(
-            [1, max_detections, total_num_keypoints, 2], dtype=tf.float32)
-        kpt_scores_for_example_all_det = tf.zeros(
-            [1, max_detections, total_num_keypoints], dtype=tf.float32)
+      (kpt_coords_for_example_all_det,
+       kpt_scores_for_example_all_det) = self._scatter_keypoints_to_batch(
+           num_ind, kpt_coords_for_example, kpt_scores_for_example,
+           instance_inds_for_example, max_detections, total_num_keypoints)

-      kpt_coords_for_example_list.append(kpt_coords_for_example_all_det)
-      kpt_scores_for_example_list.append(kpt_scores_for_example_all_det)
+    kpt_coords_for_example_list.append(kpt_coords_for_example_all_det)
+    kpt_scores_for_example_list.append(kpt_scores_for_example_all_det)

    # Concatenate all keypoints and scores from all examples in the batch.
    # Shapes are [batch_size, max_detections, num_total_keypoints, 2] and
@@ -3951,5 +3979,13 @@ class CenterNetMetaArch(model.DetectionModel):
          fine_tune_checkpoint_type)}

  def updates(self):
-    raise RuntimeError('This model is intended to be used with model_lib_v2 '
-                       'which does not support updates()')
+    if tf_version.is_tf2():
+      raise RuntimeError('This model is intended to be used with model_lib_v2 '
+                         'which does not support updates()')
+    else:
+      update_ops = []
+      slim_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
+      # Copy the slim ops to avoid modifying the collection
+      if slim_update_ops:
+        update_ops.extend(slim_update_ops)
+      return update_ops
--- a/research/object_detection/model_lib_v2.py
+++ b/research/object_detection/model_lib_v2.py
@@ -795,7 +795,8 @@ def eager_eval_loop(
    eval_dataset,
    use_tpu=False,
    postprocess_on_cpu=False,
-    global_step=None):
+    global_step=None,
+    ):
  """Evaluate the model eagerly on the evaluation dataset.

  This method will compute the evaluation metrics specified in the configs on
@@ -968,11 +969,10 @@ def eager_eval_loop(
    eval_metrics[loss_key] = tf.reduce_mean(loss_metrics[loss_key])

  eval_metrics = {str(k): v for k, v in eval_metrics.items()}
-  tf.logging.info('Eval metrics at step %d', global_step)
+  tf.logging.info('Eval metrics at step %d', global_step.numpy())
  for k in eval_metrics:
    tf.compat.v2.summary.scalar(k, eval_metrics[k], step=global_step)
    tf.logging.info('\t+ %s: %f', k, eval_metrics[k])
-
  return eval_metrics


@@ -1026,6 +1026,8 @@ def eval_continuously(
  """
  get_configs_from_pipeline_file = MODEL_BUILD_UTIL_MAP[
      'get_configs_from_pipeline_file']
+  create_pipeline_proto_from_configs = MODEL_BUILD_UTIL_MAP[
+      'create_pipeline_proto_from_configs']
  merge_external_params_with_configs = MODEL_BUILD_UTIL_MAP[
      'merge_external_params_with_configs']

@@ -1043,6 +1045,10 @@ def eval_continuously(
        'Forced number of epochs for all eval validations to be 1.')
  configs = merge_external_params_with_configs(
      configs, None, kwargs_dict=kwargs)
+  if model_dir:
+    pipeline_config_final = create_pipeline_proto_from_configs(configs)
+    config_util.save_pipeline_config(pipeline_config_final, model_dir)
+
  model_config = configs['model']
  train_input_config = configs['train_input_config']
  eval_config = configs['eval_config']
@@ -1109,4 +1115,5 @@ def eval_continuously(
          eval_input,
          use_tpu=use_tpu,
          postprocess_on_cpu=postprocess_on_cpu,
-          global_step=global_step)
+          global_step=global_step,
+          )
--- a/research/object_detection/models/feature_map_generators.py
+++ b/research/object_detection/models/feature_map_generators.py
@@ -305,7 +305,7 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
      # this net must be appended only once it's been filled with layers
      self.convolutions.append(net)

-  def call(self, image_features, training=None):
+  def call(self, image_features):
    """Generate the multi-resolution feature maps.

    Executed when calling the `.__call__` method on input.
@@ -313,11 +313,6 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
    Args:
      image_features: A dictionary of handles to activation tensors from the
        base feature extractor.
-      training: A boolean, True when in training mode. If not specified,
-        defaults to (in order of priority): the training mode of the outer
-        `Layer.call`; the default mode set by
-        `tf.keras.backend.set_learning_phase`; or the default value for
-        `training` in the call signature.

    Returns:
      feature_maps: an OrderedDict mapping keys (feature map names) to
@@ -333,7 +328,7 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
      else:
        feature_map = feature_maps[-1]
        for layer in self.convolutions[index]:
-          feature_map = layer(feature_map, training=training)
+          feature_map = layer(feature_map)
        layer_name = self.convolutions[index][-1].name
        feature_map_keys.append(layer_name)
      feature_maps.append(feature_map)
@@ -621,7 +616,7 @@ class KerasFpnTopDownFeatureMaps(tf.keras.Model):
      self.reshape_blocks.append(reshaped_residual)
      self.conv_layers.append(conv_net)

-  def call(self, image_features, training=None):
+  def call(self, image_features):
    """Generate the multi-resolution feature maps.

    Executed when calling the `.__call__` method on input.
@@ -630,11 +625,6 @@ class KerasFpnTopDownFeatureMaps(tf.keras.Model):
      image_features: list of tuples of (tensor_name, image_feature_tensor).
        Spatial resolutions of succesive tensors must reduce exactly by a factor
        of 2.
-      training: A boolean, True when in training mode. If not specified,
-        defaults to (in order of priority): the training mode of the outer
-        `Layer.call`; the default mode set by
-        `tf.keras.backend.set_learning_phase`; or the default value for
-        `training` in the call signature.

    Returns:
      feature_maps: an OrderedDict mapping keys (feature map names) to
@@ -646,7 +636,7 @@ class KerasFpnTopDownFeatureMaps(tf.keras.Model):
    with tf.name_scope(self.scope):
      top_down = image_features[-1][1]
      for layer in self.top_layers:
-        top_down = layer(top_down, training=training)
+        top_down = layer(top_down)
      output_feature_maps_list.append(top_down)
      output_feature_map_keys.append('top_down_%s' % image_features[-1][0])

@@ -655,14 +645,14 @@ class KerasFpnTopDownFeatureMaps(tf.keras.Model):
        residual = image_features[level][1]
        top_down = output_feature_maps_list[-1]
        for layer in self.residual_blocks[index]:
-          residual = layer(residual, training=training)
+          residual = layer(residual)
        for layer in self.top_down_blocks[index]:
-          top_down = layer(top_down, training=training)
+          top_down = layer(top_down)
        for layer in self.reshape_blocks[index]:
-          top_down = layer([residual, top_down], training=training)
+          top_down = layer([residual, top_down])
        top_down += residual
        for layer in self.conv_layers[index]:
-          top_down = layer(top_down, training=training)
+          top_down = layer(top_down)
        output_feature_maps_list.append(top_down)
        output_feature_map_keys.append('top_down_%s' % image_features[level][0])
    return collections.OrderedDict(reversed(

--- a/research/object_detection/predictors/convolutional_keras_box_predictor.py
+++ b/research/object_detection/predictors/convolutional_keras_box_predictor.py
@@ -197,7 +197,7 @@ class ConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):

      # Apply shared conv layers before the head predictors.
      for layer in self._shared_nets[index]:
-        net = layer(net, training=self._is_training)
+        net = layer(net)

      for head_name in self._sorted_head_names:
        head_obj = self._prediction_heads[head_name][index]
@@ -458,13 +458,13 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):

    def _apply_layers(base_tower_layers, image_feature):
      for layer in base_tower_layers:
-        image_feature = layer(image_feature, training=self._is_training)
+        image_feature = layer(image_feature)
      return image_feature

    for (index, image_feature) in enumerate(image_features):
      # Apply additional projection layers to image features
      for layer in self._additional_projection_layers[index]:
-        image_feature = layer(image_feature, training=self._is_training)
+        image_feature = layer(image_feature)

      # Apply box tower layers.
      box_tower_feature = _apply_layers(

--- a/research/object_detection/protos/center_net.proto
+++ b/research/object_detection/protos/center_net.proto
@@ -334,4 +334,6 @@ message CenterNetFeatureExtractor {
  // separable convolutions. This is typically applied to feature pyramid
  // network if any.
  optional bool use_depthwise = 5 [default = false];
+
 }
+