Merged commit includes the following changes: (#8739)

318417714 by jonathanhuang: Internal change. -- 318367213 by sbeery: Pointing users to more documentation for beam -- 318358685 by sbeery: Context R-CNN sample config for GPU -- 318309800 by rathodv: Internal -- 318303364 by ronnyvotel: Adding the option for parsing and including DensePose annotations. http://densepose.org/ -- 318291319 by aom: Adds conv_bn_act conv_block option, and naming convention changes for BiFPN utils. -- 318200598 by ronnyvotel: Updating the TF Example Decoder to parse DensePose annotations. -- 318174065 by jonathanhuang: Internal change. -- 318167805 by rathodv: Add use_tpu flag to TF2 binary. -- 318145285 by aom: Adds option for convolutional keras box predictor to force use_bias. -- PiperOrigin-RevId: 318417714

Merged commit includes the following changes: (#8739)
318417714 by jonathanhuang: Internal change. -- 318367213 by sbeery: Pointing users to more documentation for beam -- 318358685 by sbeery: Context R-CNN sample config for GPU -- 318309800 by rathodv: Internal -- 318303364 by ronnyvotel: Adding the option for parsing and including DensePose annotations. http://densepose.org/ -- 318291319 by aom: Adds conv_bn_act conv_block option, and naming convention changes for BiFPN utils. -- 318200598 by ronnyvotel: Updating the TF Example Decoder to parse DensePose annotations. -- 318174065 by jonathanhuang: Internal change. -- 318167805 by rathodv: Add use_tpu flag to TF2 binary. -- 318145285 by aom: Adds option for convolutional keras box predictor to force use_bias. -- PiperOrigin-RevId: 318417714
0f0c7745 · vivek rathod · GitHub · 1e4fd825 · 0f0c7745 · 0f0c7745
Unverified Commit 0f0c7745 authored Jun 26, 2020 by vivek rathod Committed by GitHub Jun 26, 2020
14 changed files
--- a/research/object_detection/model_main_tf2.py
+++ b/research/object_detection/model_main_tf2.py
@@ -16,14 +16,6 @@
 r"""Creates and runs TF2 object detection models.
-##################################
-NOTE: This module has not been fully tested; please bear with us while we iron
-out the kinks.
-##################################
-When a TPU device is available, this binary uses TPUStrategy. Otherwise, it uses
-GPUS with MirroredStrategy/MultiWorkerMirroredStrategy.
 For local training/evaluation run:
 PIPELINE_CONFIG_PATH=path/to/pipeline.config
 MODEL_DIR=/tmp/model_outputs
@@ -60,6 +52,8 @@ flags.DEFINE_string(
 flags.DEFINE_integer('eval_timeout', 3600, 'Number of seconds to wait for an'
                     'evaluation checkpoint before exiting.')
+flags.DEFINE_bool('use_tpu', False, 'Whether the job is executing on a TPU.')
 flags.DEFINE_integer(
    'num_workers', 1, 'When num_workers > 1, training uses '
    'MultiWorkerMirroredStrategy. When num_workers = 1 it uses '
@@ -84,7 +78,7 @@ def main(unused_argv):
        checkpoint_dir=FLAGS.checkpoint_dir,
        wait_interval=300, timeout=FLAGS.eval_timeout)
  else:
-    if tf.config.get_visible_devices('TPU'):
+    if FLAGS.use_tpu:
      resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
      tf.config.experimental_connect_to_cluster(resolver)
      tf.tpu.experimental.initialize_tpu_system(resolver)

--- a/research/object_detection/models/faster_rcnn_inception_resnet_v2_keras_feature_extractor.py
+++ b/research/object_detection/models/faster_rcnn_inception_resnet_v2_keras_feature_extractor.py
--- a/research/object_detection/models/faster_rcnn_inception_resnet_v2_keras_feature_extractor_tf2_test.py
+++ b/research/object_detection/models/faster_rcnn_inception_resnet_v2_keras_feature_extractor_tf2_test.py
@@ -73,7 +73,7 @@ class FasterRcnnInceptionResnetV2KerasFeatureExtractorTest(tf.test.TestCase):
    proposal_classifier_features = (
        model(proposal_feature_maps))
    features_shape = tf.shape(proposal_classifier_features)
-    self.assertAllEqual(features_shape.numpy(), [2, 8, 8, 1536])
+    self.assertAllEqual(features_shape.numpy(), [2, 9, 9, 1536])
 if __name__ == '__main__':

--- a/research/object_detection/models/faster_rcnn_resnet_keras_feature_extractor.py
+++ b/research/object_detection/models/faster_rcnn_resnet_keras_feature_extractor.py
@@ -175,23 +175,6 @@ class FasterRCNNResnetKerasFeatureExtractor(
          self._variable_dict[variable.name[:-2]] = variable
        return keras_model
-  def restore_from_classification_checkpoint_fn(
-      self,
-      first_stage_feature_extractor_scope,
-      second_stage_feature_extractor_scope):
-    """Returns a map for restoring from an (object-based) checkpoint.
-    Args:
-      first_stage_feature_extractor_scope: A scope name for the first stage
-        feature extractor (unused).
-      second_stage_feature_extractor_scope: A scope name for the second stage
-        feature extractor (unused).
-    Returns:
-      A dict mapping keys to Keras models
-    """
-    return {'feature_extractor': self.classification_backbone}
 class FasterRCNNResnet50KerasFeatureExtractor(
    FasterRCNNResnetKerasFeatureExtractor):

--- a/research/object_detection/models/ssd_mobilenet_v1_keras_feature_extractor.py
+++ b/research/object_detection/models/ssd_mobilenet_v1_keras_feature_extractor.py
@@ -163,14 +163,3 @@ class SSDMobileNetV1KerasFeatureExtractor(
        'Conv2d_13_pointwise': image_features[1]})
    return list(feature_maps.values())
-  def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
-    """Returns a map for restoring from an (object-based) checkpoint.
-    Args:
-      feature_extractor_scope: A scope name for the feature extractor (unused).
-    Returns:
-      A dict mapping keys to Keras models
-    """
-    return {'feature_extractor': self.classification_backbone}
--- a/research/object_detection/models/ssd_mobilenet_v2_fpn_keras_feature_extractor.py
+++ b/research/object_detection/models/ssd_mobilenet_v2_fpn_keras_feature_extractor.py
@@ -241,14 +241,3 @@ class SSDMobileNetV2FpnKerasFeatureExtractor(
        last_feature_map = layer(last_feature_map)
      feature_maps.append(last_feature_map)
    return feature_maps
-  def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
-    """Returns a map for restoring from an (object-based) checkpoint.
-    Args:
-      feature_extractor_scope: A scope name for the feature extractor (unused).
-    Returns:
-      A dict mapping keys to Keras models
-    """
-    return {'feature_extractor': self.classification_backbone}
--- a/research/object_detection/models/ssd_mobilenet_v2_keras_feature_extractor.py
+++ b/research/object_detection/models/ssd_mobilenet_v2_keras_feature_extractor.py
@@ -166,14 +166,3 @@ class SSDMobileNetV2KerasFeatureExtractor(
        'layer_19': image_features[1]})
    return list(feature_maps.values())
-  def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
-    """Returns a map for restoring from an (object-based) checkpoint.
-    Args:
-      feature_extractor_scope: A scope name for the feature extractor (unused).
-    Returns:
-      A dict mapping keys to Keras models
-    """
-    return {'feature_extractor': self.classification_backbone}
--- a/research/object_detection/models/ssd_resnet_v1_fpn_keras_feature_extractor.py
+++ b/research/object_detection/models/ssd_resnet_v1_fpn_keras_feature_extractor.py
@@ -246,17 +246,6 @@ class SSDResNetV1FpnKerasFeatureExtractor(
      feature_maps.append(last_feature_map)
    return feature_maps
-  def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
-    """Returns a map for restoring from an (object-based) checkpoint.
-    Args:
-      feature_extractor_scope: A scope name for the feature extractor (unused).
-    Returns:
-      A dict mapping keys to Keras models
-    """
-    return {'feature_extractor': self.classification_backbone}
 class SSDResNet50V1FpnKerasFeatureExtractor(
    SSDResNetV1FpnKerasFeatureExtractor):

--- a/research/object_detection/predictors/convolutional_keras_box_predictor.py
+++ b/research/object_detection/predictors/convolutional_keras_box_predictor.py
@@ -314,7 +314,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
      self, inserted_layer_counter, target_channel):
    projection_layers = []
    if inserted_layer_counter >= 0:
-      use_bias = False if self._apply_batch_norm else True
+      use_bias = False if (self._apply_batch_norm and not
+                           self._conv_hyperparams.force_use_bias()) else True
      projection_layers.append(keras.Conv2D(
          target_channel, [1, 1], strides=1, padding='SAME',
          name='ProjectionLayer/conv2d_{}'.format(inserted_layer_counter),
@@ -331,7 +332,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
    conv_layers = []
    batch_norm_layers = []
    activation_layers = []
-    use_bias = False if self._apply_batch_norm else True
+    use_bias = False if (self._apply_batch_norm and not
+                         self._conv_hyperparams.force_use_bias()) else True
    for additional_conv_layer_idx in range(self._num_layers_before_predictor):
      layer_name = '{}/conv2d_{}'.format(
          tower_name_scope, additional_conv_layer_idx)
@@ -363,7 +365,9 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
            training=(self._is_training and not self._freeze_batchnorm),
            name='{}/conv2d_{}/BatchNorm/feature_{}'.format(
                tower_name_scope, additional_conv_layer_idx, feature_index)))
-      activation_layers.append(tf.keras.layers.Lambda(tf.nn.relu6))
+      activation_layers.append(self._conv_hyperparams.build_activation_layer(
+          name='{}/conv2d_{}/activation_{}'.format(
+              tower_name_scope, additional_conv_layer_idx, feature_index)))
    # Set conv layers as the shared conv layers for different feature maps with
    # the same tower_name_scope.

--- a/research/object_detection/protos/input_reader.proto
+++ b/research/object_detection/protos/input_reader.proto
@@ -31,7 +31,7 @@ enum InputType {
  TF_SEQUENCE_EXAMPLE = 2;        // TfSequenceExample Input
 }
-// Next id: 31
+// Next id: 32
 message InputReader {
  // Name of input reader. Typically used to describe the dataset that is read
  // by this input reader.
@@ -119,6 +119,10 @@ message InputReader {
  // Type of instance mask.
  optional InstanceMaskType mask_type = 10 [default = NUMERICAL_MASKS];
+  // Whether to load DensePose data. If set, must also set load_instance_masks
+  // to true.
+  optional bool load_dense_pose = 31 [default = false];
  // Whether to use the display name when decoding examples. This is only used
  // when mapping class text strings to integers.
  optional bool use_display_name = 17 [default = false];

--- a/research/object_detection/protos/train.proto
+++ b/research/object_detection/protos/train.proto
@@ -59,7 +59,8 @@ message TrainConfig {
  // Whether to load all checkpoint vars that match model variable names and
  // sizes. This option is only available if `from_detection_checkpoint` is
-  // True.
+  // True.  This option is *not* supported for TF2 --- setting it to true
+  // will raise an error.
  optional bool load_all_detection_checkpoint_vars = 19 [default = false];
  // Number of steps to train the DetectionModel for. If 0, will train the model

--- a/research/object_detection/samples/configs/context_rcnn_resnet101_snapshot_serengeti.config
+++ b/research/object_detection/samples/configs/context_rcnn_resnet101_snapshot_serengeti.config
+# Context R-CNN configuration for Snapshot Serengeti Dataset, with sequence
+# example input data with context_features.
+# This model uses attention into contextual features within the Faster R-CNN
+# object detection framework to improve object detection performance.
+# See https://arxiv.org/abs/1912.03538 for more information.
+# Search for "PATH_TO_BE_CONFIGURED" to find the fields that should be
+# configured.
+model {
+  faster_rcnn {
+    num_classes: 48
+    image_resizer {
+      fixed_shape_resizer {
+        height: 640
+        width: 640
+      }
+    }
+    feature_extractor {
+      type: "faster_rcnn_resnet101"
+      first_stage_features_stride: 16
+      batch_norm_trainable: true
+    }
+    first_stage_anchor_generator {
+      grid_anchor_generator {
+        height_stride: 16
+        width_stride: 16
+        scales: 0.25
+        scales: 0.5
+        scales: 1.0
+        scales: 2.0
+        aspect_ratios: 0.5
+        aspect_ratios: 1.0
+        aspect_ratios: 2.0
+      }
+    }
+    first_stage_box_predictor_conv_hyperparams {
+      op: CONV
+      regularizer {
+        l2_regularizer {
+          weight: 0.0
+        }
+      }
+      initializer {
+        truncated_normal_initializer {
+          stddev: 0.00999999977648
+        }
+      }
+    }
+    first_stage_nms_score_threshold: 0.0
+    first_stage_nms_iou_threshold: 0.699999988079
+    first_stage_max_proposals: 300
+    first_stage_localization_loss_weight: 2.0
+    first_stage_objectness_loss_weight: 1.0
+    initial_crop_size: 14
+    maxpool_kernel_size: 2
+    maxpool_stride: 2
+    second_stage_box_predictor {
+      mask_rcnn_box_predictor {
+        fc_hyperparams {
+          op: FC
+          regularizer {
+            l2_regularizer {
+              weight: 0.0
+            }
+          }
+          initializer {
+            variance_scaling_initializer {
+              factor: 1.0
+              uniform: true
+              mode: FAN_AVG
+            }
+          }
+        }
+        use_dropout: false
+        dropout_keep_probability: 1.0
+        share_box_across_classes: true
+      }
+    }
+    second_stage_post_processing {
+      batch_non_max_suppression {
+        score_threshold: 0.0
+        iou_threshold: 0.600000023842
+        max_detections_per_class: 100
+        max_total_detections: 300
+      }
+      score_converter: SOFTMAX
+    }
+    second_stage_localization_loss_weight: 2.0
+    second_stage_classification_loss_weight: 1.0
+    use_matmul_crop_and_resize: true
+    clip_anchors_to_image: true
+    use_matmul_gather_in_matcher: true
+    use_static_balanced_label_sampler: true
+    use_static_shapes: true
+    context_config {
+      max_num_context_features: 2000
+      context_feature_length: 2057
+    }
+  }
+}
+train_config {
+  batch_size: 8
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  sync_replicas: true
+  optimizer {
+    momentum_optimizer {
+      learning_rate {
+        manual_step_learning_rate {
+          initial_learning_rate: 0.0
+          schedule {
+            step: 400000
+            learning_rate: 0.002
+          }
+          schedule {
+            step: 500000
+            learning_rate: 0.0002
+          }
+          schedule {
+            step: 600000
+            learning_rate: 0.00002
+          }
+          warmup: true
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  gradient_clipping_by_norm: 10.0
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/faster_rcnn_resnet101_coco_2018_08_14/model.ckpt"
+  from_detection_checkpoint: true
+  num_steps: 5000000
+  replicas_to_aggregate: 8
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+  use_bfloat16: true
+}
+train_input_reader {
+  label_map_path: "PATH_TO_BE_CONFIGURED/ss_label_map.pbtxt"
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/snapshot_serengeti_train-?????-of-?????"
+  }
+  load_context_features: true
+  input_type: TF_SEQUENCE_EXAMPLE
+}
+eval_config {
+  max_evals: 50
+  metrics_set: "coco_detection_metrics"
+  use_moving_averages: false
+  batch_size: 1
+}
+eval_input_reader {
+  label_map_path: "PATH_TO_BE_CONFIGURED/ss_label_map.pbtxt"
+  shuffle: false
+  num_epochs: 1
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/snapshot_serengeti_val-?????-of-?????"
+  }
+  load_context_features: true
+  input_type: TF_SEQUENCE_EXAMPLE
+}
--- a/research/object_detection/utils/bifpn_utils.py
+++ b/research/object_detection/utils/bifpn_utils.py
@@ -26,7 +26,8 @@ from object_detection.utils import shape_utils
 def create_conv_block(name, num_filters, kernel_size, strides, padding,
                      use_separable, apply_batchnorm, apply_activation,
-                      conv_hyperparams, is_training, freeze_batchnorm):
+                      conv_hyperparams, is_training, freeze_batchnorm,
+                      conv_bn_act_pattern=True):
  """Create Keras layers for regular or separable convolutions.
  Args:
@@ -50,6 +51,9 @@ def create_conv_block(name, num_filters, kernel_size, strides, padding,
      training or not. When training with a small batch size (e.g. 1), it is
      desirable to freeze batch norm update and use pretrained batch norm
      params.
+    conv_bn_act_pattern: Bool. By default, when True, the layers returned by
+      this function are in the order [conv, batchnorm, activation]. Otherwise,
+      when False, the order of the layers is [activation, conv, batchnorm].
  Returns:
    A list of keras layers, including (regular or seperable) convolution, and
@@ -73,7 +77,7 @@ def create_conv_block(name, num_filters, kernel_size, strides, padding,
            depth_multiplier=1,
            padding=padding,
            strides=strides,
-            name=name + '_separable_conv',
+            name=name + 'separable_conv',
            **kwargs))
  else:
    layers.append(
@@ -82,18 +86,22 @@ def create_conv_block(name, num_filters, kernel_size, strides, padding,
            kernel_size=kernel_size,
            padding=padding,
            strides=strides,
-            name=name + '_conv',
+            name=name + 'conv',
            **conv_hyperparams.params()))
  if apply_batchnorm:
    layers.append(
        conv_hyperparams.build_batch_norm(
            training=(is_training and not freeze_batchnorm),
-            name=name + '_batchnorm'))
+            name=name + 'batchnorm'))
  if apply_activation:
-    layers.append(
+    activation_layer = conv_hyperparams.build_activation_layer(
-        conv_hyperparams.build_activation_layer(name=name + '_activation'))
+        name=name + 'activation')
+    if conv_bn_act_pattern:
+      layers.append(activation_layer)
+    else:
+      layers = [activation_layer] + layers
  return layers
@@ -133,28 +141,28 @@ def create_downsample_feature_map_ops(scale, downsample_method,
            pool_size=kernel_size,
            strides=stride,
            padding=padding,
-            name=name + '_downsample_max_x{}'.format(stride)))
+            name=name + 'downsample_max_x{}'.format(stride)))
  elif downsample_method == 'avg_pooling':
    layers.append(
        tf.keras.layers.AveragePooling2D(
            pool_size=kernel_size,
            strides=stride,
            padding=padding,
-            name=name + '_downsample_avg_x{}'.format(stride)))
+            name=name + 'downsample_avg_x{}'.format(stride)))
  elif downsample_method == 'depthwise_conv':
    layers.append(
        tf.keras.layers.DepthwiseConv2D(
            kernel_size=kernel_size,
            strides=stride,
            padding=padding,
-            name=name + '_downsample_depthwise_x{}'.format(stride)))
+            name=name + 'downsample_depthwise_x{}'.format(stride)))
    layers.append(
        conv_hyperparams.build_batch_norm(
            training=(is_training and not freeze_batchnorm),
-            name=name + '_downsample_batchnorm'))
+            name=name + 'downsample_batchnorm'))
    layers.append(
        conv_hyperparams.build_activation_layer(name=name +
-                                                '_downsample_activation'))
+                                                'downsample_activation'))
  else:
    raise ValueError('Unknown downsample method: {}'.format(downsample_method))

--- a/research/object_detection/utils/config_util.py
+++ b/research/object_detection/utils/config_util.py
@@ -147,6 +147,7 @@ def clear_fine_tune_checkpoint(pipeline_config_path,
  """Clears fine_tune_checkpoint and writes a new pipeline config file."""
  configs = get_configs_from_pipeline_file(pipeline_config_path)
  configs["train_config"].fine_tune_checkpoint = ""
+  configs["train_config"].load_all_detection_checkpoint_vars = False
  pipeline_proto = create_pipeline_proto_from_configs(configs)
  with tf.gfile.Open(new_pipeline_config_path, "wb") as f:
    f.write(text_format.MessageToString(pipeline_proto))