Merge branch 'master' of https://github.com/tensorflow/models into latest

e2385734 · Kaushik Shivakumar · 30c14aa9 · 1bfb577d · e2385734 · e2385734
Commit e2385734 authored Jul 10, 2020 by Kaushik Shivakumar
20 changed files
--- a/research/object_detection/configs/tf2/ssd_resnet152_v1_fpn_1024x1024_coco17_tpu-8.config
+++ b/research/object_detection/configs/tf2/ssd_resnet152_v1_fpn_1024x1024_coco17_tpu-8.config
+# SSD with Resnet 152 v1 FPN feature extractor, shared box predictor and focal
+# loss (a.k.a Retinanet).
+# See Lin et al, https://arxiv.org/abs/1708.02002
+# Trained on COCO, initialized from Imagenet classification checkpoint
+# Train on TPU-8
+#
+# Achieves 39.6 mAP on COCO17 Val
+
+model {
+  ssd {
+    inplace_batchnorm_update: true
+    freeze_batchnorm: false
+    num_classes: 90
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+        use_matmul_gather: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    encode_background_as_zeros: true
+    anchor_generator {
+      multiscale_anchor_generator {
+        min_level: 3
+        max_level: 7
+        anchor_scale: 4.0
+        aspect_ratios: [1.0, 2.0, 0.5]
+        scales_per_octave: 2
+      }
+    }
+    image_resizer {
+      fixed_shape_resizer {
+        height: 1024
+        width: 1024
+      }
+    }
+    box_predictor {
+      weight_shared_convolutional_box_predictor {
+        depth: 256
+        class_prediction_bias_init: -4.6
+        conv_hyperparams {
+          activation: RELU_6,
+          regularizer {
+            l2_regularizer {
+              weight: 0.0004
+            }
+          }
+          initializer {
+            random_normal_initializer {
+              stddev: 0.01
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            scale: true,
+            decay: 0.997,
+            epsilon: 0.001,
+          }
+        }
+        num_layers_before_predictor: 4
+        kernel_size: 3
+      }
+    }
+    feature_extractor {
+      type: 'ssd_resnet152_v1_fpn_keras'
+      fpn {
+        min_level: 3
+        max_level: 7
+      }
+      min_depth: 16
+      depth_multiplier: 1.0
+      conv_hyperparams {
+        activation: RELU_6,
+        regularizer {
+          l2_regularizer {
+            weight: 0.0004
+          }
+        }
+        initializer {
+          truncated_normal_initializer {
+            stddev: 0.03
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          scale: true,
+          decay: 0.997,
+          epsilon: 0.001,
+        }
+      }
+      override_base_feature_extractor_hyperparams: true
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid_focal {
+          alpha: 0.25
+          gamma: 2.0
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+        }
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    normalize_loc_loss_by_codesize: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.6
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  fine_tune_checkpoint_version: V2
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/resnet152.ckpt-1"
+  fine_tune_checkpoint_type: "classification"
+  batch_size: 64
+  sync_replicas: true
+  startup_delay_steps: 0
+  replicas_to_aggregate: 8
+  use_bfloat16: true
+  num_steps: 100000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    random_crop_image {
+      min_object_covered: 0.0
+      min_aspect_ratio: 0.75
+      max_aspect_ratio: 3.0
+      min_area: 0.75
+      max_area: 1.0
+      overlap_thresh: 0.0
+    }
+  }
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        cosine_decay_learning_rate {
+          learning_rate_base: .04
+          total_steps: 100000
+          warmup_learning_rate: .013333
+          warmup_steps: 2000
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+}
+
+train_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
+  }
+}
+
+eval_config: {
+  metrics_set: "coco_detection_metrics"
+  use_moving_averages: false
+}
+
+eval_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  shuffle: false
+  num_epochs: 1
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
+  }
+}
--- a/research/object_detection/configs/tf2/ssd_resnet152_v1_fpn_640x640_coco17_tpu-8.config
+++ b/research/object_detection/configs/tf2/ssd_resnet152_v1_fpn_640x640_coco17_tpu-8.config
+# SSD with Resnet 152 v1 FPN feature extractor, shared box predictor and focal
+# loss (a.k.a Retinanet).
+# See Lin et al, https://arxiv.org/abs/1708.02002
+# Trained on COCO, initialized from Imagenet classification checkpoint
+# Train on TPU-8
+#
+# Achieves 35.6 mAP on COCO17 Val
+
+model {
+  ssd {
+    inplace_batchnorm_update: true
+    freeze_batchnorm: false
+    num_classes: 90
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+        use_matmul_gather: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    encode_background_as_zeros: true
+    anchor_generator {
+      multiscale_anchor_generator {
+        min_level: 3
+        max_level: 7
+        anchor_scale: 4.0
+        aspect_ratios: [1.0, 2.0, 0.5]
+        scales_per_octave: 2
+      }
+    }
+    image_resizer {
+      fixed_shape_resizer {
+        height: 640
+        width: 640
+      }
+    }
+    box_predictor {
+      weight_shared_convolutional_box_predictor {
+        depth: 256
+        class_prediction_bias_init: -4.6
+        conv_hyperparams {
+          activation: RELU_6,
+          regularizer {
+            l2_regularizer {
+              weight: 0.0004
+            }
+          }
+          initializer {
+            random_normal_initializer {
+              stddev: 0.01
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            scale: true,
+            decay: 0.997,
+            epsilon: 0.001,
+          }
+        }
+        num_layers_before_predictor: 4
+        kernel_size: 3
+      }
+    }
+    feature_extractor {
+      type: 'ssd_resnet152_v1_fpn_keras'
+      fpn {
+        min_level: 3
+        max_level: 7
+      }
+      min_depth: 16
+      depth_multiplier: 1.0
+      conv_hyperparams {
+        activation: RELU_6,
+        regularizer {
+          l2_regularizer {
+            weight: 0.0004
+          }
+        }
+        initializer {
+          truncated_normal_initializer {
+            stddev: 0.03
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          scale: true,
+          decay: 0.997,
+          epsilon: 0.001,
+        }
+      }
+      override_base_feature_extractor_hyperparams: true
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid_focal {
+          alpha: 0.25
+          gamma: 2.0
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+        }
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    normalize_loc_loss_by_codesize: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.6
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  fine_tune_checkpoint_version: V2
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/resnet152.ckpt-1"
+  fine_tune_checkpoint_type: "classification"
+  batch_size: 64
+  sync_replicas: true
+  startup_delay_steps: 0
+  replicas_to_aggregate: 8
+  use_bfloat16: true
+  num_steps: 25000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    random_crop_image {
+      min_object_covered: 0.0
+      min_aspect_ratio: 0.75
+      max_aspect_ratio: 3.0
+      min_area: 0.75
+      max_area: 1.0
+      overlap_thresh: 0.0
+    }
+  }
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        cosine_decay_learning_rate {
+          learning_rate_base: .04
+          total_steps: 25000
+          warmup_learning_rate: .013333
+          warmup_steps: 2000
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+}
+
+train_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
+  }
+}
+
+eval_config: {
+  metrics_set: "coco_detection_metrics"
+  use_moving_averages: false
+}
+
+eval_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  shuffle: false
+  num_epochs: 1
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
+  }
+}
--- a/research/object_detection/configs/tf2/ssd_resnet50_v1_fpn_1024x1024_coco17_tpu-8.config
+++ b/research/object_detection/configs/tf2/ssd_resnet50_v1_fpn_1024x1024_coco17_tpu-8.config
+# SSD with Resnet 50 v1 FPN feature extractor, shared box predictor and focal
+# loss (a.k.a Retinanet).
+# See Lin et al, https://arxiv.org/abs/1708.02002
+# Trained on COCO, initialized from Imagenet classification checkpoint
+# Train on TPU-8
+#
+# Achieves 38.3 mAP on COCO17 Val
+
+model {
+  ssd {
+    inplace_batchnorm_update: true
+    freeze_batchnorm: false
+    num_classes: 90
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+        use_matmul_gather: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    encode_background_as_zeros: true
+    anchor_generator {
+      multiscale_anchor_generator {
+        min_level: 3
+        max_level: 7
+        anchor_scale: 4.0
+        aspect_ratios: [1.0, 2.0, 0.5]
+        scales_per_octave: 2
+      }
+    }
+    image_resizer {
+      fixed_shape_resizer {
+        height: 1024
+        width: 1024
+      }
+    }
+    box_predictor {
+      weight_shared_convolutional_box_predictor {
+        depth: 256
+        class_prediction_bias_init: -4.6
+        conv_hyperparams {
+          activation: RELU_6,
+          regularizer {
+            l2_regularizer {
+              weight: 0.0004
+            }
+          }
+          initializer {
+            random_normal_initializer {
+              stddev: 0.01
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            scale: true,
+            decay: 0.997,
+            epsilon: 0.001,
+          }
+        }
+        num_layers_before_predictor: 4
+        kernel_size: 3
+      }
+    }
+    feature_extractor {
+      type: 'ssd_resnet50_v1_fpn_keras'
+      fpn {
+        min_level: 3
+        max_level: 7
+      }
+      min_depth: 16
+      depth_multiplier: 1.0
+      conv_hyperparams {
+        activation: RELU_6,
+        regularizer {
+          l2_regularizer {
+            weight: 0.0004
+          }
+        }
+        initializer {
+          truncated_normal_initializer {
+            stddev: 0.03
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          scale: true,
+          decay: 0.997,
+          epsilon: 0.001,
+        }
+      }
+      override_base_feature_extractor_hyperparams: true
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid_focal {
+          alpha: 0.25
+          gamma: 2.0
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+        }
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    normalize_loc_loss_by_codesize: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.6
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  fine_tune_checkpoint_version: V2
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/resnet50.ckpt-1"
+  fine_tune_checkpoint_type: "classification"
+  batch_size: 64
+  sync_replicas: true
+  startup_delay_steps: 0
+  replicas_to_aggregate: 8
+  use_bfloat16: true
+  num_steps: 100000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    random_crop_image {
+      min_object_covered: 0.0
+      min_aspect_ratio: 0.75
+      max_aspect_ratio: 3.0
+      min_area: 0.75
+      max_area: 1.0
+      overlap_thresh: 0.0
+    }
+  }
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        cosine_decay_learning_rate {
+          learning_rate_base: .04
+          total_steps: 100000
+          warmup_learning_rate: .013333
+          warmup_steps: 2000
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+}
+
+train_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
+  }
+}
+
+eval_config: {
+  metrics_set: "coco_detection_metrics"
+  use_moving_averages: false
+}
+
+eval_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  shuffle: false
+  num_epochs: 1
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
+  }
+}
--- a/research/object_detection/configs/tf2/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.config
+++ b/research/object_detection/configs/tf2/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.config
+# SSD with Resnet 50 v1 FPN feature extractor, shared box predictor and focal
+# loss (a.k.a Retinanet).
+# See Lin et al, https://arxiv.org/abs/1708.02002
+# Trained on COCO, initialized from Imagenet classification checkpoint
+# Train on TPU-8
+#
+# Achieves 34.3 mAP on COCO17 Val
+
+model {
+  ssd {
+    inplace_batchnorm_update: true
+    freeze_batchnorm: false
+    num_classes: 90
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+        use_matmul_gather: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    encode_background_as_zeros: true
+    anchor_generator {
+      multiscale_anchor_generator {
+        min_level: 3
+        max_level: 7
+        anchor_scale: 4.0
+        aspect_ratios: [1.0, 2.0, 0.5]
+        scales_per_octave: 2
+      }
+    }
+    image_resizer {
+      fixed_shape_resizer {
+        height: 640
+        width: 640
+      }
+    }
+    box_predictor {
+      weight_shared_convolutional_box_predictor {
+        depth: 256
+        class_prediction_bias_init: -4.6
+        conv_hyperparams {
+          activation: RELU_6,
+          regularizer {
+            l2_regularizer {
+              weight: 0.0004
+            }
+          }
+          initializer {
+            random_normal_initializer {
+              stddev: 0.01
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            scale: true,
+            decay: 0.997,
+            epsilon: 0.001,
+          }
+        }
+        num_layers_before_predictor: 4
+        kernel_size: 3
+      }
+    }
+    feature_extractor {
+      type: 'ssd_resnet50_v1_fpn_keras'
+      fpn {
+        min_level: 3
+        max_level: 7
+      }
+      min_depth: 16
+      depth_multiplier: 1.0
+      conv_hyperparams {
+        activation: RELU_6,
+        regularizer {
+          l2_regularizer {
+            weight: 0.0004
+          }
+        }
+        initializer {
+          truncated_normal_initializer {
+            stddev: 0.03
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          scale: true,
+          decay: 0.997,
+          epsilon: 0.001,
+        }
+      }
+      override_base_feature_extractor_hyperparams: true
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid_focal {
+          alpha: 0.25
+          gamma: 2.0
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+        }
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    normalize_loc_loss_by_codesize: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.6
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  fine_tune_checkpoint_version: V2
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/resnet50.ckpt-1"
+  fine_tune_checkpoint_type: "classification"
+  batch_size: 64
+  sync_replicas: true
+  startup_delay_steps: 0
+  replicas_to_aggregate: 8
+  use_bfloat16: true
+  num_steps: 25000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    random_crop_image {
+      min_object_covered: 0.0
+      min_aspect_ratio: 0.75
+      max_aspect_ratio: 3.0
+      min_area: 0.75
+      max_area: 1.0
+      overlap_thresh: 0.0
+    }
+  }
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        cosine_decay_learning_rate {
+          learning_rate_base: .04
+          total_steps: 25000
+          warmup_learning_rate: .013333
+          warmup_steps: 2000
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+}
+
+train_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
+  }
+}
+
+eval_config: {
+  metrics_set: "coco_detection_metrics"
+  use_moving_averages: false
+}
+
+eval_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  shuffle: false
+  num_epochs: 1
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
+  }
+}
--- a/research/object_detection/core/densepose_ops.py
+++ b/research/object_detection/core/densepose_ops.py
@@ -42,9 +42,6 @@ PART_NAMES = [
    b'left_face',
 ]

-_SRC_PATH = ('google3/third_party/tensorflow_models/object_detection/'
-             'dataset_tools/densepose')
-

 def scale(dp_surface_coords, y_scale, x_scale, scope=None):
  """Scales DensePose coordinates in y and x dimensions.
@@ -266,10 +263,14 @@ class DensePoseHorizontalFlip(object):
  def __init__(self):
    """Constructor."""

-    uv_symmetry_transforms_path = os.path.join(
-        tf.resource_loader.get_data_files_path(), '..', 'dataset_tools',
-        'densepose', 'UV_symmetry_transforms.mat')
-    data = scipy.io.loadmat(uv_symmetry_transforms_path)
+    path = os.path.dirname(os.path.abspath(__file__))
+    uv_symmetry_transforms_path = tf.resource_loader.get_path_to_datafile(
+        os.path.join(path, '..', 'dataset_tools', 'densepose',
+                     'UV_symmetry_transforms.mat'))
+    tf.logging.info('Loading DensePose symmetry transforms file from {}'.format(
+        uv_symmetry_transforms_path))
+    with tf.io.gfile.GFile(uv_symmetry_transforms_path, 'rb') as f:
+      data = scipy.io.loadmat(f)

    # Create lookup maps which indicate how a VU coordinate changes after a
    # horizontal flip.

--- a/research/object_detection/core/model.py
+++ b/research/object_detection/core/model.py
@@ -102,7 +102,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
    Args:
      field: a string key, options are
        fields.BoxListFields.{boxes,classes,masks,keypoints,
-        keypoint_visibilities} or
+        keypoint_visibilities, densepose_*}
        fields.InputDataFields.is_annotated.

    Returns:
@@ -123,7 +123,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
    Args:
      field: a string key, options are
        fields.BoxListFields.{boxes,classes,masks,keypoints,
-        keypoint_visibilities} or
+        keypoint_visibilities, densepose_*} or
        fields.InputDataFields.is_annotated.

    Returns:
@@ -251,9 +251,14 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
        detection_classes: [batch, max_detections]
          (If a model is producing class-agnostic detections, this field may be
          missing)
-        instance_masks: [batch, max_detections, image_height, image_width]
+        detection_masks: [batch, max_detections, mask_height, mask_width]
          (optional)
-        keypoints: [batch, max_detections, num_keypoints, 2] (optional)
+        detection_keypoints: [batch, max_detections, num_keypoints, 2]
+          (optional)
+        detection_keypoint_scores: [batch, max_detections, num_keypoints]
+          (optional)
+        detection_surface_coords: [batch, max_detections, mask_height,
+          mask_width, 2] (optional)
        num_detections: [batch]

        In addition to the above fields this stage also outputs the following
@@ -288,19 +293,23 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
    """
    pass

-  def provide_groundtruth(self,
-                          groundtruth_boxes_list,
-                          groundtruth_classes_list,
-                          groundtruth_masks_list=None,
-                          groundtruth_keypoints_list=None,
-                          groundtruth_keypoint_visibilities_list=None,
-                          groundtruth_weights_list=None,
-                          groundtruth_confidences_list=None,
-                          groundtruth_is_crowd_list=None,
-                          groundtruth_group_of_list=None,
-                          groundtruth_area_list=None,
-                          is_annotated_list=None,
-                          groundtruth_labeled_classes=None):
+  def provide_groundtruth(
+      self,
+      groundtruth_boxes_list,
+      groundtruth_classes_list,
+      groundtruth_masks_list=None,
+      groundtruth_keypoints_list=None,
+      groundtruth_keypoint_visibilities_list=None,
+      groundtruth_dp_num_points_list=None,
+      groundtruth_dp_part_ids_list=None,
+      groundtruth_dp_surface_coords_list=None,
+      groundtruth_weights_list=None,
+      groundtruth_confidences_list=None,
+      groundtruth_is_crowd_list=None,
+      groundtruth_group_of_list=None,
+      groundtruth_area_list=None,
+      is_annotated_list=None,
+      groundtruth_labeled_classes=None):
    """Provide groundtruth tensors.

    Args:
@@ -324,6 +333,15 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
        `groundtruth_keypoint_visibilities_list`).
      groundtruth_keypoint_visibilities_list: a list of 3-D tf.bool tensors
        of shape [num_boxes, num_keypoints] containing keypoint visibilities.
+      groundtruth_dp_num_points_list: a list of 1-D tf.int32 tensors of shape
+        [num_boxes] containing the number of DensePose sampled points.
+      groundtruth_dp_part_ids_list: a list of 2-D tf.int32 tensors of shape
+        [num_boxes, max_sampled_points] containing the DensePose part ids
+        (0-indexed) for each sampled point. Note that there may be padding.
+      groundtruth_dp_surface_coords_list: a list of 3-D tf.float32 tensors of
+        shape [num_boxes, max_sampled_points, 4] containing the DensePose
+        surface coordinates for each sampled point. Note that there may be
+        padding.
      groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape
        [num_boxes] containing weights for groundtruth boxes.
      groundtruth_confidences_list: A list of 2-D tf.float32 tensors of shape
@@ -361,6 +379,18 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
      self._groundtruth_lists[
          fields.BoxListFields.keypoint_visibilities] = (
              groundtruth_keypoint_visibilities_list)
+    if groundtruth_dp_num_points_list:
+      self._groundtruth_lists[
+          fields.BoxListFields.densepose_num_points] = (
+              groundtruth_dp_num_points_list)
+    if groundtruth_dp_part_ids_list:
+      self._groundtruth_lists[
+          fields.BoxListFields.densepose_part_ids] = (
+              groundtruth_dp_part_ids_list)
+    if groundtruth_dp_surface_coords_list:
+      self._groundtruth_lists[
+          fields.BoxListFields.densepose_surface_coords] = (
+              groundtruth_dp_surface_coords_list)
    if groundtruth_is_crowd_list:
      self._groundtruth_lists[
          fields.BoxListFields.is_crowd] = groundtruth_is_crowd_list

--- a/research/object_detection/core/preprocessor.py
+++ b/research/object_detection/core/preprocessor.py
@@ -3984,7 +3984,7 @@ def random_square_crop_by_scale(image, boxes, labels, label_weights,

  Args:
    image: rank 3 float32 tensor containing 1 image ->
-           [height, width,channels].
+           [height, width, channels].
    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
           Boxes are in normalized form meaning their coordinates vary
           between [0, 1]. Each row is in the form of [ymin, xmin, ymax, xmax].
@@ -4128,6 +4128,131 @@ def random_square_crop_by_scale(image, boxes, labels, label_weights,
  return return_values


+def random_scale_crop_and_pad_to_square(
+    image,
+    boxes,
+    labels,
+    label_weights,
+    masks=None,
+    keypoints=None,
+    scale_min=0.1,
+    scale_max=2.0,
+    output_size=512,
+    resize_method=tf.image.ResizeMethod.BILINEAR,
+    seed=None):
+  """Randomly scale, crop, and then pad an image to fixed square dimensions.
+
+   Randomly scale, crop, and then pad an image to the desired square output
+   dimensions. Specifically, this method first samples a random_scale factor
+   from a uniform distribution between scale_min and scale_max, and then resizes
+   the image such that it's maximum dimension is (output_size * random_scale).
+   Secondly, a square output_size crop is extracted from the resized image
+   (note, this will only occur when random_scale > 1.0). Lastly, the cropped
+   region is padded to the desired square output_size, by filling with zeros.
+   The augmentation is borrowed from [1]
+   [1]: https://arxiv.org/abs/1911.09070
+
+  Args:
+    image: rank 3 float32 tensor containing 1 image ->
+      [height, width, channels].
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. Boxes
+      are in normalized form meaning their coordinates vary between [0, 1]. Each
+      row is in the form of [ymin, xmin, ymax, xmax]. Boxes on the crop boundary
+      are clipped to the boundary and boxes falling outside the crop are
+      ignored.
+    labels: rank 1 int32 tensor containing the object classes.
+    label_weights: float32 tensor of shape [num_instances] representing the
+      weight for each box.
+    masks: (optional) rank 3 float32 tensor with shape [num_instances, height,
+      width] containing instance masks. The masks are of the same height, width
+      as the input `image`.
+    keypoints: (optional) rank 3 float32 tensor with shape [num_instances,
+      num_keypoints, 2]. The keypoints are in y-x normalized coordinates.
+    scale_min: float, the minimum value for the random scale factor.
+    scale_max: float, the maximum value for the random scale factor.
+    output_size: int, the desired (square) output image size.
+    resize_method: tf.image.ResizeMethod, resize method to use when scaling the
+      input images.
+    seed: random seed.
+
+  Returns:
+    image: image which is the same rank as input image.
+    boxes: boxes which is the same rank as input boxes.
+           Boxes are in normalized form.
+    labels: new labels.
+    label_weights: rank 1 float32 tensor with shape [num_instances].
+    masks: rank 3 float32 tensor with shape [num_instances, height, width]
+           containing instance masks.
+
+  """
+
+  img_shape = tf.shape(image)
+  input_height, input_width = img_shape[0], img_shape[1]
+  random_scale = tf.random_uniform([], scale_min, scale_max, seed=seed)
+
+  # Compute the scaled height and width from the random scale.
+  max_input_dim = tf.cast(tf.maximum(input_height, input_width), tf.float32)
+  input_ar_y = tf.cast(input_height, tf.float32) / max_input_dim
+  input_ar_x = tf.cast(input_width, tf.float32) / max_input_dim
+  scaled_height = tf.cast(random_scale * output_size * input_ar_y, tf.int32)
+  scaled_width = tf.cast(random_scale * output_size * input_ar_x, tf.int32)
+
+  # Compute the offsets:
+  offset_y = tf.cast(scaled_height - output_size, tf.float32)
+  offset_x = tf.cast(scaled_width - output_size, tf.float32)
+  offset_y = tf.maximum(0.0, offset_y) * tf.random_uniform([], 0, 1, seed=seed)
+  offset_x = tf.maximum(0.0, offset_x) * tf.random_uniform([], 0, 1, seed=seed)
+  offset_y = tf.cast(offset_y, tf.int32)
+  offset_x = tf.cast(offset_x, tf.int32)
+
+  # Scale, crop, and pad the input image.
+  scaled_image = tf.image.resize_images(
+      image, [scaled_height, scaled_width], method=resize_method)
+  scaled_image = scaled_image[offset_y:offset_y + output_size,
+                              offset_x:offset_x + output_size, :]
+  output_image = tf.image.pad_to_bounding_box(scaled_image, 0, 0, output_size,
+                                              output_size)
+
+  # Update the boxes.
+  new_window = tf.cast(
+      tf.stack([offset_y, offset_x,
+                offset_y + output_size, offset_x + output_size]),
+      dtype=tf.float32)
+  new_window /= tf.cast(
+      tf.stack([scaled_height, scaled_width, scaled_height, scaled_width]),
+      dtype=tf.float32)
+  boxlist = box_list.BoxList(boxes)
+  boxlist = box_list_ops.change_coordinate_frame(boxlist, new_window)
+  boxlist, indices = box_list_ops.prune_completely_outside_window(
+      boxlist, [0.0, 0.0, 1.0, 1.0])
+  boxlist = box_list_ops.clip_to_window(
+      boxlist, [0.0, 0.0, 1.0, 1.0], filter_nonoverlapping=False)
+
+  return_values = [output_image, boxlist.get(),
+                   tf.gather(labels, indices),
+                   tf.gather(label_weights, indices)]
+
+  if masks is not None:
+    new_masks = tf.expand_dims(masks, -1)
+    new_masks = tf.image.resize_images(
+        new_masks, [scaled_height, scaled_width], method=resize_method)
+    new_masks = new_masks[:, offset_y:offset_y + output_size,
+                          offset_x:offset_x + output_size, :]
+    new_masks = tf.image.pad_to_bounding_box(
+        new_masks, 0, 0, output_size, output_size)
+    new_masks = tf.squeeze(new_masks, [-1])
+    return_values.append(tf.gather(new_masks, indices))
+
+  if keypoints is not None:
+    keypoints = tf.gather(keypoints, indices)
+    keypoints = keypoint_ops.change_coordinate_frame(keypoints, new_window)
+    keypoints = keypoint_ops.prune_outside_window(
+        keypoints, [0.0, 0.0, 1.0, 1.0])
+    return_values.append(keypoints)
+
+  return return_values
+
+
 def get_default_func_arg_map(include_label_weights=True,
                             include_label_confidences=False,
                             include_multiclass_scores=False,
@@ -4230,15 +4355,14 @@ def get_default_func_arg_map(include_label_weights=True,
      random_adjust_saturation: (fields.InputDataFields.image,),
      random_distort_color: (fields.InputDataFields.image,),
      random_jitter_boxes: (fields.InputDataFields.groundtruth_boxes,),
-      random_crop_image: (fields.InputDataFields.image,
-                          fields.InputDataFields.groundtruth_boxes,
-                          fields.InputDataFields.groundtruth_classes,
-                          groundtruth_label_weights,
-                          groundtruth_label_confidences, multiclass_scores,
-                          groundtruth_instance_masks, groundtruth_keypoints,
-                          groundtruth_keypoint_visibilities,
-                          groundtruth_dp_num_points, groundtruth_dp_part_ids,
-                          groundtruth_dp_surface_coords),
+      random_crop_image:
+          (fields.InputDataFields.image,
+           fields.InputDataFields.groundtruth_boxes,
+           fields.InputDataFields.groundtruth_classes,
+           groundtruth_label_weights, groundtruth_label_confidences,
+           multiclass_scores, groundtruth_instance_masks, groundtruth_keypoints,
+           groundtruth_keypoint_visibilities, groundtruth_dp_num_points,
+           groundtruth_dp_part_ids, groundtruth_dp_surface_coords),
      random_pad_image:
          (fields.InputDataFields.image,
           fields.InputDataFields.groundtruth_boxes, groundtruth_instance_masks,
@@ -4361,6 +4485,12 @@ def get_default_func_arg_map(include_label_weights=True,
           fields.InputDataFields.groundtruth_classes,
           groundtruth_label_weights, groundtruth_instance_masks,
           groundtruth_keypoints),
+      random_scale_crop_and_pad_to_square:
+          (fields.InputDataFields.image,
+           fields.InputDataFields.groundtruth_boxes,
+           fields.InputDataFields.groundtruth_classes,
+           groundtruth_label_weights, groundtruth_instance_masks,
+           groundtruth_keypoints),
  }

  return prep_func_arg_map

--- a/research/object_detection/core/preprocessor_test.py
+++ b/research/object_detection/core/preprocessor_test.py
@@ -712,76 +712,6 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
                                test_masks=True,
                                test_keypoints=True)

-  @parameterized.parameters(
-      {'include_dense_pose': False},
-      {'include_dense_pose': True}
-  )
-  def testRunRandomHorizontalFlipWithMaskAndKeypoints(self, include_dense_pose):
-
-    def graph_fn():
-      preprocess_options = [(preprocessor.random_horizontal_flip, {})]
-      image_height = 3
-      image_width = 3
-      images = tf.random_uniform([1, image_height, image_width, 3])
-      boxes = self.createTestBoxes()
-      masks = self.createTestMasks()
-      keypoints, keypoint_visibilities = self.createTestKeypoints()
-      dp_num_point, dp_part_ids, dp_surface_coords = self.createTestDensePose()
-      keypoint_flip_permutation = self.createKeypointFlipPermutation()
-      tensor_dict = {
-          fields.InputDataFields.image:
-              images,
-          fields.InputDataFields.groundtruth_boxes:
-              boxes,
-          fields.InputDataFields.groundtruth_instance_masks:
-              masks,
-          fields.InputDataFields.groundtruth_keypoints:
-              keypoints,
-          fields.InputDataFields.groundtruth_keypoint_visibilities:
-              keypoint_visibilities
-      }
-      if include_dense_pose:
-        tensor_dict.update({
-            fields.InputDataFields.groundtruth_dp_num_points: dp_num_point,
-            fields.InputDataFields.groundtruth_dp_part_ids: dp_part_ids,
-            fields.InputDataFields.groundtruth_dp_surface_coords:
-                dp_surface_coords
-        })
-      preprocess_options = [(preprocessor.random_horizontal_flip, {
-          'keypoint_flip_permutation': keypoint_flip_permutation
-      })]
-      preprocessor_arg_map = preprocessor.get_default_func_arg_map(
-          include_instance_masks=True,
-          include_keypoints=True,
-          include_keypoint_visibilities=True,
-          include_dense_pose=include_dense_pose)
-      tensor_dict = preprocessor.preprocess(
-          tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map)
-      boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
-      masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
-      keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints]
-      keypoint_visibilities = tensor_dict[
-          fields.InputDataFields.groundtruth_keypoint_visibilities]
-      output_tensors = [boxes, masks, keypoints, keypoint_visibilities]
-      if include_dense_pose:
-        dp_num_points = tensor_dict[
-            fields.InputDataFields.groundtruth_dp_num_points]
-        dp_part_ids = tensor_dict[
-            fields.InputDataFields.groundtruth_dp_part_ids]
-        dp_surface_coords = tensor_dict[
-            fields.InputDataFields.groundtruth_dp_surface_coords]
-        output_tensors.extend([dp_num_points, dp_part_ids, dp_surface_coords])
-      return output_tensors
-
-    output_tensors = self.execute_cpu(graph_fn, [])
-    self.assertIsNotNone(output_tensors[0])  # Boxes.
-    self.assertIsNotNone(output_tensors[1])  # Masks.
-    self.assertIsNotNone(output_tensors[2])  # Keypoints
-    self.assertIsNotNone(output_tensors[3])  # Keypoint Visibilities.
-    if include_dense_pose:
-      self.assertIsNotNone(output_tensors[4])  # DensePose Num Points.
-      self.assertIsNotNone(output_tensors[5])  # DensePose Part IDs.
-      self.assertIsNotNone(output_tensors[6])  # DensePose Surface Coords

  def testRandomVerticalFlip(self):

@@ -2380,7 +2310,6 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):

  @parameterized.parameters(
      {'include_dense_pose': False},
-      {'include_dense_pose': True}
  )
  def testRandomPadImageWithKeypointsAndMasks(self, include_dense_pose):
    def graph_fn():
@@ -3912,6 +3841,90 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
    size = max(image.shape)
    self.assertAlmostEqual(scale * 256.0, size)

+    self.assertAllClose(image[:, :, 0], masks[0, :, :])
+
+  @parameterized.named_parameters(('scale_0_1', 0.1), ('scale_1_0', 1.0),
+                                  ('scale_2_0', 2.0))
+  def test_random_scale_crop_and_pad_to_square(self, scale):
+
+    def graph_fn():
+      image = np.random.randn(512, 256, 1)
+      box_centers = [0.25, 0.5, 0.75]
+      box_size = 0.1
+      box_corners = []
+      box_labels = []
+      box_label_weights = []
+      keypoints = []
+      masks = []
+      for center_y in box_centers:
+        for center_x in box_centers:
+          box_corners.append(
+              [center_y - box_size / 2.0, center_x - box_size / 2.0,
+               center_y + box_size / 2.0, center_x + box_size / 2.0])
+          box_labels.append([1])
+          box_label_weights.append([1.])
+          keypoints.append(
+              [[center_y - box_size / 2.0, center_x - box_size / 2.0],
+               [center_y + box_size / 2.0, center_x + box_size / 2.0]])
+          masks.append(image[:, :, 0].reshape(512, 256))
+
+      image = tf.constant(image)
+      boxes = tf.constant(box_corners)
+      labels = tf.constant(box_labels)
+      label_weights = tf.constant(box_label_weights)
+      keypoints = tf.constant(keypoints)
+      masks = tf.constant(np.stack(masks))
+
+      (new_image, new_boxes, _, _, new_masks,
+       new_keypoints) = preprocessor.random_scale_crop_and_pad_to_square(
+           image,
+           boxes,
+           labels,
+           label_weights,
+           masks=masks,
+           keypoints=keypoints,
+           scale_min=scale,
+           scale_max=scale,
+           output_size=512)
+      return new_image, new_boxes, new_masks, new_keypoints
+
+    image, boxes, masks, keypoints = self.execute_cpu(graph_fn, [])
+
+    # Since random_scale_crop_and_pad_to_square may prune and clip boxes,
+    # we only need to find one of the boxes that was not clipped and check
+    # that it matches the expected dimensions. Note, assertAlmostEqual(a, b)
+    # is equivalent to round(a-b, 7) == 0.
+    any_box_has_correct_size = False
+    effective_scale_y = int(scale * 512) / 512.0
+    effective_scale_x = int(scale * 256) / 512.0
+    expected_size_y = 0.1 * effective_scale_y
+    expected_size_x = 0.1 * effective_scale_x
+    for box in boxes:
+      ymin, xmin, ymax, xmax = box
+      any_box_has_correct_size |= (
+          (round(ymin, 7) != 0.0) and (round(xmin, 7) != 0.0) and
+          (round(ymax, 7) != 1.0) and (round(xmax, 7) != 1.0) and
+          (round((ymax - ymin) - expected_size_y, 7) == 0.0) and
+          (round((xmax - xmin) - expected_size_x, 7) == 0.0))
+    self.assertTrue(any_box_has_correct_size)
+
+    # Similar to the approach above where we check for at least one box with the
+    # expected dimensions, we check for at least one pair of keypoints whose
+    # distance matches the expected dimensions.
+    any_keypoint_pair_has_correct_dist = False
+    for keypoint_pair in keypoints:
+      ymin, xmin = keypoint_pair[0]
+      ymax, xmax = keypoint_pair[1]
+      any_keypoint_pair_has_correct_dist |= (
+          (round(ymin, 7) != 0.0) and (round(xmin, 7) != 0.0) and
+          (round(ymax, 7) != 1.0) and (round(xmax, 7) != 1.0) and
+          (round((ymax - ymin) - expected_size_y, 7) == 0.0) and
+          (round((xmax - xmin) - expected_size_x, 7) == 0.0))
+    self.assertTrue(any_keypoint_pair_has_correct_dist)
+
+    self.assertAlmostEqual(512.0, image.shape[0])
+    self.assertAlmostEqual(512.0, image.shape[1])
+
    self.assertAllClose(image[:, :, 0],
                        masks[0, :, :])


--- a/research/object_detection/core/standard_fields.py
+++ b/research/object_detection/core/standard_fields.py
@@ -141,6 +141,8 @@ class DetectionResultFields(object):
      for detection boxes in the image including background class.
    detection_classes: detection-level class labels.
    detection_masks: contains a segmentation mask for each detection box.
+    detection_surface_coords: contains DensePose surface coordinates for each
+      box.
    detection_boundaries: contains an object boundary for each detection box.
    detection_keypoints: contains detection keypoints for each detection box.
    detection_keypoint_scores: contains detection keypoint scores.
@@ -161,6 +163,7 @@ class DetectionResultFields(object):
  detection_features = 'detection_features'
  detection_classes = 'detection_classes'
  detection_masks = 'detection_masks'
+  detection_surface_coords = 'detection_surface_coords'
  detection_boundaries = 'detection_boundaries'
  detection_keypoints = 'detection_keypoints'
  detection_keypoint_scores = 'detection_keypoint_scores'
@@ -182,7 +185,11 @@ class BoxListFields(object):
    masks: masks per bounding box.
    boundaries: boundaries per bounding box.
    keypoints: keypoints per bounding box.
+    keypoint_visibilities: keypoint visibilities per bounding box.
    keypoint_heatmaps: keypoint heatmaps per bounding box.
+    densepose_num_points: number of DensePose points per bounding box.
+    densepose_part_ids: DensePose part ids per bounding box.
+    densepose_surface_coords: DensePose surface coordinates per bounding box.
    is_crowd: is_crowd annotation per bounding box.
  """
  boxes = 'boxes'
@@ -196,6 +203,9 @@ class BoxListFields(object):
  keypoints = 'keypoints'
  keypoint_visibilities = 'keypoint_visibilities'
  keypoint_heatmaps = 'keypoint_heatmaps'
+  densepose_num_points = 'densepose_num_points'
+  densepose_part_ids = 'densepose_part_ids'
+  densepose_surface_coords = 'densepose_surface_coords'
  is_crowd = 'is_crowd'
  group_of = 'group_of'


--- a/research/object_detection/dataset_tools/oid_tfrecord_creation.py
+++ b/research/object_detection/dataset_tools/oid_tfrecord_creation.py
@@ -51,25 +51,25 @@ def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
  feature_map = {
      standard_fields.TfExampleFields.object_bbox_ymin:
          dataset_util.float_list_feature(
-              filtered_data_frame_boxes.YMin.as_matrix()),
+              filtered_data_frame_boxes.YMin.to_numpy()),
      standard_fields.TfExampleFields.object_bbox_xmin:
          dataset_util.float_list_feature(
-              filtered_data_frame_boxes.XMin.as_matrix()),
+              filtered_data_frame_boxes.XMin.to_numpy()),
      standard_fields.TfExampleFields.object_bbox_ymax:
          dataset_util.float_list_feature(
-              filtered_data_frame_boxes.YMax.as_matrix()),
+              filtered_data_frame_boxes.YMax.to_numpy()),
      standard_fields.TfExampleFields.object_bbox_xmax:
          dataset_util.float_list_feature(
-              filtered_data_frame_boxes.XMax.as_matrix()),
+              filtered_data_frame_boxes.XMax.to_numpy()),
      standard_fields.TfExampleFields.object_class_text:
          dataset_util.bytes_list_feature([
              six.ensure_binary(label_text)
-              for label_text in filtered_data_frame_boxes.LabelName.as_matrix()
+              for label_text in filtered_data_frame_boxes.LabelName.to_numpy()
          ]),
      standard_fields.TfExampleFields.object_class_label:
          dataset_util.int64_list_feature(
              filtered_data_frame_boxes.LabelName.map(
-                  lambda x: label_map[x]).as_matrix()),
+                  lambda x: label_map[x]).to_numpy()),
      standard_fields.TfExampleFields.filename:
          dataset_util.bytes_feature(
              six.ensure_binary('{}.jpg'.format(image_id))),
@@ -82,31 +82,31 @@ def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
  if 'IsGroupOf' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_group_of] = dataset_util.int64_list_feature(
-                    filtered_data_frame_boxes.IsGroupOf.as_matrix().astype(int))
+                    filtered_data_frame_boxes.IsGroupOf.to_numpy().astype(int))
  if 'IsOccluded' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_occluded] = dataset_util.int64_list_feature(
-                    filtered_data_frame_boxes.IsOccluded.as_matrix().astype(
+                    filtered_data_frame_boxes.IsOccluded.to_numpy().astype(
                        int))
  if 'IsTruncated' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_truncated] = dataset_util.int64_list_feature(
-                    filtered_data_frame_boxes.IsTruncated.as_matrix().astype(
+                    filtered_data_frame_boxes.IsTruncated.to_numpy().astype(
                        int))
  if 'IsDepiction' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_depiction] = dataset_util.int64_list_feature(
-                    filtered_data_frame_boxes.IsDepiction.as_matrix().astype(
+                    filtered_data_frame_boxes.IsDepiction.to_numpy().astype(
                        int))

  if 'ConfidenceImageLabel' in filtered_data_frame_labels.columns:
    feature_map[standard_fields.TfExampleFields.
                image_class_label] = dataset_util.int64_list_feature(
                    filtered_data_frame_labels.LabelName.map(
-                        lambda x: label_map[x]).as_matrix())
+                        lambda x: label_map[x]).to_numpy())
    feature_map[standard_fields.TfExampleFields
                .image_class_text] = dataset_util.bytes_list_feature([
                    six.ensure_binary(label_text) for label_text in
-                    filtered_data_frame_labels.LabelName.as_matrix()
+                    filtered_data_frame_labels.LabelName.to_numpy()
                ]),
  return tf.train.Example(features=tf.train.Features(feature=feature_map))
--- a/research/object_detection/dockerfiles/tf1/Dockerfile
+++ b/research/object_detection/dockerfiles/tf1/Dockerfile
@@ -25,20 +25,17 @@ RUN useradd -ms /bin/bash tensorflow
 USER tensorflow
 WORKDIR /home/tensorflow

-# Install pip dependencies
-RUN pip3 install --user absl-py
-RUN pip3 install --user contextlib2
-RUN pip3 install --user Cython
-RUN pip3 install --user jupyter
-RUN pip3 install --user matplotlib
-RUN pip3 install --user pycocotools
-RUN pip3 install --user tf-slim
-
 # Copy this version of of the model garden into the image
 COPY --chown=tensorflow . /home/tensorflow/models

 # Compile protobuf configs
 RUN (cd /home/tensorflow/models/research/ && protoc object_detection/protos/*.proto --python_out=.)
+WORKDIR /home/tensorflow/models/research/
+
+RUN cp object_detection/packages/tf1/setup.py ./
+ENV PATH="/home/tensorflow/.local/bin:${PATH}"
+
+RUN python -m pip install --user -U pip
+RUN python -m pip install --user .

-ENV PYTHONPATH $PYTHONPATH:/home/tensorflow/models/research/:/home/tensorflow/models/research/slim
 ENV TF_CPP_MIN_LOG_LEVEL 3
--- a/research/object_detection/dockerfiles/tf1/README.md
+++ b/research/object_detection/dockerfiles/tf1/README.md
-# Tensorflow Object Detection on Docker
+# TensorFlow Object Detection on Docker

 These instructions are experimental.

@@ -6,6 +6,6 @@ These instructions are experimental.

 ```bash
 # From the root of the git repository
-docker build -f research/object_detection/dockerfiles/1.15/Dockerfile -t od .
+docker build -f research/object_detection/dockerfiles/tf1/Dockerfile -t od .
 docker run -it od
 ```
--- a/research/object_detection/dockerfiles/tf2/Dockerfile
+++ b/research/object_detection/dockerfiles/tf2/Dockerfile
@@ -25,20 +25,17 @@ RUN useradd -ms /bin/bash tensorflow
 USER tensorflow
 WORKDIR /home/tensorflow

-# Install pip dependencies
-RUN pip3 install --user absl-py
-RUN pip3 install --user contextlib2
-RUN pip3 install --user Cython
-RUN pip3 install --user jupyter
-RUN pip3 install --user matplotlib
-RUN pip3 install --user pycocotools
-RUN pip3 install --user tf-slim
-
 # Copy this version of of the model garden into the image
 COPY --chown=tensorflow . /home/tensorflow/models

 # Compile protobuf configs
 RUN (cd /home/tensorflow/models/research/ && protoc object_detection/protos/*.proto --python_out=.)
+WORKDIR /home/tensorflow/models/research/
+
+RUN cp object_detection/packages/tf2/setup.py ./
+ENV PATH="/home/tensorflow/.local/bin:${PATH}"
+
+RUN python -m pip install -U pip
+RUN python -m pip install .

-ENV PYTHONPATH $PYTHONPATH:/home/tensorflow/models/research/:/home/tensorflow/models/research/slim
 ENV TF_CPP_MIN_LOG_LEVEL 3
--- a/research/object_detection/dockerfiles/tf2/README.md
+++ b/research/object_detection/dockerfiles/tf2/README.md
-# Tensorflow Object Detection on Docker
+# TensorFlow Object Detection on Docker

 These instructions are experimental.

@@ -6,6 +6,6 @@ These instructions are experimental.

 ```bash
 # From the root of the git repository
-docker build -f research/object_detection/dockerfiles/2.2/Dockerfile -t od .
+docker build -f research/object_detection/dockerfiles/tf2/Dockerfile -t od .
 docker run -it od
 ```
--- a/research/object_detection/eval_util.py
+++ b/research/object_detection/eval_util.py
@@ -552,7 +552,11 @@ def _resize_detection_masks(args):
  detection_boxes, detection_masks, image_shape = args
  detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
      detection_masks, detection_boxes, image_shape[0], image_shape[1])
-  return tf.cast(tf.greater(detection_masks_reframed, 0.5), tf.uint8)
+  # If the masks are currently float, binarize them. Otherwise keep them as
+  # integers, since they have already been thresholded.
+  if detection_masks_reframed.dtype == tf.float32:
+    detection_masks_reframed = tf.greater(detection_masks_reframed, 0.5)
+  return tf.cast(detection_masks_reframed, tf.uint8)


 def _resize_groundtruth_masks(args):
@@ -570,6 +574,17 @@ def _resize_groundtruth_masks(args):
  return tf.cast(tf.squeeze(mask, 3), tf.uint8)


+def _resize_surface_coordinate_masks(args):
+  detection_boxes, surface_coords, image_shape = args
+  surface_coords_v, surface_coords_u = tf.unstack(surface_coords, axis=-1)
+  surface_coords_v_reframed = ops.reframe_box_masks_to_image_masks(
+      surface_coords_v, detection_boxes, image_shape[0], image_shape[1])
+  surface_coords_u_reframed = ops.reframe_box_masks_to_image_masks(
+      surface_coords_u, detection_boxes, image_shape[0], image_shape[1])
+  return tf.stack([surface_coords_v_reframed, surface_coords_u_reframed],
+                  axis=-1)
+
+
 def _scale_keypoint_to_absolute(args):
  keypoints, image_shape = args
  return keypoint_ops.scale(keypoints, image_shape[0], image_shape[1])
@@ -720,6 +735,12 @@ def result_dict_for_batched_example(images,
        num_keypoints] bool tensor with keypoint visibilities (Optional).
      'groundtruth_labeled_classes': [batch_size, num_classes] int64
        tensor of 1-indexed classes. (Optional)
+      'groundtruth_dp_num_points': [batch_size, max_number_of_boxes] int32
+        tensor. (Optional)
+      'groundtruth_dp_part_ids': [batch_size, max_number_of_boxes,
+        max_sampled_points] int32 tensor. (Optional)
+      'groundtruth_dp_surface_coords_list': [batch_size, max_number_of_boxes,
+        max_sampled_points, 4] float32 tensor. (Optional)
    class_agnostic: Boolean indicating whether the detections are class-agnostic
      (i.e. binary). Default False.
    scale_to_absolute: Boolean indicating whether boxes and keypoints should be
@@ -747,12 +768,16 @@ def result_dict_for_batched_example(images,
    'detection_scores': [batch_size, max_detections] float32 tensor of scores.
    'detection_classes': [batch_size, max_detections] int64 tensor of 1-indexed
      classes.
-    'detection_masks': [batch_size, max_detections, H, W] float32 tensor of
-      binarized masks, reframed to full image masks. (Optional)
+    'detection_masks': [batch_size, max_detections, H, W] uint8 tensor of
+      instance masks, reframed to full image masks. Note that these may be
+      binarized (e.g. {0, 1}), or may contain 1-indexed part labels. (Optional)
    'detection_keypoints': [batch_size, max_detections, num_keypoints, 2]
      float32 tensor containing keypoint coordinates. (Optional)
    'detection_keypoint_scores': [batch_size, max_detections, num_keypoints]
      float32 tensor containing keypoint scores. (Optional)
+    'detection_surface_coords': [batch_size, max_detection, H, W, 2] float32
+      tensor with normalized surface coordinates (e.g. DensePose UV
+      coordinates). (Optional)
    'num_detections': [batch_size] int64 tensor containing number of valid
      detections.
    'groundtruth_boxes': [batch_size, num_boxes, 4] float32 tensor of boxes, in
@@ -844,14 +869,21 @@ def result_dict_for_batched_example(images,

  if detection_fields.detection_masks in detections:
    detection_masks = detections[detection_fields.detection_masks]
-    # TODO(rathodv): This should be done in model's postprocess
-    # function ideally.
    output_dict[detection_fields.detection_masks] = (
        shape_utils.static_or_dynamic_map_fn(
            _resize_detection_masks,
            elems=[detection_boxes, detection_masks,
                   original_image_spatial_shapes],
            dtype=tf.uint8))
+    if detection_fields.detection_surface_coords in detections:
+      detection_surface_coords = detections[
+          detection_fields.detection_surface_coords]
+      output_dict[detection_fields.detection_surface_coords] = (
+          shape_utils.static_or_dynamic_map_fn(
+              _resize_surface_coordinate_masks,
+              elems=[detection_boxes, detection_surface_coords,
+                     original_image_spatial_shapes],
+              dtype=tf.float32))

  if detection_fields.detection_keypoints in detections:
    detection_keypoints = detections[detection_fields.detection_keypoints]
@@ -1074,3 +1106,8 @@ def evaluator_options_from_eval_config(eval_config):
          'recall_upper_bound': (eval_config.recall_upper_bound)
      }
  return evaluator_options
+
+
+def has_densepose(eval_dict):
+  return (fields.DetectionResultFields.detection_masks in eval_dict and
+          fields.DetectionResultFields.detection_surface_coords in eval_dict)
--- a/research/object_detection/g3doc/challenge_evaluation.md
+++ b/research/object_detection/g3doc/challenge_evaluation.md
@@ -47,7 +47,7 @@ python object_detection/dataset_tools/oid_hierarchical_labels_expansion.py \
    --annotation_type=2
 ```

-1.  If you are not using Tensorflow, you can run evaluation directly using your
+1.  If you are not using TensorFlow, you can run evaluation directly using your
    algorithm's output and generated ground-truth files. {value=4}

 After step 3 you produced the ground-truth files suitable for running 'OID
@@ -73,7 +73,7 @@ For the Object Detection Track, the participants will be ranked on:

 -   "OpenImagesDetectionChallenge_Precision/mAP@0.5IOU"

-To use evaluation within Tensorflow training, use metric name
+To use evaluation within TensorFlow training, use metric name
 `oid_challenge_detection_metrics` in the evaluation config.

 ## Instance Segmentation Track
@@ -130,7 +130,7 @@ python object_detection/dataset_tools/oid_hierarchical_labels_expansion.py \
    --annotation_type=1
 ```

-1.  If you are not using Tensorflow, you can run evaluation directly using your
+1.  If you are not using TensorFlow, you can run evaluation directly using your
    algorithm's output and generated ground-truth files. {value=4}

 ```

--- a/research/object_detection/g3doc/configuring_jobs.md
+++ b/research/object_detection/g3doc/configuring_jobs.md
@@ -2,7 +2,7 @@

 ## Overview

-The Tensorflow Object Detection API uses protobuf files to configure the
+The TensorFlow Object Detection API uses protobuf files to configure the
 training and evaluation process. The schema for the training pipeline can be
 found in object_detection/protos/pipeline.proto. At a high level, the config
 file is split into 5 parts:
@@ -60,7 +60,7 @@ to a value suited for the dataset the user is training on.

 ## Defining Inputs

-The Tensorflow Object Detection API accepts inputs in the TFRecord file format.
+The TensorFlow Object Detection API accepts inputs in the TFRecord file format.
 Users must specify the locations of both the training and evaluation files.
 Additionally, users should also specify a label map, which define the mapping
 between a class id and class name. The label map should be identical between
@@ -126,24 +126,6 @@ data_augmentation_options {
 }
 ```

-### Model Parameter Initialization
-
-While optional, it is highly recommended that users utilize other object
-detection checkpoints. Training an object detector from scratch can take days.
-To speed up the training process, it is recommended that users re-use the
-feature extractor parameters from a pre-existing image classification or
-object detection checkpoint. `train_config` provides two fields to specify
-pre-existing checkpoints: `fine_tune_checkpoint` and
-`from_detection_checkpoint`. `fine_tune_checkpoint` should provide a path to
-the pre-existing checkpoint
-(ie:"/usr/home/username/checkpoint/model.ckpt-#####").
-`from_detection_checkpoint` is a boolean value. If false, it assumes the
-checkpoint was from an object classification checkpoint. Note that starting
-from a detection checkpoint will usually result in a faster training job than
-a classification checkpoint.
-
-The list of provided checkpoints can be found [here](detection_model_zoo.md).
-
 ### Input Preprocessing

 The `data_augmentation_options` in `train_config` can be used to specify

--- a/research/object_detection/g3doc/context_rcnn.md
+++ b/research/object_detection/g3doc/context_rcnn.md
 # Context R-CNN

+[![TensorFlow 1.15](https://img.shields.io/badge/TensorFlow-1.15-FF6F00?logo=tensorflow)](https://github.com/tensorflow/tensorflow/releases/tag/v1.15.0)
+
 Context R-CNN is an object detection model that uses contextual features to
 improve object detection. See https://arxiv.org/abs/1912.03538 for more details.


--- a/research/object_detection/g3doc/defining_your_own_model.md
+++ b/research/object_detection/g3doc/defining_your_own_model.md
@@ -2,14 +2,14 @@

 In this section, we discuss some of the abstractions that we use
 for defining detection models. If you would like to define a new model
-architecture for detection and use it in the Tensorflow Detection API,
+architecture for detection and use it in the TensorFlow Detection API,
 then this section should also serve as a high level guide to the files that you
 will need to edit to get your new model working.

 ## DetectionModels (`object_detection/core/model.py`)

 In order to be trained, evaluated, and exported for serving  using our
-provided binaries, all models under the Tensorflow Object Detection API must
+provided binaries, all models under the TensorFlow Object Detection API must
 implement the `DetectionModel` interface (see the full definition in `object_detection/core/model.py`).  In particular,
 each of these models are responsible for implementing 5 functions:

@@ -20,7 +20,7 @@ each of these models are responsible for implementing 5 functions:
  postprocess functions.
 * `postprocess`: Convert predicted output tensors to final detections.
 * `loss`: Compute scalar loss tensors with respect to provided groundtruth.
-* `restore`: Load a checkpoint into the Tensorflow graph.
+* `restore`: Load a checkpoint into the TensorFlow graph.

 Given a `DetectionModel` at training time, we pass each image batch through
 the following sequence of functions to compute a loss which can be optimized via
@@ -87,7 +87,7 @@ functions:
 * `_extract_box_classifier_features`: Extract second stage Box Classifier
  features.
 * `restore_from_classification_checkpoint_fn`: Load a checkpoint into the
-  Tensorflow graph.
+  TensorFlow graph.

 See the `object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py`
 definition as one example. Some remarks:

--- a/research/object_detection/g3doc/evaluation_protocols.md
+++ b/research/object_detection/g3doc/evaluation_protocols.md
 # Supported object detection evaluation protocols

-The Tensorflow Object Detection API currently supports three evaluation protocols,
+The TensorFlow Object Detection API currently supports three evaluation protocols,
 that can be configured in `EvalConfig` by setting `metrics_set` to the
 corresponding value.