Merge branch 'master' of https://github.com/tensorflow/models into AXg

44e7092c · stephenwu · 431a9ca3 · 59434199 · 44e7092c · 44e7092c
Commit 44e7092c authored Feb 01, 2021 by stephenwu
13 changed files
--- a/research/object_detection/exporter_lib_v2.py
+++ b/research/object_detection/exporter_lib_v2.py
@@ -94,22 +94,37 @@ class DetectionInferenceModule(tf.Module):
  def _get_side_names_from_zip(self, zipped_side_inputs):
    return [side[2] for side in zipped_side_inputs]

-  def _run_inference_on_images(self, image, **kwargs):
+  def _preprocess_input(self, batch_input, decode_fn):
+    # Input preprocessing happends on the CPU. We don't need to use the device
+    # placement as it is automatically handled by TF.
+    def _decode_and_preprocess(single_input):
+      image = decode_fn(single_input)
+      image = tf.cast(image, tf.float32)
+      image, true_shape = self._model.preprocess(image[tf.newaxis, :, :, :])
+      return image[0], true_shape[0]
+
+    images, true_shapes = tf.map_fn(
+        _decode_and_preprocess,
+        elems=batch_input,
+        parallel_iterations=32,
+        back_prop=False,
+        fn_output_signature=(tf.float32, tf.int32))
+    return images, true_shapes
+
+  def _run_inference_on_images(self, images, true_shapes, **kwargs):
    """Cast image to float and run inference.

    Args:
-      image: uint8 Tensor of shape [1, None, None, 3].
+      images: float32 Tensor of shape [None, None, None, 3].
+      true_shapes: int32 Tensor of form [batch, 3]
      **kwargs: additional keyword arguments.

    Returns:
      Tensor dictionary holding detections.
    """
    label_id_offset = 1
-
-    image = tf.cast(image, tf.float32)
-    image, shapes = self._model.preprocess(image)
-    prediction_dict = self._model.predict(image, shapes, **kwargs)
-    detections = self._model.postprocess(prediction_dict, shapes)
+    prediction_dict = self._model.predict(images, true_shapes, **kwargs)
+    detections = self._model.postprocess(prediction_dict, true_shapes)
    classes_field = fields.DetectionResultFields.detection_classes
    detections[classes_field] = (
        tf.cast(detections[classes_field], tf.float32) + label_id_offset)
@@ -144,7 +159,8 @@ class DetectionFromImageModule(DetectionInferenceModule):

    def call_func(input_tensor, *side_inputs):
      kwargs = dict(zip(self._side_input_names, side_inputs))
-      return self._run_inference_on_images(input_tensor, **kwargs)
+      images, true_shapes = self._preprocess_input(input_tensor, lambda x: x)
+      return self._run_inference_on_images(images, true_shapes, **kwargs)

    self.__call__ = tf.function(call_func, input_signature=sig)

@@ -154,44 +170,43 @@ class DetectionFromImageModule(DetectionInferenceModule):
                                                   zipped_side_inputs)


+def get_true_shapes(input_tensor):
+  input_shape = tf.shape(input_tensor)
+  batch = input_shape[0]
+  image_shape = input_shape[1:]
+  true_shapes = tf.tile(image_shape[tf.newaxis, :], [batch, 1])
+  return true_shapes
+
+
 class DetectionFromFloatImageModule(DetectionInferenceModule):
  """Detection Inference Module for float image inputs."""

  @tf.function(
      input_signature=[
-          tf.TensorSpec(shape=[1, None, None, 3], dtype=tf.float32)])
+          tf.TensorSpec(shape=[None, None, None, 3], dtype=tf.float32)])
  def __call__(self, input_tensor):
-    return self._run_inference_on_images(input_tensor)
+    images, true_shapes = self._preprocess_input(input_tensor, lambda x: x)
+    return self._run_inference_on_images(images,
+                                         true_shapes)


 class DetectionFromEncodedImageModule(DetectionInferenceModule):
  """Detection Inference Module for encoded image string inputs."""

-  @tf.function(input_signature=[tf.TensorSpec(shape=[1], dtype=tf.string)])
+  @tf.function(input_signature=[tf.TensorSpec(shape=[None], dtype=tf.string)])
  def __call__(self, input_tensor):
-    with tf.device('cpu:0'):
-      image = tf.map_fn(
-          _decode_image,
-          elems=input_tensor,
-          dtype=tf.uint8,
-          parallel_iterations=32,
-          back_prop=False)
-    return self._run_inference_on_images(image)
+    images, true_shapes = self._preprocess_input(input_tensor, _decode_image)
+    return self._run_inference_on_images(images, true_shapes)


 class DetectionFromTFExampleModule(DetectionInferenceModule):
  """Detection Inference Module for TF.Example inputs."""

-  @tf.function(input_signature=[tf.TensorSpec(shape=[1], dtype=tf.string)])
+  @tf.function(input_signature=[tf.TensorSpec(shape=[None], dtype=tf.string)])
  def __call__(self, input_tensor):
-    with tf.device('cpu:0'):
-      image = tf.map_fn(
-          _decode_tf_example,
-          elems=input_tensor,
-          dtype=tf.uint8,
-          parallel_iterations=32,
-          back_prop=False)
-    return self._run_inference_on_images(image)
+    images, true_shapes = self._preprocess_input(input_tensor,
+                                                 _decode_tf_example)
+    return self._run_inference_on_images(images, true_shapes)

 DETECTION_MODULE_MAP = {
    'image_tensor': DetectionFromImageModule,

--- a/research/object_detection/inputs.py
+++ b/research/object_detection/inputs.py
@@ -307,6 +307,14 @@ def transform_input_data(tensor_dict,
      out_tensor_dict[flds_gt_kpt_vis] = tf.ones_like(
          out_tensor_dict[flds_gt_kpt][:, :, 0],
          dtype=tf.bool)
+    flds_gt_kpt_depth = fields.InputDataFields.groundtruth_keypoint_depths
+    flds_gt_kpt_depth_weight = (
+        fields.InputDataFields.groundtruth_keypoint_depth_weights)
+    if flds_gt_kpt_depth in out_tensor_dict:
+      out_tensor_dict[flds_gt_kpt_depth] = out_tensor_dict[flds_gt_kpt_depth]
+      out_tensor_dict[flds_gt_kpt_depth_weight] = out_tensor_dict[
+          flds_gt_kpt_depth_weight]
+
    out_tensor_dict[flds_gt_kpt_weights] = (
        keypoint_ops.keypoint_weights_from_visibilities(
            out_tensor_dict[flds_gt_kpt_vis],
@@ -506,6 +514,15 @@ def pad_input_data_to_static_shapes(tensor_dict,
    padding_shapes[input_fields.
                   groundtruth_keypoint_visibilities] = padding_shape

+  if fields.InputDataFields.groundtruth_keypoint_depths in tensor_dict:
+    tensor_shape = tensor_dict[fields.InputDataFields.
+                               groundtruth_keypoint_depths].shape
+    padding_shape = [max_num_boxes, shape_utils.get_dim_as_int(tensor_shape[1])]
+    padding_shapes[fields.InputDataFields.
+                   groundtruth_keypoint_depths] = padding_shape
+    padding_shapes[fields.InputDataFields.
+                   groundtruth_keypoint_depth_weights] = padding_shape
+
  if input_fields.groundtruth_keypoint_weights in tensor_dict:
    tensor_shape = (
        tensor_dict[input_fields.groundtruth_keypoint_weights].shape)
@@ -587,6 +604,8 @@ def augment_input_data(tensor_dict, data_augmentation_options):
                       in tensor_dict)
  include_keypoint_visibilities = (
      fields.InputDataFields.groundtruth_keypoint_visibilities in tensor_dict)
+  include_keypoint_depths = (
+      fields.InputDataFields.groundtruth_keypoint_depths in tensor_dict)
  include_label_weights = (fields.InputDataFields.groundtruth_weights
                           in tensor_dict)
  include_label_confidences = (fields.InputDataFields.groundtruth_confidences
@@ -606,7 +625,8 @@ def augment_input_data(tensor_dict, data_augmentation_options):
          include_instance_masks=include_instance_masks,
          include_keypoints=include_keypoints,
          include_keypoint_visibilities=include_keypoint_visibilities,
-          include_dense_pose=include_dense_pose))
+          include_dense_pose=include_dense_pose,
+          include_keypoint_depths=include_keypoint_depths))
  tensor_dict[fields.InputDataFields.image] = tf.squeeze(
      tensor_dict[fields.InputDataFields.image], axis=0)
  return tensor_dict
@@ -628,6 +648,8 @@ def _get_labels_dict(input_dict):
      fields.InputDataFields.groundtruth_confidences,
      fields.InputDataFields.groundtruth_labeled_classes,
      fields.InputDataFields.groundtruth_keypoints,
+      fields.InputDataFields.groundtruth_keypoint_depths,
+      fields.InputDataFields.groundtruth_keypoint_depth_weights,
      fields.InputDataFields.groundtruth_instance_masks,
      fields.InputDataFields.groundtruth_area,
      fields.InputDataFields.groundtruth_is_crowd,

--- a/research/object_detection/inputs_test.py
+++ b/research/object_detection/inputs_test.py
@@ -1420,6 +1420,49 @@ class DataTransformationFnTest(test_case.TestCase, parameterized.TestCase):
        [[[0., 0., 0., 0.,], [0., 0., 0., 0.,]],
         [[0.1, 0.1, 0.3, 0.4,], [0.6, 0.4, 0.6, 0.7,]]])

+  def test_groundtruth_keypoint_depths(self):
+    def graph_fn():
+      tensor_dict = {
+          fields.InputDataFields.image:
+              tf.constant(np.random.rand(100, 50, 3).astype(np.float32)),
+          fields.InputDataFields.groundtruth_boxes:
+              tf.constant(np.array([[.5, .5, 1, 1], [.0, .0, .5, .5]],
+                                   np.float32)),
+          fields.InputDataFields.groundtruth_classes:
+              tf.constant(np.array([1, 2], np.int32)),
+          fields.InputDataFields.groundtruth_keypoints:
+              tf.constant([[[0.1, 0.2], [0.3, 0.4]],
+                           [[0.5, 0.6], [0.7, 0.8]]]),
+          fields.InputDataFields.groundtruth_keypoint_visibilities:
+              tf.constant([[True, False], [True, True]]),
+          fields.InputDataFields.groundtruth_keypoint_depths:
+              tf.constant([[1.0, 0.9], [0.8, 0.7]]),
+          fields.InputDataFields.groundtruth_keypoint_depth_weights:
+              tf.constant([[0.7, 0.8], [0.9, 1.0]]),
+      }
+
+      num_classes = 3
+      keypoint_type_weight = [1.0, 2.0]
+      input_transformation_fn = functools.partial(
+          inputs.transform_input_data,
+          model_preprocess_fn=_fake_resize50_preprocess_fn,
+          image_resizer_fn=_fake_image_resizer_fn,
+          num_classes=num_classes,
+          keypoint_type_weight=keypoint_type_weight)
+      transformed_inputs = input_transformation_fn(tensor_dict=tensor_dict)
+      return (transformed_inputs[
+          fields.InputDataFields.groundtruth_keypoint_depths],
+              transformed_inputs[
+                  fields.InputDataFields.groundtruth_keypoint_depth_weights])
+
+    keypoint_depths, keypoint_depth_weights = self.execute_cpu(graph_fn, [])
+    self.assertAllClose(
+        keypoint_depths,
+        [[1.0, 0.9], [0.8, 0.7]])
+    self.assertAllClose(
+        keypoint_depth_weights,
+        [[0.7, 0.8], [0.9, 1.0]])
+

 class PadInputDataToStaticShapesFnTest(test_case.TestCase):


--- a/research/object_detection/meta_architectures/center_net_meta_arch.py
+++ b/research/object_detection/meta_architectures/center_net_meta_arch.py
@@ -32,6 +32,7 @@ from object_detection.core import model
 from object_detection.core import standard_fields as fields
 from object_detection.core import target_assigner as cn_assigner
 from object_detection.utils import shape_utils
+from object_detection.utils import target_assigner_utils as ta_utils

 # Number of channels needed to predict size and offsets.
 NUM_OFFSET_CHANNELS = 2
@@ -526,6 +527,125 @@ def prediction_tensors_to_keypoint_candidates(
  return keypoint_candidates, keypoint_scores, num_candidates


+def prediction_to_single_instance_keypoints(object_heatmap, keypoint_heatmap,
+                                            keypoint_offset,
+                                            keypoint_regression, stride,
+                                            object_center_std_dev,
+                                            keypoint_std_dev, kp_params):
+  """Postprocess function to predict single instance keypoints.
+
+  This is a simplified postprocessing function based on the assumption that
+  there is only one instance in the image. If there are multiple instances in
+  the image, the model prefers to predict the one that is closest to the image
+  center. Here is a high-level description of what this function does:
+    1) Object heatmap re-weighted by image center Gaussian is used to determine
+       the instance center.
+    2) Regressed keypoint locations are retrieved from the instance center. The
+       Gaussian kernel is applied to the regressed keypoint locations to
+       re-weight the keypoint heatmap. This is to select the keypoints that are
+       associated with the center instance without using top_k op.
+    3) The keypoint locations are computed by the re-weighted keypoint heatmap
+       and the keypoint offset.
+
+  Args:
+    object_heatmap: A float tensor of shape [1, height, width, 1] representing
+      the heapmap of the class.
+    keypoint_heatmap: A float tensor of shape [1, height, width, num_keypoints]
+      representing the per-keypoint heatmaps.
+    keypoint_offset: A float tensor of shape [1, height, width, 2] (or [1,
+      height, width, 2 * num_keypoints] if 'per_keypoint_offset' is set True)
+      representing the per-keypoint offsets.
+    keypoint_regression: A float  tensor of shape [1, height, width, 2 *
+      num_keypoints] representing the joint regression prediction.
+    stride: The stride in the output space.
+    object_center_std_dev: The standard deviation of the Gaussian mask which is
+      applied to the object_heatmap. The goal is to upweight the instance that
+      is closer to the image center. Expressed in units of input image pixels.
+    keypoint_std_dev: The standard deviation of the Gaussian masks which are
+      applied to the keypoint_heatmap based on the regressed joint location. It
+      is used to upweight the keypoint joints that belongs to the targeted
+      instance. If keypoint_std_dev contains 1 element, all keypoint joints will
+      share the same value. Otherwise, it must contain num_keypoints elements,
+      representing the standard deviation corresponding to each joint.
+    kp_params: A `KeypointEstimationParams` object with parameters for a single
+      keypoint class.
+
+  Returns:
+    A tuple of two tensors:
+      keypoint_candidates: A float tensor with shape [1, 1, num_keypoints, 2]
+        representing the yx-coordinates of the keypoints in the output feature
+        map space.
+      keypoint_scores: A float tensor with shape [1, 1, num_keypoints]
+        representing the keypoint prediction scores.
+
+  Raises:
+    ValueError: if the input keypoint_std_dev doesn't have valid number of
+      elements (1 or num_keypoints).
+  """
+  num_keypoints = len(kp_params.keypoint_std_dev)
+  batch_size, height, width, _ = _get_shape(keypoint_heatmap, 4)
+
+  # Apply the Gaussian mask to the image center.
+  image_center_y = tf.convert_to_tensor([0.5 * height], dtype=tf.float32)
+  image_center_x = tf.convert_to_tensor([0.5 * width], dtype=tf.float32)
+  (y_grid, x_grid) = ta_utils.image_shape_to_grids(height, width)
+  # Mask shape: [1, height, width, 1]
+  object_mask = tf.expand_dims(
+      ta_utils.coordinates_to_heatmap(y_grid, x_grid, image_center_y,
+                                      image_center_x,
+                                      object_center_std_dev / stride,
+                                      tf.one_hot(tf.range(1), depth=1)), axis=0)
+  object_heatmap = tf.math.multiply(object_heatmap, object_mask)
+
+  # Pick the highest score and location of the weighted object heatmap.
+  _, y_indices, x_indices, _ = (
+      top_k_feature_map_locations(
+          object_heatmap, max_pool_kernel_size=1, k=1, per_channel=True))
+  _, num_indices = _get_shape(y_indices, 2)
+  combined_indices = tf.stack([
+      _multi_range(batch_size, value_repetitions=num_indices),
+      tf.reshape(y_indices, [-1]),
+      tf.reshape(x_indices, [-1])
+  ], axis=1)
+
+  # Select the regression vectors from the object center.
+  selected_regression_flat = tf.gather_nd(keypoint_regression, combined_indices)
+  # shape: [num_keypoints, 2]
+  regression_offsets = tf.reshape(selected_regression_flat, [num_keypoints, -1])
+  (y_reg, x_reg) = tf.unstack(regression_offsets, axis=1)
+  y_regressed = tf.cast(y_indices, dtype=tf.float32) + y_reg
+  x_regressed = tf.cast(x_indices, dtype=tf.float32) + x_reg
+
+  # Prepare and apply the keypoint heatmap masks.
+  keypoint_std_dev = [x / stride for x in keypoint_std_dev]
+  if len(keypoint_std_dev) == 1:
+    std_dev = tf.convert_to_tensor(
+        keypoint_std_dev * num_keypoints, dtype=tf.float32)
+  elif len(keypoint_std_dev) == num_keypoints:
+    std_dev = tf.convert_to_tensor(
+        keypoint_std_dev, dtype=tf.float32)
+  else:
+    raise ValueError('keypoint_std_dev needs to have length either '
+                     'equal to 1 or num_keypoints.')
+  channel_onehot = tf.one_hot(tf.range(num_keypoints), depth=num_keypoints)
+  keypoint_mask = tf.expand_dims(
+      ta_utils.coordinates_to_heatmap(y_grid, x_grid, y_regressed, x_regressed,
+                                      std_dev, channel_onehot), axis=0)
+  keypoint_predictions = tf.math.multiply(keypoint_heatmap, keypoint_mask)
+
+  # Get the keypoint locations/scores:
+  #   keypoint_candidates: [1, 1, num_keypoints, 2]
+  #   keypoint_scores: [1, 1, num_keypoints]
+  (keypoint_candidates, keypoint_scores,
+   _) = prediction_tensors_to_keypoint_candidates(
+       keypoint_predictions,
+       keypoint_offset,
+       keypoint_score_threshold=kp_params.keypoint_candidate_score_threshold,
+       max_pool_kernel_size=kp_params.peak_max_pool_kernel_size,
+       max_candidates=1)
+  return keypoint_candidates, keypoint_scores
+
+
 def regressed_keypoints_at_object_centers(regressed_keypoint_predictions,
                                          y_indices, x_indices):
  """Returns the regressed keypoints at specified object centers.
@@ -1776,7 +1896,8 @@ class CenterNetMetaArch(model.DetectionModel):
               track_params=None,
               temporal_offset_params=None,
               use_depthwise=False,
-               compute_heatmap_sparse=False):
+               compute_heatmap_sparse=False,
+               non_max_suppression_fn=None):
    """Initializes a CenterNet model.

    Args:
@@ -1819,6 +1940,7 @@ class CenterNetMetaArch(model.DetectionModel):
        the Op that computes the center heatmaps. The sparse version scales
        better with number of channels in the heatmap, but in some cases is
        known to cause an OOM error. See b/170989061.
+      non_max_suppression_fn: Optional Non Max Suppression function to apply.
    """
    assert object_detection_params or keypoint_params_dict
    # Shorten the name for convenience and better formatting.
@@ -1857,6 +1979,7 @@ class CenterNetMetaArch(model.DetectionModel):

    # Will be used in VOD single_frame_meta_arch for tensor reshape.
    self._batched_prediction_tensor_names = []
+    self._non_max_suppression_fn = non_max_suppression_fn

    super(CenterNetMetaArch, self).__init__(num_classes)

@@ -2988,6 +3111,117 @@ class CenterNetMetaArch(model.DetectionModel):
          prediction_dict[TEMPORAL_OFFSET][-1])
      postprocess_dict[fields.DetectionResultFields.detection_offsets] = offsets

+    if self._non_max_suppression_fn:
+      boxes = tf.expand_dims(
+          postprocess_dict.pop(fields.DetectionResultFields.detection_boxes),
+          axis=-2)
+      multiclass_scores = postprocess_dict[
+          fields.DetectionResultFields.detection_multiclass_scores]
+      num_valid_boxes = postprocess_dict.pop(
+          fields.DetectionResultFields.num_detections)
+      # Remove scores and classes as NMS will compute these form multiclass
+      # scores.
+      postprocess_dict.pop(fields.DetectionResultFields.detection_scores)
+      postprocess_dict.pop(fields.DetectionResultFields.detection_classes)
+      (nmsed_boxes, nmsed_scores, nmsed_classes, _, nmsed_additional_fields,
+       num_detections) = self._non_max_suppression_fn(
+           boxes,
+           multiclass_scores,
+           additional_fields=postprocess_dict,
+           num_valid_boxes=num_valid_boxes)
+      postprocess_dict = nmsed_additional_fields
+      postprocess_dict[
+          fields.DetectionResultFields.detection_boxes] = nmsed_boxes
+      postprocess_dict[
+          fields.DetectionResultFields.detection_scores] = nmsed_scores
+      postprocess_dict[
+          fields.DetectionResultFields.detection_classes] = nmsed_classes
+      postprocess_dict[
+          fields.DetectionResultFields.num_detections] = num_detections
+      postprocess_dict.update(nmsed_additional_fields)
+    return postprocess_dict
+
+  def postprocess_single_instance_keypoints(self, prediction_dict,
+                                            true_image_shapes,
+                                            object_center_std_dev,
+                                            keypoint_std_dev):
+    """Postprocess for predicting single instance keypoints.
+
+    This postprocess function is a special case of predicting the keypoint of
+    a single instance in the image (original CenterNet postprocess supports
+    multi-instance prediction). Due to the simplification assumption, this
+    postprocessing function achieves much faster inference time.
+    Here is a short list of the modifications made in this function:
+
+      1) Assume the model predicts only single class keypoint.
+      2) Assume there is only one instance in the image. If multiple instances
+         appear in the image, the model tends to predict the one that is closer
+         to the image center (the other ones are considered as background and
+         are rejected by the model).
+      3) Avoid using top_k ops in the postprocessing logics since it is slower
+         than using argmax.
+      4) The predictions other than the keypoints are ignored, e.g. boxes.
+      5) The input batch size is assumed to be 1.
+
+    Args:
+      prediction_dict: a dictionary holding predicted tensors from "predict"
+        function.
+      true_image_shapes: int32 tensor of shape [batch, 3] where each row is of
+        the form [height, width, channels] indicating the shapes of true images
+        in the resized images, as resized images can be padded with zeros.
+      object_center_std_dev: The standard deviation of the Gaussian mask which
+        is applied to the object_heatmap. The goal is to upweight the instance
+        that is closer to the image center. Expressed in units of input image
+        pixels.
+      keypoint_std_dev: The standard deviation of the Gaussian masks which are
+        applied to the keypoint_heatmap based on the regressed joint location.
+        It is used to upweight the keypoint joints that belongs to the targeted
+        instance. If keypoint_std_dev contains one value, then we assume the
+        same value is applied to all keypoint joints. If keypoint_std_dev is a
+        list, it must contain num_keypoints elements, representing the standard
+        deviation corresponding to each joints.
+
+    Returns:
+      detections: a dictionary containing the following fields
+        detection_keypoints: A float tensor of shape
+          [1, 1, num_keypoints, 2] with normalized keypoints. Any invalid
+          keypoints have their coordinates and scores set to 0.0.
+        detection_keypoint_scores: A float tensor of shape
+          [1, 1, num_keypoints] with scores for each keypoint.
+    """
+    # The number of keypoint task is expected to be 1.
+    assert len(self._kp_params_dict) == 1
+    task_name, kp_params = next(iter(self._kp_params_dict.items()))
+    keypoint_heatmap = tf.nn.sigmoid(prediction_dict[get_keypoint_name(
+        task_name, KEYPOINT_HEATMAP)][-1])
+    keypoint_offset = prediction_dict[get_keypoint_name(task_name,
+                                                        KEYPOINT_OFFSET)][-1]
+    keypoint_regression = prediction_dict[get_keypoint_name(
+        task_name, KEYPOINT_REGRESSION)][-1]
+    object_heatmap = tf.nn.sigmoid(prediction_dict[OBJECT_CENTER][-1])
+
+    keypoints, keypoint_scores = (
+        prediction_to_single_instance_keypoints(
+            object_heatmap=object_heatmap,
+            keypoint_heatmap=keypoint_heatmap,
+            keypoint_offset=keypoint_offset,
+            keypoint_regression=keypoint_regression,
+            stride=self._stride,
+            object_center_std_dev=object_center_std_dev,
+            keypoint_std_dev=keypoint_std_dev,
+            kp_params=kp_params))
+
+    keypoints, keypoint_scores = (
+        convert_strided_predictions_to_normalized_keypoints(
+            keypoints,
+            keypoint_scores,
+            self._stride,
+            true_image_shapes,
+            clip_out_of_frame_keypoints=False))
+    postprocess_dict = {
+        fields.DetectionResultFields.detection_keypoints: keypoints,
+        fields.DetectionResultFields.detection_keypoint_scores: keypoint_scores
+    }
    return postprocess_dict

  def _postprocess_embeddings(self, prediction_dict, y_indices, x_indices):

--- a/research/object_detection/meta_architectures/center_net_meta_arch_tf2_test.py
+++ b/research/object_detection/meta_architectures/center_net_meta_arch_tf2_test.py
@@ -24,12 +24,14 @@ from absl.testing import parameterized
 import numpy as np
 import tensorflow.compat.v1 as tf

+from object_detection.builders import post_processing_builder
 from object_detection.core import losses
 from object_detection.core import preprocessor
 from object_detection.core import standard_fields as fields
 from object_detection.core import target_assigner as cn_assigner
 from object_detection.meta_architectures import center_net_meta_arch as cnma
 from object_detection.models import center_net_resnet_feature_extractor
+from object_detection.protos import post_processing_pb2
 from object_detection.utils import test_case
 from object_detection.utils import tf_version

@@ -734,6 +736,75 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
    np.testing.assert_array_equal(expected_num_keypoint_candidates,
                                  num_keypoint_candidates)

+  def test_prediction_to_single_instance_keypoints(self):
+    image_size = (9, 9)
+    object_heatmap_np = np.zeros((1, image_size[0], image_size[1], 1),
+                                 dtype=np.float32)
+    # This should be picked.
+    object_heatmap_np[0, 4, 4, 0] = 0.9
+    # This shouldn't be picked since it's farther away from the center.
+    object_heatmap_np[0, 2, 2, 0] = 1.0
+
+    keypoint_heatmap_np = np.zeros((1, image_size[0], image_size[1], 4),
+                                   dtype=np.float32)
+    # Top-left corner should be picked.
+    keypoint_heatmap_np[0, 1, 1, 0] = 0.9
+    keypoint_heatmap_np[0, 4, 4, 0] = 1.0
+    # Top-right corner should be picked.
+    keypoint_heatmap_np[0, 1, 7, 1] = 0.9
+    keypoint_heatmap_np[0, 4, 4, 1] = 1.0
+    # Bottom-left corner should be picked.
+    keypoint_heatmap_np[0, 7, 1, 2] = 0.9
+    keypoint_heatmap_np[0, 4, 4, 2] = 1.0
+    # Bottom-right corner should be picked.
+    keypoint_heatmap_np[0, 7, 7, 3] = 0.9
+    keypoint_heatmap_np[0, 4, 4, 3] = 1.0
+
+    keypoint_offset_np = np.zeros((1, image_size[0], image_size[1], 2),
+                                  dtype=np.float32)
+    keypoint_offset_np[0, 1, 1] = [0.5, 0.5]
+    keypoint_offset_np[0, 1, 7] = [0.5, -0.5]
+    keypoint_offset_np[0, 7, 1] = [-0.5, 0.5]
+    keypoint_offset_np[0, 7, 7] = [-0.5, -0.5]
+
+    keypoint_regression_np = np.zeros((1, image_size[0], image_size[1], 8),
+                                      dtype=np.float32)
+    keypoint_regression_np[0, 4, 4] = [-3, -3, -3, 3, 3, -3, 3, 3]
+
+    kp_params = get_fake_kp_params(num_candidates_per_keypoint=1)
+
+    def graph_fn():
+      object_heatmap = tf.constant(object_heatmap_np, dtype=tf.float32)
+      keypoint_heatmap = tf.constant(keypoint_heatmap_np, dtype=tf.float32)
+      keypoint_offset = tf.constant(keypoint_offset_np, dtype=tf.float32)
+      keypoint_regression = tf.constant(
+          keypoint_regression_np, dtype=tf.float32)
+
+      (keypoint_cands, keypoint_scores) = (
+          cnma.prediction_to_single_instance_keypoints(
+              object_heatmap,
+              keypoint_heatmap,
+              keypoint_offset,
+              keypoint_regression,
+              stride=4,
+              object_center_std_dev=image_size[0] / 2,
+              keypoint_std_dev=[image_size[0] / 10],
+              kp_params=kp_params))
+
+      return keypoint_cands, keypoint_scores
+
+    (keypoint_cands, keypoint_scores) = self.execute(graph_fn, [])
+
+    expected_keypoint_candidates = [[[
+        [1.5, 1.5],  # top-left
+        [1.5, 6.5],  # top-right
+        [6.5, 1.5],  # bottom-left
+        [6.5, 6.5],  # bottom-right
+    ]]]
+    expected_keypoint_scores = [[[0.9, 0.9, 0.9, 0.9]]]
+    np.testing.assert_allclose(expected_keypoint_candidates, keypoint_cands)
+    np.testing.assert_allclose(expected_keypoint_scores, keypoint_scores)
+
  def test_keypoint_candidate_prediction_per_keypoints(self):
    keypoint_heatmap_np = np.zeros((2, 3, 3, 2), dtype=np.float32)
    keypoint_heatmap_np[0, 0, 0, 0] = 1.0
@@ -1280,7 +1351,9 @@ def get_fake_temporal_offset_params():

 def build_center_net_meta_arch(build_resnet=False,
                               num_classes=_NUM_CLASSES,
-                               max_box_predictions=5):
+                               max_box_predictions=5,
+                               apply_non_max_suppression=False,
+                               detection_only=False):
  """Builds the CenterNet meta architecture."""
  if build_resnet:
    feature_extractor = (
@@ -1299,7 +1372,31 @@ def build_center_net_meta_arch(build_resnet=False,
      max_dimension=128,
      pad_to_max_dimesnion=True)

-  if num_classes == 1:
+  non_max_suppression_fn = None
+  if apply_non_max_suppression:
+    post_processing_proto = post_processing_pb2.PostProcessing()
+    post_processing_proto.batch_non_max_suppression.iou_threshold = 1.0
+    post_processing_proto.batch_non_max_suppression.score_threshold = 0.6
+    (post_processing_proto.batch_non_max_suppression.max_total_detections
+    ) = max_box_predictions
+    (post_processing_proto.batch_non_max_suppression.max_detections_per_class
+    ) = max_box_predictions
+    (post_processing_proto.batch_non_max_suppression.change_coordinate_frame
+    ) = False
+    non_max_suppression_fn, _ = post_processing_builder.build(
+        post_processing_proto)
+
+  if detection_only:
+    return cnma.CenterNetMetaArch(
+        is_training=True,
+        add_summaries=False,
+        num_classes=num_classes,
+        feature_extractor=feature_extractor,
+        image_resizer_fn=image_resizer_fn,
+        object_center_params=get_fake_center_params(max_box_predictions),
+        object_detection_params=get_fake_od_params(),
+        non_max_suppression_fn=non_max_suppression_fn)
+  elif num_classes == 1:
    num_candidates_per_keypoint = 100 if max_box_predictions > 1 else 1
    return cnma.CenterNetMetaArch(
        is_training=True,
@@ -1311,7 +1408,8 @@ def build_center_net_meta_arch(build_resnet=False,
        object_detection_params=get_fake_od_params(),
        keypoint_params_dict={
            _TASK_NAME: get_fake_kp_params(num_candidates_per_keypoint)
-        })
+        },
+        non_max_suppression_fn=non_max_suppression_fn)
  else:
    return cnma.CenterNetMetaArch(
        is_training=True,
@@ -1325,7 +1423,8 @@ def build_center_net_meta_arch(build_resnet=False,
        mask_params=get_fake_mask_params(),
        densepose_params=get_fake_densepose_params(),
        track_params=get_fake_track_params(),
-        temporal_offset_params=get_fake_temporal_offset_params())
+        temporal_offset_params=get_fake_temporal_offset_params(),
+        non_max_suppression_fn=non_max_suppression_fn)


 def _logit(p):
@@ -1659,7 +1758,6 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
      return detections

    detections = self.execute_cpu(graph_fn, [])
-
    self.assertAllClose(detections['detection_boxes'][0, 0],
                        np.array([55, 46, 75, 86]) / 128.0)
    self.assertAllClose(detections['detection_scores'][0],
@@ -1732,6 +1830,49 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
          detections['detection_surface_coords'][0, 0, :, :],
          np.zeros_like(detections['detection_surface_coords'][0, 0, :, :]))

+  def test_non_max_suppression(self):
+    """Tests application of NMS on CenterNet detections."""
+    target_class_id = 1
+    model = build_center_net_meta_arch(apply_non_max_suppression=True,
+                                       detection_only=True)
+
+    class_center = np.zeros((1, 32, 32, 10), dtype=np.float32)
+    height_width = np.zeros((1, 32, 32, 2), dtype=np.float32)
+    offset = np.zeros((1, 32, 32, 2), dtype=np.float32)
+
+    class_probs = np.ones(10) * _logit(0.25)
+    class_probs[target_class_id] = _logit(0.75)
+    class_center[0, 16, 16] = class_probs
+    height_width[0, 16, 16] = [5, 10]
+    offset[0, 16, 16] = [.25, .5]
+
+    class_center = tf.constant(class_center)
+    height_width = tf.constant(height_width)
+    offset = tf.constant(offset)
+
+    prediction_dict = {
+        cnma.OBJECT_CENTER: [class_center],
+        cnma.BOX_SCALE: [height_width],
+        cnma.BOX_OFFSET: [offset],
+    }
+
+    def graph_fn():
+      detections = model.postprocess(prediction_dict,
+                                     tf.constant([[128, 128, 3]]))
+      return detections
+
+    detections = self.execute_cpu(graph_fn, [])
+    num_detections = int(detections['num_detections'])
+    self.assertEqual(num_detections, 1)
+    self.assertAllClose(detections['detection_boxes'][0, 0],
+                        np.array([55, 46, 75, 86]) / 128.0)
+    self.assertAllClose(detections['detection_scores'][0][:num_detections],
+                        [.75])
+    expected_multiclass_scores = [.25] * 10
+    expected_multiclass_scores[target_class_id] = .75
+    self.assertAllClose(expected_multiclass_scores,
+                        detections['detection_multiclass_scores'][0][0])
+
  def test_postprocess_single_class(self):
    """Test the postprocess function."""
    model = build_center_net_meta_arch(num_classes=1)
@@ -1798,6 +1939,59 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
    self.assertAllEqual([1, max_detection, num_keypoints],
                        detections['detection_keypoint_scores'].shape)

+  def test_postprocess_single_instance(self):
+    """Test the postprocess single instance function."""
+    model = build_center_net_meta_arch(num_classes=1)
+    num_keypoints = len(model._kp_params_dict[_TASK_NAME].keypoint_indices)
+
+    class_center = np.zeros((1, 32, 32, 1), dtype=np.float32)
+    keypoint_heatmaps = np.zeros((1, 32, 32, num_keypoints), dtype=np.float32)
+    keypoint_offsets = np.zeros((1, 32, 32, 2), dtype=np.float32)
+    keypoint_regression = np.random.randn(1, 32, 32, num_keypoints * 2)
+
+    class_probs = np.zeros(1)
+    class_probs[0] = _logit(0.75)
+    class_center[0, 16, 16] = class_probs
+    keypoint_regression[0, 16, 16] = [
+        -1., -1.,
+        -1., 1.,
+        1., -1.,
+        1., 1.]
+    keypoint_heatmaps[0, 14, 14, 0] = _logit(0.9)
+    keypoint_heatmaps[0, 14, 18, 1] = _logit(0.9)
+    keypoint_heatmaps[0, 18, 14, 2] = _logit(0.9)
+    keypoint_heatmaps[0, 18, 18, 3] = _logit(0.05)  # Note the low score.
+
+    class_center = tf.constant(class_center)
+    keypoint_heatmaps = tf.constant(keypoint_heatmaps, dtype=tf.float32)
+    keypoint_offsets = tf.constant(keypoint_offsets, dtype=tf.float32)
+    keypoint_regression = tf.constant(keypoint_regression, dtype=tf.float32)
+
+    prediction_dict = {
+        cnma.OBJECT_CENTER: [class_center],
+        cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_HEATMAP):
+            [keypoint_heatmaps],
+        cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_OFFSET):
+            [keypoint_offsets],
+        cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_REGRESSION):
+            [keypoint_regression],
+    }
+
+    def graph_fn():
+      detections = model.postprocess_single_instance_keypoints(
+          prediction_dict,
+          tf.constant([[128, 128, 3]]),
+          object_center_std_dev=32,
+          keypoint_std_dev=[32])
+      return detections
+
+    detections = self.execute_cpu(graph_fn, [])
+
+    self.assertAllEqual([1, 1, num_keypoints, 2],
+                        detections['detection_keypoints'].shape)
+    self.assertAllEqual([1, 1, num_keypoints],
+                        detections['detection_keypoint_scores'].shape)
+
  def test_get_instance_indices(self):
    classes = tf.constant([[0, 1, 2, 0], [2, 1, 2, 2]], dtype=tf.int32)
    num_detections = tf.constant([1, 3], dtype=tf.int32)

--- a/research/object_detection/model_lib.py
+++ b/research/object_detection/model_lib.py
@@ -971,12 +971,12 @@ def _evaluate_checkpoint(estimator,
        raise e


-def continuous_eval(estimator,
-                    model_dir,
-                    input_fn,
-                    train_steps,
-                    name,
-                    max_retries=0):
+def continuous_eval_generator(estimator,
+                              model_dir,
+                              input_fn,
+                              train_steps,
+                              name,
+                              max_retries=0):
  """Perform continuous evaluation on checkpoints written to a model directory.

  Args:
@@ -989,6 +989,9 @@ def continuous_eval(estimator,
    max_retries: Maximum number of times to retry the evaluation on encountering
      a tf.errors.InvalidArgumentError. If negative, will always retry the
      evaluation.
+
+  Yields:
+    Pair of current step and eval_results.
  """

  def terminate_eval():
@@ -1011,6 +1014,7 @@ def continuous_eval(estimator,

      # Terminate eval job when final checkpoint is reached
      current_step = int(os.path.basename(ckpt).split('-')[1])
+      yield (current_step, eval_results)
      if current_step >= train_steps:
        tf.logging.info(
            'Evaluation finished after training step %d' % current_step)
@@ -1021,6 +1025,30 @@ def continuous_eval(estimator,
          'Checkpoint %s no longer exists, skipping checkpoint' % ckpt)


+def continuous_eval(estimator,
+                    model_dir,
+                    input_fn,
+                    train_steps,
+                    name,
+                    max_retries=0):
+  """Performs continuous evaluation on checkpoints written to a model directory.
+
+  Args:
+    estimator: Estimator object to use for evaluation.
+    model_dir: Model directory to read checkpoints for continuous evaluation.
+    input_fn: Input function to use for evaluation.
+    train_steps: Number of training steps. This is used to infer the last
+      checkpoint and stop evaluation loop.
+    name: Namescope for eval summary.
+    max_retries: Maximum number of times to retry the evaluation on encountering
+      a tf.errors.InvalidArgumentError. If negative, will always retry the
+      evaluation.
+  """
+  for current_step, eval_results in continuous_eval_generator(
+      estimator, model_dir, input_fn, train_steps, name, max_retries):
+    tf.logging.info('Step %s, Eval results: %s', current_step, eval_results)
+
+
 def populate_experiment(run_config,
                        hparams,
                        pipeline_config_path,

--- a/research/object_detection/protos/center_net.proto
+++ b/research/object_detection/protos/center_net.proto
@@ -4,6 +4,7 @@ package object_detection.protos;

 import "object_detection/protos/image_resizer.proto";
 import "object_detection/protos/losses.proto";
+import "object_detection/protos/post_processing.proto";

 // Configuration for the CenterNet meta architecture from the "Objects as
 // Points" paper [1]
@@ -271,6 +272,13 @@ message CenterNet {
  optional TemporalOffsetEstimation temporal_offset_task = 12;


+  // CenterNet does not apply conventional post processing operations such as
+  // non max suppression as it applies a max-pool operator on box centers.
+  // However, in some cases we observe the need to remove duplicate predictions
+  // from CenterNet. Use this optional parameter to apply traditional non max
+  // suppression and score thresholding.
+  optional PostProcessing post_processing = 24;
+
 }

 message CenterNetFeatureExtractor {

--- a/research/object_detection/protos/hyperparams.proto
+++ b/research/object_detection/protos/hyperparams.proto
@@ -42,6 +42,8 @@ message Hyperparams {
    // Note that if nothing below is selected, then no normalization is applied
    // BatchNorm hyperparameters.
    BatchNorm batch_norm = 5;
+    // SyncBatchNorm hyperparameters (KerasLayerHyperparams only).
+    BatchNorm sync_batch_norm = 9;
    // GroupNorm hyperparameters. This is only supported on a subset of models.
    // Note that the current implementation of group norm instantiated in
    // tf.contrib.group.layers.group_norm() only supports fixed_size_resizer

--- a/research/object_detection/protos/input_reader.proto
+++ b/research/object_detection/protos/input_reader.proto
@@ -30,7 +30,7 @@ enum InputType {
  TF_SEQUENCE_EXAMPLE = 2;  // TfSequenceExample Input
 }

-// Next id: 37
+// Next id: 38
 message InputReader {
  // Name of input reader. Typically used to describe the dataset that is read
  // by this input reader.
@@ -134,6 +134,9 @@ message InputReader {
  // Whether to load track information.
  optional bool load_track_id = 33 [default = false];

+  // Whether to load keypoint depth features.
+  optional bool load_keypoint_depth_features = 37 [default = false];
+
  // Whether to use the display name when decoding examples. This is only used
  // when mapping class text strings to integers.
  optional bool use_display_name = 17 [default = false];
@@ -158,12 +161,17 @@ message InputReader {
  //
  // The number of weights must match the number of input files configured.
  //
-  // When set, shuffling, shuffle buffer size, and num_readers settings are
+  // The number of input readers per dataset is num_readers, scaled relative to
+  // the dataset weight.
+  //
+  // When set, shuffling and shuffle buffer size, settings are
  // applied individually to each dataset.
  //
  // Implementation follows tf.data.experimental.sample_from_datasets sampling
  // strategy. Weights may take any value - only relative weights matter.
-  // Zero weights will result in a dataset not being sampled.
+  //
+  // Zero weights will result in a dataset not being sampled and no input
+  // readers spawned.
  //
  // Examples, assuming two input files configured:
  //

--- a/research/object_detection/utils/object_detection_evaluation.py
+++ b/research/object_detection/utils/object_detection_evaluation.py
@@ -254,7 +254,7 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
    """
    for image_id in image_ids:
      if image_id in self._image_ids:
-        raise ValueError('Image with id {} already added.'.format(image_id))
+        logging.warning('Image with id %s already added.', image_id)

    self._evaluation.merge_internal_state(state_tuple)

@@ -321,7 +321,7 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
        raise error if instance masks are not in groundtruth dictionary.
    """
    if image_id in self._image_ids:
-      raise ValueError('Image with id {} already added.'.format(image_id))
+      logging.warning('Image with id %s already added.', image_id)

    groundtruth_classes = (
        groundtruth_dict[standard_fields.InputDataFields.groundtruth_classes] -
@@ -729,7 +729,7 @@ class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator):
      ValueError: On adding groundtruth for an image more than once.
    """
    if image_id in self._image_ids:
-      raise ValueError('Image with id {} already added.'.format(image_id))
+      logging.warning('Image with id %s already added.', image_id)

    groundtruth_classes = (
        groundtruth_dict[standard_fields.InputDataFields.groundtruth_classes] -

--- a/research/object_detection/utils/object_detection_evaluation_test.py
+++ b/research/object_detection/utils/object_detection_evaluation_test.py
@@ -524,30 +524,6 @@ class PascalEvaluationTest(tf.test.TestCase):
    pascal_evaluator.clear()
    self.assertFalse(pascal_evaluator._image_ids)

-  def test_value_error_on_duplicate_images(self):
-    categories = [{'id': 1, 'name': 'cat'},
-                  {'id': 2, 'name': 'dog'},
-                  {'id': 3, 'name': 'elephant'}]
-    #  Add groundtruth
-    pascal_evaluator = object_detection_evaluation.PascalDetectionEvaluator(
-        categories)
-    image_key1 = 'img1'
-    groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
-                                  dtype=float)
-    groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int)
-    pascal_evaluator.add_single_ground_truth_image_info(
-        image_key1,
-        {standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes1,
-         standard_fields.InputDataFields.groundtruth_classes:
-         groundtruth_class_labels1})
-    with self.assertRaises(ValueError):
-      pascal_evaluator.add_single_ground_truth_image_info(
-          image_key1,
-          {standard_fields.InputDataFields.groundtruth_boxes:
-           groundtruth_boxes1,
-           standard_fields.InputDataFields.groundtruth_classes:
-           groundtruth_class_labels1})
-

 class WeightedPascalEvaluationTest(tf.test.TestCase):

@@ -659,28 +635,6 @@ class WeightedPascalEvaluationTest(tf.test.TestCase):
    self.wp_eval.clear()
    self.assertFalse(self.wp_eval._image_ids)

-  def test_value_error_on_duplicate_images(self):
-    #  Add groundtruth
-    self.wp_eval = (
-        object_detection_evaluation.WeightedPascalDetectionEvaluator(
-            self.categories))
-    image_key1 = 'img1'
-    groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
-                                  dtype=float)
-    groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int)
-    self.wp_eval.add_single_ground_truth_image_info(
-        image_key1,
-        {standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes1,
-         standard_fields.InputDataFields.groundtruth_classes:
-         groundtruth_class_labels1})
-    with self.assertRaises(ValueError):
-      self.wp_eval.add_single_ground_truth_image_info(
-          image_key1,
-          {standard_fields.InputDataFields.groundtruth_boxes:
-           groundtruth_boxes1,
-           standard_fields.InputDataFields.groundtruth_classes:
-           groundtruth_class_labels1})
-

 class PrecisionAtRecallEvaluationTest(tf.test.TestCase):

@@ -807,31 +761,6 @@ class PrecisionAtRecallEvaluationTest(tf.test.TestCase):
    self.wp_eval.clear()
    self.assertFalse(self.wp_eval._image_ids)

-  def test_value_error_on_duplicate_images(self):
-    #  Add groundtruth
-    self.wp_eval = (
-        object_detection_evaluation.PrecisionAtRecallDetectionEvaluator(
-            self.categories, recall_lower_bound=0.0, recall_upper_bound=0.5))
-    image_key1 = 'img1'
-    groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
-                                  dtype=float)
-    groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int)
-    self.wp_eval.add_single_ground_truth_image_info(
-        image_key1, {
-            standard_fields.InputDataFields.groundtruth_boxes:
-                groundtruth_boxes1,
-            standard_fields.InputDataFields.groundtruth_classes:
-                groundtruth_class_labels1
-        })
-    with self.assertRaises(ValueError):
-      self.wp_eval.add_single_ground_truth_image_info(
-          image_key1, {
-              standard_fields.InputDataFields.groundtruth_boxes:
-                  groundtruth_boxes1,
-              standard_fields.InputDataFields.groundtruth_classes:
-                  groundtruth_class_labels1
-          })
-

 class ObjectDetectionEvaluationTest(tf.test.TestCase):


--- a/research/setup.py
+++ b/research/setup.py
-"""Setup script for object_detection."""
-
-from setuptools import find_packages
-from setuptools import setup
-
-
-REQUIRED_PACKAGES = ['Pillow>=1.0', 'Matplotlib>=2.1', 'Cython>=0.28.1']
-
-setup(
-    name='object_detection',
-    version='0.1',
-    install_requires=REQUIRED_PACKAGES,
-    include_package_data=True,
-    packages=[p for p in find_packages() if p.startswith('object_detection')],
-    description='Tensorflow Object Detection Library',
-)
--- a/research/vid2depth/README.md
+++ b/research/vid2depth/README.md
@@ -65,15 +65,6 @@ You will need to register in order to download the data.  Download the following
 * leftImg8bit_sequence_trainvaltest.zip
 * camera_trainvaltest.zip

-### Download Bike dataset (17GB) (optional)
-
-```shell
-mkdir -p ~/vid2depth/bike-uncompressed
-cd ~/vid2depth/bike-uncompressed
-wget https://storage.googleapis.com/brain-robotics-data/bike/BikeVideoDataset.tar
-tar xvf BikeVideoDataset.tar
-```
-
 ## 3. Inference

 ### Download trained model
@@ -122,18 +113,6 @@ python dataset/gen_data.py \
  --seq_length 3
 ```

-### Prepare Bike training sequences (optional)
-
-```shell
-# Prepare training sequences.
-cd tensorflow/models/research/vid2depth
-python dataset/gen_data.py \
-  --dataset_name bike \
-  --dataset_dir ~/vid2depth/bike-uncompressed \
-  --data_dir ~/vid2depth/data/bike \
-  --seq_length 3
-```
-
 ### Compile the ICP op (work in progress)

 The ICP op depends on multiple software packages (TensorFlow, Point Cloud