Open source MnasFPN and minor fixes to OD API (#8484)

310447280 by lzc: Internal change 310420845 by Zhichao Lu: Open source the internal Context RCNN code. -- 310362339 by Zhichao Lu: Internal change 310259448 by lzc: Update required TF version for OD API. -- 310252159 by Zhichao Lu: Port patch_ops_test to TF1/TF2 as TPUs. -- 310247180 by Zhichao Lu: Ignore keypoint heatmap loss in the regions/bounding boxes with target keypoint class but no valid keypoint annotations. -- 310178294 by Zhichao Lu: Opensource MnasFPN https://arxiv.org/abs/1912.01106 -- 310094222 by lzc: Internal changes. -- 310085250 by lzc: Internal Change. -- 310016447 by huizhongc: Remove unrecognized classes from labeled_classes. -- 310009470 by rathodv: Mark batcher.py as TF1 only. -- 310001984 by rathodv: Update core/preprocessor.py to be compatible with TF1/TF2.. -- 309455035 by Zhi...

Open source MnasFPN and minor fixes to OD API (#8484)
310447280 by lzc: Internal change 310420845 by Zhichao Lu: Open source the internal Context RCNN code. -- 310362339 by Zhichao Lu: Internal change 310259448 by lzc: Update required TF version for OD API. -- 310252159 by Zhichao Lu: Port patch_ops_test to TF1/TF2 as TPUs. -- 310247180 by Zhichao Lu: Ignore keypoint heatmap loss in the regions/bounding boxes with target keypoint class but no valid keypoint annotations. -- 310178294 by Zhichao Lu: Opensource MnasFPN https://arxiv.org/abs/1912.01106 -- 310094222 by lzc: Internal changes. -- 310085250 by lzc: Internal Change. -- 310016447 by huizhongc: Remove unrecognized classes from labeled_classes. -- 310009470 by rathodv: Mark batcher.py as TF1 only. -- 310001984 by rathodv: Update core/preprocessor.py to be compatible with TF1/TF2.. -- 309455035 by Zhi...
8518d053 · pkulzc · GitHub · ac5fff19 · 8518d053 · 8518d053
Unverified Commit 8518d053 authored May 12, 2020 by pkulzc Committed by GitHub May 12, 2020
20 changed files
--- a/research/object_detection/protos/hyperparams.proto
+++ b/research/object_detection/protos/hyperparams.proto
@@ -32,6 +32,9 @@ message Hyperparams {
    // Use tf.nn.relu6
    RELU_6 = 2;
+    // Use tf.nn.swish
+    SWISH = 3;
  }
  optional Activation activation = 4 [default = RELU];

--- a/research/object_detection/protos/image_resizer.proto
+++ b/research/object_detection/protos/image_resizer.proto
@@ -10,6 +10,7 @@ message ImageResizer {
    FixedShapeResizer fixed_shape_resizer = 2;
    IdentityResizer identity_resizer = 3;
    ConditionalShapeResizer conditional_shape_resizer = 4;
+    PadToMultipleResizer pad_to_multiple_resizer = 5;
  }
 }
@@ -90,3 +91,18 @@ message ConditionalShapeResizer {
  optional bool convert_to_grayscale = 4 [default = false];
 }
+// An image resizer which resizes inputs by zero padding them such that their
+// spatial dimensions are divisible by a specified multiple. This is useful
+// when you want to concatenate or compare the input to an output of a
+// fully convolutional network.
+message PadToMultipleResizer {
+  // The multiple to which the spatial dimensions will be padded to.
+  optional int32 multiple = 1 [default = 1];
+  // Whether to also resize the image channels from 3 to 1 (RGB to grayscale).
+  optional bool convert_to_grayscale = 4 [default = false];
+}
--- a/research/object_detection/protos/input_reader.proto
+++ b/research/object_detection/protos/input_reader.proto
@@ -2,6 +2,8 @@ syntax = "proto2";
 package object_detection.protos;
+import "object_detection/protos/image_resizer.proto";
 // Configuration proto for defining input readers that generate Object Detection
 // Examples from input sources. Input readers are expected to generate a
 // dictionary of tensors, with the following fields populated:
@@ -22,19 +24,19 @@ enum InstanceMaskType {
  PNG_MASKS = 2;        // Encoded PNG masks.
 }
-// Next id: 25
+// Next id: 29
 message InputReader {
  // Name of input reader. Typically used to describe the dataset that is read
  // by this input reader.
-  optional string name = 23 [default=""];
+  optional string name = 23 [default = ""];
  // Path to StringIntLabelMap pbtxt file specifying the mapping from string
  // labels to integer ids.
-  optional string label_map_path = 1 [default=""];
+  optional string label_map_path = 1 [default = ""];
  // Whether data should be processed in the order they are read in, or
  // shuffled randomly.
-  optional bool shuffle = 2 [default=true];
+  optional bool shuffle = 2 [default = true];
  // Buffer size to be used when shuffling.
  optional uint32 shuffle_buffer_size = 11 [default = 2048];
@@ -44,43 +46,43 @@ message InputReader {
  // The number of times a data source is read. If set to zero, the data source
  // will be reused indefinitely.
-  optional uint32 num_epochs = 5 [default=0];
+  optional uint32 num_epochs = 5 [default = 0];
  // Integer representing how often an example should be sampled. To feed
  // only 1/3 of your data into your model, set `sample_1_of_n_examples` to 3.
  // This is particularly useful for evaluation, where you might not prefer to
  // evaluate all of your samples.
-  optional uint32 sample_1_of_n_examples = 22 [default=1];
+  optional uint32 sample_1_of_n_examples = 22 [default = 1];
  // Number of file shards to read in parallel.
-  optional uint32 num_readers = 6 [default=64];
+  optional uint32 num_readers = 6 [default = 64];
  // Number of batches to produce in parallel. If this is run on a 2x2 TPU set
  // this to 8.
-  optional uint32 num_parallel_batches = 19 [default=8];
+  optional uint32 num_parallel_batches = 19 [default = 8];
  // Number of batches to prefetch. Prefetch decouples input pipeline and
  // model so they can be pipelined resulting in higher throughput. Set this
  // to a small constant and increment linearly until the improvements become
  // marginal or you exceed your cpu memory budget. Setting this to -1,
  // automatically tunes this value for you.
-  optional int32 num_prefetch_batches = 20 [default=2];
+  optional int32 num_prefetch_batches = 20 [default = 2];
  // Maximum number of records to keep in reader queue.
-  optional uint32 queue_capacity = 3 [default=2000, deprecated=true];
+  optional uint32 queue_capacity = 3 [default = 2000, deprecated = true];
  // Minimum number of records to keep in reader queue. A large value is needed
  // to generate a good random shuffle.
-  optional uint32 min_after_dequeue = 4 [default=1000, deprecated=true];
+  optional uint32 min_after_dequeue = 4 [default = 1000, deprecated = true];
  // Number of records to read from each reader at once.
-  optional uint32 read_block_length = 15 [default=32];
+  optional uint32 read_block_length = 15 [default = 32];
  // Number of decoded records to prefetch before batching.
-  optional uint32 prefetch_size = 13 [default = 512, deprecated=true];
+  optional uint32 prefetch_size = 13 [default = 512, deprecated = true];
  // Number of parallel decode ops to apply.
-  optional uint32 num_parallel_map_calls = 14 [default = 64, deprecated=true];
+  optional uint32 num_parallel_map_calls = 14 [default = 64, deprecated = true];
  // If positive, TfExampleDecoder will try to decode rasters of additional
  // channels from tf.Examples.
@@ -89,14 +91,21 @@ message InputReader {
  // Number of groundtruth keypoints per object.
  optional uint32 num_keypoints = 16 [default = 0];
+  // Keypoint weights. These weights can be used to apply per-keypoint loss
+  // multipliers. The size of this field should agree with `num_keypoints`.
+  repeated float keypoint_type_weight = 26;
  // Maximum number of boxes to pad to during training / evaluation.
  // Set this to at least the maximum amount of boxes in the input data,
  // otherwise some groundtruth boxes may be clipped.
-  optional int32 max_number_of_boxes = 21 [default=100];
+  optional int32 max_number_of_boxes = 21 [default = 100];
  // Whether to load multiclass scores from the dataset.
  optional bool load_multiclass_scores = 24 [default = false];
+  // Whether to load context features from the dataset.
+  optional bool load_context_features = 25 [default = false];
  // Whether to load groundtruth instance masks.
  optional bool load_instance_masks = 7 [default = false];
@@ -107,10 +116,15 @@ message InputReader {
  // when mapping class text strings to integers.
  optional bool use_display_name = 17 [default = false];
+  // Whether to include the source_id string in the input features.
+  optional bool include_source_id = 27 [default = false];
  oneof input_reader {
    TFRecordInputReader tf_record_input_reader = 8;
    ExternalInputReader external_input_reader = 9;
  }
 }
 // An input reader that reads TF Example protos from local TFRecord files.

--- a/research/object_detection/protos/losses.proto
+++ b/research/object_detection/protos/losses.proto
@@ -69,6 +69,7 @@ message LocalizationLoss {
    WeightedL2LocalizationLoss weighted_l2 = 1;
    WeightedSmoothL1LocalizationLoss weighted_smooth_l1 = 2;
    WeightedIOULocalizationLoss weighted_iou = 3;
+    L1LocalizationLoss l1_localization_loss = 4;
  }
 }
@@ -96,6 +97,10 @@ message WeightedSmoothL1LocalizationLoss {
 message WeightedIOULocalizationLoss {
 }
+// L1 Localization Loss.
+message L1LocalizationLoss {
+}
 // Configuration for class prediction loss function.
 message ClassificationLoss {
  oneof classification_loss {
@@ -104,6 +109,7 @@ message ClassificationLoss {
    WeightedSoftmaxClassificationAgainstLogitsLoss weighted_logits_softmax = 5;
    BootstrappedSigmoidClassificationLoss bootstrapped_sigmoid = 3;
    SigmoidFocalClassificationLoss weighted_sigmoid_focal = 4;
+    PenaltyReducedLogisticFocalLoss penalty_reduced_logistic_focal_loss = 6;
  }
 }
@@ -162,6 +168,17 @@ message BootstrappedSigmoidClassificationLoss {
  optional bool anchorwise_output = 3 [default=false];
 }
+// Pixelwise logistic focal loss with pixels near the target having a reduced
+// penalty.
+message PenaltyReducedLogisticFocalLoss {
+  // Focussing parameter of the focal loss.
+  optional float alpha = 1;
+  // Penalty reduction factor.
+  optional float beta = 2;
+}
 // Configuration for hard example miner.
 message HardExampleMiner {
  // Maximum number of hard examples to be selected per image (prior to

--- a/research/object_detection/protos/model.proto
+++ b/research/object_detection/protos/model.proto
@@ -13,9 +13,10 @@ message DetectionModel {
    // This can be used to define experimental models. To define your own
    // experimental meta architecture, populate a key in the
-    // model_builder.EXPERIMENTAL_META_ARCHITECURE_BUILDER_MAP dict and set its
+    // model_builder.EXPERIMENTAL_META_ARCH_BUILDER_MAP dict and set its
    // value to a function that builds your model.
    ExperimentalModel experimental_model = 3;
  }
 }

--- a/research/object_detection/protos/optimizer.proto
+++ b/research/object_detection/protos/optimizer.proto
@@ -36,6 +36,10 @@ message MomentumOptimizer {
 // See: https://www.tensorflow.org/api_docs/python/tf/train/AdamOptimizer
 message AdamOptimizer {
  optional LearningRate learning_rate = 1;
+  // Default value for epsilon (1e-8) matches default value in
+  // tf.compat.v1.train.AdamOptimizer. This differs from tf2 default of 1e-7
+  // in tf.keras.optimizers.Adam .
+  optional float epsilon = 2 [default = 1e-8];
 }

--- a/research/object_detection/protos/pipeline.proto
+++ b/research/object_detection/protos/pipeline.proto
@@ -10,7 +10,7 @@ import "object_detection/protos/train.proto";
 // Convenience message for configuring a training and eval pipeline. Allows all
 // of the pipeline parameters to be configured from one file.
-// Next id: 7
+// Next id: 8
 message TrainEvalPipelineConfig {
  optional DetectionModel model = 1;
  optional TrainConfig train_config = 2;

--- a/research/object_detection/protos/post_processing.proto
+++ b/research/object_detection/protos/post_processing.proto
@@ -45,6 +45,10 @@ message BatchNonMaxSuppression {
  // Whether to use tf.image.combined_non_max_suppression.
  optional bool use_combined_nms = 11 [default = false];
+  // Whether to change coordinate frame of the boxlist to be relative to
+  // window's frame.
+  optional bool change_coordinate_frame = 12 [default = true];
 }
 // Configuration proto for post-processing predicted boxes and

--- a/research/object_detection/protos/preprocessor.proto
+++ b/research/object_detection/protos/preprocessor.proto
@@ -4,6 +4,7 @@ package object_detection.protos;
 // Message for defining a preprocessing operation on input data.
 // See: //third_party/tensorflow_models/object_detection/core/preprocessor.py
+// Next ID: 38
 message PreprocessingStep {
  oneof preprocessing_step {
    NormalizeImage normalize_image = 1;
@@ -42,6 +43,7 @@ message PreprocessingStep {
    RandomJpegQuality random_jpeg_quality = 34;
    RandomDownscaleToTargetPixels random_downscale_to_target_pixels = 35;
    RandomPatchGaussian random_patch_gaussian = 36;
+    RandomSquareCropByScale random_square_crop_by_scale = 37;
  }
 }
@@ -533,3 +535,26 @@ message RandomPatchGaussian {
  optional float min_gaussian_stddev = 4 [default = 0.0];
  optional float max_gaussian_stddev = 5 [default = 1.0];
 }
+// Extract a square sized crop from an image whose side length is sampled by
+// randomly scaling the maximum spatial dimension of the image. If part of the
+// crop falls outside the image, it is filled with zeros.
+// The augmentation is borrowed from [1]
+// [1]: https://arxiv.org/abs/1904.07850
+message RandomSquareCropByScale {
+  // The maximum size of the border. The border defines distance in pixels to
+  // the image boundaries that will not be considered as a center of a crop.
+  // To make sure that the border does not go over the center of the image,
+  // we chose the border value by computing the minimum k, such that
+  // (max_border / (2**k)) < image_dimension/2
+  optional int32 max_border = 1 [default = 128];
+  // The minimum and maximum values of scale.
+  optional float scale_min = 2 [default=0.6];
+  optional float scale_max = 3 [default=1.3];
+  // The number of discrete scale values to randomly sample between
+  // [min_scale, max_scale]
+  optional int32 num_scales = 4 [default=8];
+}
--- a/research/object_detection/protos/ssd.proto
+++ b/research/object_detection/protos/ssd.proto
@@ -145,6 +145,7 @@ message Ssd {
  optional MaskHead mask_head_config = 25;
 }
+// Next id: 18.
 message SsdFeatureExtractor {
  reserved 6;

--- a/research/object_detection/protos/string_int_label_map.proto
+++ b/research/object_detection/protos/string_int_label_map.proto
@@ -17,6 +17,22 @@ message StringIntLabelMapItem {
  // Human readable string label.
  optional string display_name = 3;
+  // Name of class specific keypoints for each class object and their respective
+  // keypoint IDs.
+  message KeypointMap {
+    // Id for the keypoint. Id must be unique within a given class, however, it
+    // could be shared across classes. For example "nose" keypoint can occur
+    // in both "face" and "person" classes. Hence they can be mapped to the same
+    // id.
+    //
+    // Note: It is advised to assign ids in range [1, num_unique_keypoints] to
+    // encode keypoint targets efficiently.
+    optional int32 id = 1;
+    // Label for the keypoint.
+    optional string label = 2;
+  }
+  repeated KeypointMap keypoints = 4;
 };
 message StringIntLabelMap {

--- a/research/object_detection/protos/train.proto
+++ b/research/object_detection/protos/train.proto
@@ -5,8 +5,16 @@ package object_detection.protos;
 import "object_detection/protos/optimizer.proto";
 import "object_detection/protos/preprocessor.proto";
+enum CheckpointVersion {
+  UNKNOWN  = 0;
+  V1 = 1;
+  V2 = 2;
+}
 // Message for configuring DetectionModel training jobs (train.py).
-// Next id: 28
+// Next id: 30
 message TrainConfig {
  // Effective batch size to use for training.
  // For TPU (or sync SGD jobs), the batch size per core (or GPU) is going to be
@@ -37,6 +45,11 @@ message TrainConfig {
  // Typically used to load feature extractor variables from trained models.
  optional string fine_tune_checkpoint_type = 22 [default=""];
+  // Either "v1" or "v2". If v1, restores the checkpoint using the tensorflow
+  // v1 style of restoring checkpoints. If v2, uses the eager mode checkpoint
+  // restoration API.
+  optional CheckpointVersion fine_tune_checkpoint_version = 28 [default=V1];
  // [Deprecated]: use fine_tune_checkpoint_type instead.
  // Specifies if the finetune checkpoint is from an object detection model.
  // If from an object detection model, the model being trained should have
@@ -119,4 +132,6 @@ message TrainConfig {
  // Whether to summarize gradients.
  optional bool summarize_gradients = 27 [default=false];
 }
--- a/research/object_detection/samples/configs/ssd_mobilenet_v2_mnasfpn_shared_box_predictor_320x320_coco_sync.config
+++ b/research/object_detection/samples/configs/ssd_mobilenet_v2_mnasfpn_shared_box_predictor_320x320_coco_sync.config
+# SSD with MnasFPN feature extractor, shared box predictor
+# See Chen et al, https://arxiv.org/abs/1912.01106
+# Trained on COCO, initialized from scratch.
+#
+# 0.92B MulAdds, 2.5M Parameters. Latency is 193ms on Pixel 1.
+# Achieves 26.6 mAP on COCO14 minival dataset.
+# This config is TPU compatible
+model {
+  ssd {
+    inplace_batchnorm_update: true
+    freeze_batchnorm: false
+    num_classes: 90
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+        use_matmul_gather: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    encode_background_as_zeros: true
+    anchor_generator {
+      multiscale_anchor_generator {
+        min_level: 3
+        max_level: 6
+        anchor_scale: 3.0
+        aspect_ratios: [1.0, 2.0, 0.5]
+        scales_per_octave: 3
+      }
+    }
+    image_resizer {
+      fixed_shape_resizer {
+        height: 320
+        width: 320
+      }
+    }
+    box_predictor {
+      weight_shared_convolutional_box_predictor {
+        depth: 64
+        class_prediction_bias_init: -4.6
+        conv_hyperparams {
+          activation: RELU_6,
+          regularizer {
+            l2_regularizer {
+              weight: 0.00004
+            }
+          }
+          initializer {
+            random_normal_initializer {
+              stddev: 0.01
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            scale: true,
+            decay: 0.997,
+            epsilon: 0.001,
+          }
+        }
+        num_layers_before_predictor: 4
+        share_prediction_tower: true
+        use_depthwise: true
+        kernel_size: 3
+      }
+    }
+    feature_extractor {
+      type: 'ssd_mobilenet_v2_mnasfpn'
+      fpn {
+        min_level: 3
+        max_level: 6
+        additional_layer_depth: 48
+      }
+      min_depth: 16
+      depth_multiplier: 1.0
+      conv_hyperparams {
+        activation: RELU_6,
+        regularizer {
+          l2_regularizer {
+            weight: 0.00004
+          }
+        }
+        initializer {
+          random_normal_initializer {
+            stddev: 0.01
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          scale: true,
+          decay: 0.97,
+          epsilon: 0.001,
+        }
+      }
+      override_base_feature_extractor_hyperparams: true
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid_focal {
+          alpha: 0.25
+          gamma: 2.0
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+        }
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    normalize_loc_loss_by_codesize: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.6
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+train_config: {
+  batch_size: 1024
+  sync_replicas: true
+  startup_delay_steps: 0
+  replicas_to_aggregate: 32
+  num_steps: 50000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    random_crop_image {
+      min_object_covered: 0.0
+      min_aspect_ratio: 0.75
+      max_aspect_ratio: 3.0
+      min_area: 0.75
+      max_area: 1.0
+      overlap_thresh: 0.0
+    }
+  }
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        cosine_decay_learning_rate {
+          learning_rate_base: 4.
+          total_steps: 50000
+          warmup_learning_rate: .026666
+          warmup_steps: 5000
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+}
+train_input_reader: {
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record-?????-of-00100"
+  }
+  label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
+}
+eval_config: {
+  metrics_set: "coco_detection_metrics"
+  use_moving_averages: false
+  num_examples: 8000
+}
+eval_input_reader: {
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record-?????-of-00010"
+  }
+  label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
+  shuffle: false
+  num_readers: 1
+}
--- a/research/object_detection/test_ckpt/ssd_inception_v2.pb
+++ b/research/object_detection/test_ckpt/ssd_inception_v2.pb
--- a/research/object_detection/test_data/ssd_mobilenet_v1_fpp.config
+++ b/research/object_detection/test_data/ssd_mobilenet_v1_fpp.config
+model {
+  ssd {
+    num_classes: 2
+    box_coder {
+      keypoint_box_coder {
+        num_keypoints: 23
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    anchor_generator {
+      ssd_anchor_generator {
+        num_layers: 6
+        min_scale: 0.2
+        max_scale: 0.95
+        aspect_ratios: 1.0
+        aspect_ratios: 2.0
+        aspect_ratios: 0.5
+        aspect_ratios: 3.0
+        aspect_ratios: 0.3333
+        height_stride: 16
+        height_stride: 32
+        height_stride: 64
+        height_stride: 128
+        height_stride: 256
+        height_stride: 512
+        width_stride: 16
+        width_stride: 32
+        width_stride: 64
+        width_stride: 128
+        width_stride: 256
+        width_stride: 512
+        height_offset: 0
+        height_offset: 0
+        height_offset: 0
+        height_offset: 0
+        height_offset: 0
+        height_offset: 0
+        width_offset: 0
+        width_offset: 0
+        width_offset: 0
+        width_offset: 0
+        width_offset: 0
+        width_offset: 0
+      }
+    }
+    image_resizer {
+      keep_aspect_ratio_resizer {
+        min_dimension: 320
+        max_dimension: 640
+        convert_to_grayscale: true
+      }
+    }
+    box_predictor {
+      convolutional_box_predictor {
+        min_depth: 0
+        max_depth: 0
+        num_layers_before_predictor: 0
+        use_dropout: false
+        kernel_size: 3
+        box_code_size: 50
+        apply_sigmoid_to_scores: false
+        conv_hyperparams {
+          activation: RELU_6
+          regularizer {
+            l2_regularizer {
+              weight: 0.00004
+            }
+          }
+          initializer {
+            truncated_normal_initializer {
+              stddev: 0.03
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            train: true
+            scale: true
+            center: true
+            decay: 0.9997
+            epsilon: 0.001
+          }
+        }
+      }
+    }
+    feature_extractor {
+      type: "ssd_mobilenet_v1"
+      min_depth: 16
+      depth_multiplier: 0.25
+      use_explicit_padding: true
+      conv_hyperparams {
+        activation: RELU_6
+        regularizer {
+          l2_regularizer {
+            weight: 0.00004
+          }
+        }
+        initializer {
+          truncated_normal_initializer {
+            stddev: 0.03
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          train: true
+          scale: true
+          center: true
+          decay: 0.9997
+          epsilon: 0.001
+        }
+      }
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid {
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+        }
+      }
+      hard_example_miner {
+        num_hard_examples: 3000
+        iou_threshold: 0.99
+        loss_type: CLASSIFICATION
+        max_negatives_per_positive: 3
+        min_negatives_per_image: 10
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.5
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+train_config: {
+  fine_tune_checkpoint: ""
+  num_steps: 10000000
+  batch_size: 32
+  data_augmentation_options {
+    random_horizontal_flip {
+      keypoint_flip_permutation: 1
+      keypoint_flip_permutation: 0
+      keypoint_flip_permutation: 2
+      keypoint_flip_permutation: 3
+      keypoint_flip_permutation: 5
+      keypoint_flip_permutation: 4
+      keypoint_flip_permutation: 6
+      keypoint_flip_permutation: 8
+      keypoint_flip_permutation: 7
+      keypoint_flip_permutation: 10
+      keypoint_flip_permutation: 9
+      keypoint_flip_permutation: 12
+      keypoint_flip_permutation: 11
+      keypoint_flip_permutation: 14
+      keypoint_flip_permutation: 13
+      keypoint_flip_permutation: 16
+      keypoint_flip_permutation: 15
+      keypoint_flip_permutation: 18
+      keypoint_flip_permutation: 17
+      keypoint_flip_permutation: 20
+      keypoint_flip_permutation: 19
+      keypoint_flip_permutation: 22
+      keypoint_flip_permutation: 21
+    }
+  }
+  data_augmentation_options {
+    ssd_random_crop_fixed_aspect_ratio {
+    }
+  }
+  optimizer {
+    rms_prop_optimizer {
+      learning_rate {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.0004
+          decay_steps: 800720
+          decay_factor: 0.95
+        }
+      }
+      momentum_optimizer_value: 0.9
+      decay: 0.9
+      epsilon: 1.0
+    }
+  }
+}
+train_input_reader {
+  label_map_path: "PATH_TO_BE_CONFIGURED/face_person_with_keypoints_label_map.pbtxt"
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/faces_train.record-?????-of-00010"
+  }
+  num_keypoints: 23
+}
+eval_config {
+  num_visualizations: 10
+  metrics_set: "coco_detection_metrics"
+  use_moving_averages: true
+  parameterized_metric {
+    coco_keypoint_metrics {
+      class_label: "face"
+    }
+  }
+  parameterized_metric {
+    coco_keypoint_metrics {
+      class_label: "PERSON"
+    }
+  }
+}
+eval_input_reader {
+  label_map_path: "PATH_TO_BE_CONFIGURED/face_person_with_keypoints_label_map.pbtxt"
+  shuffle: true
+  num_epochs: 1
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/faces_val.record-?????-of-00010"
+  }
+  num_keypoints: 23
+}
+graph_rewriter {
+  quantization {
+    delay: 2000000
+    activation_bits: 8
+    weight_bits: 8
+  }
+}
--- a/research/object_detection/tpu_exporters/export_saved_model_tpu_lib.py
+++ b/research/object_detection/tpu_exporters/export_saved_model_tpu_lib.py
@@ -24,6 +24,7 @@ from google.protobuf import text_format
 from tensorflow.python.saved_model import loader
 from tensorflow.python.saved_model import signature_constants
 from tensorflow.python.saved_model import tag_constants
+from tensorflow.python.tpu import tpu
 # pylint: enable=g-direct-tensorflow-import
 from object_detection.protos import pipeline_pb2
 from object_detection.tpu_exporters import faster_rcnn
@@ -160,7 +161,7 @@ def run_inference(inputs,
    saver = tf.train.Saver()
    init_op = tf.global_variables_initializer()
-    sess.run(tf.contrib.tpu.initialize_system())
+    sess.run(tpu.initialize_system())
    sess.run(init_op)
    if ckpt_path is not None:
@@ -170,7 +171,7 @@ def run_inference(inputs,
      tensor_dict_out = sess.run(
          result_tensor_dict, feed_dict={placeholder_tensor: [inputs]})
-    sess.run(tf.contrib.tpu.shutdown_system())
+    sess.run(tpu.shutdown_system())
    return tensor_dict_out
@@ -194,7 +195,7 @@ def run_inference_from_saved_model(inputs,
    meta_graph = loader.load(sess, [tag_constants.SERVING, tag_constants.TPU],
                             saved_model_dir)
-    sess.run(tf.contrib.tpu.initialize_system())
+    sess.run(tpu.initialize_system())
    key_prediction = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
@@ -210,6 +211,6 @@ def run_inference_from_saved_model(inputs,
      tensor_dict_out = sess.run(
          tensor_name_output, feed_dict={tensor_name_input: [inputs]})
-    sess.run(tf.contrib.tpu.shutdown_system())
+    sess.run(tpu.shutdown_system())
    return tensor_dict_out
--- a/research/object_detection/tpu_exporters/faster_rcnn.py
+++ b/research/object_detection/tpu_exporters/faster_rcnn.py
@@ -31,6 +31,8 @@ if int(major) < 1 or (int(major == 1) and int(minor) < 14):
 from tensorflow.python.framework import function
 from tensorflow.python.tpu import functional as tpu_functional
+from tensorflow.python.tpu import tpu
+from tensorflow.python.tpu.bfloat16 import bfloat16_scope
 from tensorflow.python.tpu.ops import tpu_ops
 from object_detection import exporter
 from object_detection.builders import model_builder
@@ -169,12 +171,12 @@ def build_graph(pipeline_config,
  @function.Defun(capture_resource_var_by_value=False)
  def tpu_subgraph_predict():
    if use_bfloat16:
-      with tf.contrib.tpu.bfloat16_scope():
+      with bfloat16_scope():
-        return tf.contrib.tpu.rewrite(tpu_subgraph_predict_fn,
+        return tpu.rewrite(tpu_subgraph_predict_fn,
-                                      [preprocessed_inputs, true_image_shapes])
+                           [preprocessed_inputs, true_image_shapes])
    else:
-      return tf.contrib.tpu.rewrite(tpu_subgraph_predict_fn,
+      return tpu.rewrite(tpu_subgraph_predict_fn,
-                                    [preprocessed_inputs, true_image_shapes])
+                         [preprocessed_inputs, true_image_shapes])
  (rpn_box_encodings, rpn_objectness_predictions_with_background, anchors,
   refined_box_encodings, class_predictions_with_background, num_proposals,

--- a/research/object_detection/tpu_exporters/ssd.py
+++ b/research/object_detection/tpu_exporters/ssd.py
@@ -30,6 +30,8 @@ if int(major) < 1 or (int(major == 1) and int(minor) < 14):
 from tensorflow.python.framework import function
 from tensorflow.python.tpu import functional as tpu_functional
+from tensorflow.python.tpu import tpu
+from tensorflow.python.tpu.bfloat16 import bfloat16_scope
 from tensorflow.python.tpu.ops import tpu_ops
 from object_detection import exporter
 from object_detection.builders import model_builder
@@ -171,7 +173,7 @@ def build_graph(pipeline_config,
    # Dimshuffle: (b, c, h, w) -> (b, h, w, c)
    preprocessed_inputs = tf.transpose(preprocessed_inputs, perm=[0, 2, 3, 1])
    if use_bfloat16:
-      with tf.contrib.tpu.bfloat16_scope():
+      with bfloat16_scope():
        prediction_dict = detection_model.predict(preprocessed_inputs,
                                                  true_image_shapes)
    else:
@@ -188,8 +190,8 @@ def build_graph(pipeline_config,
  @function.Defun(capture_resource_var_by_value=False)
  def predict_tpu():
-    return tf.contrib.tpu.rewrite(predict_tpu_subgraph,
+    return tpu.rewrite(predict_tpu_subgraph,
-                                  [preprocessed_inputs, true_image_shapes])
+                       [preprocessed_inputs, true_image_shapes])
  prediction_outputs = tpu_functional.TPUPartitionedCall(
      args=predict_tpu.captured_inputs,

--- a/research/object_detection/tpu_exporters/utils_test.py
+++ b/research/object_detection/tpu_exporters/utils_test.py
+# Lint as: python2, python3
 # Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -18,6 +19,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+from six.moves import range
 import tensorflow as tf
 from object_detection.tpu_exporters import utils

--- a/research/object_detection/utils/autoaugment_utils.py
+++ b/research/object_detection/utils/autoaugment_utils.py
@@ -24,6 +24,14 @@ import math
 import six
 import tensorflow as tf
+# pylint: disable=g-import-not-at-top
+try:
+  from tensorflow.contrib import image as contrib_image
+  from tensorflow.contrib import training as contrib_training
+except ImportError:
+  # TF 2.0 doesn't ship with contrib.
+  pass
+# pylint: enable=g-import-not-at-top
 # This signifies the max integer that the controller RNN could predict for the
 # augmentation scheme.
@@ -315,7 +323,7 @@ def rotate(image, degrees, replace):
  # In practice, we should randomize the rotation degrees by flipping
  # it negatively half the time, but that's done on 'degrees' outside
  # of the function.
-  image = tf.contrib.image.rotate(wrap(image), radians)
+  image = contrib_image.rotate(wrap(image), radians)
  return unwrap(image, replace)
@@ -870,13 +878,13 @@ def rotate_with_bboxes(image, bboxes, degrees, replace):
 def translate_x(image, pixels, replace):
  """Equivalent of PIL Translate in X dimension."""
-  image = tf.contrib.image.translate(wrap(image), [-pixels, 0])
+  image = contrib_image.translate(wrap(image), [-pixels, 0])
  return unwrap(image, replace)
 def translate_y(image, pixels, replace):
  """Equivalent of PIL Translate in Y dimension."""
-  image = tf.contrib.image.translate(wrap(image), [0, -pixels])
+  image = contrib_image.translate(wrap(image), [0, -pixels])
  return unwrap(image, replace)
@@ -961,7 +969,7 @@ def shear_x(image, level, replace):
  # with a matrix form of:
  # [1  level
  #  0  1].
-  image = tf.contrib.image.transform(
+  image = contrib_image.transform(
      wrap(image), [1., level, 0., 0., 1., 0., 0., 0.])
  return unwrap(image, replace)
@@ -972,7 +980,7 @@ def shear_y(image, level, replace):
  # with a matrix form of:
  # [1  0
  #  level  1].
-  image = tf.contrib.image.transform(
+  image = contrib_image.transform(
      wrap(image), [1., 0., 0., level, 1., 0., 0., 0.])
  return unwrap(image, replace)
@@ -1628,9 +1636,12 @@ def distort_image_with_autoaugment(image, bboxes, augmentation_name):
  policy = available_policies[augmentation_name]()
  # Hparams that will be used for AutoAugment.
-  augmentation_hparams = tf.contrib.training.HParams(
+  augmentation_hparams = contrib_training.HParams(
-      cutout_max_pad_fraction=0.75, cutout_bbox_replace_with_mean=False,
+      cutout_max_pad_fraction=0.75,
-      cutout_const=100, translate_const=250, cutout_bbox_const=50,
+      cutout_bbox_replace_with_mean=False,
+      cutout_const=100,
+      translate_const=250,
+      cutout_bbox_const=50,
      translate_bbox_const=120)
  augmented_image, augmented_bbox = (