resovle merge conflicts

31ca3b97 · Kaushik Shivakumar · 3e9d886d · 7fcd7cba · 31ca3b97 · 31ca3b97
Commit 31ca3b97 authored Jul 23, 2020 by Kaushik Shivakumar
20 changed files
--- a/research/object_detection/configs/tf2/ssd_efficientdet_d0_512x512_coco17_tpu-8.config
+++ b/research/object_detection/configs/tf2/ssd_efficientdet_d0_512x512_coco17_tpu-8.config
+ # SSD with EfficientNet-b0 + BiFPN feature extractor,
+# shared box predictor and focal loss (a.k.a EfficientDet-d0).
+# See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
+# See Lin et al, https://arxiv.org/abs/1708.02002
+# Trained on COCO, initialized from an EfficientNet-b0 checkpoint.
+#
+# Train on TPU-8
+
+model {
+  ssd {
+    inplace_batchnorm_update: true
+    freeze_batchnorm: false
+    num_classes: 90
+    add_background_class: false
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+        use_matmul_gather: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    encode_background_as_zeros: true
+    anchor_generator {
+      multiscale_anchor_generator {
+        min_level: 3
+        max_level: 7
+        anchor_scale: 4.0
+        aspect_ratios: [1.0, 2.0, 0.5]
+        scales_per_octave: 3
+      }
+    }
+    image_resizer {
+      keep_aspect_ratio_resizer {
+        min_dimension: 512
+        max_dimension: 512
+        pad_to_max_dimension: true
+        }
+    }
+    box_predictor {
+      weight_shared_convolutional_box_predictor {
+        depth: 64
+        class_prediction_bias_init: -4.6
+        conv_hyperparams {
+          force_use_bias: true
+          activation: SWISH
+          regularizer {
+            l2_regularizer {
+              weight: 0.00004
+            }
+          }
+          initializer {
+            random_normal_initializer {
+              stddev: 0.01
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            scale: true
+            decay: 0.99
+            epsilon: 0.001
+          }
+        }
+        num_layers_before_predictor: 3
+        kernel_size: 3
+        use_depthwise: true
+      }
+    }
+    feature_extractor {
+      type: 'ssd_efficientnet-b0_bifpn_keras'
+      bifpn {
+        min_level: 3
+        max_level: 7
+        num_iterations: 3
+        num_filters: 64
+      }
+      conv_hyperparams {
+        force_use_bias: true
+        activation: SWISH
+        regularizer {
+          l2_regularizer {
+            weight: 0.00004
+          }
+        }
+        initializer {
+          truncated_normal_initializer {
+            stddev: 0.03
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          scale: true,
+          decay: 0.99,
+          epsilon: 0.001,
+        }
+      }
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid_focal {
+          alpha: 0.25
+          gamma: 1.5
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+        }
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    normalize_loc_loss_by_codesize: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.5
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/ckpt-0"
+  fine_tune_checkpoint_version: V2
+  fine_tune_checkpoint_type: "classification"
+  batch_size: 128
+  sync_replicas: true
+  startup_delay_steps: 0
+  replicas_to_aggregate: 8
+  use_bfloat16: true
+  num_steps: 300000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    random_scale_crop_and_pad_to_square {
+      output_size: 512
+      scale_min: 0.1
+      scale_max: 2.0
+    }
+  }
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        cosine_decay_learning_rate {
+          learning_rate_base: 8e-2
+          total_steps: 300000
+          warmup_learning_rate: .001
+          warmup_steps: 2500
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+}
+
+train_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
+  }
+}
+
+eval_config: {
+  metrics_set: "coco_detection_metrics"
+  use_moving_averages: false
+  batch_size: 1;
+}
+
+eval_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  shuffle: false
+  num_epochs: 1
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
+  }
+}
--- a/research/object_detection/configs/tf2/ssd_efficientdet_d1_640x640_coco17_tpu-8.config
+++ b/research/object_detection/configs/tf2/ssd_efficientdet_d1_640x640_coco17_tpu-8.config
+ # SSD with EfficientNet-b1 + BiFPN feature extractor,
+# shared box predictor and focal loss (a.k.a EfficientDet-d1).
+# See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
+# See Lin et al, https://arxiv.org/abs/1708.02002
+# Trained on COCO, initialized from an EfficientNet-b1 checkpoint.
+#
+# Train on TPU-8
+
+model {
+  ssd {
+    inplace_batchnorm_update: true
+    freeze_batchnorm: false
+    num_classes: 90
+    add_background_class: false
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+        use_matmul_gather: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    encode_background_as_zeros: true
+    anchor_generator {
+      multiscale_anchor_generator {
+        min_level: 3
+        max_level: 7
+        anchor_scale: 4.0
+        aspect_ratios: [1.0, 2.0, 0.5]
+        scales_per_octave: 3
+      }
+    }
+    image_resizer {
+      keep_aspect_ratio_resizer {
+        min_dimension: 640
+        max_dimension: 640
+        pad_to_max_dimension: true
+        }
+    }
+    box_predictor {
+      weight_shared_convolutional_box_predictor {
+        depth: 88
+        class_prediction_bias_init: -4.6
+        conv_hyperparams {
+          force_use_bias: true
+          activation: SWISH
+          regularizer {
+            l2_regularizer {
+              weight: 0.00004
+            }
+          }
+          initializer {
+            random_normal_initializer {
+              stddev: 0.01
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            scale: true
+            decay: 0.99
+            epsilon: 0.001
+          }
+        }
+        num_layers_before_predictor: 3
+        kernel_size: 3
+        use_depthwise: true
+      }
+    }
+    feature_extractor {
+      type: 'ssd_efficientnet-b1_bifpn_keras'
+      bifpn {
+        min_level: 3
+        max_level: 7
+        num_iterations: 4
+        num_filters: 88
+      }
+      conv_hyperparams {
+        force_use_bias: true
+        activation: SWISH
+        regularizer {
+          l2_regularizer {
+            weight: 0.00004
+          }
+        }
+        initializer {
+          truncated_normal_initializer {
+            stddev: 0.03
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          scale: true,
+          decay: 0.99,
+          epsilon: 0.001,
+        }
+      }
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid_focal {
+          alpha: 0.25
+          gamma: 1.5
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+        }
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    normalize_loc_loss_by_codesize: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.5
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/ckpt-0"
+  fine_tune_checkpoint_version: V2
+  fine_tune_checkpoint_type: "classification"
+  batch_size: 128
+  sync_replicas: true
+  startup_delay_steps: 0
+  replicas_to_aggregate: 8
+  use_bfloat16: true
+  num_steps: 300000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    random_scale_crop_and_pad_to_square {
+      output_size: 640
+      scale_min: 0.1
+      scale_max: 2.0
+    }
+  }
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        cosine_decay_learning_rate {
+          learning_rate_base: 8e-2
+          total_steps: 300000
+          warmup_learning_rate: .001
+          warmup_steps: 2500
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+}
+
+train_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
+  }
+}
+
+eval_config: {
+  metrics_set: "coco_detection_metrics"
+  use_moving_averages: false
+  batch_size: 1;
+}
+
+eval_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  shuffle: false
+  num_epochs: 1
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
+  }
+}
--- a/research/object_detection/configs/tf2/ssd_efficientdet_d2_768x768_coco17_tpu-8.config
+++ b/research/object_detection/configs/tf2/ssd_efficientdet_d2_768x768_coco17_tpu-8.config
+ # SSD with EfficientNet-b2 + BiFPN feature extractor,
+# shared box predictor and focal loss (a.k.a EfficientDet-d2).
+# See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
+# See Lin et al, https://arxiv.org/abs/1708.02002
+# Trained on COCO, initialized from an EfficientNet-b2 checkpoint.
+#
+# Train on TPU-8
+
+model {
+  ssd {
+    inplace_batchnorm_update: true
+    freeze_batchnorm: false
+    num_classes: 90
+    add_background_class: false
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+        use_matmul_gather: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    encode_background_as_zeros: true
+    anchor_generator {
+      multiscale_anchor_generator {
+        min_level: 3
+        max_level: 7
+        anchor_scale: 4.0
+        aspect_ratios: [1.0, 2.0, 0.5]
+        scales_per_octave: 3
+      }
+    }
+    image_resizer {
+      keep_aspect_ratio_resizer {
+        min_dimension: 768
+        max_dimension: 768
+        pad_to_max_dimension: true
+        }
+    }
+    box_predictor {
+      weight_shared_convolutional_box_predictor {
+        depth: 112
+        class_prediction_bias_init: -4.6
+        conv_hyperparams {
+          force_use_bias: true
+          activation: SWISH
+          regularizer {
+            l2_regularizer {
+              weight: 0.00004
+            }
+          }
+          initializer {
+            random_normal_initializer {
+              stddev: 0.01
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            scale: true
+            decay: 0.99
+            epsilon: 0.001
+          }
+        }
+        num_layers_before_predictor: 3
+        kernel_size: 3
+        use_depthwise: true
+      }
+    }
+    feature_extractor {
+      type: 'ssd_efficientnet-b2_bifpn_keras'
+      bifpn {
+        min_level: 3
+        max_level: 7
+        num_iterations: 5
+        num_filters: 112
+      }
+      conv_hyperparams {
+        force_use_bias: true
+        activation: SWISH
+        regularizer {
+          l2_regularizer {
+            weight: 0.00004
+          }
+        }
+        initializer {
+          truncated_normal_initializer {
+            stddev: 0.03
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          scale: true,
+          decay: 0.99,
+          epsilon: 0.001,
+        }
+      }
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid_focal {
+          alpha: 0.25
+          gamma: 1.5
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+        }
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    normalize_loc_loss_by_codesize: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.5
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/ckpt-0"
+  fine_tune_checkpoint_version: V2
+  fine_tune_checkpoint_type: "classification"
+  batch_size: 128
+  sync_replicas: true
+  startup_delay_steps: 0
+  replicas_to_aggregate: 8
+  use_bfloat16: true
+  num_steps: 300000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    random_scale_crop_and_pad_to_square {
+      output_size: 768
+      scale_min: 0.1
+      scale_max: 2.0
+    }
+  }
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        cosine_decay_learning_rate {
+          learning_rate_base: 8e-2
+          total_steps: 300000
+          warmup_learning_rate: .001
+          warmup_steps: 2500
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+}
+
+train_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
+  }
+}
+
+eval_config: {
+  metrics_set: "coco_detection_metrics"
+  use_moving_averages: false
+  batch_size: 1;
+}
+
+eval_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  shuffle: false
+  num_epochs: 1
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
+  }
+}
--- a/research/object_detection/configs/tf2/ssd_efficientdet_d3_896x896_coco17_tpu-32.config
+++ b/research/object_detection/configs/tf2/ssd_efficientdet_d3_896x896_coco17_tpu-32.config
+ # SSD with EfficientNet-b3 + BiFPN feature extractor,
+# shared box predictor and focal loss (a.k.a EfficientDet-d3).
+# See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
+# See Lin et al, https://arxiv.org/abs/1708.02002
+# Trained on COCO, initialized from an EfficientNet-b3 checkpoint.
+#
+# Train on TPU-32
+
+model {
+  ssd {
+    inplace_batchnorm_update: true
+    freeze_batchnorm: false
+    num_classes: 90
+    add_background_class: false
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+        use_matmul_gather: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    encode_background_as_zeros: true
+    anchor_generator {
+      multiscale_anchor_generator {
+        min_level: 3
+        max_level: 7
+        anchor_scale: 4.0
+        aspect_ratios: [1.0, 2.0, 0.5]
+        scales_per_octave: 3
+      }
+    }
+    image_resizer {
+      keep_aspect_ratio_resizer {
+        min_dimension: 896
+        max_dimension: 896
+        pad_to_max_dimension: true
+        }
+    }
+    box_predictor {
+      weight_shared_convolutional_box_predictor {
+        depth: 160
+        class_prediction_bias_init: -4.6
+        conv_hyperparams {
+          force_use_bias: true
+          activation: SWISH
+          regularizer {
+            l2_regularizer {
+              weight: 0.00004
+            }
+          }
+          initializer {
+            random_normal_initializer {
+              stddev: 0.01
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            scale: true
+            decay: 0.99
+            epsilon: 0.001
+          }
+        }
+        num_layers_before_predictor: 4
+        kernel_size: 3
+        use_depthwise: true
+      }
+    }
+    feature_extractor {
+      type: 'ssd_efficientnet-b3_bifpn_keras'
+      bifpn {
+        min_level: 3
+        max_level: 7
+        num_iterations: 6
+        num_filters: 160
+      }
+      conv_hyperparams {
+        force_use_bias: true
+        activation: SWISH
+        regularizer {
+          l2_regularizer {
+            weight: 0.00004
+          }
+        }
+        initializer {
+          truncated_normal_initializer {
+            stddev: 0.03
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          scale: true,
+          decay: 0.99,
+          epsilon: 0.001,
+        }
+      }
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid_focal {
+          alpha: 0.25
+          gamma: 1.5
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+        }
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    normalize_loc_loss_by_codesize: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.5
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/ckpt-0"
+  fine_tune_checkpoint_version: V2
+  fine_tune_checkpoint_type: "classification"
+  batch_size: 128
+  sync_replicas: true
+  startup_delay_steps: 0
+  replicas_to_aggregate: 8
+  use_bfloat16: true
+  num_steps: 300000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    random_scale_crop_and_pad_to_square {
+      output_size: 896
+      scale_min: 0.1
+      scale_max: 2.0
+    }
+  }
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        cosine_decay_learning_rate {
+          learning_rate_base: 8e-2
+          total_steps: 300000
+          warmup_learning_rate: .001
+          warmup_steps: 2500
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+}
+
+train_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
+  }
+}
+
+eval_config: {
+  metrics_set: "coco_detection_metrics"
+  use_moving_averages: false
+  batch_size: 1;
+}
+
+eval_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  shuffle: false
+  num_epochs: 1
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
+  }
+}
--- a/research/object_detection/configs/tf2/ssd_efficientdet_d4_1024x1024_coco17_tpu-32.config
+++ b/research/object_detection/configs/tf2/ssd_efficientdet_d4_1024x1024_coco17_tpu-32.config
+ # SSD with EfficientNet-b4 + BiFPN feature extractor,
+# shared box predictor and focal loss (a.k.a EfficientDet-d4).
+# See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
+# See Lin et al, https://arxiv.org/abs/1708.02002
+# Trained on COCO, initialized from an EfficientNet-b4 checkpoint.
+#
+# Train on TPU-32
+
+model {
+  ssd {
+    inplace_batchnorm_update: true
+    freeze_batchnorm: false
+    num_classes: 90
+    add_background_class: false
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+        use_matmul_gather: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    encode_background_as_zeros: true
+    anchor_generator {
+      multiscale_anchor_generator {
+        min_level: 3
+        max_level: 7
+        anchor_scale: 4.0
+        aspect_ratios: [1.0, 2.0, 0.5]
+        scales_per_octave: 3
+      }
+    }
+    image_resizer {
+      keep_aspect_ratio_resizer {
+        min_dimension: 1024
+        max_dimension: 1024
+        pad_to_max_dimension: true
+        }
+    }
+    box_predictor {
+      weight_shared_convolutional_box_predictor {
+        depth: 224
+        class_prediction_bias_init: -4.6
+        conv_hyperparams {
+          force_use_bias: true
+          activation: SWISH
+          regularizer {
+            l2_regularizer {
+              weight: 0.00004
+            }
+          }
+          initializer {
+            random_normal_initializer {
+              stddev: 0.01
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            scale: true
+            decay: 0.99
+            epsilon: 0.001
+          }
+        }
+        num_layers_before_predictor: 4
+        kernel_size: 3
+        use_depthwise: true
+      }
+    }
+    feature_extractor {
+      type: 'ssd_efficientnet-b4_bifpn_keras'
+      bifpn {
+        min_level: 3
+        max_level: 7
+        num_iterations: 7
+        num_filters: 224
+      }
+      conv_hyperparams {
+        force_use_bias: true
+        activation: SWISH
+        regularizer {
+          l2_regularizer {
+            weight: 0.00004
+          }
+        }
+        initializer {
+          truncated_normal_initializer {
+            stddev: 0.03
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          scale: true,
+          decay: 0.99,
+          epsilon: 0.001,
+        }
+      }
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid_focal {
+          alpha: 0.25
+          gamma: 1.5
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+        }
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    normalize_loc_loss_by_codesize: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.5
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/ckpt-0"
+  fine_tune_checkpoint_version: V2
+  fine_tune_checkpoint_type: "classification"
+  batch_size: 128
+  sync_replicas: true
+  startup_delay_steps: 0
+  replicas_to_aggregate: 8
+  use_bfloat16: true
+  num_steps: 300000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    random_scale_crop_and_pad_to_square {
+      output_size: 1024
+      scale_min: 0.1
+      scale_max: 2.0
+    }
+  }
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        cosine_decay_learning_rate {
+          learning_rate_base: 8e-2
+          total_steps: 300000
+          warmup_learning_rate: .001
+          warmup_steps: 2500
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+}
+
+train_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
+  }
+}
+
+eval_config: {
+  metrics_set: "coco_detection_metrics"
+  use_moving_averages: false
+  batch_size: 1;
+}
+
+eval_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  shuffle: false
+  num_epochs: 1
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
+  }
+}
--- a/research/object_detection/configs/tf2/ssd_efficientdet_d5_1280x1280_coco17_tpu-32.config
+++ b/research/object_detection/configs/tf2/ssd_efficientdet_d5_1280x1280_coco17_tpu-32.config
+ # SSD with EfficientNet-b5 + BiFPN feature extractor,
+# shared box predictor and focal loss (a.k.a EfficientDet-d5).
+# See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
+# See Lin et al, https://arxiv.org/abs/1708.02002
+# Trained on COCO, initialized from an EfficientNet-b5 checkpoint.
+#
+# Train on TPU-32
+
+model {
+  ssd {
+    inplace_batchnorm_update: true
+    freeze_batchnorm: false
+    num_classes: 90
+    add_background_class: false
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+        use_matmul_gather: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    encode_background_as_zeros: true
+    anchor_generator {
+      multiscale_anchor_generator {
+        min_level: 3
+        max_level: 7
+        anchor_scale: 4.0
+        aspect_ratios: [1.0, 2.0, 0.5]
+        scales_per_octave: 3
+      }
+    }
+    image_resizer {
+      keep_aspect_ratio_resizer {
+        min_dimension: 1280
+        max_dimension: 1280
+        pad_to_max_dimension: true
+        }
+    }
+    box_predictor {
+      weight_shared_convolutional_box_predictor {
+        depth: 288
+        class_prediction_bias_init: -4.6
+        conv_hyperparams {
+          force_use_bias: true
+          activation: SWISH
+          regularizer {
+            l2_regularizer {
+              weight: 0.00004
+            }
+          }
+          initializer {
+            random_normal_initializer {
+              stddev: 0.01
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            scale: true
+            decay: 0.99
+            epsilon: 0.001
+          }
+        }
+        num_layers_before_predictor: 4
+        kernel_size: 3
+        use_depthwise: true
+      }
+    }
+    feature_extractor {
+      type: 'ssd_efficientnet-b5_bifpn_keras'
+      bifpn {
+        min_level: 3
+        max_level: 7
+        num_iterations: 7
+        num_filters: 288
+      }
+      conv_hyperparams {
+        force_use_bias: true
+        activation: SWISH
+        regularizer {
+          l2_regularizer {
+            weight: 0.00004
+          }
+        }
+        initializer {
+          truncated_normal_initializer {
+            stddev: 0.03
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          scale: true,
+          decay: 0.99,
+          epsilon: 0.001,
+        }
+      }
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid_focal {
+          alpha: 0.25
+          gamma: 1.5
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+        }
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    normalize_loc_loss_by_codesize: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.5
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/ckpt-0"
+  fine_tune_checkpoint_version: V2
+  fine_tune_checkpoint_type: "classification"
+  batch_size: 128
+  sync_replicas: true
+  startup_delay_steps: 0
+  replicas_to_aggregate: 8
+  use_bfloat16: true
+  num_steps: 300000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    random_scale_crop_and_pad_to_square {
+      output_size: 1280
+      scale_min: 0.1
+      scale_max: 2.0
+    }
+  }
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        cosine_decay_learning_rate {
+          learning_rate_base: 8e-2
+          total_steps: 300000
+          warmup_learning_rate: .001
+          warmup_steps: 2500
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+}
+
+train_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
+  }
+}
+
+eval_config: {
+  metrics_set: "coco_detection_metrics"
+  use_moving_averages: false
+  batch_size: 1;
+}
+
+eval_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  shuffle: false
+  num_epochs: 1
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
+  }
+}
--- a/research/object_detection/configs/tf2/ssd_efficientdet_d6_1408x1408_coco17_tpu-32.config
+++ b/research/object_detection/configs/tf2/ssd_efficientdet_d6_1408x1408_coco17_tpu-32.config
+ # SSD with EfficientNet-b6 + BiFPN feature extractor,
+# shared box predictor and focal loss (a.k.a EfficientDet-d6).
+# See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
+# See Lin et al, https://arxiv.org/abs/1708.02002
+# Trained on COCO, initialized from an EfficientNet-b6 checkpoint.
+#
+# Train on TPU-32
+
+model {
+  ssd {
+    inplace_batchnorm_update: true
+    freeze_batchnorm: false
+    num_classes: 90
+    add_background_class: false
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+        use_matmul_gather: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    encode_background_as_zeros: true
+    anchor_generator {
+      multiscale_anchor_generator {
+        min_level: 3
+        max_level: 7
+        anchor_scale: 4.0
+        aspect_ratios: [1.0, 2.0, 0.5]
+        scales_per_octave: 3
+      }
+    }
+    image_resizer {
+      keep_aspect_ratio_resizer {
+        min_dimension: 1408
+        max_dimension: 1408
+        pad_to_max_dimension: true
+        }
+    }
+    box_predictor {
+      weight_shared_convolutional_box_predictor {
+        depth: 384
+        class_prediction_bias_init: -4.6
+        conv_hyperparams {
+          force_use_bias: true
+          activation: SWISH
+          regularizer {
+            l2_regularizer {
+              weight: 0.00004
+            }
+          }
+          initializer {
+            random_normal_initializer {
+              stddev: 0.01
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            scale: true
+            decay: 0.99
+            epsilon: 0.001
+          }
+        }
+        num_layers_before_predictor: 5
+        kernel_size: 3
+        use_depthwise: true
+      }
+    }
+    feature_extractor {
+      type: 'ssd_efficientnet-b6_bifpn_keras'
+      bifpn {
+        min_level: 3
+        max_level: 7
+        num_iterations: 8
+        num_filters: 384
+        # Use unweighted sum for stability.
+        combine_method: 'sum'
+      }
+      conv_hyperparams {
+        force_use_bias: true
+        activation: SWISH
+        regularizer {
+          l2_regularizer {
+            weight: 0.00004
+          }
+        }
+        initializer {
+          truncated_normal_initializer {
+            stddev: 0.03
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          scale: true,
+          decay: 0.99,
+          epsilon: 0.001,
+        }
+      }
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid_focal {
+          alpha: 0.25
+          gamma: 1.5
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+        }
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    normalize_loc_loss_by_codesize: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.5
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/ckpt-0"
+  fine_tune_checkpoint_version: V2
+  fine_tune_checkpoint_type: "classification"
+  batch_size: 128
+  sync_replicas: true
+  startup_delay_steps: 0
+  replicas_to_aggregate: 8
+  use_bfloat16: true
+  num_steps: 300000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    random_scale_crop_and_pad_to_square {
+      output_size: 1408
+      scale_min: 0.1
+      scale_max: 2.0
+    }
+  }
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        cosine_decay_learning_rate {
+          learning_rate_base: 8e-2
+          total_steps: 300000
+          warmup_learning_rate: .001
+          warmup_steps: 2500
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+}
+
+train_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
+  }
+}
+
+eval_config: {
+  metrics_set: "coco_detection_metrics"
+  use_moving_averages: false
+  batch_size: 1;
+}
+
+eval_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  shuffle: false
+  num_epochs: 1
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
+  }
+}
--- a/research/object_detection/configs/tf2/ssd_efficientdet_d7_1536x1536_coco17_tpu-32.config
+++ b/research/object_detection/configs/tf2/ssd_efficientdet_d7_1536x1536_coco17_tpu-32.config
+ # SSD with EfficientNet-b6 + BiFPN feature extractor,
+# shared box predictor and focal loss (a.k.a EfficientDet-d7).
+# See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
+# See Lin et al, https://arxiv.org/abs/1708.02002
+# Trained on COCO, initialized from an EfficientNet-b6 checkpoint.
+#
+# Train on TPU-32
+
+model {
+  ssd {
+    inplace_batchnorm_update: true
+    freeze_batchnorm: false
+    num_classes: 90
+    add_background_class: false
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+        use_matmul_gather: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    encode_background_as_zeros: true
+    anchor_generator {
+      multiscale_anchor_generator {
+        min_level: 3
+        max_level: 7
+        anchor_scale: 4.0
+        aspect_ratios: [1.0, 2.0, 0.5]
+        scales_per_octave: 3
+      }
+    }
+    image_resizer {
+      keep_aspect_ratio_resizer {
+        min_dimension: 1536
+        max_dimension: 1536
+        pad_to_max_dimension: true
+        }
+    }
+    box_predictor {
+      weight_shared_convolutional_box_predictor {
+        depth: 384
+        class_prediction_bias_init: -4.6
+        conv_hyperparams {
+          force_use_bias: true
+          activation: SWISH
+          regularizer {
+            l2_regularizer {
+              weight: 0.00004
+            }
+          }
+          initializer {
+            random_normal_initializer {
+              stddev: 0.01
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            scale: true
+            decay: 0.99
+            epsilon: 0.001
+          }
+        }
+        num_layers_before_predictor: 5
+        kernel_size: 3
+        use_depthwise: true
+      }
+    }
+    feature_extractor {
+      type: 'ssd_efficientnet-b6_bifpn_keras'
+      bifpn {
+        min_level: 3
+        max_level: 7
+        num_iterations: 8
+        num_filters: 384
+        # Use unweighted sum for stability.
+        combine_method: 'sum'
+      }
+      conv_hyperparams {
+        force_use_bias: true
+        activation: SWISH
+        regularizer {
+          l2_regularizer {
+            weight: 0.00004
+          }
+        }
+        initializer {
+          truncated_normal_initializer {
+            stddev: 0.03
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          scale: true,
+          decay: 0.99,
+          epsilon: 0.001,
+        }
+      }
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid_focal {
+          alpha: 0.25
+          gamma: 1.5
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+        }
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    normalize_loc_loss_by_codesize: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.5
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/ckpt-0"
+  fine_tune_checkpoint_version: V2
+  fine_tune_checkpoint_type: "classification"
+  batch_size: 128
+  sync_replicas: true
+  startup_delay_steps: 0
+  replicas_to_aggregate: 8
+  use_bfloat16: true
+  num_steps: 300000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    random_scale_crop_and_pad_to_square {
+      output_size: 1536
+      scale_min: 0.1
+      scale_max: 2.0
+    }
+  }
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        cosine_decay_learning_rate {
+          learning_rate_base: 8e-2
+          total_steps: 300000
+          warmup_learning_rate: .001
+          warmup_steps: 2500
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+}
+
+train_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
+  }
+}
+
+eval_config: {
+  metrics_set: "coco_detection_metrics"
+  use_moving_averages: false
+  batch_size: 1;
+}
+
+eval_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  shuffle: false
+  num_epochs: 1
+  tf_record_input_reader {
+    input_path: "PATH_TO_BEE_CONFIGURED/val2017-?????-of-00032.tfrecord"
+  }
+}
--- a/research/object_detection/configs/tf2/ssd_mobilenet_v1_fpn_640x640_coco17_tpu-8.config
+++ b/research/object_detection/configs/tf2/ssd_mobilenet_v1_fpn_640x640_coco17_tpu-8.config
+# SSD with Mobilenet v1 FPN feature extractor, shared box predictor and focal
+# loss (a.k.a Retinanet).
+# See Lin et al, https://arxiv.org/abs/1708.02002
+# Trained on COCO, initialized from Imagenet classification checkpoint
+# Train on TPU-8
+#
+# Achieves 29.1 mAP on COCO17 Val
+
+model {
+  ssd {
+    inplace_batchnorm_update: true
+    freeze_batchnorm: false
+    num_classes: 90
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+        use_matmul_gather: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    encode_background_as_zeros: true
+    anchor_generator {
+      multiscale_anchor_generator {
+        min_level: 3
+        max_level: 7
+        anchor_scale: 4.0
+        aspect_ratios: [1.0, 2.0, 0.5]
+        scales_per_octave: 2
+      }
+    }
+    image_resizer {
+      fixed_shape_resizer {
+        height: 640
+        width: 640
+      }
+    }
+    box_predictor {
+      weight_shared_convolutional_box_predictor {
+        depth: 256
+        class_prediction_bias_init: -4.6
+        conv_hyperparams {
+          activation: RELU_6,
+          regularizer {
+            l2_regularizer {
+              weight: 0.00004
+            }
+          }
+          initializer {
+            random_normal_initializer {
+              stddev: 0.01
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            scale: true,
+            decay: 0.997,
+            epsilon: 0.001,
+          }
+        }
+        num_layers_before_predictor: 4
+        kernel_size: 3
+      }
+    }
+    feature_extractor {
+      type: 'ssd_mobilenet_v1_fpn_keras'
+      fpn {
+        min_level: 3
+        max_level: 7
+      }
+      min_depth: 16
+      depth_multiplier: 1.0
+      conv_hyperparams {
+        activation: RELU_6,
+        regularizer {
+          l2_regularizer {
+            weight: 0.00004
+          }
+        }
+        initializer {
+          random_normal_initializer {
+            stddev: 0.01
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          scale: true,
+          decay: 0.997,
+          epsilon: 0.001,
+        }
+      }
+      override_base_feature_extractor_hyperparams: true
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid_focal {
+          alpha: 0.25
+          gamma: 2.0
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+        }
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    normalize_loc_loss_by_codesize: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.6
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  fine_tune_checkpoint_version: V2
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/mobilenet_v1.ckpt-1"
+  fine_tune_checkpoint_type: "classification"
+  batch_size: 64
+  sync_replicas: true
+  startup_delay_steps: 0
+  replicas_to_aggregate: 8
+  num_steps: 25000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    random_crop_image {
+      min_object_covered: 0.0
+      min_aspect_ratio: 0.75
+      max_aspect_ratio: 3.0
+      min_area: 0.75
+      max_area: 1.0
+      overlap_thresh: 0.0
+    }
+  }
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        cosine_decay_learning_rate {
+          learning_rate_base: .04
+          total_steps: 25000
+          warmup_learning_rate: .013333
+          warmup_steps: 2000
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+}
+
+train_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
+  }
+}
+
+eval_config: {
+  metrics_set: "coco_detection_metrics"
+  use_moving_averages: false
+  batch_size: 1;
+}
+
+eval_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  shuffle: false
+  num_epochs: 1
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
+  }
+}
--- a/research/object_detection/configs/tf2/ssd_mobilenet_v2_320x320_coco17_tpu-8.config
+++ b/research/object_detection/configs/tf2/ssd_mobilenet_v2_320x320_coco17_tpu-8.config
+# SSD with Mobilenet v2
+# Trained on COCO17, initialized from Imagenet classification checkpoint
+# Train on TPU-8
+#
+# Achieves 22.2 mAP on COCO17 Val
+
+model {
+  ssd {
+    inplace_batchnorm_update: true
+    freeze_batchnorm: false
+    num_classes: 90
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+        use_matmul_gather: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    encode_background_as_zeros: true
+    anchor_generator {
+      ssd_anchor_generator {
+        num_layers: 6
+        min_scale: 0.2
+        max_scale: 0.95
+        aspect_ratios: 1.0
+        aspect_ratios: 2.0
+        aspect_ratios: 0.5
+        aspect_ratios: 3.0
+        aspect_ratios: 0.3333
+      }
+    }
+    image_resizer {
+      fixed_shape_resizer {
+        height: 300
+        width: 300
+      }
+    }
+    box_predictor {
+      convolutional_box_predictor {
+        min_depth: 0
+        max_depth: 0
+        num_layers_before_predictor: 0
+        use_dropout: false
+        dropout_keep_probability: 0.8
+        kernel_size: 1
+        box_code_size: 4
+        apply_sigmoid_to_scores: false
+        class_prediction_bias_init: -4.6
+        conv_hyperparams {
+          activation: RELU_6,
+          regularizer {
+            l2_regularizer {
+              weight: 0.00004
+            }
+          }
+          initializer {
+            random_normal_initializer {
+              stddev: 0.01
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            train: true,
+            scale: true,
+            center: true,
+            decay: 0.97,
+            epsilon: 0.001,
+          }
+        }
+      }
+    }
+    feature_extractor {
+      type: 'ssd_mobilenet_v2_keras'
+      min_depth: 16
+      depth_multiplier: 1.0
+      conv_hyperparams {
+        activation: RELU_6,
+        regularizer {
+          l2_regularizer {
+            weight: 0.00004
+          }
+        }
+        initializer {
+          truncated_normal_initializer {
+            stddev: 0.03
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          train: true,
+          scale: true,
+          center: true,
+          decay: 0.97,
+          epsilon: 0.001,
+        }
+      }
+      override_base_feature_extractor_hyperparams: true
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid_focal {
+          alpha: 0.75,
+          gamma: 2.0
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+          delta: 1.0
+        }
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    normalize_loc_loss_by_codesize: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.6
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  fine_tune_checkpoint_version: V2
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/mobilenet_v2.ckpt-1"
+  fine_tune_checkpoint_type: "classification"
+  batch_size: 512
+  sync_replicas: true
+  startup_delay_steps: 0
+  replicas_to_aggregate: 8
+  num_steps: 50000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    ssd_random_crop {
+    }
+  }
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        cosine_decay_learning_rate {
+          learning_rate_base: .8
+          total_steps: 50000
+          warmup_learning_rate: 0.13333
+          warmup_steps: 2000
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+}
+
+train_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
+  }
+}
+
+eval_config: {
+  metrics_set: "coco_detection_metrics"
+  use_moving_averages: false
+}
+
+eval_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  shuffle: false
+  num_epochs: 1
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
+  }
+}
--- a/research/object_detection/configs/tf2/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.config
+++ b/research/object_detection/configs/tf2/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.config
+# SSD with Mobilenet v2 FPN-lite (go/fpn-lite) feature extractor, shared box
+# predictor and focal loss (a mobile version of Retinanet).
+# Retinanet: see Lin et al, https://arxiv.org/abs/1708.02002
+# Trained on COCO, initialized from Imagenet classification checkpoint
+# Train on TPU-8
+#
+# Achieves 22.2 mAP on COCO17 Val
+
+model {
+  ssd {
+    inplace_batchnorm_update: true
+    freeze_batchnorm: false
+    num_classes: 90
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+        use_matmul_gather: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    encode_background_as_zeros: true
+    anchor_generator {
+      multiscale_anchor_generator {
+        min_level: 3
+        max_level: 7
+        anchor_scale: 4.0
+        aspect_ratios: [1.0, 2.0, 0.5]
+        scales_per_octave: 2
+      }
+    }
+    image_resizer {
+      fixed_shape_resizer {
+        height: 320
+        width: 320
+      }
+    }
+    box_predictor {
+      weight_shared_convolutional_box_predictor {
+        depth: 128
+        class_prediction_bias_init: -4.6
+        conv_hyperparams {
+          activation: RELU_6,
+          regularizer {
+            l2_regularizer {
+              weight: 0.00004
+            }
+          }
+          initializer {
+            random_normal_initializer {
+              stddev: 0.01
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            scale: true,
+            decay: 0.997,
+            epsilon: 0.001,
+          }
+        }
+        num_layers_before_predictor: 4
+        share_prediction_tower: true
+        use_depthwise: true
+        kernel_size: 3
+      }
+    }
+    feature_extractor {
+      type: 'ssd_mobilenet_v2_fpn_keras'
+      use_depthwise: true
+      fpn {
+        min_level: 3
+        max_level: 7
+        additional_layer_depth: 128
+      }
+      min_depth: 16
+      depth_multiplier: 1.0
+      conv_hyperparams {
+        activation: RELU_6,
+        regularizer {
+          l2_regularizer {
+            weight: 0.00004
+          }
+        }
+        initializer {
+          random_normal_initializer {
+            stddev: 0.01
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          scale: true,
+          decay: 0.997,
+          epsilon: 0.001,
+        }
+      }
+      override_base_feature_extractor_hyperparams: true
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid_focal {
+          alpha: 0.25
+          gamma: 2.0
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+        }
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    normalize_loc_loss_by_codesize: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.6
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  fine_tune_checkpoint_version: V2
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/mobilenet_v2.ckpt-1"
+  fine_tune_checkpoint_type: "classification"
+  batch_size: 128
+  sync_replicas: true
+  startup_delay_steps: 0
+  replicas_to_aggregate: 8
+  num_steps: 50000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    random_crop_image {
+      min_object_covered: 0.0
+      min_aspect_ratio: 0.75
+      max_aspect_ratio: 3.0
+      min_area: 0.75
+      max_area: 1.0
+      overlap_thresh: 0.0
+    }
+  }
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        cosine_decay_learning_rate {
+          learning_rate_base: .08
+          total_steps: 50000
+          warmup_learning_rate: .026666
+          warmup_steps: 1000
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+}
+
+train_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
+  }
+}
+
+eval_config: {
+  metrics_set: "coco_detection_metrics"
+  use_moving_averages: false
+}
+
+eval_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  shuffle: false
+  num_epochs: 1
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
+  }
+}
+
--- a/research/object_detection/configs/tf2/ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.config
+++ b/research/object_detection/configs/tf2/ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.config
+# SSD with Mobilenet v2 FPN-lite (go/fpn-lite) feature extractor, shared box
+# predictor and focal loss (a mobile version of Retinanet).
+# Retinanet: see Lin et al, https://arxiv.org/abs/1708.02002
+# Trained on COCO, initialized from Imagenet classification checkpoint
+# Train on TPU-8
+#
+# Achieves 28.2 mAP on COCO17 Val
+
+model {
+  ssd {
+    inplace_batchnorm_update: true
+    freeze_batchnorm: false
+    num_classes: 90
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+        use_matmul_gather: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    encode_background_as_zeros: true
+    anchor_generator {
+      multiscale_anchor_generator {
+        min_level: 3
+        max_level: 7
+        anchor_scale: 4.0
+        aspect_ratios: [1.0, 2.0, 0.5]
+        scales_per_octave: 2
+      }
+    }
+    image_resizer {
+      fixed_shape_resizer {
+        height: 640
+        width: 640
+      }
+    }
+    box_predictor {
+      weight_shared_convolutional_box_predictor {
+        depth: 128
+        class_prediction_bias_init: -4.6
+        conv_hyperparams {
+          activation: RELU_6,
+          regularizer {
+            l2_regularizer {
+              weight: 0.00004
+            }
+          }
+          initializer {
+            random_normal_initializer {
+              stddev: 0.01
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            scale: true,
+            decay: 0.997,
+            epsilon: 0.001,
+          }
+        }
+        num_layers_before_predictor: 4
+        share_prediction_tower: true
+        use_depthwise: true
+        kernel_size: 3
+      }
+    }
+    feature_extractor {
+      type: 'ssd_mobilenet_v2_fpn_keras'
+      use_depthwise: true
+      fpn {
+        min_level: 3
+        max_level: 7
+        additional_layer_depth: 128
+      }
+      min_depth: 16
+      depth_multiplier: 1.0
+      conv_hyperparams {
+        activation: RELU_6,
+        regularizer {
+          l2_regularizer {
+            weight: 0.00004
+          }
+        }
+        initializer {
+          random_normal_initializer {
+            stddev: 0.01
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          scale: true,
+          decay: 0.997,
+          epsilon: 0.001,
+        }
+      }
+      override_base_feature_extractor_hyperparams: true
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid_focal {
+          alpha: 0.25
+          gamma: 2.0
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+        }
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    normalize_loc_loss_by_codesize: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.6
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  fine_tune_checkpoint_version: V2
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/mobilenet_v2.ckpt-1"
+  fine_tune_checkpoint_type: "classification"
+  batch_size: 128
+  sync_replicas: true
+  startup_delay_steps: 0
+  replicas_to_aggregate: 8
+  num_steps: 50000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    random_crop_image {
+      min_object_covered: 0.0
+      min_aspect_ratio: 0.75
+      max_aspect_ratio: 3.0
+      min_area: 0.75
+      max_area: 1.0
+      overlap_thresh: 0.0
+    }
+  }
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        cosine_decay_learning_rate {
+          learning_rate_base: .08
+          total_steps: 50000
+          warmup_learning_rate: .026666
+          warmup_steps: 1000
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+}
+
+train_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
+  }
+}
+
+eval_config: {
+  metrics_set: "coco_detection_metrics"
+  use_moving_averages: false
+}
+
+eval_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  shuffle: false
+  num_epochs: 1
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
+  }
+}
+
--- a/research/object_detection/configs/tf2/ssd_resnet101_v1_fpn_1024x1024_coco17_tpu-8.config
+++ b/research/object_detection/configs/tf2/ssd_resnet101_v1_fpn_1024x1024_coco17_tpu-8.config
+# SSD with Resnet 101 v1 FPN feature extractor, shared box predictor and focal
+# loss (a.k.a Retinanet).
+# See Lin et al, https://arxiv.org/abs/1708.02002
+# Trained on COCO, initialized from Imagenet classification checkpoint
+# Train on TPU-8
+#
+# Achieves 39.5 mAP on COCO17 Val
+
+model {
+  ssd {
+    inplace_batchnorm_update: true
+    freeze_batchnorm: false
+    num_classes: 90
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+        use_matmul_gather: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    encode_background_as_zeros: true
+    anchor_generator {
+      multiscale_anchor_generator {
+        min_level: 3
+        max_level: 7
+        anchor_scale: 4.0
+        aspect_ratios: [1.0, 2.0, 0.5]
+        scales_per_octave: 2
+      }
+    }
+    image_resizer {
+      fixed_shape_resizer {
+        height: 1024
+        width: 1024
+      }
+    }
+    box_predictor {
+      weight_shared_convolutional_box_predictor {
+        depth: 256
+        class_prediction_bias_init: -4.6
+        conv_hyperparams {
+          activation: RELU_6,
+          regularizer {
+            l2_regularizer {
+              weight: 0.0004
+            }
+          }
+          initializer {
+            random_normal_initializer {
+              stddev: 0.01
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            scale: true,
+            decay: 0.997,
+            epsilon: 0.001,
+          }
+        }
+        num_layers_before_predictor: 4
+        kernel_size: 3
+      }
+    }
+    feature_extractor {
+      type: 'ssd_resnet101_v1_fpn_keras'
+      fpn {
+        min_level: 3
+        max_level: 7
+      }
+      min_depth: 16
+      depth_multiplier: 1.0
+      conv_hyperparams {
+        activation: RELU_6,
+        regularizer {
+          l2_regularizer {
+            weight: 0.0004
+          }
+        }
+        initializer {
+          truncated_normal_initializer {
+            stddev: 0.03
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          scale: true,
+          decay: 0.997,
+          epsilon: 0.001,
+        }
+      }
+      override_base_feature_extractor_hyperparams: true
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid_focal {
+          alpha: 0.25
+          gamma: 2.0
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+        }
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    normalize_loc_loss_by_codesize: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.6
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  fine_tune_checkpoint_version: V2
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/resnet101.ckpt-1"
+  fine_tune_checkpoint_type: "classification"
+  batch_size: 64
+  sync_replicas: true
+  startup_delay_steps: 0
+  replicas_to_aggregate: 8
+  use_bfloat16: true
+  num_steps: 100000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    random_crop_image {
+      min_object_covered: 0.0
+      min_aspect_ratio: 0.75
+      max_aspect_ratio: 3.0
+      min_area: 0.75
+      max_area: 1.0
+      overlap_thresh: 0.0
+    }
+  }
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        cosine_decay_learning_rate {
+          learning_rate_base: .04
+          total_steps: 100000
+          warmup_learning_rate: .013333
+          warmup_steps: 2000
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+}
+
+train_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
+  }
+}
+
+eval_config: {
+  metrics_set: "coco_detection_metrics"
+  use_moving_averages: false
+}
+
+eval_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  shuffle: false
+  num_epochs: 1
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
+  }
+}
--- a/research/object_detection/configs/tf2/ssd_resnet101_v1_fpn_640x640_coco17_tpu-8.config
+++ b/research/object_detection/configs/tf2/ssd_resnet101_v1_fpn_640x640_coco17_tpu-8.config
+# SSD with Resnet 101 v1 FPN feature extractor, shared box predictor and focal
+# loss (a.k.a Retinanet).
+# See Lin et al, https://arxiv.org/abs/1708.02002
+# Trained on COCO, initialized from Imagenet classification checkpoint
+# Train on TPU-8
+#
+# Achieves 35.4 mAP on COCO17 Val
+
+model {
+  ssd {
+    inplace_batchnorm_update: true
+    freeze_batchnorm: false
+    num_classes: 90
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+        use_matmul_gather: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    encode_background_as_zeros: true
+    anchor_generator {
+      multiscale_anchor_generator {
+        min_level: 3
+        max_level: 7
+        anchor_scale: 4.0
+        aspect_ratios: [1.0, 2.0, 0.5]
+        scales_per_octave: 2
+      }
+    }
+    image_resizer {
+      fixed_shape_resizer {
+        height: 640
+        width: 640
+      }
+    }
+    box_predictor {
+      weight_shared_convolutional_box_predictor {
+        depth: 256
+        class_prediction_bias_init: -4.6
+        conv_hyperparams {
+          activation: RELU_6,
+          regularizer {
+            l2_regularizer {
+              weight: 0.0004
+            }
+          }
+          initializer {
+            random_normal_initializer {
+              stddev: 0.01
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            scale: true,
+            decay: 0.997,
+            epsilon: 0.001,
+          }
+        }
+        num_layers_before_predictor: 4
+        kernel_size: 3
+      }
+    }
+    feature_extractor {
+      type: 'ssd_resnet101_v1_fpn_keras'
+      fpn {
+        min_level: 3
+        max_level: 7
+      }
+      min_depth: 16
+      depth_multiplier: 1.0
+      conv_hyperparams {
+        activation: RELU_6,
+        regularizer {
+          l2_regularizer {
+            weight: 0.0004
+          }
+        }
+        initializer {
+          truncated_normal_initializer {
+            stddev: 0.03
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          scale: true,
+          decay: 0.997,
+          epsilon: 0.001,
+        }
+      }
+      override_base_feature_extractor_hyperparams: true
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid_focal {
+          alpha: 0.25
+          gamma: 2.0
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+        }
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    normalize_loc_loss_by_codesize: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.6
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  fine_tune_checkpoint_version: V2
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/resnet101.ckpt-1"
+  fine_tune_checkpoint_type: "classification"
+  batch_size: 64
+  sync_replicas: true
+  startup_delay_steps: 0
+  replicas_to_aggregate: 8
+  use_bfloat16: true
+  num_steps: 25000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    random_crop_image {
+      min_object_covered: 0.0
+      min_aspect_ratio: 0.75
+      max_aspect_ratio: 3.0
+      min_area: 0.75
+      max_area: 1.0
+      overlap_thresh: 0.0
+    }
+  }
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        cosine_decay_learning_rate {
+          learning_rate_base: .04
+          total_steps: 25000
+          warmup_learning_rate: .013333
+          warmup_steps: 2000
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+}
+
+train_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
+  }
+}
+
+eval_config: {
+  metrics_set: "coco_detection_metrics"
+  use_moving_averages: false
+}
+
+eval_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  shuffle: false
+  num_epochs: 1
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
+  }
+}
--- a/research/object_detection/configs/tf2/ssd_resnet152_v1_fpn_1024x1024_coco17_tpu-8.config
+++ b/research/object_detection/configs/tf2/ssd_resnet152_v1_fpn_1024x1024_coco17_tpu-8.config
+# SSD with Resnet 152 v1 FPN feature extractor, shared box predictor and focal
+# loss (a.k.a Retinanet).
+# See Lin et al, https://arxiv.org/abs/1708.02002
+# Trained on COCO, initialized from Imagenet classification checkpoint
+# Train on TPU-8
+#
+# Achieves 39.6 mAP on COCO17 Val
+
+model {
+  ssd {
+    inplace_batchnorm_update: true
+    freeze_batchnorm: false
+    num_classes: 90
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+        use_matmul_gather: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    encode_background_as_zeros: true
+    anchor_generator {
+      multiscale_anchor_generator {
+        min_level: 3
+        max_level: 7
+        anchor_scale: 4.0
+        aspect_ratios: [1.0, 2.0, 0.5]
+        scales_per_octave: 2
+      }
+    }
+    image_resizer {
+      fixed_shape_resizer {
+        height: 1024
+        width: 1024
+      }
+    }
+    box_predictor {
+      weight_shared_convolutional_box_predictor {
+        depth: 256
+        class_prediction_bias_init: -4.6
+        conv_hyperparams {
+          activation: RELU_6,
+          regularizer {
+            l2_regularizer {
+              weight: 0.0004
+            }
+          }
+          initializer {
+            random_normal_initializer {
+              stddev: 0.01
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            scale: true,
+            decay: 0.997,
+            epsilon: 0.001,
+          }
+        }
+        num_layers_before_predictor: 4
+        kernel_size: 3
+      }
+    }
+    feature_extractor {
+      type: 'ssd_resnet152_v1_fpn_keras'
+      fpn {
+        min_level: 3
+        max_level: 7
+      }
+      min_depth: 16
+      depth_multiplier: 1.0
+      conv_hyperparams {
+        activation: RELU_6,
+        regularizer {
+          l2_regularizer {
+            weight: 0.0004
+          }
+        }
+        initializer {
+          truncated_normal_initializer {
+            stddev: 0.03
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          scale: true,
+          decay: 0.997,
+          epsilon: 0.001,
+        }
+      }
+      override_base_feature_extractor_hyperparams: true
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid_focal {
+          alpha: 0.25
+          gamma: 2.0
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+        }
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    normalize_loc_loss_by_codesize: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.6
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  fine_tune_checkpoint_version: V2
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/resnet152.ckpt-1"
+  fine_tune_checkpoint_type: "classification"
+  batch_size: 64
+  sync_replicas: true
+  startup_delay_steps: 0
+  replicas_to_aggregate: 8
+  use_bfloat16: true
+  num_steps: 100000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    random_crop_image {
+      min_object_covered: 0.0
+      min_aspect_ratio: 0.75
+      max_aspect_ratio: 3.0
+      min_area: 0.75
+      max_area: 1.0
+      overlap_thresh: 0.0
+    }
+  }
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        cosine_decay_learning_rate {
+          learning_rate_base: .04
+          total_steps: 100000
+          warmup_learning_rate: .013333
+          warmup_steps: 2000
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+}
+
+train_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
+  }
+}
+
+eval_config: {
+  metrics_set: "coco_detection_metrics"
+  use_moving_averages: false
+}
+
+eval_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  shuffle: false
+  num_epochs: 1
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
+  }
+}
--- a/research/object_detection/configs/tf2/ssd_resnet152_v1_fpn_640x640_coco17_tpu-8.config
+++ b/research/object_detection/configs/tf2/ssd_resnet152_v1_fpn_640x640_coco17_tpu-8.config
+# SSD with Resnet 152 v1 FPN feature extractor, shared box predictor and focal
+# loss (a.k.a Retinanet).
+# See Lin et al, https://arxiv.org/abs/1708.02002
+# Trained on COCO, initialized from Imagenet classification checkpoint
+# Train on TPU-8
+#
+# Achieves 35.6 mAP on COCO17 Val
+
+model {
+  ssd {
+    inplace_batchnorm_update: true
+    freeze_batchnorm: false
+    num_classes: 90
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+        use_matmul_gather: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    encode_background_as_zeros: true
+    anchor_generator {
+      multiscale_anchor_generator {
+        min_level: 3
+        max_level: 7
+        anchor_scale: 4.0
+        aspect_ratios: [1.0, 2.0, 0.5]
+        scales_per_octave: 2
+      }
+    }
+    image_resizer {
+      fixed_shape_resizer {
+        height: 640
+        width: 640
+      }
+    }
+    box_predictor {
+      weight_shared_convolutional_box_predictor {
+        depth: 256
+        class_prediction_bias_init: -4.6
+        conv_hyperparams {
+          activation: RELU_6,
+          regularizer {
+            l2_regularizer {
+              weight: 0.0004
+            }
+          }
+          initializer {
+            random_normal_initializer {
+              stddev: 0.01
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            scale: true,
+            decay: 0.997,
+            epsilon: 0.001,
+          }
+        }
+        num_layers_before_predictor: 4
+        kernel_size: 3
+      }
+    }
+    feature_extractor {
+      type: 'ssd_resnet152_v1_fpn_keras'
+      fpn {
+        min_level: 3
+        max_level: 7
+      }
+      min_depth: 16
+      depth_multiplier: 1.0
+      conv_hyperparams {
+        activation: RELU_6,
+        regularizer {
+          l2_regularizer {
+            weight: 0.0004
+          }
+        }
+        initializer {
+          truncated_normal_initializer {
+            stddev: 0.03
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          scale: true,
+          decay: 0.997,
+          epsilon: 0.001,
+        }
+      }
+      override_base_feature_extractor_hyperparams: true
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid_focal {
+          alpha: 0.25
+          gamma: 2.0
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+        }
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    normalize_loc_loss_by_codesize: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.6
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  fine_tune_checkpoint_version: V2
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/resnet152.ckpt-1"
+  fine_tune_checkpoint_type: "classification"
+  batch_size: 64
+  sync_replicas: true
+  startup_delay_steps: 0
+  replicas_to_aggregate: 8
+  use_bfloat16: true
+  num_steps: 25000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    random_crop_image {
+      min_object_covered: 0.0
+      min_aspect_ratio: 0.75
+      max_aspect_ratio: 3.0
+      min_area: 0.75
+      max_area: 1.0
+      overlap_thresh: 0.0
+    }
+  }
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        cosine_decay_learning_rate {
+          learning_rate_base: .04
+          total_steps: 25000
+          warmup_learning_rate: .013333
+          warmup_steps: 2000
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+}
+
+train_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
+  }
+}
+
+eval_config: {
+  metrics_set: "coco_detection_metrics"
+  use_moving_averages: false
+}
+
+eval_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  shuffle: false
+  num_epochs: 1
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
+  }
+}
--- a/research/object_detection/configs/tf2/ssd_resnet50_v1_fpn_1024x1024_coco17_tpu-8.config
+++ b/research/object_detection/configs/tf2/ssd_resnet50_v1_fpn_1024x1024_coco17_tpu-8.config
+# SSD with Resnet 50 v1 FPN feature extractor, shared box predictor and focal
+# loss (a.k.a Retinanet).
+# See Lin et al, https://arxiv.org/abs/1708.02002
+# Trained on COCO, initialized from Imagenet classification checkpoint
+# Train on TPU-8
+#
+# Achieves 38.3 mAP on COCO17 Val
+
+model {
+  ssd {
+    inplace_batchnorm_update: true
+    freeze_batchnorm: false
+    num_classes: 90
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+        use_matmul_gather: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    encode_background_as_zeros: true
+    anchor_generator {
+      multiscale_anchor_generator {
+        min_level: 3
+        max_level: 7
+        anchor_scale: 4.0
+        aspect_ratios: [1.0, 2.0, 0.5]
+        scales_per_octave: 2
+      }
+    }
+    image_resizer {
+      fixed_shape_resizer {
+        height: 1024
+        width: 1024
+      }
+    }
+    box_predictor {
+      weight_shared_convolutional_box_predictor {
+        depth: 256
+        class_prediction_bias_init: -4.6
+        conv_hyperparams {
+          activation: RELU_6,
+          regularizer {
+            l2_regularizer {
+              weight: 0.0004
+            }
+          }
+          initializer {
+            random_normal_initializer {
+              stddev: 0.01
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            scale: true,
+            decay: 0.997,
+            epsilon: 0.001,
+          }
+        }
+        num_layers_before_predictor: 4
+        kernel_size: 3
+      }
+    }
+    feature_extractor {
+      type: 'ssd_resnet50_v1_fpn_keras'
+      fpn {
+        min_level: 3
+        max_level: 7
+      }
+      min_depth: 16
+      depth_multiplier: 1.0
+      conv_hyperparams {
+        activation: RELU_6,
+        regularizer {
+          l2_regularizer {
+            weight: 0.0004
+          }
+        }
+        initializer {
+          truncated_normal_initializer {
+            stddev: 0.03
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          scale: true,
+          decay: 0.997,
+          epsilon: 0.001,
+        }
+      }
+      override_base_feature_extractor_hyperparams: true
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid_focal {
+          alpha: 0.25
+          gamma: 2.0
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+        }
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    normalize_loc_loss_by_codesize: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.6
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  fine_tune_checkpoint_version: V2
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/resnet50.ckpt-1"
+  fine_tune_checkpoint_type: "classification"
+  batch_size: 64
+  sync_replicas: true
+  startup_delay_steps: 0
+  replicas_to_aggregate: 8
+  use_bfloat16: true
+  num_steps: 100000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    random_crop_image {
+      min_object_covered: 0.0
+      min_aspect_ratio: 0.75
+      max_aspect_ratio: 3.0
+      min_area: 0.75
+      max_area: 1.0
+      overlap_thresh: 0.0
+    }
+  }
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        cosine_decay_learning_rate {
+          learning_rate_base: .04
+          total_steps: 100000
+          warmup_learning_rate: .013333
+          warmup_steps: 2000
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+}
+
+train_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
+  }
+}
+
+eval_config: {
+  metrics_set: "coco_detection_metrics"
+  use_moving_averages: false
+}
+
+eval_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  shuffle: false
+  num_epochs: 1
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
+  }
+}
--- a/research/object_detection/configs/tf2/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.config
+++ b/research/object_detection/configs/tf2/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.config
+# SSD with Resnet 50 v1 FPN feature extractor, shared box predictor and focal
+# loss (a.k.a Retinanet).
+# See Lin et al, https://arxiv.org/abs/1708.02002
+# Trained on COCO, initialized from Imagenet classification checkpoint
+# Train on TPU-8
+#
+# Achieves 34.3 mAP on COCO17 Val
+
+model {
+  ssd {
+    inplace_batchnorm_update: true
+    freeze_batchnorm: false
+    num_classes: 90
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+        use_matmul_gather: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    encode_background_as_zeros: true
+    anchor_generator {
+      multiscale_anchor_generator {
+        min_level: 3
+        max_level: 7
+        anchor_scale: 4.0
+        aspect_ratios: [1.0, 2.0, 0.5]
+        scales_per_octave: 2
+      }
+    }
+    image_resizer {
+      fixed_shape_resizer {
+        height: 640
+        width: 640
+      }
+    }
+    box_predictor {
+      weight_shared_convolutional_box_predictor {
+        depth: 256
+        class_prediction_bias_init: -4.6
+        conv_hyperparams {
+          activation: RELU_6,
+          regularizer {
+            l2_regularizer {
+              weight: 0.0004
+            }
+          }
+          initializer {
+            random_normal_initializer {
+              stddev: 0.01
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            scale: true,
+            decay: 0.997,
+            epsilon: 0.001,
+          }
+        }
+        num_layers_before_predictor: 4
+        kernel_size: 3
+      }
+    }
+    feature_extractor {
+      type: 'ssd_resnet50_v1_fpn_keras'
+      fpn {
+        min_level: 3
+        max_level: 7
+      }
+      min_depth: 16
+      depth_multiplier: 1.0
+      conv_hyperparams {
+        activation: RELU_6,
+        regularizer {
+          l2_regularizer {
+            weight: 0.0004
+          }
+        }
+        initializer {
+          truncated_normal_initializer {
+            stddev: 0.03
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          scale: true,
+          decay: 0.997,
+          epsilon: 0.001,
+        }
+      }
+      override_base_feature_extractor_hyperparams: true
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid_focal {
+          alpha: 0.25
+          gamma: 2.0
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+        }
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    normalize_loc_loss_by_codesize: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.6
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  fine_tune_checkpoint_version: V2
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/resnet50.ckpt-1"
+  fine_tune_checkpoint_type: "classification"
+  batch_size: 64
+  sync_replicas: true
+  startup_delay_steps: 0
+  replicas_to_aggregate: 8
+  use_bfloat16: true
+  num_steps: 25000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    random_crop_image {
+      min_object_covered: 0.0
+      min_aspect_ratio: 0.75
+      max_aspect_ratio: 3.0
+      min_area: 0.75
+      max_area: 1.0
+      overlap_thresh: 0.0
+    }
+  }
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        cosine_decay_learning_rate {
+          learning_rate_base: .04
+          total_steps: 25000
+          warmup_learning_rate: .013333
+          warmup_steps: 2000
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+}
+
+train_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
+  }
+}
+
+eval_config: {
+  metrics_set: "coco_detection_metrics"
+  use_moving_averages: false
+}
+
+eval_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  shuffle: false
+  num_epochs: 1
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
+  }
+}
--- a/research/object_detection/core/box_predictor.py
+++ b/research/object_detection/core/box_predictor.py
@@ -134,7 +134,7 @@ class BoxPredictor(object):
    pass


-class KerasBoxPredictor(tf.keras.Model):
+class KerasBoxPredictor(tf.keras.layers.Layer):
  """Keras-based BoxPredictor."""

  def __init__(self, is_training, num_classes, freeze_batchnorm,

--- a/research/object_detection/core/densepose_ops.py
+++ b/research/object_detection/core/densepose_ops.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""DensePose operations.
+
+DensePose part ids are represented as tensors of shape
+[num_instances, num_points] and coordinates are represented as tensors of shape
+[num_instances, num_points, 4] where each point holds (y, x, v, u). The location
+of the DensePose sampled point is (y, x) in normalized coordinates. The surface
+coordinate (in the part coordinate frame) is (v, u). Note that dim 1 of both
+tensors may contain padding, since the number of sampled points per instance
+is not fixed. The value `num_points` represents the maximum number of sampled
+points for an instance in the example.
+"""
+import os
+
+import scipy.io
+import tensorflow.compat.v1 as tf
+
+from object_detection.utils import shape_utils
+
+PART_NAMES = [
+    b'torso_back', b'torso_front', b'right_hand', b'left_hand', b'left_foot',
+    b'right_foot', b'right_upper_leg_back', b'left_upper_leg_back',
+    b'right_upper_leg_front', b'left_upper_leg_front', b'right_lower_leg_back',
+    b'left_lower_leg_back', b'right_lower_leg_front', b'left_lower_leg_front',
+    b'left_upper_arm_back', b'right_upper_arm_back', b'left_upper_arm_front',
+    b'right_upper_arm_front', b'left_lower_arm_back', b'right_lower_arm_back',
+    b'left_lower_arm_front', b'right_lower_arm_front', b'right_face',
+    b'left_face',
+]
+
+
+def scale(dp_surface_coords, y_scale, x_scale, scope=None):
+  """Scales DensePose coordinates in y and x dimensions.
+
+  Args:
+    dp_surface_coords: a tensor of shape [num_instances, num_points, 4], with
+      coordinates in (y, x, v, u) format.
+    y_scale: (float) scalar tensor
+    x_scale: (float) scalar tensor
+    scope: name scope.
+
+  Returns:
+    new_dp_surface_coords: a tensor of shape [num_instances, num_points, 4]
+  """
+  with tf.name_scope(scope, 'DensePoseScale'):
+    y_scale = tf.cast(y_scale, tf.float32)
+    x_scale = tf.cast(x_scale, tf.float32)
+    new_keypoints = dp_surface_coords * [[[y_scale, x_scale, 1, 1]]]
+    return new_keypoints
+
+
+def clip_to_window(dp_surface_coords, window, scope=None):
+  """Clips DensePose points to a window.
+
+  This op clips any input DensePose points to a window.
+
+  Args:
+    dp_surface_coords: a tensor of shape [num_instances, num_points, 4] with
+      DensePose surface coordinates in (y, x, v, u) format.
+    window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
+      window to which the op should clip the keypoints.
+    scope: name scope.
+
+  Returns:
+    new_dp_surface_coords: a tensor of shape [num_instances, num_points, 4].
+  """
+  with tf.name_scope(scope, 'DensePoseClipToWindow'):
+    y, x, v, u = tf.split(value=dp_surface_coords, num_or_size_splits=4, axis=2)
+    win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
+    y = tf.maximum(tf.minimum(y, win_y_max), win_y_min)
+    x = tf.maximum(tf.minimum(x, win_x_max), win_x_min)
+    new_dp_surface_coords = tf.concat([y, x, v, u], 2)
+    return new_dp_surface_coords
+
+
+def prune_outside_window(dp_num_points, dp_part_ids, dp_surface_coords, window,
+                         scope=None):
+  """Prunes DensePose points that fall outside a given window.
+
+  This function replaces points that fall outside the given window with zeros.
+  See also clip_to_window which clips any DensePose points that fall outside the
+  given window.
+
+  Note that this operation uses dynamic shapes, and therefore is not currently
+  suitable for TPU.
+
+  Args:
+    dp_num_points: a tensor of shape [num_instances] that indicates how many
+      (non-padded) DensePose points there are per instance.
+    dp_part_ids: a tensor of shape [num_instances, num_points] with DensePose
+      part ids. These part_ids are 0-indexed, where the first non-background
+      part has index 0.
+    dp_surface_coords: a tensor of shape [num_instances, num_points, 4] with
+      DensePose surface coordinates in (y, x, v, u) format.
+    window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
+      window outside of which the op should prune the points.
+    scope: name scope.
+
+  Returns:
+    new_dp_num_points: a tensor of shape [num_instances] that indicates how many
+      (non-padded) DensePose points there are per instance after pruning.
+    new_dp_part_ids: a tensor of shape [num_instances, num_points] with
+      DensePose part ids. These part_ids are 0-indexed, where the first
+      non-background part has index 0.
+    new_dp_surface_coords: a tensor of shape [num_instances, num_points, 4] with
+      DensePose surface coordinates after pruning.
+  """
+  with tf.name_scope(scope, 'DensePosePruneOutsideWindow'):
+    y, x, _, _ = tf.unstack(dp_surface_coords, axis=-1)
+    win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
+
+    num_instances, num_points = shape_utils.combined_static_and_dynamic_shape(
+        dp_part_ids)
+    dp_num_points_tiled = tf.tile(dp_num_points[:, tf.newaxis],
+                                  multiples=[1, num_points])
+    range_tiled = tf.tile(tf.range(num_points)[tf.newaxis, :],
+                          multiples=[num_instances, 1])
+    valid_initial = range_tiled < dp_num_points_tiled
+    valid_in_window = tf.logical_and(
+        tf.logical_and(y >= win_y_min, y <= win_y_max),
+        tf.logical_and(x >= win_x_min, x <= win_x_max))
+    valid_indices = tf.logical_and(valid_initial, valid_in_window)
+
+    new_dp_num_points = tf.math.reduce_sum(
+        tf.cast(valid_indices, tf.int32), axis=1)
+    max_num_points = tf.math.reduce_max(new_dp_num_points)
+
+    def gather_and_reshuffle(elems):
+      dp_part_ids, dp_surface_coords, valid_indices = elems
+      locs = tf.where(valid_indices)[:, 0]
+      valid_part_ids = tf.gather(dp_part_ids, locs, axis=0)
+      valid_part_ids_padded = shape_utils.pad_or_clip_nd(
+          valid_part_ids, output_shape=[max_num_points])
+      valid_surface_coords = tf.gather(dp_surface_coords, locs, axis=0)
+      valid_surface_coords_padded = shape_utils.pad_or_clip_nd(
+          valid_surface_coords, output_shape=[max_num_points, 4])
+      return [valid_part_ids_padded, valid_surface_coords_padded]
+
+    new_dp_part_ids, new_dp_surface_coords = (
+        shape_utils.static_or_dynamic_map_fn(
+            gather_and_reshuffle,
+            elems=[dp_part_ids, dp_surface_coords, valid_indices],
+            dtype=[tf.int32, tf.float32],
+            back_prop=False))
+    return new_dp_num_points, new_dp_part_ids, new_dp_surface_coords
+
+
+def change_coordinate_frame(dp_surface_coords, window, scope=None):
+  """Changes coordinate frame of the points to be relative to window's frame.
+
+  Given a window of the form [y_min, x_min, y_max, x_max] in normalized
+  coordinates, changes DensePose coordinates to be relative to this window.
+
+  An example use case is data augmentation: where we are given groundtruth
+  points and would like to randomly crop the image to some window. In this
+  case we need to change the coordinate frame of each sampled point to be
+  relative to this new window.
+
+  Args:
+    dp_surface_coords: a tensor of shape [num_instances, num_points, 4] with
+      DensePose surface coordinates in (y, x, v, u) format.
+    window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
+      window we should change the coordinate frame to.
+    scope: name scope.
+
+  Returns:
+    new_dp_surface_coords: a tensor of shape [num_instances, num_points, 4].
+  """
+  with tf.name_scope(scope, 'DensePoseChangeCoordinateFrame'):
+    win_height = window[2] - window[0]
+    win_width = window[3] - window[1]
+    new_dp_surface_coords = scale(
+        dp_surface_coords - [window[0], window[1], 0, 0],
+        1.0 / win_height, 1.0 / win_width)
+    return new_dp_surface_coords
+
+
+def to_normalized_coordinates(dp_surface_coords, height, width,
+                              check_range=True, scope=None):
+  """Converts absolute DensePose coordinates to normalized in range [0, 1].
+
+  This function raises an assertion failed error at graph execution time when
+  the maximum coordinate is smaller than 1.01 (which means that coordinates are
+  already normalized). The value 1.01 is to deal with small rounding errors.
+
+  Args:
+    dp_surface_coords: a tensor of shape [num_instances, num_points, 4] with
+      DensePose absolute surface coordinates in (y, x, v, u) format.
+    height: Height of image.
+    width: Width of image.
+    check_range: If True, checks if the coordinates are already normalized.
+    scope: name scope.
+
+  Returns:
+    A tensor of shape [num_instances, num_points, 4] with normalized
+    coordinates.
+  """
+  with tf.name_scope(scope, 'DensePoseToNormalizedCoordinates'):
+    height = tf.cast(height, tf.float32)
+    width = tf.cast(width, tf.float32)
+
+    if check_range:
+      max_val = tf.reduce_max(dp_surface_coords[:, :, :2])
+      max_assert = tf.Assert(tf.greater(max_val, 1.01),
+                             ['max value is lower than 1.01: ', max_val])
+      with tf.control_dependencies([max_assert]):
+        width = tf.identity(width)
+
+    return scale(dp_surface_coords, 1.0 / height, 1.0 / width)
+
+
+def to_absolute_coordinates(dp_surface_coords, height, width,
+                            check_range=True, scope=None):
+  """Converts normalized DensePose coordinates to absolute pixel coordinates.
+
+  This function raises an assertion failed error when the maximum
+  coordinate value is larger than 1.01 (in which case coordinates are already
+  absolute).
+
+  Args:
+    dp_surface_coords: a tensor of shape [num_instances, num_points, 4] with
+      DensePose normalized surface coordinates in (y, x, v, u) format.
+    height: Height of image.
+    width: Width of image.
+    check_range: If True, checks if the coordinates are normalized or not.
+    scope: name scope.
+
+  Returns:
+    A tensor of shape [num_instances, num_points, 4] with absolute coordinates.
+  """
+  with tf.name_scope(scope, 'DensePoseToAbsoluteCoordinates'):
+    height = tf.cast(height, tf.float32)
+    width = tf.cast(width, tf.float32)
+
+    if check_range:
+      max_val = tf.reduce_max(dp_surface_coords[:, :, :2])
+      max_assert = tf.Assert(tf.greater_equal(1.01, max_val),
+                             ['maximum coordinate value is larger than 1.01: ',
+                              max_val])
+      with tf.control_dependencies([max_assert]):
+        width = tf.identity(width)
+
+    return scale(dp_surface_coords, height, width)
+
+
+class DensePoseHorizontalFlip(object):
+  """Class responsible for horizontal flipping of parts and surface coords."""
+
+  def __init__(self):
+    """Constructor."""
+
+    path = os.path.dirname(os.path.abspath(__file__))
+    uv_symmetry_transforms_path = tf.resource_loader.get_path_to_datafile(
+        os.path.join(path, '..', 'dataset_tools', 'densepose',
+                     'UV_symmetry_transforms.mat'))
+    tf.logging.info('Loading DensePose symmetry transforms file from {}'.format(
+        uv_symmetry_transforms_path))
+    with tf.io.gfile.GFile(uv_symmetry_transforms_path, 'rb') as f:
+      data = scipy.io.loadmat(f)
+
+    # Create lookup maps which indicate how a VU coordinate changes after a
+    # horizontal flip.
+    uv_symmetry_map = {}
+    for key in ('U_transforms', 'V_transforms'):
+      uv_symmetry_map_per_part = []
+      for i in range(data[key].shape[1]):
+        # The following tensor has shape [256, 256].
+        map_per_part = tf.constant(data[key][0, i], dtype=tf.float32)
+        uv_symmetry_map_per_part.append(map_per_part)
+      uv_symmetry_map[key] = tf.reshape(
+          tf.stack(uv_symmetry_map_per_part, axis=0), [-1])
+    # The following dictionary contains flattened lookup maps for the U and V
+    # coordinates separately. The shape of each is [24 * 256 * 256].
+    self.uv_symmetries = uv_symmetry_map
+
+    # Create a list of that maps part index to flipped part index (0-indexed).
+    part_symmetries = []
+    for i, part_name in enumerate(PART_NAMES):
+      if b'left' in part_name:
+        part_symmetries.append(PART_NAMES.index(
+            part_name.replace(b'left', b'right')))
+      elif b'right' in part_name:
+        part_symmetries.append(PART_NAMES.index(
+            part_name.replace(b'right', b'left')))
+      else:
+        part_symmetries.append(i)
+    self.part_symmetries = part_symmetries
+
+  def flip_parts_and_coords(self, part_ids, vu):
+    """Flips part ids and coordinates.
+
+    Args:
+      part_ids: a [num_instances, num_points] int32 tensor with pre-flipped part
+        ids. These part_ids are 0-indexed, where the first non-background part
+        has index 0.
+      vu: a [num_instances, num_points, 2] float32 tensor with pre-flipped vu
+        normalized coordinates.
+
+    Returns:
+      new_part_ids: a [num_instances, num_points] int32 tensor with post-flipped
+        part ids. These part_ids are 0-indexed, where the first non-background
+        part has index 0.
+      new_vu: a [num_instances, num_points, 2] float32 tensor with post-flipped
+        vu coordinates.
+    """
+    num_instances, num_points = shape_utils.combined_static_and_dynamic_shape(
+        part_ids)
+    part_ids_flattened = tf.reshape(part_ids, [-1])
+    new_part_ids_flattened = tf.gather(self.part_symmetries, part_ids_flattened)
+    new_part_ids = tf.reshape(new_part_ids_flattened,
+                              [num_instances, num_points])
+
+    # Convert VU floating point coordinates to values in [256, 256] grid.
+    vu = tf.math.minimum(tf.math.maximum(vu, 0.0), 1.0)
+    vu_locs = tf.cast(vu * 256., dtype=tf.int32)
+    vu_locs_flattened = tf.reshape(vu_locs, [-1, 2])
+    v_locs_flattened, u_locs_flattened = tf.unstack(vu_locs_flattened, axis=1)
+
+    # Convert vu_locs into lookup indices (in flattened part symmetries map).
+    symmetry_lookup_inds = (
+        part_ids_flattened * 65536 + 256 * v_locs_flattened + u_locs_flattened)
+
+    # New VU coordinates.
+    v_new = tf.gather(self.uv_symmetries['V_transforms'], symmetry_lookup_inds)
+    u_new = tf.gather(self.uv_symmetries['U_transforms'], symmetry_lookup_inds)
+    new_vu_flattened = tf.stack([v_new, u_new], axis=1)
+    new_vu = tf.reshape(new_vu_flattened, [num_instances, num_points, 2])
+
+    return new_part_ids, new_vu
+
+
+def flip_horizontal(dp_part_ids, dp_surface_coords, scope=None):
+  """Flips the DensePose points horizontally around the flip_point.
+
+  This operation flips dense pose annotations horizontally. Note that part ids
+  and surface coordinates may or may not change as a result of the flip.
+
+  Args:
+    dp_part_ids: a tensor of shape [num_instances, num_points] with DensePose
+      part ids. These part_ids are 0-indexed, where the first non-background
+      part has index 0.
+    dp_surface_coords: a tensor of shape [num_instances, num_points, 4] with
+      DensePose surface coordinates in (y, x, v, u) normalized format.
+    scope: name scope.
+
+  Returns:
+    new_dp_part_ids: a tensor of shape [num_instances, num_points] with
+      DensePose part ids after flipping.
+    new_dp_surface_coords: a tensor of shape [num_instances, num_points, 4] with
+      DensePose surface coordinates after flipping.
+  """
+  with tf.name_scope(scope, 'DensePoseFlipHorizontal'):
+    # First flip x coordinate.
+    y, x, vu = tf.split(dp_surface_coords, num_or_size_splits=[1, 1, 2], axis=2)
+    xflipped = 1.0 - x
+
+    # Flip part ids and surface coordinates.
+    horizontal_flip = DensePoseHorizontalFlip()
+    new_dp_part_ids, new_vu = horizontal_flip.flip_parts_and_coords(
+        dp_part_ids, vu)
+    new_dp_surface_coords = tf.concat([y, xflipped, new_vu], axis=2)
+    return new_dp_part_ids, new_dp_surface_coords
+