Merged commit includes the following changes: (#7800)

280142968 by Zhichao Lu: Opensource MobilenetEdgeTPU + ssdlite into third-party object detection APIs on EdgeTPU. -- 280134001 by Zhichao Lu: Adds MobilenetEdgeTpu + ssdlite into internal object detection APIs on EdgeTPU. -- 278941778 by Zhichao Lu: Add support for fixed input shapes for 'encoded_image_string_tensor' and 'tf_example' inputs. -- 278933274 by Zhichao Lu: Adding fool proof check to avoid using 1x1 depthwise conv op. -- 278762192 by Zhichao Lu: Ensure correct number of iterations after training resumes. -- 278746440 by Zhichao Lu: Internal change. -- 278006953 by Zhichao Lu: Internal changes to tf.contrib symbols -- 278006330 by Zhichao Lu: Internal changes to tf.contrib symbols -- 277593959 by Zhichao Lu: Make the ssd_feature_extractor_test.py PY3 compatible. The "six.zip" will use "itertools.izip" in Python 2 and "zip" in Python 3. -- 277344551 by Zhichao Lu: Internal change. -- 277154953 by Zhichao Lu: Conditionally use keras based optimizers so that check-pointing works correctly. This change also enables summaries on TPU which were previously not enabled due to a bug. -- 277087572 by Zhichao Lu: Fix resizing boxes when using keep_aspect_ratio_rezier with padding. -- 275898543 by Zhichao Lu: Support label_map_proto as input in label_map_util. -- 275347137 by Zhichao Lu: Add force_no_resize flag in eval.proto which replaces the resize config with identity resizer. This is useful when we want to test at the original image resolution. -- PiperOrigin-RevId: 280142968

Merged commit includes the following changes: (#7800)
280142968 by Zhichao Lu: Opensource MobilenetEdgeTPU + ssdlite into third-party object detection APIs on EdgeTPU. -- 280134001 by Zhichao Lu: Adds MobilenetEdgeTpu + ssdlite into internal object detection APIs on EdgeTPU. -- 278941778 by Zhichao Lu: Add support for fixed input shapes for 'encoded_image_string_tensor' and 'tf_example' inputs. -- 278933274 by Zhichao Lu: Adding fool proof check to avoid using 1x1 depthwise conv op. -- 278762192 by Zhichao Lu: Ensure correct number of iterations after training resumes. -- 278746440 by Zhichao Lu: Internal change. -- 278006953 by Zhichao Lu: Internal changes to tf.contrib symbols -- 278006330 by Zhichao Lu: Internal changes to tf.contrib symbols -- 277593959 by Zhichao Lu: Make the ssd_feature_extractor_test.py PY3 compatible. The "six.zip" will use "itertools.izip" in Python 2 and "zip" in Python 3. -- 277344551 by Zhichao Lu: Internal change. -- 277154953 by Zhichao Lu: Conditionally use keras based optimizers so that check-pointing works correctly. This change also enables summaries on TPU which were previously not enabled due to a bug. -- 277087572 by Zhichao Lu: Fix resizing boxes when using keep_aspect_ratio_rezier with padding. -- 275898543 by Zhichao Lu: Support label_map_proto as input in label_map_util. -- 275347137 by Zhichao Lu: Add force_no_resize flag in eval.proto which replaces the resize config with identity resizer. This is useful when we want to test at the original image resolution. -- PiperOrigin-RevId: 280142968
b968a6ce · Mark Sandler · Jonathan Huang · c3bd5082 · b968a6ce · b968a6ce
Commit b968a6ce authored Nov 13, 2019 by Mark Sandler Committed by Jonathan Huang Nov 13, 2019
7 changed files
--- a/research/object_detection/predictors/heads/mask_head.py
+++ b/research/object_detection/predictors/heads/mask_head.py
@@ -21,11 +21,12 @@ All the mask prediction heads have a predict function that receives the
 """
 import math
 import tensorflow as tf
+from tensorflow.contrib import slim as contrib_slim

 from object_detection.predictors.heads import head
 from object_detection.utils import ops

-slim = tf.contrib.slim
+slim = contrib_slim


 class MaskRCNNMaskHead(head.Head):

--- a/research/object_detection/protos/eval.proto
+++ b/research/object_detection/protos/eval.proto
@@ -3,6 +3,7 @@ syntax = "proto2";
 package object_detection.protos;

 // Message for configuring DetectionModel evaluation jobs (eval.py).
+// Next id - 30
 message EvalConfig {
  optional uint32 batch_size = 25 [default = 1];
  // Number of visualization images to generate.
@@ -85,4 +86,9 @@ message EvalConfig {
  // tensor dictionary, so that they can be displayed in Tensorboard.
  optional bool retain_original_image_additional_channels = 28
      [default = false];
+
+  // When this flag is set, images are not resized during evaluation.
+  // When this flag is not set (default case), image are resized according
+  // to the image_resizer config in the model during evaluation.
+  optional bool force_no_resize = 29 [default=false];
 }
--- a/research/object_detection/protos/ssd.proto
+++ b/research/object_detection/protos/ssd.proto
@@ -194,6 +194,7 @@ message SsdFeatureExtractor {

  // The number of SSD layers.
  optional int32 num_layers = 12 [default = 6];
+
 }

 // Configuration for Feature Pyramid Networks.
@@ -222,3 +223,4 @@ message FeaturePyramidNetworks {
  optional int32 additional_layer_depth = 3 [default = 256];

 }
+
--- a/research/object_detection/samples/configs/ssdlite_mobilenet_edgetpu_320x320_coco.config
+++ b/research/object_detection/samples/configs/ssdlite_mobilenet_edgetpu_320x320_coco.config
+# SSDLite with MobileNetEdgeTPU feature extractor.
+# Trained on COCO14, initialized from scratch.
+# TPU-compatible.
+# Users should configure the fine_tune_checkpoint field in the train config as
+# well as the label_map_path and input_path fields in the train_input_reader and
+# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
+# should be configured.
+
+model {
+  ssd {
+    inplace_batchnorm_update: true
+    freeze_batchnorm: false
+    num_classes: 90
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+        use_matmul_gather: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    encode_background_as_zeros: true
+    anchor_generator {
+      ssd_anchor_generator {
+        num_layers: 6
+        min_scale: 0.2
+        max_scale: 0.95
+        aspect_ratios: 1.0
+        aspect_ratios: 2.0
+        aspect_ratios: 0.5
+        aspect_ratios: 3.0
+        aspect_ratios: 0.3333
+      }
+    }
+    image_resizer {
+      fixed_shape_resizer {
+        height: 320
+        width: 320
+      }
+    }
+    box_predictor {
+      convolutional_box_predictor {
+        min_depth: 0
+        max_depth: 0
+        num_layers_before_predictor: 0
+        use_dropout: false
+        dropout_keep_probability: 0.8
+        kernel_size: 3
+        use_depthwise: true
+        box_code_size: 4
+        apply_sigmoid_to_scores: false
+        class_prediction_bias_init: -4.6
+        conv_hyperparams {
+          activation: RELU_6,
+          regularizer {
+            l2_regularizer {
+              weight: 0.00004
+            }
+          }
+          initializer {
+            random_normal_initializer {
+              stddev: 0.03
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            train: true,
+            scale: true,
+            center: true,
+            decay: 0.97,
+            epsilon: 0.001,
+          }
+        }
+      }
+    }
+    feature_extractor {
+      type: 'ssd_mobilenet_edgetpu'
+      min_depth: 16
+      depth_multiplier: 1.0
+      use_depthwise: true
+      conv_hyperparams {
+        activation: RELU_6,
+        regularizer {
+          l2_regularizer {
+            weight: 0.00004
+          }
+        }
+        initializer {
+          truncated_normal_initializer {
+            stddev: 0.03
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          train: true,
+          scale: true,
+          center: true,
+          decay: 0.97,
+          epsilon: 0.001,
+        }
+      }
+      override_base_feature_extractor_hyperparams: true
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid_focal {
+          alpha: 0.75,
+          gamma: 2.0
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+          delta: 1.0
+        }
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    normalize_loc_loss_by_codesize: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.6
+        max_detections_per_class: 100
+        max_total_detections: 100
+        use_static_shapes: true
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  batch_size: 512
+  sync_replicas: true
+  startup_delay_steps: 0
+  replicas_to_aggregate: 32
+  num_steps: 400000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    ssd_random_crop {
+    }
+  }
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        cosine_decay_learning_rate {
+          learning_rate_base: 0.8
+          total_steps: 400000
+          warmup_learning_rate: 0.13333
+          warmup_steps: 2000
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+}
+
+train_input_reader: {
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record-?????-of-00100"
+  }
+  label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
+}
+
+eval_config: {
+  num_examples: 8000
+}
+
+eval_input_reader: {
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record-?????-of-00010"
+  }
+  label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
+  shuffle: false
+  num_readers: 1
+}
--- a/research/object_detection/samples/configs/ssdlite_mobilenet_edgetpu_320x320_coco_quant.config
+++ b/research/object_detection/samples/configs/ssdlite_mobilenet_edgetpu_320x320_coco_quant.config
+# Quantized Training SSDLite with MobileNetEdgeTPU feature extractor.
+# Trained on COCO14, initialized from scratch.
+# TPU-compatible.
+# Users should configure the fine_tune_checkpoint field in the train config as
+# well as the label_map_path and input_path fields in the train_input_reader and
+# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
+# should be configured.
+
+model {
+  ssd {
+    inplace_batchnorm_update: true
+    freeze_batchnorm: false
+    num_classes: 90
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+        use_matmul_gather: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    encode_background_as_zeros: true
+    anchor_generator {
+      ssd_anchor_generator {
+        num_layers: 6
+        min_scale: 0.2
+        max_scale: 0.95
+        aspect_ratios: 1.0
+        aspect_ratios: 2.0
+        aspect_ratios: 0.5
+        aspect_ratios: 3.0
+        aspect_ratios: 0.3333
+      }
+    }
+    image_resizer {
+      fixed_shape_resizer {
+        height: 320
+        width: 320
+      }
+    }
+    box_predictor {
+      convolutional_box_predictor {
+        min_depth: 0
+        max_depth: 0
+        num_layers_before_predictor: 0
+        use_dropout: false
+        dropout_keep_probability: 0.8
+        kernel_size: 3
+        use_depthwise: true
+        box_code_size: 4
+        apply_sigmoid_to_scores: false
+        class_prediction_bias_init: -4.6
+        conv_hyperparams {
+          activation: RELU_6,
+          regularizer {
+            l2_regularizer {
+              weight: 0.00004
+            }
+          }
+          initializer {
+            random_normal_initializer {
+              stddev: 0.03
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            train: true,
+            scale: true,
+            center: true,
+            decay: 0.97,
+            epsilon: 0.001,
+          }
+        }
+      }
+    }
+    feature_extractor {
+      type: 'ssd_mobilenet_edgetpu'
+      min_depth: 16
+      depth_multiplier: 1.0
+      use_depthwise: true
+      conv_hyperparams {
+        activation: RELU_6,
+        regularizer {
+          l2_regularizer {
+            weight: 0.00004
+          }
+        }
+        initializer {
+          truncated_normal_initializer {
+            stddev: 0.03
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          train: true,
+          scale: true,
+          center: true,
+          decay: 0.97,
+          epsilon: 0.001,
+        }
+      }
+      override_base_feature_extractor_hyperparams: true
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid_focal {
+          alpha: 0.75,
+          gamma: 2.0
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+          delta: 1.0
+        }
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    normalize_loc_loss_by_codesize: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.6
+        max_detections_per_class: 100
+        max_total_detections: 100
+        use_static_shapes: true
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  batch_size: 512
+  sync_replicas: true
+  startup_delay_steps: 0
+  replicas_to_aggregate: 32
+  num_steps: 400000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    ssd_random_crop {
+    }
+  }
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        cosine_decay_learning_rate {
+          learning_rate_base: 0.8
+          total_steps: 400000
+          warmup_learning_rate: 0.13333
+          warmup_steps: 2000
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+}
+
+train_input_reader: {
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record-?????-of-00100"
+  }
+  label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
+}
+
+eval_config: {
+  num_examples: 8000
+}
+
+eval_input_reader: {
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record-?????-of-00010"
+  }
+  label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
+  shuffle: false
+  num_readers: 1
+}
+
+graph_rewriter {
+  quantization {
+    delay: 30000
+    activation_bits: 8
+    weight_bits: 8
+  }
+}
--- a/research/object_detection/utils/label_map_util.py
+++ b/research/object_detection/utils/label_map_util.py
@@ -20,6 +20,7 @@ from __future__ import print_function

 import logging

+from six import string_types
 from six.moves import range
 import tensorflow as tf
 from google.protobuf import text_format
@@ -145,13 +146,14 @@ def load_labelmap(path):
  return label_map


-def get_label_map_dict(label_map_path,
+def get_label_map_dict(label_map_path_or_proto,
                       use_display_name=False,
                       fill_in_gaps_and_background=False):
  """Reads a label map and returns a dictionary of label names to id.

  Args:
-    label_map_path: path to StringIntLabelMap proto text file.
+    label_map_path_or_proto: path to StringIntLabelMap proto text file or the
+      proto itself.
    use_display_name: whether to use the label map items' display names as keys.
    fill_in_gaps_and_background: whether to fill in gaps and background with
    respect to the id field in the proto. The id: 0 is reserved for the
@@ -166,7 +168,12 @@ def get_label_map_dict(label_map_path,
    ValueError: if fill_in_gaps_and_background and label_map has non-integer or
    negative values.
  """
-  label_map = load_labelmap(label_map_path)
+  if isinstance(label_map_path_or_proto, string_types):
+    label_map = load_labelmap(label_map_path_or_proto)
+  else:
+    _validate_label_map(label_map_path_or_proto)
+    label_map = label_map_path_or_proto
+
  label_map_dict = {}
  for item in label_map.item:
    if use_display_name:

--- a/research/object_detection/utils/label_map_util_test.py
+++ b/research/object_detection/utils/label_map_util_test.py
@@ -57,6 +57,23 @@ class LabelMapUtilTest(tf.test.TestCase):
    self.assertEqual(label_map_dict['dog'], 1)
    self.assertEqual(label_map_dict['cat'], 2)

+  def test_get_label_map_dict_from_proto(self):
+    label_map_string = """
+      item {
+        id:2
+        name:'cat'
+      }
+      item {
+        id:1
+        name:'dog'
+      }
+    """
+    label_map_proto = text_format.Parse(
+        label_map_string, string_int_label_map_pb2.StringIntLabelMap())
+    label_map_dict = label_map_util.get_label_map_dict(label_map_proto)
+    self.assertEqual(label_map_dict['dog'], 1)
+    self.assertEqual(label_map_dict['cat'], 2)
+
  def test_get_label_map_dict_display(self):
    label_map_string = """
      item {