Add SpaghettiNet Feature Extractor

PiperOrigin-RevId: 402944074

Add SpaghettiNet Feature Extractor
PiperOrigin-RevId: 402944074
ac8d0651 · A. Unique TensorFlower · TF Object Detection Team · fccb57b1 · ac8d0651 · ac8d0651
Commit ac8d0651 authored Oct 13, 2021 by A. Unique TensorFlower Committed by TF Object Detection Team Oct 13, 2021
7 changed files
--- a/research/object_detection/README.md
+++ b/research/object_detection/README.md
@@ -73,6 +73,23 @@ documentation of the Object Detection API:

 ## Whats New

+### SpaghettiNet for Edge TPU
+
+We have released SpaghettiNet models optimized for the Edge TPU in the [Google Tensor SoC](https://blog.google/products/pixel/google-tensor-debuts-new-pixel-6-fall/).
+
+SpaghettiNet models are derived from a TuNAS search space that incorporates
+group convolution based [Inverted Bottleneck](https://arxiv.org/abs/1801.04381) blocks.
+The backbone and detection head are connected through [MnasFPN](https://arxiv.org/abs/1912.01106)-style feature map
+merging and searched jointly.
+
+When compared to MobileDet-EdgeTPU, SpaghettiNet models achieve +2.2% mAP
+(absolute) on COCO17 at the same latency. They also consume <70% of the energy
+used by MobileDet-EdgeTPU to achieve the same accuracy.
+
+Sample config available [here](configs/tf1/ssd_spaghettinet_edgetpu_320x320_coco17_sync_4x4.config).
+
+<b>Thanks to contributors</b>: Marie White, Hao Xu, Hanxiao Liu and Suyog Gupta.
+
 ### DeepMAC architecture

 We have released our new architecture, **DeepMAC**, designed for partially

--- a/research/object_detection/builders/model_builder.py
+++ b/research/object_detection/builders/model_builder.py
@@ -93,6 +93,7 @@ if tf_version.is_tf1():
  from object_detection.models.ssd_mobiledet_feature_extractor import SSDMobileDetDSPFeatureExtractor
  from object_detection.models.ssd_mobiledet_feature_extractor import SSDMobileDetEdgeTPUFeatureExtractor
  from object_detection.models.ssd_mobiledet_feature_extractor import SSDMobileDetGPUFeatureExtractor
+  from object_detection.models.ssd_spaghettinet_feature_extractor import SSDSpaghettinetFeatureExtractor
  from object_detection.models.ssd_pnasnet_feature_extractor import SSDPNASNetFeatureExtractor
  from object_detection.predictors import rfcn_box_predictor
 # pylint: enable=g-import-not-at-top
@@ -229,6 +230,8 @@ if tf_version.is_tf1():
          SSDMobileDetEdgeTPUFeatureExtractor,
      'ssd_mobiledet_gpu':
          SSDMobileDetGPUFeatureExtractor,
+      'ssd_spaghettinet':
+          SSDSpaghettinetFeatureExtractor,
  }

  FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP = {
@@ -350,6 +353,12 @@ def _build_ssd_feature_extractor(feature_extractor_config,
    })


+  if feature_extractor_config.HasField('spaghettinet_arch_name'):
+    kwargs.update({
+        'spaghettinet_arch_name':
+            feature_extractor_config.spaghettinet_arch_name,
+    })
+
  if feature_extractor_config.HasField('fpn'):
    kwargs.update({
        'fpn_min_level':

--- a/research/object_detection/configs/tf1/ssd_spaghettinet_edgetpu_320x320_coco17_sync_4x4.config
+++ b/research/object_detection/configs/tf1/ssd_spaghettinet_edgetpu_320x320_coco17_sync_4x4.config
+# SpaghettiNet Feature Extractor optimized for EdgeTPU.
+# Trained on COCO17 from scratch.
+#
+# spaghettinet_edgetpu_s
+# Achieves 26.2% mAP on COCO17 at 400k steps.
+# 1.31ms Edge TPU latency at 1 billion MACs, 3.4 million params.
+#
+# spaghettinet_edgetpu_m
+# Achieves 27.4% mAP on COCO17 at 400k steps.
+# 1.55ms Edge TPU latency at 1.25 billion MACs, 4.1 million params.
+#
+# spaghettinet_edgetpu_l
+# Achieves 28.02% mAP on COCO17 at 400k steps.
+# 1.75ms Edge TPU latency at 1.57 billion MACs, 5.7 million params.
+#
+# TPU-compatible.
+
+model {
+  ssd {
+    inplace_batchnorm_update: true
+    freeze_batchnorm: false
+    num_classes: 90
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+        use_matmul_gather: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    encode_background_as_zeros: true
+    anchor_generator {
+      ssd_anchor_generator {
+        num_layers: 5
+        min_scale: 0.2
+        max_scale: 0.95
+        aspect_ratios: 1.0
+        aspect_ratios: 2.0
+        aspect_ratios: 0.5
+        aspect_ratios: 3.0
+        aspect_ratios: 0.3333333
+      }
+    }
+    image_resizer {
+      fixed_shape_resizer {
+        height: 320
+        width: 320
+      }
+    }
+    box_predictor {
+      convolutional_box_predictor {
+        min_depth: 0
+        max_depth: 0
+        num_layers_before_predictor: 0
+        use_dropout: false
+        dropout_keep_probability: 0.8
+        kernel_size: 3
+        use_depthwise: true
+        box_code_size: 4
+        apply_sigmoid_to_scores: false
+        class_prediction_bias_init: -4.6
+        conv_hyperparams {
+          activation: RELU_6,
+          regularizer {
+            l2_regularizer {
+              weight: 0.00004
+            }
+          }
+          initializer {
+            random_normal_initializer {
+              stddev: 0.03
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            train: true,
+            scale: true,
+            center: true,
+            decay: 0.97,
+            epsilon: 0.001,
+          }
+        }
+      }
+    }
+    feature_extractor {
+      type: 'ssd_spaghettinet'
+      # 3 architectures are supported and performance for each is listed at the top of this config file.
+      #spaghettinet_arch_name: 'spaghettinet_edgetpu_s'
+      spaghettinet_arch_name: 'spaghettinet_edgetpu_m'
+      #spaghettinet_arch_name: 'spaghettinet_edgetpu_l'
+      use_explicit_padding: false
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid_focal {
+          alpha: 0.75,
+          gamma: 2.0
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+          delta: 1.0
+        }
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    normalize_loc_loss_by_codesize: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.6
+        max_detections_per_class: 100
+        max_total_detections: 100
+        use_static_shapes: true
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  batch_size: 512
+  sync_replicas: true
+  startup_delay_steps: 0
+  replicas_to_aggregate: 32
+  num_steps: 400000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    ssd_random_crop {
+    }
+  }
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        cosine_decay_learning_rate {
+          learning_rate_base: 0.8
+          total_steps: 400000
+          warmup_learning_rate: 0.13333
+          warmup_steps: 2000
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  max_number_of_boxes: 100
+  unpad_groundtruth_tensors: false
+}
+
+train_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
+  }
+}
+
+eval_config: {
+  metrics_set: "coco_detection_metrics"
+  use_moving_averages: false
+}
+
+eval_input_reader: {
+  label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
+  shuffle: false
+  num_epochs: 1
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
+  }
+}
+
+graph_rewriter {
+  quantization {
+    delay: 40000
+    weight_bits: 8
+    activation_bits: 8
+  }
+}
--- a/research/object_detection/g3doc/tf1_detection_zoo.md
+++ b/research/object_detection/g3doc/tf1_detection_zoo.md
@@ -173,10 +173,19 @@ Model name
 [faster_rcnn_resnet101_snapshot_serengeti](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_snapshot_serengeti_2020_06_10.tar.gz)   | 38           | Boxes
 [context_rcnn_resnet101_snapshot_serengeti](http://download.tensorflow.org/models/object_detection/context_rcnn_resnet101_snapshot_serengeti_2020_06_10.tar.gz) | 56           | Boxes

+## Pixel 6 Edge TPU models
+
+Model name                                                                                                                    | Pixel 6 Edge TPU Speed (ms) | Pixel 6 Speed with Post-processing on CPU (ms) | COCO 2017 mAP (uint8) | Outputs
+----------------------------------------------------------------------------------------------------------------------------- | :-------------------------: | :--------------------------------------------: | :-------------------: | :-----:
+[spaghettinet_edgetpu_s](http://download.tensorflow.org/models/object_detection/tf1/spaghettinet_edgetpu_s_2021_10_13.tar.gz) | 1.3                         | 1.8                                            | 26.3                  | Boxes
+[spaghettinet_edgetpu_m](http://download.tensorflow.org/models/object_detection/tf1/spaghettinet_edgetpu_m_2021_10_13.tar.gz) | 1.4                         | 1.9                                            | 27.4                  | Boxes
+[spaghettinet_edgetpu_l](http://download.tensorflow.org/models/object_detection/tf1/spaghettinet_edgetpu_l_2021_10_13.tar.gz) | 1.7                         | 2.1                                            | 28.0                  | Boxes
+
 [^1]: See [MSCOCO evaluation protocol](http://cocodataset.org/#detections-eval).
-    The COCO mAP numbers here are evaluated on COCO 14 minival set (note that
-    our split is different from COCO 17 Val). A full list of image ids used in
-    our split could be fould
+    The COCO mAP numbers, with the exception of the Pixel 6 Edge TPU models,
+    are evaluated on COCO 14 minival set (note that our split is different
+    from COCO 17 Val). A full list of image ids used in our split could be
+    found
    [here](https://github.com/tensorflow/models/blob/master/research/object_detection/data/mscoco_minival_ids.txt).
 [^2]: This is PASCAL mAP with a slightly different way of true positives
    computation: see

--- a/research/object_detection/models/ssd_spaghettinet_feature_extractor.py
+++ b/research/object_detection/models/ssd_spaghettinet_feature_extractor.py
--- a/research/object_detection/models/ssd_spaghettinet_feature_extractor_tf1_test.py
+++ b/research/object_detection/models/ssd_spaghettinet_feature_extractor_tf1_test.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for ssd_spaghettinet_feature_extractor."""
+import unittest
+import tensorflow.compat.v1 as tf
+
+from object_detection.models import ssd_feature_extractor_test
+from object_detection.models import ssd_spaghettinet_feature_extractor
+from object_detection.utils import tf_version
+
+try:
+  from tensorflow.contrib import quantize as contrib_quantize  # pylint: disable=g-import-not-at-top
+except:  # pylint: disable=bare-except
+  pass
+
+
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
+class SSDSpaghettiNetFeatureExtractorTest(
+    ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
+
+  def _create_feature_extractor(self, arch_name, is_training=True):
+    return ssd_spaghettinet_feature_extractor.SSDSpaghettinetFeatureExtractor(
+        is_training=is_training,
+        spaghettinet_arch_name=arch_name,
+        depth_multiplier=1.0,
+        min_depth=4,
+        pad_to_multiple=1,
+        conv_hyperparams_fn=self.conv_hyperparams_fn)
+
+  def _test_spaghettinet_returns_correct_shapes(self, arch_name,
+                                                expected_feature_map_shapes):
+    image = tf.random.normal((1, 320, 320, 3))
+    feature_extractor = self._create_feature_extractor(arch_name)
+    feature_maps = feature_extractor.extract_features(image)
+
+    self.assertEqual(len(expected_feature_map_shapes), len(feature_maps))
+    for expected_shape, x in zip(expected_feature_map_shapes, feature_maps):
+      self.assertTrue(x.shape.is_compatible_with(expected_shape))
+
+  def test_spaghettinet_edgetpu_s(self):
+    expected_feature_map_shapes = [(1, 20, 20, 120), (1, 10, 10, 168),
+                                   (1, 5, 5, 136), (1, 3, 3, 136),
+                                   (1, 3, 3, 64)]
+    self._test_spaghettinet_returns_correct_shapes('spaghettinet_edgetpu_s',
+                                                   expected_feature_map_shapes)
+
+  def test_spaghettinet_edgetpu_m(self):
+    expected_feature_map_shapes = [(1, 20, 20, 120), (1, 10, 10, 168),
+                                   (1, 5, 5, 136), (1, 3, 3, 136),
+                                   (1, 3, 3, 64)]
+    self._test_spaghettinet_returns_correct_shapes('spaghettinet_edgetpu_m',
+                                                   expected_feature_map_shapes)
+
+  def test_spaghettinet_edgetpu_l(self):
+    expected_feature_map_shapes = [(1, 20, 20, 120), (1, 10, 10, 168),
+                                   (1, 5, 5, 112), (1, 3, 3, 128),
+                                   (1, 3, 3, 64)]
+    self._test_spaghettinet_returns_correct_shapes('spaghettinet_edgetpu_l',
+                                                   expected_feature_map_shapes)
+
+  def _check_quantization(self, model_fn):
+    checkpoint_dir = self.get_temp_dir()
+
+    with tf.Graph().as_default() as training_graph:
+      model_fn(is_training=True)
+      contrib_quantize.experimental_create_training_graph(training_graph)
+      with self.session(graph=training_graph) as sess:
+        sess.run(tf.global_variables_initializer())
+        tf.train.Saver().save(sess, checkpoint_dir)
+
+    with tf.Graph().as_default() as eval_graph:
+      model_fn(is_training=False)
+      contrib_quantize.experimental_create_eval_graph(eval_graph)
+      with self.session(graph=eval_graph) as sess:
+        tf.train.Saver().restore(sess, checkpoint_dir)
+
+  def _test_spaghettinet_quantization(self, arch_name):
+    def model_fn(is_training):
+      image = tf.random.normal((1, 320, 320, 3))
+      feature_extractor = self._create_feature_extractor(
+          arch_name, is_training=is_training)
+      feature_extractor.extract_features(image)
+    self._check_quantization(model_fn)
+
+  def test_spaghettinet_edgetpu_s_quantization(self):
+    self._test_spaghettinet_quantization('spaghettinet_edgetpu_s')
+
+  def test_spaghettinet_edgetpu_m_quantization(self):
+    self._test_spaghettinet_quantization('spaghettinet_edgetpu_m')
+
+  def test_spaghettinet_edgetpu_l_quantization(self):
+    self._test_spaghettinet_quantization('spaghettinet_edgetpu_l')
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/protos/ssd.proto
+++ b/research/object_detection/protos/ssd.proto
@@ -5,13 +5,13 @@ package object_detection.protos;
 import "object_detection/protos/anchor_generator.proto";
 import "object_detection/protos/box_coder.proto";
 import "object_detection/protos/box_predictor.proto";
+import "object_detection/protos/fpn.proto";
 import "object_detection/protos/hyperparams.proto";
 import "object_detection/protos/image_resizer.proto";
 import "object_detection/protos/losses.proto";
 import "object_detection/protos/matcher.proto";
 import "object_detection/protos/post_processing.proto";
 import "object_detection/protos/region_similarity_calculator.proto";
-import "object_detection/protos/fpn.proto";

 // Configuration for Single Shot Detection (SSD) models.
 // Next id: 27
@@ -146,7 +146,7 @@ message Ssd {
  optional MaskHead mask_head_config = 25;
 }

-// Next id: 20.
+// Next id: 21.
 message SsdFeatureExtractor {
  reserved 6;

@@ -202,5 +202,8 @@ message SsdFeatureExtractor {
  // The number of SSD layers.
  optional int32 num_layers = 12 [default = 6];

+
+  // The SpaghettiNet architecture name.
+  optional string spaghettinet_arch_name = 20;
 }