Commit ac8d0651 authored by A. Unique TensorFlower's avatar A. Unique TensorFlower Committed by TF Object Detection Team
Browse files

Add SpaghettiNet Feature Extractor

PiperOrigin-RevId: 402944074
parent fccb57b1
......@@ -73,6 +73,23 @@ documentation of the Object Detection API:
## Whats New
### SpaghettiNet for Edge TPU
We have released SpaghettiNet models optimized for the Edge TPU in the [Google Tensor SoC](https://blog.google/products/pixel/google-tensor-debuts-new-pixel-6-fall/).
SpaghettiNet models are derived from a TuNAS search space that incorporates
group convolution based [Inverted Bottleneck](https://arxiv.org/abs/1801.04381) blocks.
The backbone and detection head are connected through [MnasFPN](https://arxiv.org/abs/1912.01106)-style feature map
merging and searched jointly.
When compared to MobileDet-EdgeTPU, SpaghettiNet models achieve +2.2% mAP
(absolute) on COCO17 at the same latency. They also consume <70% of the energy
used by MobileDet-EdgeTPU to achieve the same accuracy.
Sample config available [here](configs/tf1/ssd_spaghettinet_edgetpu_320x320_coco17_sync_4x4.config).
<b>Thanks to contributors</b>: Marie White, Hao Xu, Hanxiao Liu and Suyog Gupta.
### DeepMAC architecture
We have released our new architecture, **DeepMAC**, designed for partially
......
......@@ -93,6 +93,7 @@ if tf_version.is_tf1():
from object_detection.models.ssd_mobiledet_feature_extractor import SSDMobileDetDSPFeatureExtractor
from object_detection.models.ssd_mobiledet_feature_extractor import SSDMobileDetEdgeTPUFeatureExtractor
from object_detection.models.ssd_mobiledet_feature_extractor import SSDMobileDetGPUFeatureExtractor
from object_detection.models.ssd_spaghettinet_feature_extractor import SSDSpaghettinetFeatureExtractor
from object_detection.models.ssd_pnasnet_feature_extractor import SSDPNASNetFeatureExtractor
from object_detection.predictors import rfcn_box_predictor
# pylint: enable=g-import-not-at-top
......@@ -229,6 +230,8 @@ if tf_version.is_tf1():
SSDMobileDetEdgeTPUFeatureExtractor,
'ssd_mobiledet_gpu':
SSDMobileDetGPUFeatureExtractor,
'ssd_spaghettinet':
SSDSpaghettinetFeatureExtractor,
}
FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP = {
......@@ -350,6 +353,12 @@ def _build_ssd_feature_extractor(feature_extractor_config,
})
if feature_extractor_config.HasField('spaghettinet_arch_name'):
kwargs.update({
'spaghettinet_arch_name':
feature_extractor_config.spaghettinet_arch_name,
})
if feature_extractor_config.HasField('fpn'):
kwargs.update({
'fpn_min_level':
......
# SpaghettiNet Feature Extractor optimized for EdgeTPU.
# Trained on COCO17 from scratch.
#
# spaghettinet_edgetpu_s
# Achieves 26.2% mAP on COCO17 at 400k steps.
# 1.31ms Edge TPU latency at 1 billion MACs, 3.4 million params.
#
# spaghettinet_edgetpu_m
# Achieves 27.4% mAP on COCO17 at 400k steps.
# 1.55ms Edge TPU latency at 1.25 billion MACs, 4.1 million params.
#
# spaghettinet_edgetpu_l
# Achieves 28.02% mAP on COCO17 at 400k steps.
# 1.75ms Edge TPU latency at 1.57 billion MACs, 5.7 million params.
#
# TPU-compatible.
model {
ssd {
inplace_batchnorm_update: true
freeze_batchnorm: false
num_classes: 90
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
encode_background_as_zeros: true
anchor_generator {
ssd_anchor_generator {
num_layers: 5
min_scale: 0.2
max_scale: 0.95
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.3333333
}
}
image_resizer {
fixed_shape_resizer {
height: 320
width: 320
}
}
box_predictor {
convolutional_box_predictor {
min_depth: 0
max_depth: 0
num_layers_before_predictor: 0
use_dropout: false
dropout_keep_probability: 0.8
kernel_size: 3
use_depthwise: true
box_code_size: 4
apply_sigmoid_to_scores: false
class_prediction_bias_init: -4.6
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
random_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.97,
epsilon: 0.001,
}
}
}
}
feature_extractor {
type: 'ssd_spaghettinet'
# 3 architectures are supported and performance for each is listed at the top of this config file.
#spaghettinet_arch_name: 'spaghettinet_edgetpu_s'
spaghettinet_arch_name: 'spaghettinet_edgetpu_m'
#spaghettinet_arch_name: 'spaghettinet_edgetpu_l'
use_explicit_padding: false
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.75,
gamma: 2.0
}
}
localization_loss {
weighted_smooth_l1 {
delta: 1.0
}
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
normalize_loc_loss_by_codesize: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
use_static_shapes: true
}
score_converter: SIGMOID
}
}
}
train_config: {
batch_size: 512
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 32
num_steps: 400000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
ssd_random_crop {
}
}
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: 0.8
total_steps: 400000
warmup_learning_rate: 0.13333
warmup_steps: 2000
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
}
train_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
}
eval_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
graph_rewriter {
quantization {
delay: 40000
weight_bits: 8
activation_bits: 8
}
}
......@@ -173,10 +173,19 @@ Model name
[faster_rcnn_resnet101_snapshot_serengeti](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_snapshot_serengeti_2020_06_10.tar.gz) | 38 | Boxes
[context_rcnn_resnet101_snapshot_serengeti](http://download.tensorflow.org/models/object_detection/context_rcnn_resnet101_snapshot_serengeti_2020_06_10.tar.gz) | 56 | Boxes
## Pixel 6 Edge TPU models
Model name | Pixel 6 Edge TPU Speed (ms) | Pixel 6 Speed with Post-processing on CPU (ms) | COCO 2017 mAP (uint8) | Outputs
----------------------------------------------------------------------------------------------------------------------------- | :-------------------------: | :--------------------------------------------: | :-------------------: | :-----:
[spaghettinet_edgetpu_s](http://download.tensorflow.org/models/object_detection/tf1/spaghettinet_edgetpu_s_2021_10_13.tar.gz) | 1.3 | 1.8 | 26.3 | Boxes
[spaghettinet_edgetpu_m](http://download.tensorflow.org/models/object_detection/tf1/spaghettinet_edgetpu_m_2021_10_13.tar.gz) | 1.4 | 1.9 | 27.4 | Boxes
[spaghettinet_edgetpu_l](http://download.tensorflow.org/models/object_detection/tf1/spaghettinet_edgetpu_l_2021_10_13.tar.gz) | 1.7 | 2.1 | 28.0 | Boxes
[^1]: See [MSCOCO evaluation protocol](http://cocodataset.org/#detections-eval).
The COCO mAP numbers here are evaluated on COCO 14 minival set (note that
our split is different from COCO 17 Val). A full list of image ids used in
our split could be fould
The COCO mAP numbers, with the exception of the Pixel 6 Edge TPU models,
are evaluated on COCO 14 minival set (note that our split is different
from COCO 17 Val). A full list of image ids used in our split could be
found
[here](https://github.com/tensorflow/models/blob/master/research/object_detection/data/mscoco_minival_ids.txt).
[^2]: This is PASCAL mAP with a slightly different way of true positives
computation: see
......
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for ssd_spaghettinet_feature_extractor."""
import unittest
import tensorflow.compat.v1 as tf
from object_detection.models import ssd_feature_extractor_test
from object_detection.models import ssd_spaghettinet_feature_extractor
from object_detection.utils import tf_version
try:
from tensorflow.contrib import quantize as contrib_quantize # pylint: disable=g-import-not-at-top
except: # pylint: disable=bare-except
pass
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class SSDSpaghettiNetFeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, arch_name, is_training=True):
return ssd_spaghettinet_feature_extractor.SSDSpaghettinetFeatureExtractor(
is_training=is_training,
spaghettinet_arch_name=arch_name,
depth_multiplier=1.0,
min_depth=4,
pad_to_multiple=1,
conv_hyperparams_fn=self.conv_hyperparams_fn)
def _test_spaghettinet_returns_correct_shapes(self, arch_name,
expected_feature_map_shapes):
image = tf.random.normal((1, 320, 320, 3))
feature_extractor = self._create_feature_extractor(arch_name)
feature_maps = feature_extractor.extract_features(image)
self.assertEqual(len(expected_feature_map_shapes), len(feature_maps))
for expected_shape, x in zip(expected_feature_map_shapes, feature_maps):
self.assertTrue(x.shape.is_compatible_with(expected_shape))
def test_spaghettinet_edgetpu_s(self):
expected_feature_map_shapes = [(1, 20, 20, 120), (1, 10, 10, 168),
(1, 5, 5, 136), (1, 3, 3, 136),
(1, 3, 3, 64)]
self._test_spaghettinet_returns_correct_shapes('spaghettinet_edgetpu_s',
expected_feature_map_shapes)
def test_spaghettinet_edgetpu_m(self):
expected_feature_map_shapes = [(1, 20, 20, 120), (1, 10, 10, 168),
(1, 5, 5, 136), (1, 3, 3, 136),
(1, 3, 3, 64)]
self._test_spaghettinet_returns_correct_shapes('spaghettinet_edgetpu_m',
expected_feature_map_shapes)
def test_spaghettinet_edgetpu_l(self):
expected_feature_map_shapes = [(1, 20, 20, 120), (1, 10, 10, 168),
(1, 5, 5, 112), (1, 3, 3, 128),
(1, 3, 3, 64)]
self._test_spaghettinet_returns_correct_shapes('spaghettinet_edgetpu_l',
expected_feature_map_shapes)
def _check_quantization(self, model_fn):
checkpoint_dir = self.get_temp_dir()
with tf.Graph().as_default() as training_graph:
model_fn(is_training=True)
contrib_quantize.experimental_create_training_graph(training_graph)
with self.session(graph=training_graph) as sess:
sess.run(tf.global_variables_initializer())
tf.train.Saver().save(sess, checkpoint_dir)
with tf.Graph().as_default() as eval_graph:
model_fn(is_training=False)
contrib_quantize.experimental_create_eval_graph(eval_graph)
with self.session(graph=eval_graph) as sess:
tf.train.Saver().restore(sess, checkpoint_dir)
def _test_spaghettinet_quantization(self, arch_name):
def model_fn(is_training):
image = tf.random.normal((1, 320, 320, 3))
feature_extractor = self._create_feature_extractor(
arch_name, is_training=is_training)
feature_extractor.extract_features(image)
self._check_quantization(model_fn)
def test_spaghettinet_edgetpu_s_quantization(self):
self._test_spaghettinet_quantization('spaghettinet_edgetpu_s')
def test_spaghettinet_edgetpu_m_quantization(self):
self._test_spaghettinet_quantization('spaghettinet_edgetpu_m')
def test_spaghettinet_edgetpu_l_quantization(self):
self._test_spaghettinet_quantization('spaghettinet_edgetpu_l')
if __name__ == '__main__':
tf.test.main()
......@@ -5,13 +5,13 @@ package object_detection.protos;
import "object_detection/protos/anchor_generator.proto";
import "object_detection/protos/box_coder.proto";
import "object_detection/protos/box_predictor.proto";
import "object_detection/protos/fpn.proto";
import "object_detection/protos/hyperparams.proto";
import "object_detection/protos/image_resizer.proto";
import "object_detection/protos/losses.proto";
import "object_detection/protos/matcher.proto";
import "object_detection/protos/post_processing.proto";
import "object_detection/protos/region_similarity_calculator.proto";
import "object_detection/protos/fpn.proto";
// Configuration for Single Shot Detection (SSD) models.
// Next id: 27
......@@ -146,7 +146,7 @@ message Ssd {
optional MaskHead mask_head_config = 25;
}
// Next id: 20.
// Next id: 21.
message SsdFeatureExtractor {
reserved 6;
......@@ -202,5 +202,8 @@ message SsdFeatureExtractor {
// The number of SSD layers.
optional int32 num_layers = 12 [default = 6];
// The SpaghettiNet architecture name.
optional string spaghettinet_arch_name = 20;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment