Commit b968a6ce authored by Mark Sandler's avatar Mark Sandler Committed by Jonathan Huang
Browse files

Merged commit includes the following changes: (#7800)

280142968  by Zhichao Lu:

    Opensource MobilenetEdgeTPU + ssdlite into third-party object detection APIs on EdgeTPU.

--
280134001  by Zhichao Lu:

    Adds MobilenetEdgeTpu + ssdlite into internal object detection APIs on EdgeTPU.

--
278941778  by Zhichao Lu:

    Add support for fixed input shapes for 'encoded_image_string_tensor' and 'tf_example' inputs.

--
278933274  by Zhichao Lu:

      Adding fool proof check to avoid using 1x1 depthwise conv op.

--
278762192  by Zhichao Lu:

    Ensure correct number of iterations after training resumes.

--
278746440  by Zhichao Lu:

    Internal change.

--
278006953  by Zhichao Lu:

    Internal changes to tf.contrib symbols

--
278006330  by Zhichao Lu:

    Internal changes to tf.contrib symbols

--
277593959  by Zhichao Lu:

      Make the ssd_feature_extractor_test.py PY3 compatible. The "six.zip" will use "itertools.izip" in Python 2 and "zip" in Python 3.

--
277344551  by Zhichao Lu:

    Internal change.

--
277154953  by Zhichao Lu:

    Conditionally use keras based optimizers so that check-pointing works correctly.
    This change also enables summaries on TPU which were previously not enabled
    due to a bug.

--
277087572  by Zhichao Lu:

    Fix resizing boxes when using keep_aspect_ratio_rezier with padding.

--
275898543  by Zhichao Lu:

    Support label_map_proto as input in label_map_util.

--
275347137  by Zhichao Lu:

    Add force_no_resize flag in eval.proto which replaces
    the resize config with identity resizer. This is useful
    when we want to test at the original image resolution.

--

PiperOrigin-RevId: 280142968
parent c3bd5082
......@@ -21,11 +21,12 @@ All the mask prediction heads have a predict function that receives the
"""
import math
import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim
from object_detection.predictors.heads import head
from object_detection.utils import ops
slim = tf.contrib.slim
slim = contrib_slim
class MaskRCNNMaskHead(head.Head):
......
......@@ -3,6 +3,7 @@ syntax = "proto2";
package object_detection.protos;
// Message for configuring DetectionModel evaluation jobs (eval.py).
// Next id - 30
message EvalConfig {
optional uint32 batch_size = 25 [default = 1];
// Number of visualization images to generate.
......@@ -85,4 +86,9 @@ message EvalConfig {
// tensor dictionary, so that they can be displayed in Tensorboard.
optional bool retain_original_image_additional_channels = 28
[default = false];
// When this flag is set, images are not resized during evaluation.
// When this flag is not set (default case), image are resized according
// to the image_resizer config in the model during evaluation.
optional bool force_no_resize = 29 [default=false];
}
......@@ -194,6 +194,7 @@ message SsdFeatureExtractor {
// The number of SSD layers.
optional int32 num_layers = 12 [default = 6];
}
// Configuration for Feature Pyramid Networks.
......@@ -222,3 +223,4 @@ message FeaturePyramidNetworks {
optional int32 additional_layer_depth = 3 [default = 256];
}
# SSDLite with MobileNetEdgeTPU feature extractor.
# Trained on COCO14, initialized from scratch.
# TPU-compatible.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {
ssd {
inplace_batchnorm_update: true
freeze_batchnorm: false
num_classes: 90
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
encode_background_as_zeros: true
anchor_generator {
ssd_anchor_generator {
num_layers: 6
min_scale: 0.2
max_scale: 0.95
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.3333
}
}
image_resizer {
fixed_shape_resizer {
height: 320
width: 320
}
}
box_predictor {
convolutional_box_predictor {
min_depth: 0
max_depth: 0
num_layers_before_predictor: 0
use_dropout: false
dropout_keep_probability: 0.8
kernel_size: 3
use_depthwise: true
box_code_size: 4
apply_sigmoid_to_scores: false
class_prediction_bias_init: -4.6
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
random_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.97,
epsilon: 0.001,
}
}
}
}
feature_extractor {
type: 'ssd_mobilenet_edgetpu'
min_depth: 16
depth_multiplier: 1.0
use_depthwise: true
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.97,
epsilon: 0.001,
}
}
override_base_feature_extractor_hyperparams: true
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.75,
gamma: 2.0
}
}
localization_loss {
weighted_smooth_l1 {
delta: 1.0
}
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
normalize_loc_loss_by_codesize: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
use_static_shapes: true
}
score_converter: SIGMOID
}
}
}
train_config: {
batch_size: 512
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 32
num_steps: 400000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
ssd_random_crop {
}
}
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: 0.8
total_steps: 400000
warmup_learning_rate: 0.13333
warmup_steps: 2000
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
}
train_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record-?????-of-00100"
}
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
}
eval_config: {
num_examples: 8000
}
eval_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record-?????-of-00010"
}
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
shuffle: false
num_readers: 1
}
# Quantized Training SSDLite with MobileNetEdgeTPU feature extractor.
# Trained on COCO14, initialized from scratch.
# TPU-compatible.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {
ssd {
inplace_batchnorm_update: true
freeze_batchnorm: false
num_classes: 90
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
encode_background_as_zeros: true
anchor_generator {
ssd_anchor_generator {
num_layers: 6
min_scale: 0.2
max_scale: 0.95
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.3333
}
}
image_resizer {
fixed_shape_resizer {
height: 320
width: 320
}
}
box_predictor {
convolutional_box_predictor {
min_depth: 0
max_depth: 0
num_layers_before_predictor: 0
use_dropout: false
dropout_keep_probability: 0.8
kernel_size: 3
use_depthwise: true
box_code_size: 4
apply_sigmoid_to_scores: false
class_prediction_bias_init: -4.6
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
random_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.97,
epsilon: 0.001,
}
}
}
}
feature_extractor {
type: 'ssd_mobilenet_edgetpu'
min_depth: 16
depth_multiplier: 1.0
use_depthwise: true
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.97,
epsilon: 0.001,
}
}
override_base_feature_extractor_hyperparams: true
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.75,
gamma: 2.0
}
}
localization_loss {
weighted_smooth_l1 {
delta: 1.0
}
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
normalize_loc_loss_by_codesize: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
use_static_shapes: true
}
score_converter: SIGMOID
}
}
}
train_config: {
batch_size: 512
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 32
num_steps: 400000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
ssd_random_crop {
}
}
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: 0.8
total_steps: 400000
warmup_learning_rate: 0.13333
warmup_steps: 2000
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
}
train_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record-?????-of-00100"
}
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
}
eval_config: {
num_examples: 8000
}
eval_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record-?????-of-00010"
}
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
shuffle: false
num_readers: 1
}
graph_rewriter {
quantization {
delay: 30000
activation_bits: 8
weight_bits: 8
}
}
......@@ -20,6 +20,7 @@ from __future__ import print_function
import logging
from six import string_types
from six.moves import range
import tensorflow as tf
from google.protobuf import text_format
......@@ -145,13 +146,14 @@ def load_labelmap(path):
return label_map
def get_label_map_dict(label_map_path,
def get_label_map_dict(label_map_path_or_proto,
use_display_name=False,
fill_in_gaps_and_background=False):
"""Reads a label map and returns a dictionary of label names to id.
Args:
label_map_path: path to StringIntLabelMap proto text file.
label_map_path_or_proto: path to StringIntLabelMap proto text file or the
proto itself.
use_display_name: whether to use the label map items' display names as keys.
fill_in_gaps_and_background: whether to fill in gaps and background with
respect to the id field in the proto. The id: 0 is reserved for the
......@@ -166,7 +168,12 @@ def get_label_map_dict(label_map_path,
ValueError: if fill_in_gaps_and_background and label_map has non-integer or
negative values.
"""
label_map = load_labelmap(label_map_path)
if isinstance(label_map_path_or_proto, string_types):
label_map = load_labelmap(label_map_path_or_proto)
else:
_validate_label_map(label_map_path_or_proto)
label_map = label_map_path_or_proto
label_map_dict = {}
for item in label_map.item:
if use_display_name:
......
......@@ -57,6 +57,23 @@ class LabelMapUtilTest(tf.test.TestCase):
self.assertEqual(label_map_dict['dog'], 1)
self.assertEqual(label_map_dict['cat'], 2)
def test_get_label_map_dict_from_proto(self):
label_map_string = """
item {
id:2
name:'cat'
}
item {
id:1
name:'dog'
}
"""
label_map_proto = text_format.Parse(
label_map_string, string_int_label_map_pb2.StringIntLabelMap())
label_map_dict = label_map_util.get_label_map_dict(label_map_proto)
self.assertEqual(label_map_dict['dog'], 1)
self.assertEqual(label_map_dict['cat'], 2)
def test_get_label_map_dict_display(self):
label_map_string = """
item {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment