Unverified Commit fd7b6887 authored by Jonathan Huang's avatar Jonathan Huang Committed by GitHub
Browse files

Merge pull request #3293 from pkulzc/master

Internal changes of object_detection 
parents f98ec55e 1efe98bb
"""Tests for ssd resnet v1 FPN feature extractors."""
import tensorflow as tf
from object_detection.models import ssd_resnet_v1_fpn_feature_extractor
from object_detection.models import ssd_resnet_v1_fpn_feature_extractor_testbase
class SSDResnet50V1FeatureExtractorTest(
ssd_resnet_v1_fpn_feature_extractor_testbase.
SSDResnetFPNFeatureExtractorTestBase):
"""SSDResnet50v1Fpn feature extractor test."""
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple):
min_depth = 32
conv_hyperparams = {}
batch_norm_trainable = True
is_training = True
return ssd_resnet_v1_fpn_feature_extractor.SSDResnet50V1FpnFeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable)
def _resnet_scope_name(self):
return 'resnet_v1_50'
class SSDResnet101V1FeatureExtractorTest(
ssd_resnet_v1_fpn_feature_extractor_testbase.
SSDResnetFPNFeatureExtractorTestBase):
"""SSDResnet101v1Fpn feature extractor test."""
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple):
min_depth = 32
conv_hyperparams = {}
batch_norm_trainable = True
is_training = True
return (
ssd_resnet_v1_fpn_feature_extractor.SSDResnet101V1FpnFeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable))
def _resnet_scope_name(self):
return 'resnet_v1_101'
class SSDResnet152V1FeatureExtractorTest(
ssd_resnet_v1_fpn_feature_extractor_testbase.
SSDResnetFPNFeatureExtractorTestBase):
"""SSDResnet152v1Fpn feature extractor test."""
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple):
min_depth = 32
conv_hyperparams = {}
batch_norm_trainable = True
is_training = True
return (
ssd_resnet_v1_fpn_feature_extractor.SSDResnet152V1FpnFeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable))
def _resnet_scope_name(self):
return 'resnet_v1_152'
if __name__ == '__main__':
tf.test.main()
"""Tests for ssd resnet v1 FPN feature extractors."""
import abc
import numpy as np
import tensorflow as tf
from object_detection.models import ssd_feature_extractor_test
class SSDResnetFPNFeatureExtractorTestBase(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
"""Helper test class for SSD Resnet v1 FPN feature extractors."""
@abc.abstractmethod
def _resnet_scope_name(self):
pass
@abc.abstractmethod
def _fpn_scope_name(self):
return 'fpn'
def test_extract_features_returns_correct_shapes_256(self):
image_height = 256
image_width = 256
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(2, 32, 32, 256), (2, 16, 16, 256),
(2, 8, 8, 256), (2, 4, 4, 256),
(2, 2, 2, 256)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_with_dynamic_inputs(self):
image_height = 256
image_width = 256
depth_multiplier = 1.0
pad_to_multiple = 1
expected_feature_map_shape = [(2, 32, 32, 256), (2, 16, 16, 256),
(2, 8, 8, 256), (2, 4, 4, 256),
(2, 2, 2, 256)]
self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
image_height = 254
image_width = 254
depth_multiplier = 1.0
pad_to_multiple = 32
expected_feature_map_shape = [(2, 32, 32, 256), (2, 16, 16, 256),
(2, 8, 8, 256), (2, 4, 4, 256),
(2, 2, 2, 256)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape)
def test_extract_features_raises_error_with_invalid_image_size(self):
image_height = 32
image_width = 32
depth_multiplier = 1.0
pad_to_multiple = 1
self.check_extract_features_raises_error_with_invalid_image_size(
image_height, image_width, depth_multiplier, pad_to_multiple)
def test_preprocess_returns_correct_value_range(self):
image_height = 128
image_width = 128
depth_multiplier = 1
pad_to_multiple = 1
test_image = np.random.rand(4, image_height, image_width, 3)
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple)
preprocessed_image = feature_extractor.preprocess(test_image)
self.assertAllClose(preprocessed_image,
test_image - [[123.68, 116.779, 103.939]])
def test_variables_only_created_in_scope(self):
depth_multiplier = 1
pad_to_multiple = 1
g = tf.Graph()
with g.as_default():
feature_extractor = self._create_feature_extractor(
depth_multiplier, pad_to_multiple)
preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
feature_extractor.extract_features(preprocessed_inputs)
variables = g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
for variable in variables:
self.assertTrue(
variable.name.startswith(self._resnet_scope_name())
or variable.name.startswith(self._fpn_scope_name()))
......@@ -35,6 +35,7 @@
"from io import StringIO\n",
"from matplotlib import pyplot as plt\n",
"from PIL import Image\n",
"from object_detection.utils import ops as utils_ops\n",
"\n",
"if tf.__version__ < '1.4.0':\n",
" raise ImportError('Please upgrade your tensorflow installation to v1.4.* or later!')\n"
......@@ -223,6 +224,59 @@
"IMAGE_SIZE = (12, 8)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def run_inference_for_single_image(image, graph):\n",
" with graph.as_default():\n",
" with tf.Session() as sess:\n",
" # Get handles to input and output tensors\n",
" ops = tf.get_default_graph().get_operations()\n",
" all_tensor_names = {output.name for op in ops for output in op.outputs}\n",
" tensor_dict = {}\n",
" for key in [\n",
" 'num_detections', 'detection_boxes', 'detection_scores',\n",
" 'detection_classes', 'detection_masks'\n",
" ]:\n",
" tensor_name = key + ':0'\n",
" if tensor_name in all_tensor_names:\n",
" tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(\n",
" tensor_name)\n",
" if 'detection_masks' in tensor_dict:\n",
" # The following processing is only for single image\n",
" detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])\n",
" detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])\n",
" # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.\n",
" real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)\n",
" detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])\n",
" detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])\n",
" detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(\n",
" detection_masks, detection_boxes, image.shape[0], image.shape[1])\n",
" detection_masks_reframed = tf.cast(\n",
" tf.greater(detection_masks_reframed, 0.5), tf.uint8)\n",
" # Follow the convention by adding back the batch dimension\n",
" tensor_dict['detection_masks'] = tf.expand_dims(\n",
" detection_masks_reframed, 0)\n",
" image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')\n",
"\n",
" # Run inference\n",
" output_dict = sess.run(tensor_dict,\n",
" feed_dict={image_tensor: np.expand_dims(image, 0)})\n",
"\n",
" # all outputs are float32 numpy arrays, so convert types as appropriate\n",
" output_dict['num_detections'] = int(output_dict['num_detections'][0])\n",
" output_dict['detection_classes'] = output_dict[\n",
" 'detection_classes'][0].astype(np.uint8)\n",
" output_dict['detection_boxes'] = output_dict['detection_boxes'][0]\n",
" output_dict['detection_scores'] = output_dict['detection_scores'][0]\n",
" if 'detection_masks' in output_dict:\n",
" output_dict['detection_masks'] = output_dict['detection_masks'][0]\n",
" return output_dict"
]
},
{
"cell_type": "code",
"execution_count": null,
......@@ -231,39 +285,27 @@
},
"outputs": [],
"source": [
"with detection_graph.as_default():\n",
" with tf.Session(graph=detection_graph) as sess:\n",
" # Definite input and output Tensors for detection_graph\n",
" image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')\n",
" # Each box represents a part of the image where a particular object was detected.\n",
" detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')\n",
" # Each score represent how level of confidence for each of the objects.\n",
" # Score is shown on the result image, together with the class label.\n",
" detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')\n",
" detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')\n",
" num_detections = detection_graph.get_tensor_by_name('num_detections:0')\n",
" for image_path in TEST_IMAGE_PATHS:\n",
" image = Image.open(image_path)\n",
" # the array based representation of the image will be used later in order to prepare the\n",
" # result image with boxes and labels on it.\n",
" image_np = load_image_into_numpy_array(image)\n",
" # Expand dimensions since the model expects images to have shape: [1, None, None, 3]\n",
" image_np_expanded = np.expand_dims(image_np, axis=0)\n",
" # Actual detection.\n",
" (boxes, scores, classes, num) = sess.run(\n",
" [detection_boxes, detection_scores, detection_classes, num_detections],\n",
" feed_dict={image_tensor: image_np_expanded})\n",
" # Visualization of the results of a detection.\n",
" vis_util.visualize_boxes_and_labels_on_image_array(\n",
" image_np,\n",
" np.squeeze(boxes),\n",
" np.squeeze(classes).astype(np.int32),\n",
" np.squeeze(scores),\n",
" category_index,\n",
" use_normalized_coordinates=True,\n",
" line_thickness=8)\n",
" plt.figure(figsize=IMAGE_SIZE)\n",
" plt.imshow(image_np)"
"for image_path in TEST_IMAGE_PATHS:\n",
" image = Image.open(image_path)\n",
" # the array based representation of the image will be used later in order to prepare the\n",
" # result image with boxes and labels on it.\n",
" image_np = load_image_into_numpy_array(image)\n",
" # Expand dimensions since the model expects images to have shape: [1, None, None, 3]\n",
" image_np_expanded = np.expand_dims(image_np, axis=0)\n",
" # Actual detection.\n",
" output_dict = run_inference_for_single_image(image_np, detection_graph)\n",
" # Visualization of the results of a detection.\n",
" vis_util.visualize_boxes_and_labels_on_image_array(\n",
" image_np,\n",
" output_dict['detection_boxes'],\n",
" output_dict['detection_classes'],\n",
" output_dict['detection_scores'],\n",
" category_index,\n",
" instance_masks=output_dict.get('detection_masks'),\n",
" use_normalized_coordinates=True,\n",
" line_thickness=8)\n",
" plt.figure(figsize=IMAGE_SIZE)\n",
" plt.imshow(image_np)"
]
},
{
......@@ -275,6 +317,9 @@
}
],
"metadata": {
"colab": {
"version": "0.3.2"
},
"kernelspec": {
"display_name": "Python 2",
"language": "python",
......
......@@ -9,6 +9,7 @@ licenses(["notice"])
proto_library(
name = "argmax_matcher_proto",
srcs = ["argmax_matcher.proto"],
cc_api_version = 2,
)
py_proto_library(
......@@ -20,6 +21,7 @@ py_proto_library(
proto_library(
name = "bipartite_matcher_proto",
srcs = ["bipartite_matcher.proto"],
cc_api_version = 2,
)
py_proto_library(
......@@ -31,6 +33,7 @@ py_proto_library(
proto_library(
name = "matcher_proto",
srcs = ["matcher.proto"],
cc_api_version = 2,
deps = [
":argmax_matcher_proto",
":bipartite_matcher_proto",
......@@ -46,6 +49,7 @@ py_proto_library(
proto_library(
name = "faster_rcnn_box_coder_proto",
srcs = ["faster_rcnn_box_coder.proto"],
cc_api_version = 2,
)
py_proto_library(
......@@ -57,6 +61,7 @@ py_proto_library(
proto_library(
name = "keypoint_box_coder_proto",
srcs = ["keypoint_box_coder.proto"],
cc_api_version = 2,
)
py_proto_library(
......@@ -68,6 +73,7 @@ py_proto_library(
proto_library(
name = "mean_stddev_box_coder_proto",
srcs = ["mean_stddev_box_coder.proto"],
cc_api_version = 2,
)
py_proto_library(
......@@ -79,6 +85,7 @@ py_proto_library(
proto_library(
name = "square_box_coder_proto",
srcs = ["square_box_coder.proto"],
cc_api_version = 2,
)
py_proto_library(
......@@ -90,6 +97,7 @@ py_proto_library(
proto_library(
name = "box_coder_proto",
srcs = ["box_coder.proto"],
cc_api_version = 2,
deps = [
":faster_rcnn_box_coder_proto",
":keypoint_box_coder_proto",
......@@ -107,6 +115,7 @@ py_proto_library(
proto_library(
name = "grid_anchor_generator_proto",
srcs = ["grid_anchor_generator.proto"],
cc_api_version = 2,
)
py_proto_library(
......@@ -118,6 +127,7 @@ py_proto_library(
proto_library(
name = "ssd_anchor_generator_proto",
srcs = ["ssd_anchor_generator.proto"],
cc_api_version = 2,
)
py_proto_library(
......@@ -126,11 +136,25 @@ py_proto_library(
deps = [":ssd_anchor_generator_proto"],
)
proto_library(
name = "multiscale_anchor_generator_proto",
srcs = ["multiscale_anchor_generator.proto"],
cc_api_version = 2,
)
py_proto_library(
name = "multiscale_anchor_generator_py_pb2",
api_version = 2,
deps = [":multiscale_anchor_generator_proto"],
)
proto_library(
name = "anchor_generator_proto",
srcs = ["anchor_generator.proto"],
cc_api_version = 2,
deps = [
":grid_anchor_generator_proto",
":multiscale_anchor_generator_proto",
":ssd_anchor_generator_proto",
],
)
......@@ -144,6 +168,7 @@ py_proto_library(
proto_library(
name = "input_reader_proto",
srcs = ["input_reader.proto"],
cc_api_version = 2,
)
py_proto_library(
......@@ -155,6 +180,7 @@ py_proto_library(
proto_library(
name = "losses_proto",
srcs = ["losses.proto"],
cc_api_version = 2,
)
py_proto_library(
......@@ -166,6 +192,7 @@ py_proto_library(
proto_library(
name = "optimizer_proto",
srcs = ["optimizer.proto"],
cc_api_version = 2,
)
py_proto_library(
......@@ -177,6 +204,7 @@ py_proto_library(
proto_library(
name = "post_processing_proto",
srcs = ["post_processing.proto"],
cc_api_version = 2,
)
py_proto_library(
......@@ -188,6 +216,7 @@ py_proto_library(
proto_library(
name = "hyperparams_proto",
srcs = ["hyperparams.proto"],
cc_api_version = 2,
)
py_proto_library(
......@@ -199,6 +228,7 @@ py_proto_library(
proto_library(
name = "box_predictor_proto",
srcs = ["box_predictor.proto"],
cc_api_version = 2,
deps = [":hyperparams_proto"],
)
......@@ -211,6 +241,7 @@ py_proto_library(
proto_library(
name = "region_similarity_calculator_proto",
srcs = ["region_similarity_calculator.proto"],
cc_api_version = 2,
deps = [],
)
......@@ -223,6 +254,7 @@ py_proto_library(
proto_library(
name = "preprocessor_proto",
srcs = ["preprocessor.proto"],
cc_api_version = 2,
)
py_proto_library(
......@@ -234,6 +266,7 @@ py_proto_library(
proto_library(
name = "train_proto",
srcs = ["train.proto"],
cc_api_version = 2,
deps = [
":optimizer_proto",
":preprocessor_proto",
......@@ -249,6 +282,7 @@ py_proto_library(
proto_library(
name = "eval_proto",
srcs = ["eval.proto"],
cc_api_version = 2,
)
py_proto_library(
......@@ -260,6 +294,7 @@ py_proto_library(
proto_library(
name = "image_resizer_proto",
srcs = ["image_resizer.proto"],
cc_api_version = 2,
)
py_proto_library(
......@@ -271,19 +306,21 @@ py_proto_library(
proto_library(
name = "faster_rcnn_proto",
srcs = ["faster_rcnn.proto"],
cc_api_version = 2,
deps = [
":box_predictor_proto",
"//object_detection/protos:anchor_generator_proto",
"//object_detection/protos:hyperparams_proto",
"//object_detection/protos:image_resizer_proto",
"//object_detection/protos:losses_proto",
"//object_detection/protos:post_processing_proto",
"//tensorflow/models/research/object_detection/protos:anchor_generator_proto",
"//tensorflow/models/research/object_detection/protos:hyperparams_proto",
"//tensorflow/models/research/object_detection/protos:image_resizer_proto",
"//tensorflow/models/research/object_detection/protos:losses_proto",
"//tensorflow/models/research/object_detection/protos:post_processing_proto",
],
)
proto_library(
name = "ssd_proto",
srcs = ["ssd.proto"],
cc_api_version = 2,
deps = [
":anchor_generator_proto",
":box_coder_proto",
......@@ -300,6 +337,7 @@ proto_library(
proto_library(
name = "model_proto",
srcs = ["model.proto"],
cc_api_version = 2,
deps = [
":faster_rcnn_proto",
":ssd_proto",
......@@ -315,6 +353,7 @@ py_proto_library(
proto_library(
name = "pipeline_proto",
srcs = ["pipeline.proto"],
cc_api_version = 2,
deps = [
":eval_proto",
":input_reader_proto",
......@@ -332,6 +371,7 @@ py_proto_library(
proto_library(
name = "string_int_label_map_proto",
srcs = ["string_int_label_map.proto"],
cc_api_version = 2,
)
py_proto_library(
......
......@@ -4,6 +4,7 @@ package object_detection.protos;
import "object_detection/protos/grid_anchor_generator.proto";
import "object_detection/protos/ssd_anchor_generator.proto";
import "object_detection/protos/multiscale_anchor_generator.proto";
// Configuration proto for the anchor generator to use in the object detection
// pipeline. See core/anchor_generator.py for details.
......@@ -11,5 +12,6 @@ message AnchorGenerator {
oneof anchor_generator_oneof {
GridAnchorGenerator grid_anchor_generator = 1;
SsdAnchorGenerator ssd_anchor_generator = 2;
MultiscaleAnchorGenerator multiscale_anchor_generator = 3;
}
}
......@@ -22,4 +22,8 @@ message ArgMaxMatcher {
// Whether to ensure each row is matched to at least one column.
optional bool force_match_for_each_row = 5 [default = false];
// Force constructed match objects to use matrix multiplication based gather
// instead of standard tf.gather
optional bool use_matmul_gather = 6 [default = false];
}
......@@ -5,4 +5,7 @@ package object_detection.protos;
// Configuration proto for bipartite matcher. See
// matchers/bipartite_matcher.py for details.
message BipartiteMatcher {
// Force constructed match objects to use matrix multiplication based gather
// instead of standard tf.gather
optional bool use_matmul_gather = 6 [default = false];
}
......@@ -11,6 +11,7 @@ message BoxPredictor {
ConvolutionalBoxPredictor convolutional_box_predictor = 1;
MaskRCNNBoxPredictor mask_rcnn_box_predictor = 2;
RfcnBoxPredictor rfcn_box_predictor = 3;
WeightSharedConvolutionalBoxPredictor weight_shared_convolutional_box_predictor = 4;
}
}
......@@ -46,10 +47,40 @@ message ConvolutionalBoxPredictor {
optional int32 box_code_size = 8 [default = 4];
// Whether to apply sigmoid to the output of class predictions.
// TODO: Do we need this since we have a post processing module.?
// TODO(jonathanhuang): Do we need this since we have a post processing
// module.?
optional bool apply_sigmoid_to_scores = 9 [default = false];
optional float class_prediction_bias_init = 10 [default = 0.0];
// Whether to use depthwise separable convolution for box predictor layers.
optional bool use_depthwise = 11 [default = false];
}
// Configuration proto for weight shared convolutional box predictor.
message WeightSharedConvolutionalBoxPredictor {
// Hyperparameters for convolution ops used in the box predictor.
optional Hyperparams conv_hyperparams = 1;
// Number of the additional conv layers before the predictor.
optional int32 num_layers_before_predictor = 4 [default = 0];
// Output depth for the convolution ops prior to predicting box encodings
// and class predictions.
optional int32 depth = 2 [default = 0];
// Size of final convolution kernel. If the spatial resolution of the feature
// map is smaller than the kernel size, then the kernel size is set to
// min(feature_width, feature_height).
optional int32 kernel_size = 7 [default = 3];
// Size of the encoding for boxes.
optional int32 box_code_size = 8 [default = 4];
// Bias initialization for class prediction. It has been show to stabilize
// training where there are large number of negative boxes. See
// https://arxiv.org/abs/1708.02002 for details.
optional float class_prediction_bias_init = 10 [default = 0.0];
}
message MaskRCNNBoxPredictor {
......@@ -71,12 +102,22 @@ message MaskRCNNBoxPredictor {
// Whether to predict instance masks inside detection boxes.
optional bool predict_instance_masks = 6 [default = false];
// The depth for the first conv2d_transpose op applied to the
// image_features in the mask prediciton branch
// The depth for the first conv2d_transpose op applied to the
// image_features in the mask prediction branch. If set to 0, the value
// will be set automatically based on the number of channels in the image
// features and the number of classes.
optional int32 mask_prediction_conv_depth = 7 [default = 256];
// Whether to predict keypoints inside detection boxes.
optional bool predict_keypoints = 8 [default = false];
// The height and the width of the predicted mask.
optional int32 mask_height = 9 [default = 15];
optional int32 mask_width = 10 [default = 15];
// The number of convolutions applied to image_features in the mask prediction
// branch.
optional int32 mask_prediction_num_conv_layers = 11 [default = 2];
}
message RfcnBoxPredictor {
......
......@@ -26,9 +26,8 @@ message EvalConfig {
// BNS name of the TensorFlow master.
optional string eval_master = 7 [default=""];
// Type of metrics to use for evaluation. Currently supports only Pascal VOC
// detection metrics.
optional string metrics_set = 8 [default="pascal_voc_metrics"];
// Type of metrics to use for evaluation.
repeated string metrics_set = 8;
// Path to export detections to COCO compatible JSON format.
optional string export_path = 9 [default=''];
......@@ -38,10 +37,35 @@ message EvalConfig {
optional bool ignore_groundtruth = 10 [default=false];
// Use exponential moving averages of variables for evaluation.
// TODO(rathodv): When this is false make sure the model is constructed
// without moving averages in restore_fn.
optional bool use_moving_averages = 11 [default=false];
// Whether to evaluate instance masks.
// Note that since there is no evaluation code currently for instance
// segmenation this option is unused.
optional bool eval_instance_masks = 12 [default=false];
// Minimum score threshold for a detected object box to be visualized
optional float min_score_threshold = 13 [default=0.5];
// Maximum number of detections to visualize
optional int32 max_num_boxes_to_visualize = 14 [default=20];
// When drawing a single detection, each label is by default visualized as
// <label name> : <label score>. One can skip the name or/and score using the
// following fields:
optional bool skip_scores = 15 [default=false];
optional bool skip_labels = 16 [default=false];
// Whether to show groundtruth boxes in addition to detected boxes in
// visualizations.
optional bool visualize_groundtruth_boxes = 17 [default=false];
// Box color for visualizing groundtruth boxes.
optional string groundtruth_box_visualization_color = 18 [default="black"];
// Whether to keep image identifier in filename when exported to
// visualization_export_dir.
optional bool keep_image_id_for_visualization_export = 19 [default=false];
}
......@@ -20,7 +20,7 @@ import "object_detection/protos/post_processing.proto";
message FasterRcnn {
// Whether to construct only the Region Proposal Network (RPN).
optional bool first_stage_only = 1 [default=false];
optional int32 number_of_stages = 1 [default=2];
// Number of classes to predict.
optional int32 num_classes = 3;
......
......@@ -29,6 +29,14 @@ message KeepAspectRatioResizer {
// Desired method when resizing image.
optional ResizeType resize_method = 3 [default = BILINEAR];
// Whether to pad the image with zeros so the output spatial size is
// [max_dimension, max_dimension]. Note that the zeros are padded to the
// bottom and the right of the resized image.
optional bool pad_to_max_dimension = 4 [default = false];
// Whether to also resize the image channels from 3 to 1 (RGB to grayscale).
optional bool convert_to_grayscale = 5 [default = false];
}
// Configuration proto for image resizer that resizes to a fixed shape.
......@@ -41,4 +49,7 @@ message FixedShapeResizer {
// Desired method when resizing image.
optional ResizeType resize_method = 3 [default = BILINEAR];
// Whether to also resize the image channels from 3 to 1 (RGB to grayscale).
optional bool convert_to_grayscale = 4 [default = false];
}
......@@ -15,6 +15,13 @@ package object_detection.protos;
// 'groundtruth_instance_masks': (Optional), a [num_boxes, image_height,
// image_width] float tensor storing binary mask of the objects in boxes.
// Instance mask format. Note that PNG masks are much more space efficient.
enum InstanceMaskType {
DEFAULT = 0; // Default implementation, currently NUMERICAL_MASKS
NUMERICAL_MASKS = 1; // [num_masks, H, W] float32 binary masks.
PNG_MASKS = 2; // Encoded PNG masks.
}
message InputReader {
// Path to StringIntLabelMap pbtxt file specifying the mapping from string
// labels to integer ids.
......@@ -24,6 +31,12 @@ message InputReader {
// shuffled randomly.
optional bool shuffle = 2 [default=true];
// Buffer size to be used when shuffling.
optional uint32 shuffle_buffer_size = 11 [default = 100];
// Buffer size to be used when shuffling file names.
optional uint32 filenames_shuffle_buffer_size = 12 [default = 100];
// Maximum number of records to keep in reader queue.
optional uint32 queue_capacity = 3 [default=2000];
......@@ -38,9 +51,15 @@ message InputReader {
// Number of reader instances to create.
optional uint32 num_readers = 6 [default=8];
// Size of the buffer for prefetching (in batches).
optional uint32 prefetch_buffer_size = 13 [default = 2];
// Whether to load groundtruth instance masks.
optional bool load_instance_masks = 7 [default = false];
// Type of instance mask.
optional InstanceMaskType mask_type = 10 [default = NUMERICAL_MASKS];
oneof input_reader {
TFRecordInputReader tf_record_input_reader = 8;
ExternalInputReader external_input_reader = 9;
......
......@@ -33,12 +33,14 @@ message LocalizationLoss {
// L2 location loss: 0.5 * ||weight * (a - b)|| ^ 2
message WeightedL2LocalizationLoss {
// DEPRECATED, do not use.
// Output loss per anchor.
optional bool anchorwise_output = 1 [default=false];
}
// SmoothL1 (Huber) location loss: .5 * x ^ 2 if |x| < 1 else |x| - .5
message WeightedSmoothL1LocalizationLoss {
// DEPRECATED, do not use.
// Output loss per anchor.
optional bool anchorwise_output = 1 [default=false];
}
......@@ -59,6 +61,7 @@ message ClassificationLoss {
// Classification loss using a sigmoid function over class predictions.
message WeightedSigmoidClassificationLoss {
// DEPRECATED, do not use.
// Output loss per anchor.
optional bool anchorwise_output = 1 [default=false];
}
......@@ -66,6 +69,7 @@ message WeightedSigmoidClassificationLoss {
// Sigmoid Focal cross entropy loss as described in
// https://arxiv.org/abs/1708.02002
message SigmoidFocalClassificationLoss {
// DEPRECATED, do not use.
optional bool anchorwise_output = 1 [default = false];
// modulating factor for the loss.
optional float gamma = 2 [default = 2.0];
......@@ -75,6 +79,7 @@ message SigmoidFocalClassificationLoss {
// Classification loss using a softmax function over class predictions.
message WeightedSoftmaxClassificationLoss {
// DEPRECATED, do not use.
// Output loss per anchor.
optional bool anchorwise_output = 1 [default=false];
// Scale logit (input) value before calculating softmax classification loss.
......@@ -93,6 +98,7 @@ message BootstrappedSigmoidClassificationLoss {
// probabilities.
optional bool hard_bootstrap = 2 [default=false];
// DEPRECATED, do not use.
// Output loss per anchor.
optional bool anchorwise_output = 3 [default=false];
}
......
syntax = "proto2";
package object_detection.protos;
// Configuration proto for RetinaNet anchor generator described in
// https://arxiv.org/abs/1708.02002. See
// anchor_generators/multiscale_grid_anchor_generator.py for details.
message MultiscaleAnchorGenerator {
// minimum level in feature pyramid
optional int32 min_level = 1 [default = 3];
// maximum level in feature pyramid
optional int32 max_level = 2 [default = 7];
// Scale of anchor to feature stride
optional float anchor_scale = 3 [default = 4.0];
// Aspect ratios for anchors at each grid point.
repeated float aspect_ratios = 4;
// Number of intermediate scale each scale octave
optional int32 scales_per_octave = 5 [default = 2];
}
......@@ -15,4 +15,5 @@ message TrainEvalPipelineConfig {
optional InputReader train_input_reader = 3;
optional EvalConfig eval_config = 4;
optional InputReader eval_input_reader = 5;
extensions 1000 to max;
}
......@@ -3,7 +3,7 @@ syntax = "proto2";
package object_detection.protos;
// Message for defining a preprocessing operation on input data.
// See: //object_detection/core/preprocessor.py
// See: //third_party/tensorflow_models/object_detection/core/preprocessor.py
message PreprocessingStep {
oneof preprocessing_step {
NormalizeImage normalize_image = 1;
......@@ -32,6 +32,7 @@ message PreprocessingStep {
SSDRandomCropPadFixedAspectRatio ssd_random_crop_pad_fixed_aspect_ratio = 24;
RandomVerticalFlip random_vertical_flip = 25;
RandomRotation90 random_rotation90 = 26;
RGBtoGray rgb_to_gray = 27;
}
}
......@@ -202,7 +203,7 @@ message RandomCropPadImage {
repeated float max_padded_size_ratio = 9;
// Color of the padding. If unset, will pad using average color of the input
// image.
// image. This field should be of length 3.
repeated float pad_color = 10;
}
......@@ -236,6 +237,11 @@ message RandomResizeMethod {
optional float target_width = 2;
}
// Converts the RGB image to a grayscale image. This also converts the image
// depth from 3 to 1, unlike RandomRGBtoGray which does not change the image
// depth.
message RGBtoGray {}
// Scales boxes from normalized coordinates to pixel coordinates.
message ScaleBoxesToPixelCoordinates {
}
......
......@@ -82,4 +82,12 @@ message SsdFeatureExtractor {
// will apply only to the additional layers that are added and are outside the
// canned arg_scope.
optional bool batch_norm_trainable = 6 [default=true];
// Whether to use explicit padding when extracting SSD multiresolution
// features. Note that this does not apply to the base feature extractor.
optional bool use_explicit_padding = 7 [default=false];
// Whether to use depthwise separable convolutions for to extract additional
// feature maps added by SSD.
optional bool use_depthwise = 8 [default=false];
}
......@@ -35,6 +35,11 @@ message TrainConfig {
// If false, it assumes the checkpoint was a object classification model.
optional bool from_detection_checkpoint = 8 [default=false];
// Whether to load all checkpoint vars that match model variable names and
// sizes. This option is only available if `from_detection_checkpoint` is
// True.
optional bool load_all_detection_checkpoint_vars = 19 [default = false];
// Number of steps to train the DetectionModel for. If 0, will train the model
// indefinitely.
optional uint32 num_steps = 9 [default=0];
......@@ -66,4 +71,21 @@ message TrainConfig {
// This is useful when each box can have multiple labels.
// Note that only Sigmoid classification losses should be used.
optional bool merge_multiple_label_boxes = 17 [default=false];
// Whether to add regularization loss to `total_loss`. This is true by
// default and adds all regularization losses defined in the model to
// `total_loss`.
// Setting this option to false is very useful while debugging the model and
// losses.
optional bool add_regularization_loss = 18 [default=true];
// Maximum number of boxes used during training.
// Set this to at least the maximum amount of boxes in the input data.
// Otherwise, it may cause "Data loss: Attempted to pad to a smaller size
// than the input element" errors.
optional int32 max_number_of_boxes = 20 [default=50];
// Whether to remove padding along `num_boxes` dimension of the groundtruth
// tensors.
optional bool unpad_groundtruth_tensors = 21 [default=true];
}
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
exports_files([
"faster_rcnn_resnet50_pets.config",
"ssd_inception_v2_pets.config",
"ssd_mobilenet_v1_focal_loss_pets.config",
])
# Embedded SSD with Mobilenet v1 configuration for MSCOCO Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {
ssd {
num_classes: 90
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
num_layers: 5
min_scale: 0.2
max_scale: 0.95
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.3333
}
}
image_resizer {
fixed_shape_resizer {
height: 256
width: 256
}
}
box_predictor {
convolutional_box_predictor {
min_depth: 0
max_depth: 0
num_layers_before_predictor: 0
use_dropout: false
dropout_keep_probability: 0.8
kernel_size: 1
box_code_size: 4
apply_sigmoid_to_scores: false
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.9997,
epsilon: 0.001,
}
}
}
}
feature_extractor {
type: 'embedded_ssd_mobilenet_v1'
min_depth: 16
depth_multiplier: 0.125
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.9997,
epsilon: 0.001,
}
}
}
loss {
classification_loss {
weighted_sigmoid {
}
}
localization_loss {
weighted_smooth_l1 {
}
}
hard_example_miner {
num_hard_examples: 3000
iou_threshold: 0.99
loss_type: CLASSIFICATION
max_negatives_per_positive: 3
min_negatives_per_image: 0
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
batch_size: 32
optimizer {
rms_prop_optimizer: {
learning_rate: {
exponential_decay_learning_rate {
initial_learning_rate: 0.004
decay_steps: 800720
decay_factor: 0.95
}
}
momentum_optimizer_value: 0.9
decay: 0.9
epsilon: 1.0
}
}
fine_tune_checkpoint: "/PATH_TO_BE_CONFIGURED/model.ckpt"
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
ssd_random_crop {
}
}
}
train_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
}
eval_config: {
num_examples: 8000
use_moving_averages: true
}
eval_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
shuffle: false
num_readers: 1
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment