Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
fd7b6887
Unverified
Commit
fd7b6887
authored
Feb 09, 2018
by
Jonathan Huang
Committed by
GitHub
Feb 09, 2018
Browse files
Merge pull request #3293 from pkulzc/master
Internal changes of object_detection
parents
f98ec55e
1efe98bb
Changes
200
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
657 additions
and
47 deletions
+657
-47
research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_test.py
...ection/models/ssd_resnet_v1_fpn_feature_extractor_test.py
+65
-0
research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_testbase.py
...on/models/ssd_resnet_v1_fpn_feature_extractor_testbase.py
+93
-0
research/object_detection/object_detection_tutorial.ipynb
research/object_detection/object_detection_tutorial.ipynb
+78
-33
research/object_detection/protos/BUILD
research/object_detection/protos/BUILD
+45
-5
research/object_detection/protos/anchor_generator.proto
research/object_detection/protos/anchor_generator.proto
+2
-0
research/object_detection/protos/argmax_matcher.proto
research/object_detection/protos/argmax_matcher.proto
+4
-0
research/object_detection/protos/bipartite_matcher.proto
research/object_detection/protos/bipartite_matcher.proto
+3
-0
research/object_detection/protos/box_predictor.proto
research/object_detection/protos/box_predictor.proto
+44
-3
research/object_detection/protos/eval.proto
research/object_detection/protos/eval.proto
+27
-3
research/object_detection/protos/faster_rcnn.proto
research/object_detection/protos/faster_rcnn.proto
+1
-1
research/object_detection/protos/image_resizer.proto
research/object_detection/protos/image_resizer.proto
+11
-0
research/object_detection/protos/input_reader.proto
research/object_detection/protos/input_reader.proto
+19
-0
research/object_detection/protos/losses.proto
research/object_detection/protos/losses.proto
+6
-0
research/object_detection/protos/multiscale_anchor_generator.proto
...object_detection/protos/multiscale_anchor_generator.proto
+23
-0
research/object_detection/protos/pipeline.proto
research/object_detection/protos/pipeline.proto
+1
-0
research/object_detection/protos/preprocessor.proto
research/object_detection/protos/preprocessor.proto
+8
-2
research/object_detection/protos/ssd.proto
research/object_detection/protos/ssd.proto
+8
-0
research/object_detection/protos/train.proto
research/object_detection/protos/train.proto
+22
-0
research/object_detection/samples/configs/BUILD
research/object_detection/samples/configs/BUILD
+11
-0
research/object_detection/samples/configs/embedded_ssd_mobilenet_v1_coco.config
...ion/samples/configs/embedded_ssd_mobilenet_v1_coco.config
+186
-0
No files found.
research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_test.py
0 → 100644
View file @
fd7b6887
"""Tests for ssd resnet v1 FPN feature extractors."""
import
tensorflow
as
tf
from
object_detection.models
import
ssd_resnet_v1_fpn_feature_extractor
from
object_detection.models
import
ssd_resnet_v1_fpn_feature_extractor_testbase
class
SSDResnet50V1FeatureExtractorTest
(
ssd_resnet_v1_fpn_feature_extractor_testbase
.
SSDResnetFPNFeatureExtractorTestBase
):
"""SSDResnet50v1Fpn feature extractor test."""
def
_create_feature_extractor
(
self
,
depth_multiplier
,
pad_to_multiple
):
min_depth
=
32
conv_hyperparams
=
{}
batch_norm_trainable
=
True
is_training
=
True
return
ssd_resnet_v1_fpn_feature_extractor
.
SSDResnet50V1FpnFeatureExtractor
(
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
)
def
_resnet_scope_name
(
self
):
return
'resnet_v1_50'
class
SSDResnet101V1FeatureExtractorTest
(
ssd_resnet_v1_fpn_feature_extractor_testbase
.
SSDResnetFPNFeatureExtractorTestBase
):
"""SSDResnet101v1Fpn feature extractor test."""
def
_create_feature_extractor
(
self
,
depth_multiplier
,
pad_to_multiple
):
min_depth
=
32
conv_hyperparams
=
{}
batch_norm_trainable
=
True
is_training
=
True
return
(
ssd_resnet_v1_fpn_feature_extractor
.
SSDResnet101V1FpnFeatureExtractor
(
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
))
def
_resnet_scope_name
(
self
):
return
'resnet_v1_101'
class
SSDResnet152V1FeatureExtractorTest
(
ssd_resnet_v1_fpn_feature_extractor_testbase
.
SSDResnetFPNFeatureExtractorTestBase
):
"""SSDResnet152v1Fpn feature extractor test."""
def
_create_feature_extractor
(
self
,
depth_multiplier
,
pad_to_multiple
):
min_depth
=
32
conv_hyperparams
=
{}
batch_norm_trainable
=
True
is_training
=
True
return
(
ssd_resnet_v1_fpn_feature_extractor
.
SSDResnet152V1FpnFeatureExtractor
(
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
))
def
_resnet_scope_name
(
self
):
return
'resnet_v1_152'
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_testbase.py
0 → 100644
View file @
fd7b6887
"""Tests for ssd resnet v1 FPN feature extractors."""
import
abc
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.models
import
ssd_feature_extractor_test
class
SSDResnetFPNFeatureExtractorTestBase
(
ssd_feature_extractor_test
.
SsdFeatureExtractorTestBase
):
"""Helper test class for SSD Resnet v1 FPN feature extractors."""
@
abc
.
abstractmethod
def
_resnet_scope_name
(
self
):
pass
@
abc
.
abstractmethod
def
_fpn_scope_name
(
self
):
return
'fpn'
def
test_extract_features_returns_correct_shapes_256
(
self
):
image_height
=
256
image_width
=
256
depth_multiplier
=
1.0
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
2
,
32
,
32
,
256
),
(
2
,
16
,
16
,
256
),
(
2
,
8
,
8
,
256
),
(
2
,
4
,
4
,
256
),
(
2
,
2
,
2
,
256
)]
self
.
check_extract_features_returns_correct_shape
(
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
def
test_extract_features_returns_correct_shapes_with_dynamic_inputs
(
self
):
image_height
=
256
image_width
=
256
depth_multiplier
=
1.0
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
2
,
32
,
32
,
256
),
(
2
,
16
,
16
,
256
),
(
2
,
8
,
8
,
256
),
(
2
,
4
,
4
,
256
),
(
2
,
2
,
2
,
256
)]
self
.
check_extract_features_returns_correct_shapes_with_dynamic_inputs
(
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
def
test_extract_features_returns_correct_shapes_with_pad_to_multiple
(
self
):
image_height
=
254
image_width
=
254
depth_multiplier
=
1.0
pad_to_multiple
=
32
expected_feature_map_shape
=
[(
2
,
32
,
32
,
256
),
(
2
,
16
,
16
,
256
),
(
2
,
8
,
8
,
256
),
(
2
,
4
,
4
,
256
),
(
2
,
2
,
2
,
256
)]
self
.
check_extract_features_returns_correct_shape
(
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
def
test_extract_features_raises_error_with_invalid_image_size
(
self
):
image_height
=
32
image_width
=
32
depth_multiplier
=
1.0
pad_to_multiple
=
1
self
.
check_extract_features_raises_error_with_invalid_image_size
(
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
)
def
test_preprocess_returns_correct_value_range
(
self
):
image_height
=
128
image_width
=
128
depth_multiplier
=
1
pad_to_multiple
=
1
test_image
=
np
.
random
.
rand
(
4
,
image_height
,
image_width
,
3
)
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
pad_to_multiple
)
preprocessed_image
=
feature_extractor
.
preprocess
(
test_image
)
self
.
assertAllClose
(
preprocessed_image
,
test_image
-
[[
123.68
,
116.779
,
103.939
]])
def
test_variables_only_created_in_scope
(
self
):
depth_multiplier
=
1
pad_to_multiple
=
1
g
=
tf
.
Graph
()
with
g
.
as_default
():
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
pad_to_multiple
)
preprocessed_inputs
=
tf
.
placeholder
(
tf
.
float32
,
(
4
,
None
,
None
,
3
))
feature_extractor
.
extract_features
(
preprocessed_inputs
)
variables
=
g
.
get_collection
(
tf
.
GraphKeys
.
GLOBAL_VARIABLES
)
for
variable
in
variables
:
self
.
assertTrue
(
variable
.
name
.
startswith
(
self
.
_resnet_scope_name
())
or
variable
.
name
.
startswith
(
self
.
_fpn_scope_name
()))
research/object_detection/object_detection_tutorial.ipynb
View file @
fd7b6887
...
...
@@ -35,6 +35,7 @@
"from io import StringIO\n",
"from matplotlib import pyplot as plt\n",
"from PIL import Image\n",
"from object_detection.utils import ops as utils_ops\n",
"\n",
"if tf.__version__ < '1.4.0':\n",
" raise ImportError('Please upgrade your tensorflow installation to v1.4.* or later!')\n"
...
...
@@ -223,6 +224,59 @@
"IMAGE_SIZE = (12, 8)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def run_inference_for_single_image(image, graph):\n",
" with graph.as_default():\n",
" with tf.Session() as sess:\n",
" # Get handles to input and output tensors\n",
" ops = tf.get_default_graph().get_operations()\n",
" all_tensor_names = {output.name for op in ops for output in op.outputs}\n",
" tensor_dict = {}\n",
" for key in [\n",
" 'num_detections', 'detection_boxes', 'detection_scores',\n",
" 'detection_classes', 'detection_masks'\n",
" ]:\n",
" tensor_name = key + ':0'\n",
" if tensor_name in all_tensor_names:\n",
" tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(\n",
" tensor_name)\n",
" if 'detection_masks' in tensor_dict:\n",
" # The following processing is only for single image\n",
" detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])\n",
" detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])\n",
" # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.\n",
" real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)\n",
" detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])\n",
" detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])\n",
" detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(\n",
" detection_masks, detection_boxes, image.shape[0], image.shape[1])\n",
" detection_masks_reframed = tf.cast(\n",
" tf.greater(detection_masks_reframed, 0.5), tf.uint8)\n",
" # Follow the convention by adding back the batch dimension\n",
" tensor_dict['detection_masks'] = tf.expand_dims(\n",
" detection_masks_reframed, 0)\n",
" image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')\n",
"\n",
" # Run inference\n",
" output_dict = sess.run(tensor_dict,\n",
" feed_dict={image_tensor: np.expand_dims(image, 0)})\n",
"\n",
" # all outputs are float32 numpy arrays, so convert types as appropriate\n",
" output_dict['num_detections'] = int(output_dict['num_detections'][0])\n",
" output_dict['detection_classes'] = output_dict[\n",
" 'detection_classes'][0].astype(np.uint8)\n",
" output_dict['detection_boxes'] = output_dict['detection_boxes'][0]\n",
" output_dict['detection_scores'] = output_dict['detection_scores'][0]\n",
" if 'detection_masks' in output_dict:\n",
" output_dict['detection_masks'] = output_dict['detection_masks'][0]\n",
" return output_dict"
]
},
{
"cell_type": "code",
"execution_count": null,
...
...
@@ -231,39 +285,27 @@
},
"outputs": [],
"source": [
"with detection_graph.as_default():\n",
" with tf.Session(graph=detection_graph) as sess:\n",
" # Definite input and output Tensors for detection_graph\n",
" image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')\n",
" # Each box represents a part of the image where a particular object was detected.\n",
" detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')\n",
" # Each score represent how level of confidence for each of the objects.\n",
" # Score is shown on the result image, together with the class label.\n",
" detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')\n",
" detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')\n",
" num_detections = detection_graph.get_tensor_by_name('num_detections:0')\n",
" for image_path in TEST_IMAGE_PATHS:\n",
" image = Image.open(image_path)\n",
" # the array based representation of the image will be used later in order to prepare the\n",
" # result image with boxes and labels on it.\n",
" image_np = load_image_into_numpy_array(image)\n",
" # Expand dimensions since the model expects images to have shape: [1, None, None, 3]\n",
" image_np_expanded = np.expand_dims(image_np, axis=0)\n",
" # Actual detection.\n",
" (boxes, scores, classes, num) = sess.run(\n",
" [detection_boxes, detection_scores, detection_classes, num_detections],\n",
" feed_dict={image_tensor: image_np_expanded})\n",
" # Visualization of the results of a detection.\n",
" vis_util.visualize_boxes_and_labels_on_image_array(\n",
" image_np,\n",
" np.squeeze(boxes),\n",
" np.squeeze(classes).astype(np.int32),\n",
" np.squeeze(scores),\n",
" category_index,\n",
" use_normalized_coordinates=True,\n",
" line_thickness=8)\n",
" plt.figure(figsize=IMAGE_SIZE)\n",
" plt.imshow(image_np)"
"for image_path in TEST_IMAGE_PATHS:\n",
" image = Image.open(image_path)\n",
" # the array based representation of the image will be used later in order to prepare the\n",
" # result image with boxes and labels on it.\n",
" image_np = load_image_into_numpy_array(image)\n",
" # Expand dimensions since the model expects images to have shape: [1, None, None, 3]\n",
" image_np_expanded = np.expand_dims(image_np, axis=0)\n",
" # Actual detection.\n",
" output_dict = run_inference_for_single_image(image_np, detection_graph)\n",
" # Visualization of the results of a detection.\n",
" vis_util.visualize_boxes_and_labels_on_image_array(\n",
" image_np,\n",
" output_dict['detection_boxes'],\n",
" output_dict['detection_classes'],\n",
" output_dict['detection_scores'],\n",
" category_index,\n",
" instance_masks=output_dict.get('detection_masks'),\n",
" use_normalized_coordinates=True,\n",
" line_thickness=8)\n",
" plt.figure(figsize=IMAGE_SIZE)\n",
" plt.imshow(image_np)"
]
},
{
...
...
@@ -275,6 +317,9 @@
}
],
"metadata": {
"colab": {
"version": "0.3.2"
},
"kernelspec": {
"display_name": "Python 2",
"language": "python",
...
...
research/object_detection/protos/BUILD
View file @
fd7b6887
...
...
@@ -9,6 +9,7 @@ licenses(["notice"])
proto_library
(
name
=
"argmax_matcher_proto"
,
srcs
=
[
"argmax_matcher.proto"
],
cc_api_version
=
2
,
)
py_proto_library
(
...
...
@@ -20,6 +21,7 @@ py_proto_library(
proto_library
(
name
=
"bipartite_matcher_proto"
,
srcs
=
[
"bipartite_matcher.proto"
],
cc_api_version
=
2
,
)
py_proto_library
(
...
...
@@ -31,6 +33,7 @@ py_proto_library(
proto_library
(
name
=
"matcher_proto"
,
srcs
=
[
"matcher.proto"
],
cc_api_version
=
2
,
deps
=
[
":argmax_matcher_proto"
,
":bipartite_matcher_proto"
,
...
...
@@ -46,6 +49,7 @@ py_proto_library(
proto_library
(
name
=
"faster_rcnn_box_coder_proto"
,
srcs
=
[
"faster_rcnn_box_coder.proto"
],
cc_api_version
=
2
,
)
py_proto_library
(
...
...
@@ -57,6 +61,7 @@ py_proto_library(
proto_library
(
name
=
"keypoint_box_coder_proto"
,
srcs
=
[
"keypoint_box_coder.proto"
],
cc_api_version
=
2
,
)
py_proto_library
(
...
...
@@ -68,6 +73,7 @@ py_proto_library(
proto_library
(
name
=
"mean_stddev_box_coder_proto"
,
srcs
=
[
"mean_stddev_box_coder.proto"
],
cc_api_version
=
2
,
)
py_proto_library
(
...
...
@@ -79,6 +85,7 @@ py_proto_library(
proto_library
(
name
=
"square_box_coder_proto"
,
srcs
=
[
"square_box_coder.proto"
],
cc_api_version
=
2
,
)
py_proto_library
(
...
...
@@ -90,6 +97,7 @@ py_proto_library(
proto_library
(
name
=
"box_coder_proto"
,
srcs
=
[
"box_coder.proto"
],
cc_api_version
=
2
,
deps
=
[
":faster_rcnn_box_coder_proto"
,
":keypoint_box_coder_proto"
,
...
...
@@ -107,6 +115,7 @@ py_proto_library(
proto_library
(
name
=
"grid_anchor_generator_proto"
,
srcs
=
[
"grid_anchor_generator.proto"
],
cc_api_version
=
2
,
)
py_proto_library
(
...
...
@@ -118,6 +127,7 @@ py_proto_library(
proto_library
(
name
=
"ssd_anchor_generator_proto"
,
srcs
=
[
"ssd_anchor_generator.proto"
],
cc_api_version
=
2
,
)
py_proto_library
(
...
...
@@ -126,11 +136,25 @@ py_proto_library(
deps
=
[
":ssd_anchor_generator_proto"
],
)
proto_library
(
name
=
"multiscale_anchor_generator_proto"
,
srcs
=
[
"multiscale_anchor_generator.proto"
],
cc_api_version
=
2
,
)
py_proto_library
(
name
=
"multiscale_anchor_generator_py_pb2"
,
api_version
=
2
,
deps
=
[
":multiscale_anchor_generator_proto"
],
)
proto_library
(
name
=
"anchor_generator_proto"
,
srcs
=
[
"anchor_generator.proto"
],
cc_api_version
=
2
,
deps
=
[
":grid_anchor_generator_proto"
,
":multiscale_anchor_generator_proto"
,
":ssd_anchor_generator_proto"
,
],
)
...
...
@@ -144,6 +168,7 @@ py_proto_library(
proto_library
(
name
=
"input_reader_proto"
,
srcs
=
[
"input_reader.proto"
],
cc_api_version
=
2
,
)
py_proto_library
(
...
...
@@ -155,6 +180,7 @@ py_proto_library(
proto_library
(
name
=
"losses_proto"
,
srcs
=
[
"losses.proto"
],
cc_api_version
=
2
,
)
py_proto_library
(
...
...
@@ -166,6 +192,7 @@ py_proto_library(
proto_library
(
name
=
"optimizer_proto"
,
srcs
=
[
"optimizer.proto"
],
cc_api_version
=
2
,
)
py_proto_library
(
...
...
@@ -177,6 +204,7 @@ py_proto_library(
proto_library
(
name
=
"post_processing_proto"
,
srcs
=
[
"post_processing.proto"
],
cc_api_version
=
2
,
)
py_proto_library
(
...
...
@@ -188,6 +216,7 @@ py_proto_library(
proto_library
(
name
=
"hyperparams_proto"
,
srcs
=
[
"hyperparams.proto"
],
cc_api_version
=
2
,
)
py_proto_library
(
...
...
@@ -199,6 +228,7 @@ py_proto_library(
proto_library
(
name
=
"box_predictor_proto"
,
srcs
=
[
"box_predictor.proto"
],
cc_api_version
=
2
,
deps
=
[
":hyperparams_proto"
],
)
...
...
@@ -211,6 +241,7 @@ py_proto_library(
proto_library
(
name
=
"region_similarity_calculator_proto"
,
srcs
=
[
"region_similarity_calculator.proto"
],
cc_api_version
=
2
,
deps
=
[],
)
...
...
@@ -223,6 +254,7 @@ py_proto_library(
proto_library
(
name
=
"preprocessor_proto"
,
srcs
=
[
"preprocessor.proto"
],
cc_api_version
=
2
,
)
py_proto_library
(
...
...
@@ -234,6 +266,7 @@ py_proto_library(
proto_library
(
name
=
"train_proto"
,
srcs
=
[
"train.proto"
],
cc_api_version
=
2
,
deps
=
[
":optimizer_proto"
,
":preprocessor_proto"
,
...
...
@@ -249,6 +282,7 @@ py_proto_library(
proto_library
(
name
=
"eval_proto"
,
srcs
=
[
"eval.proto"
],
cc_api_version
=
2
,
)
py_proto_library
(
...
...
@@ -260,6 +294,7 @@ py_proto_library(
proto_library
(
name
=
"image_resizer_proto"
,
srcs
=
[
"image_resizer.proto"
],
cc_api_version
=
2
,
)
py_proto_library
(
...
...
@@ -271,19 +306,21 @@ py_proto_library(
proto_library
(
name
=
"faster_rcnn_proto"
,
srcs
=
[
"faster_rcnn.proto"
],
cc_api_version
=
2
,
deps
=
[
":box_predictor_proto"
,
"//object_detection/protos:anchor_generator_proto"
,
"//object_detection/protos:hyperparams_proto"
,
"//object_detection/protos:image_resizer_proto"
,
"//object_detection/protos:losses_proto"
,
"//object_detection/protos:post_processing_proto"
,
"//
tensorflow/models/research/
object_detection/protos:anchor_generator_proto"
,
"//
tensorflow/models/research/
object_detection/protos:hyperparams_proto"
,
"//
tensorflow/models/research/
object_detection/protos:image_resizer_proto"
,
"//
tensorflow/models/research/
object_detection/protos:losses_proto"
,
"//
tensorflow/models/research/
object_detection/protos:post_processing_proto"
,
],
)
proto_library
(
name
=
"ssd_proto"
,
srcs
=
[
"ssd.proto"
],
cc_api_version
=
2
,
deps
=
[
":anchor_generator_proto"
,
":box_coder_proto"
,
...
...
@@ -300,6 +337,7 @@ proto_library(
proto_library
(
name
=
"model_proto"
,
srcs
=
[
"model.proto"
],
cc_api_version
=
2
,
deps
=
[
":faster_rcnn_proto"
,
":ssd_proto"
,
...
...
@@ -315,6 +353,7 @@ py_proto_library(
proto_library
(
name
=
"pipeline_proto"
,
srcs
=
[
"pipeline.proto"
],
cc_api_version
=
2
,
deps
=
[
":eval_proto"
,
":input_reader_proto"
,
...
...
@@ -332,6 +371,7 @@ py_proto_library(
proto_library
(
name
=
"string_int_label_map_proto"
,
srcs
=
[
"string_int_label_map.proto"
],
cc_api_version
=
2
,
)
py_proto_library
(
...
...
research/object_detection/protos/anchor_generator.proto
View file @
fd7b6887
...
...
@@ -4,6 +4,7 @@ package object_detection.protos;
import
"object_detection/protos/grid_anchor_generator.proto"
;
import
"object_detection/protos/ssd_anchor_generator.proto"
;
import
"object_detection/protos/multiscale_anchor_generator.proto"
;
// Configuration proto for the anchor generator to use in the object detection
// pipeline. See core/anchor_generator.py for details.
...
...
@@ -11,5 +12,6 @@ message AnchorGenerator {
oneof
anchor_generator_oneof
{
GridAnchorGenerator
grid_anchor_generator
=
1
;
SsdAnchorGenerator
ssd_anchor_generator
=
2
;
MultiscaleAnchorGenerator
multiscale_anchor_generator
=
3
;
}
}
research/object_detection/protos/argmax_matcher.proto
View file @
fd7b6887
...
...
@@ -22,4 +22,8 @@ message ArgMaxMatcher {
// Whether to ensure each row is matched to at least one column.
optional
bool
force_match_for_each_row
=
5
[
default
=
false
];
// Force constructed match objects to use matrix multiplication based gather
// instead of standard tf.gather
optional
bool
use_matmul_gather
=
6
[
default
=
false
];
}
research/object_detection/protos/bipartite_matcher.proto
View file @
fd7b6887
...
...
@@ -5,4 +5,7 @@ package object_detection.protos;
// Configuration proto for bipartite matcher. See
// matchers/bipartite_matcher.py for details.
message
BipartiteMatcher
{
// Force constructed match objects to use matrix multiplication based gather
// instead of standard tf.gather
optional
bool
use_matmul_gather
=
6
[
default
=
false
];
}
research/object_detection/protos/box_predictor.proto
View file @
fd7b6887
...
...
@@ -11,6 +11,7 @@ message BoxPredictor {
ConvolutionalBoxPredictor
convolutional_box_predictor
=
1
;
MaskRCNNBoxPredictor
mask_rcnn_box_predictor
=
2
;
RfcnBoxPredictor
rfcn_box_predictor
=
3
;
WeightSharedConvolutionalBoxPredictor
weight_shared_convolutional_box_predictor
=
4
;
}
}
...
...
@@ -46,10 +47,40 @@ message ConvolutionalBoxPredictor {
optional
int32
box_code_size
=
8
[
default
=
4
];
// Whether to apply sigmoid to the output of class predictions.
// TODO: Do we need this since we have a post processing module.?
// TODO(jonathanhuang): Do we need this since we have a post processing
// module.?
optional
bool
apply_sigmoid_to_scores
=
9
[
default
=
false
];
optional
float
class_prediction_bias_init
=
10
[
default
=
0.0
];
// Whether to use depthwise separable convolution for box predictor layers.
optional
bool
use_depthwise
=
11
[
default
=
false
];
}
// Configuration proto for weight shared convolutional box predictor.
message
WeightSharedConvolutionalBoxPredictor
{
// Hyperparameters for convolution ops used in the box predictor.
optional
Hyperparams
conv_hyperparams
=
1
;
// Number of the additional conv layers before the predictor.
optional
int32
num_layers_before_predictor
=
4
[
default
=
0
];
// Output depth for the convolution ops prior to predicting box encodings
// and class predictions.
optional
int32
depth
=
2
[
default
=
0
];
// Size of final convolution kernel. If the spatial resolution of the feature
// map is smaller than the kernel size, then the kernel size is set to
// min(feature_width, feature_height).
optional
int32
kernel_size
=
7
[
default
=
3
];
// Size of the encoding for boxes.
optional
int32
box_code_size
=
8
[
default
=
4
];
// Bias initialization for class prediction. It has been show to stabilize
// training where there are large number of negative boxes. See
// https://arxiv.org/abs/1708.02002 for details.
optional
float
class_prediction_bias_init
=
10
[
default
=
0.0
];
}
message
MaskRCNNBoxPredictor
{
...
...
@@ -71,12 +102,22 @@ message MaskRCNNBoxPredictor {
// Whether to predict instance masks inside detection boxes.
optional
bool
predict_instance_masks
=
6
[
default
=
false
];
// The depth for the first conv2d_transpose op applied to the
// image_features in the mask prediciton branch
// The depth for the first conv2d_transpose op applied to the
// image_features in the mask prediction branch. If set to 0, the value
// will be set automatically based on the number of channels in the image
// features and the number of classes.
optional
int32
mask_prediction_conv_depth
=
7
[
default
=
256
];
// Whether to predict keypoints inside detection boxes.
optional
bool
predict_keypoints
=
8
[
default
=
false
];
// The height and the width of the predicted mask.
optional
int32
mask_height
=
9
[
default
=
15
];
optional
int32
mask_width
=
10
[
default
=
15
];
// The number of convolutions applied to image_features in the mask prediction
// branch.
optional
int32
mask_prediction_num_conv_layers
=
11
[
default
=
2
];
}
message
RfcnBoxPredictor
{
...
...
research/object_detection/protos/eval.proto
View file @
fd7b6887
...
...
@@ -26,9 +26,8 @@ message EvalConfig {
// BNS name of the TensorFlow master.
optional
string
eval_master
=
7
[
default
=
""
];
// Type of metrics to use for evaluation. Currently supports only Pascal VOC
// detection metrics.
optional
string
metrics_set
=
8
[
default
=
"pascal_voc_metrics"
];
// Type of metrics to use for evaluation.
repeated
string
metrics_set
=
8
;
// Path to export detections to COCO compatible JSON format.
optional
string
export_path
=
9
[
default
=
''
];
...
...
@@ -38,10 +37,35 @@ message EvalConfig {
optional
bool
ignore_groundtruth
=
10
[
default
=
false
];
// Use exponential moving averages of variables for evaluation.
// TODO(rathodv): When this is false make sure the model is constructed
// without moving averages in restore_fn.
optional
bool
use_moving_averages
=
11
[
default
=
false
];
// Whether to evaluate instance masks.
// Note that since there is no evaluation code currently for instance
// segmenation this option is unused.
optional
bool
eval_instance_masks
=
12
[
default
=
false
];
// Minimum score threshold for a detected object box to be visualized
optional
float
min_score_threshold
=
13
[
default
=
0.5
];
// Maximum number of detections to visualize
optional
int32
max_num_boxes_to_visualize
=
14
[
default
=
20
];
// When drawing a single detection, each label is by default visualized as
// <label name> : <label score>. One can skip the name or/and score using the
// following fields:
optional
bool
skip_scores
=
15
[
default
=
false
];
optional
bool
skip_labels
=
16
[
default
=
false
];
// Whether to show groundtruth boxes in addition to detected boxes in
// visualizations.
optional
bool
visualize_groundtruth_boxes
=
17
[
default
=
false
];
// Box color for visualizing groundtruth boxes.
optional
string
groundtruth_box_visualization_color
=
18
[
default
=
"black"
];
// Whether to keep image identifier in filename when exported to
// visualization_export_dir.
optional
bool
keep_image_id_for_visualization_export
=
19
[
default
=
false
];
}
research/object_detection/protos/faster_rcnn.proto
View file @
fd7b6887
...
...
@@ -20,7 +20,7 @@ import "object_detection/protos/post_processing.proto";
message
FasterRcnn
{
// Whether to construct only the Region Proposal Network (RPN).
optional
bool
first
_stage
_only
=
1
[
default
=
false
];
optional
int32
number_of
_stage
s
=
1
[
default
=
2
];
// Number of classes to predict.
optional
int32
num_classes
=
3
;
...
...
research/object_detection/protos/image_resizer.proto
View file @
fd7b6887
...
...
@@ -29,6 +29,14 @@ message KeepAspectRatioResizer {
// Desired method when resizing image.
optional
ResizeType
resize_method
=
3
[
default
=
BILINEAR
];
// Whether to pad the image with zeros so the output spatial size is
// [max_dimension, max_dimension]. Note that the zeros are padded to the
// bottom and the right of the resized image.
optional
bool
pad_to_max_dimension
=
4
[
default
=
false
];
// Whether to also resize the image channels from 3 to 1 (RGB to grayscale).
optional
bool
convert_to_grayscale
=
5
[
default
=
false
];
}
// Configuration proto for image resizer that resizes to a fixed shape.
...
...
@@ -41,4 +49,7 @@ message FixedShapeResizer {
// Desired method when resizing image.
optional
ResizeType
resize_method
=
3
[
default
=
BILINEAR
];
// Whether to also resize the image channels from 3 to 1 (RGB to grayscale).
optional
bool
convert_to_grayscale
=
4
[
default
=
false
];
}
research/object_detection/protos/input_reader.proto
View file @
fd7b6887
...
...
@@ -15,6 +15,13 @@ package object_detection.protos;
// 'groundtruth_instance_masks': (Optional), a [num_boxes, image_height,
// image_width] float tensor storing binary mask of the objects in boxes.
// Instance mask format. Note that PNG masks are much more space efficient.
enum
InstanceMaskType
{
DEFAULT
=
0
;
// Default implementation, currently NUMERICAL_MASKS
NUMERICAL_MASKS
=
1
;
// [num_masks, H, W] float32 binary masks.
PNG_MASKS
=
2
;
// Encoded PNG masks.
}
message
InputReader
{
// Path to StringIntLabelMap pbtxt file specifying the mapping from string
// labels to integer ids.
...
...
@@ -24,6 +31,12 @@ message InputReader {
// shuffled randomly.
optional
bool
shuffle
=
2
[
default
=
true
];
// Buffer size to be used when shuffling.
optional
uint32
shuffle_buffer_size
=
11
[
default
=
100
];
// Buffer size to be used when shuffling file names.
optional
uint32
filenames_shuffle_buffer_size
=
12
[
default
=
100
];
// Maximum number of records to keep in reader queue.
optional
uint32
queue_capacity
=
3
[
default
=
2000
];
...
...
@@ -38,9 +51,15 @@ message InputReader {
// Number of reader instances to create.
optional
uint32
num_readers
=
6
[
default
=
8
];
// Size of the buffer for prefetching (in batches).
optional
uint32
prefetch_buffer_size
=
13
[
default
=
2
];
// Whether to load groundtruth instance masks.
optional
bool
load_instance_masks
=
7
[
default
=
false
];
// Type of instance mask.
optional
InstanceMaskType
mask_type
=
10
[
default
=
NUMERICAL_MASKS
];
oneof
input_reader
{
TFRecordInputReader
tf_record_input_reader
=
8
;
ExternalInputReader
external_input_reader
=
9
;
...
...
research/object_detection/protos/losses.proto
View file @
fd7b6887
...
...
@@ -33,12 +33,14 @@ message LocalizationLoss {
// L2 location loss: 0.5 * ||weight * (a - b)|| ^ 2
message
WeightedL2LocalizationLoss
{
// DEPRECATED, do not use.
// Output loss per anchor.
optional
bool
anchorwise_output
=
1
[
default
=
false
];
}
// SmoothL1 (Huber) location loss: .5 * x ^ 2 if |x| < 1 else |x| - .5
message
WeightedSmoothL1LocalizationLoss
{
// DEPRECATED, do not use.
// Output loss per anchor.
optional
bool
anchorwise_output
=
1
[
default
=
false
];
}
...
...
@@ -59,6 +61,7 @@ message ClassificationLoss {
// Classification loss using a sigmoid function over class predictions.
message
WeightedSigmoidClassificationLoss
{
// DEPRECATED, do not use.
// Output loss per anchor.
optional
bool
anchorwise_output
=
1
[
default
=
false
];
}
...
...
@@ -66,6 +69,7 @@ message WeightedSigmoidClassificationLoss {
// Sigmoid Focal cross entropy loss as described in
// https://arxiv.org/abs/1708.02002
message
SigmoidFocalClassificationLoss
{
// DEPRECATED, do not use.
optional
bool
anchorwise_output
=
1
[
default
=
false
];
// modulating factor for the loss.
optional
float
gamma
=
2
[
default
=
2.0
];
...
...
@@ -75,6 +79,7 @@ message SigmoidFocalClassificationLoss {
// Classification loss using a softmax function over class predictions.
message
WeightedSoftmaxClassificationLoss
{
// DEPRECATED, do not use.
// Output loss per anchor.
optional
bool
anchorwise_output
=
1
[
default
=
false
];
// Scale logit (input) value before calculating softmax classification loss.
...
...
@@ -93,6 +98,7 @@ message BootstrappedSigmoidClassificationLoss {
// probabilities.
optional
bool
hard_bootstrap
=
2
[
default
=
false
];
// DEPRECATED, do not use.
// Output loss per anchor.
optional
bool
anchorwise_output
=
3
[
default
=
false
];
}
...
...
research/object_detection/protos/multiscale_anchor_generator.proto
0 → 100644
View file @
fd7b6887
syntax
=
"proto2"
;
package
object_detection
.
protos
;
// Configuration proto for RetinaNet anchor generator described in
// https://arxiv.org/abs/1708.02002. See
// anchor_generators/multiscale_grid_anchor_generator.py for details.
message
MultiscaleAnchorGenerator
{
// minimum level in feature pyramid
optional
int32
min_level
=
1
[
default
=
3
];
// maximum level in feature pyramid
optional
int32
max_level
=
2
[
default
=
7
];
// Scale of anchor to feature stride
optional
float
anchor_scale
=
3
[
default
=
4.0
];
// Aspect ratios for anchors at each grid point.
repeated
float
aspect_ratios
=
4
;
// Number of intermediate scale each scale octave
optional
int32
scales_per_octave
=
5
[
default
=
2
];
}
research/object_detection/protos/pipeline.proto
View file @
fd7b6887
...
...
@@ -15,4 +15,5 @@ message TrainEvalPipelineConfig {
optional
InputReader
train_input_reader
=
3
;
optional
EvalConfig
eval_config
=
4
;
optional
InputReader
eval_input_reader
=
5
;
extensions
1000
to
max
;
}
research/object_detection/protos/preprocessor.proto
View file @
fd7b6887
...
...
@@ -3,7 +3,7 @@ syntax = "proto2";
package
object_detection
.
protos
;
// Message for defining a preprocessing operation on input data.
// See: //object_detection/core/preprocessor.py
// See: //
third_party/tensorflow_models/
object_detection/core/preprocessor.py
message
PreprocessingStep
{
oneof
preprocessing_step
{
NormalizeImage
normalize_image
=
1
;
...
...
@@ -32,6 +32,7 @@ message PreprocessingStep {
SSDRandomCropPadFixedAspectRatio
ssd_random_crop_pad_fixed_aspect_ratio
=
24
;
RandomVerticalFlip
random_vertical_flip
=
25
;
RandomRotation90
random_rotation90
=
26
;
RGBtoGray
rgb_to_gray
=
27
;
}
}
...
...
@@ -202,7 +203,7 @@ message RandomCropPadImage {
repeated
float
max_padded_size_ratio
=
9
;
// Color of the padding. If unset, will pad using average color of the input
// image.
// image.
This field should be of length 3.
repeated
float
pad_color
=
10
;
}
...
...
@@ -236,6 +237,11 @@ message RandomResizeMethod {
optional
float
target_width
=
2
;
}
// Converts the RGB image to a grayscale image. This also converts the image
// depth from 3 to 1, unlike RandomRGBtoGray which does not change the image
// depth.
message
RGBtoGray
{}
// Scales boxes from normalized coordinates to pixel coordinates.
message
ScaleBoxesToPixelCoordinates
{
}
...
...
research/object_detection/protos/ssd.proto
View file @
fd7b6887
...
...
@@ -82,4 +82,12 @@ message SsdFeatureExtractor {
// will apply only to the additional layers that are added and are outside the
// canned arg_scope.
optional
bool
batch_norm_trainable
=
6
[
default
=
true
];
// Whether to use explicit padding when extracting SSD multiresolution
// features. Note that this does not apply to the base feature extractor.
optional
bool
use_explicit_padding
=
7
[
default
=
false
];
// Whether to use depthwise separable convolutions for to extract additional
// feature maps added by SSD.
optional
bool
use_depthwise
=
8
[
default
=
false
];
}
research/object_detection/protos/train.proto
View file @
fd7b6887
...
...
@@ -35,6 +35,11 @@ message TrainConfig {
// If false, it assumes the checkpoint was a object classification model.
optional
bool
from_detection_checkpoint
=
8
[
default
=
false
];
// Whether to load all checkpoint vars that match model variable names and
// sizes. This option is only available if `from_detection_checkpoint` is
// True.
optional
bool
load_all_detection_checkpoint_vars
=
19
[
default
=
false
];
// Number of steps to train the DetectionModel for. If 0, will train the model
// indefinitely.
optional
uint32
num_steps
=
9
[
default
=
0
];
...
...
@@ -66,4 +71,21 @@ message TrainConfig {
// This is useful when each box can have multiple labels.
// Note that only Sigmoid classification losses should be used.
optional
bool
merge_multiple_label_boxes
=
17
[
default
=
false
];
// Whether to add regularization loss to `total_loss`. This is true by
// default and adds all regularization losses defined in the model to
// `total_loss`.
// Setting this option to false is very useful while debugging the model and
// losses.
optional
bool
add_regularization_loss
=
18
[
default
=
true
];
// Maximum number of boxes used during training.
// Set this to at least the maximum amount of boxes in the input data.
// Otherwise, it may cause "Data loss: Attempted to pad to a smaller size
// than the input element" errors.
optional
int32
max_number_of_boxes
=
20
[
default
=
50
];
// Whether to remove padding along `num_boxes` dimension of the groundtruth
// tensors.
optional
bool
unpad_groundtruth_tensors
=
21
[
default
=
true
];
}
research/object_detection/samples/configs/BUILD
0 → 100644
View file @
fd7b6887
package
(
default_visibility
=
[
"//visibility:public"
],
)
licenses
([
"notice"
])
exports_files
([
"faster_rcnn_resnet50_pets.config"
,
"ssd_inception_v2_pets.config"
,
"ssd_mobilenet_v1_focal_loss_pets.config"
,
])
research/object_detection/samples/configs/embedded_ssd_mobilenet_v1_coco.config
0 → 100644
View file @
fd7b6887
# Embedded SSD with Mobilenet v1 configuration for MSCOCO Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model
{
ssd
{
num_classes
:
90
box_coder
{
faster_rcnn_box_coder
{
y_scale
:
10
.
0
x_scale
:
10
.
0
height_scale
:
5
.
0
width_scale
:
5
.
0
}
}
matcher
{
argmax_matcher
{
matched_threshold
:
0
.
5
unmatched_threshold
:
0
.
5
ignore_thresholds
:
false
negatives_lower_than_unmatched
:
true
force_match_for_each_row
:
true
}
}
similarity_calculator
{
iou_similarity
{
}
}
anchor_generator
{
ssd_anchor_generator
{
num_layers
:
5
min_scale
:
0
.
2
max_scale
:
0
.
95
aspect_ratios
:
1
.
0
aspect_ratios
:
2
.
0
aspect_ratios
:
0
.
5
aspect_ratios
:
3
.
0
aspect_ratios
:
0
.
3333
}
}
image_resizer
{
fixed_shape_resizer
{
height
:
256
width
:
256
}
}
box_predictor
{
convolutional_box_predictor
{
min_depth
:
0
max_depth
:
0
num_layers_before_predictor
:
0
use_dropout
:
false
dropout_keep_probability
:
0
.
8
kernel_size
:
1
box_code_size
:
4
apply_sigmoid_to_scores
:
false
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
truncated_normal_initializer
{
stddev
:
0
.
03
mean
:
0
.
0
}
}
batch_norm
{
train
:
true
,
scale
:
true
,
center
:
true
,
decay
:
0
.
9997
,
epsilon
:
0
.
001
,
}
}
}
}
feature_extractor
{
type
:
'embedded_ssd_mobilenet_v1'
min_depth
:
16
depth_multiplier
:
0
.
125
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
truncated_normal_initializer
{
stddev
:
0
.
03
mean
:
0
.
0
}
}
batch_norm
{
train
:
true
,
scale
:
true
,
center
:
true
,
decay
:
0
.
9997
,
epsilon
:
0
.
001
,
}
}
}
loss
{
classification_loss
{
weighted_sigmoid
{
}
}
localization_loss
{
weighted_smooth_l1
{
}
}
hard_example_miner
{
num_hard_examples
:
3000
iou_threshold
:
0
.
99
loss_type
:
CLASSIFICATION
max_negatives_per_positive
:
3
min_negatives_per_image
:
0
}
classification_weight
:
1
.
0
localization_weight
:
1
.
0
}
normalize_loss_by_num_matches
:
true
post_processing
{
batch_non_max_suppression
{
score_threshold
:
1
e
-
8
iou_threshold
:
0
.
6
max_detections_per_class
:
100
max_total_detections
:
100
}
score_converter
:
SIGMOID
}
}
}
train_config
: {
batch_size
:
32
optimizer
{
rms_prop_optimizer
: {
learning_rate
: {
exponential_decay_learning_rate
{
initial_learning_rate
:
0
.
004
decay_steps
:
800720
decay_factor
:
0
.
95
}
}
momentum_optimizer_value
:
0
.
9
decay
:
0
.
9
epsilon
:
1
.
0
}
}
fine_tune_checkpoint
:
"/PATH_TO_BE_CONFIGURED/model.ckpt"
data_augmentation_options
{
random_horizontal_flip
{
}
}
data_augmentation_options
{
ssd_random_crop
{
}
}
}
train_input_reader
: {
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/mscoco_train.record"
}
label_map_path
:
"PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
}
eval_config
: {
num_examples
:
8000
use_moving_averages
:
true
}
eval_input_reader
: {
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/mscoco_val.record"
}
label_map_path
:
"PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
shuffle
:
false
num_readers
:
1
}
Prev
1
…
3
4
5
6
7
8
9
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment