Commit b3247557 authored by Dheera Venkatraman's avatar Dheera Venkatraman
Browse files

add flag for saving images to summary; strings moved to common.py'

parents 75c931fd 2041d5ca
# Copyright 2018 Google, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utilities.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import contextlib
import tensorflow as tf
import sonnet as snt
import itertools
import functools
from tensorflow.core.framework import node_def_pb2
from tensorflow.python.framework import device as pydev
from tensorflow.python.framework import errors
from tensorflow.python.ops import variable_scope as variable_scope_ops
from sonnet.python.modules import util as snt_util
from tensorflow.python.util import nest
def eqzip(*args):
"""Zip but raises error if lengths don't match.
Args:
*args: list of lists or tuples
Returns:
list: the result of zip
Raises:
ValueError: when the lengths don't match
"""
sizes = [len(x) for x in args]
if not all([sizes[0] == x for x in sizes]):
raise ValueError("Lists are of different sizes. \n %s"%str(sizes))
return zip(*args)
@contextlib.contextmanager
def assert_no_new_variables():
"""Ensure that no tf.Variables are constructed inside the context.
Yields:
None
Raises:
ValueError: if there is a variable created.
"""
num_vars = len(tf.global_variables())
old_variables = tf.global_variables()
yield
if len(tf.global_variables()) != num_vars:
new_vars = set(tf.global_variables()) - set(old_variables)
tf.logging.error("NEW VARIABLES CREATED")
tf.logging.error(10*"=")
for v in new_vars:
tf.logging.error(v)
raise ValueError("Variables created inside an "
"assert_no_new_variables context")
if old_variables != tf.global_variables():
raise ValueError("Variables somehow changed inside an "
"assert_no_new_variables context."
"This means something modified the tf.global_variables()")
def get_variables_in_modules(module_list):
var_list = []
for m in module_list:
var_list.extend(snt.get_variables_in_module(m))
return var_list
def state_barrier_context(state):
"""Return a context manager that prevents interior ops from running
unless the whole state has been computed.
This is to prevent assign race conditions.
"""
tensors = [x for x in nest.flatten(state) if type(x) == tf.Tensor]
tarray = [x.flow for x in nest.flatten(state) if hasattr(x, "flow")]
return tf.control_dependencies(tensors + tarray)
def _identity_fn(tf_entity):
if hasattr(tf_entity, "identity"):
return tf_entity.identity()
else:
return tf.identity(tf_entity)
def state_barrier_result(state):
"""Return the same state, but with a control dependency to prevent it from
being partially computed
"""
with state_barrier_context(state):
return nest.map_structure(_identity_fn, state)
def train_iterator(num_iterations):
"""Iterator that returns an index of the current step.
This iterator runs forever if num_iterations is None
otherwise it runs for some fixed amount of steps.
"""
if num_iterations is None:
return itertools.count()
else:
return xrange(num_iterations)
def print_op(op, msg):
"""Print a string and return an op wrapped in a control dependency to make
sure it ran."""
print_op = tf.Print(tf.constant(0), [tf.constant(0)], msg)
return tf.group(op, print_op)
class MultiQueueRunner(tf.train.QueueRunner):
"""A QueueRunner with multiple queues """
def __init__(self, queues, enqueue_ops):
close_op = tf.group(* [q.close() for q in queues])
cancel_op = tf.group(
* [q.close(cancel_pending_enqueues=True) for q in queues])
queue_closed_exception_types = (errors.OutOfRangeError,)
enqueue_op = tf.group(*enqueue_ops, name="multi_enqueue")
super(MultiQueueRunner, self).__init__(
queues[0],
enqueue_ops=[enqueue_op],
close_op=close_op,
cancel_op=cancel_op,
queue_closed_exception_types=queue_closed_exception_types)
# This function is not elegant, but I tried so many other ways to get this to
# work and this is the only one that ended up not incuring significant overhead
# or obscure tensorflow bugs.
def sample_n_per_class(dataset, samples_per_class):
"""Create a new callable / dataset object that returns batches of each with
samples_per_class per label.
Args:
dataset: fn
samples_per_class: int
Returns:
function, [] -> batch where batch is the same type as the return of
dataset().
"""
with tf.control_dependencies(None), tf.name_scope(None):
with tf.name_scope("queue_runner/sample_n_per_class"):
batch = dataset()
num_classes = batch.label_onehot.shape.as_list()[1]
batch_size = num_classes * samples_per_class
flatten = nest.flatten(batch)
queues = []
enqueue_ops = []
capacity = samples_per_class * 20
for i in xrange(num_classes):
queue = tf.FIFOQueue(
capacity=capacity,
shapes=[f.shape.as_list()[1:] for f in flatten],
dtypes=[f.dtype for f in flatten])
queues.append(queue)
idx = tf.where(tf.equal(batch.label, i))
sub_batch = []
to_enqueue = []
for elem in batch:
new_e = tf.gather(elem, idx)
new_e = tf.squeeze(new_e, 1)
to_enqueue.append(new_e)
remaining = (capacity - queue.size())
to_add = tf.minimum(tf.shape(idx)[0], remaining)
def _enqueue():
return queue.enqueue_many([t[:to_add] for t in to_enqueue])
enqueue_op = tf.cond(
tf.equal(to_add, 0), tf.no_op, _enqueue)
enqueue_ops.append(enqueue_op)
# This has caused many deadlocks / issues. This is some logging to at least
# shed light to what is going on.
print_lam = lambda: tf.Print(tf.constant(0.0), [q.size() for q in queues], "MultiQueueRunner queues status. Has capacity %d"%capacity)
some_percent_of_time = tf.less(tf.random_uniform([]), 0.0005)
maybe_print = tf.cond(some_percent_of_time, print_lam, lambda: tf.constant(0.0))
with tf.control_dependencies([maybe_print]):
enqueue_ops = [tf.group(e) for e in enqueue_ops]
qr = MultiQueueRunner(queues=queues, enqueue_ops=enqueue_ops)
tf.train.add_queue_runner(qr)
def dequeue_batch():
with tf.name_scope("sample_n_per_batch/dequeue/"):
entries = []
for q in queues:
entries.append(q.dequeue_many(samples_per_class))
flat_batch = [tf.concat(x, 0) for x in zip(*entries)]
idx = tf.random_shuffle(tf.range(batch_size))
flat_batch = [tf.gather(f, idx, axis=0) for f in flat_batch]
return nest.pack_sequence_as(batch, flat_batch)
return dequeue_batch
def structure_map_multi(func, values):
all_values = [nest.flatten(v) for v in values]
rets = []
for pair in zip(*all_values):
rets.append(func(pair))
return nest.pack_sequence_as(values[0], rets)
def structure_map_split(func, value):
vv = nest.flatten(value)
rets = []
for v in vv:
rets.append(func(v))
return [nest.pack_sequence_as(value, r) for r in zip(*rets)]
def assign_variables(targets, values):
return tf.group(*[t.assign(v) for t,v in eqzip(targets, values)],
name="assign_variables")
def create_variables_in_class_scope(method):
"""Force the variables constructed in this class to live in the sonnet module.
Wraps a method on a sonnet module.
For example the following will create two different variables.
```
class Mod(snt.AbstractModule):
@create_variables_in_class_scope
def dynamic_thing(self, input, name):
return snt.Linear(name)(input)
mod.dynamic_thing(x, name="module_nameA")
mod.dynamic_thing(x, name="module_nameB")
# reuse
mod.dynamic_thing(y, name="module_nameA")
```
"""
@functools.wraps(method)
def wrapper(obj, *args, **kwargs):
def default_context_manager(reuse=None):
variable_scope = obj.variable_scope
return tf.variable_scope(variable_scope, reuse=reuse)
variable_scope_context_manager = getattr(obj, "_enter_variable_scope",
default_context_manager)
graph = tf.get_default_graph()
# Temporarily enter the variable scope to capture it
with variable_scope_context_manager() as tmp_variable_scope:
variable_scope = tmp_variable_scope
with variable_scope_ops._pure_variable_scope(
variable_scope, reuse=tf.AUTO_REUSE) as pure_variable_scope:
name_scope = variable_scope.original_name_scope
if name_scope[-1] != "/":
name_scope += "/"
with tf.name_scope(name_scope):
sub_scope = snt_util.to_snake_case(method.__name__)
with tf.name_scope(sub_scope) as scope:
out_ops = method(obj, *args, **kwargs)
return out_ops
return wrapper
# Copyright 2018 Google, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from __future__ import absolute_import
from __future__ import division
import tensorflow as tf
from contextlib import contextmanager
from tensorflow.python.ops import variable_scope
# sanity global state to ensure non recursive.
_is_variable_replacing = [False]
def in_variable_replace_scope():
return _is_variable_replacing[0]
@contextmanager
def variable_replace(replacements, no_new=True):
""" A context manager that replaces variables.
This is a context manager that replaces all calls to
get_variable with the variable in replacements.
This function does not support recursive application.
Args:
replacements: dict
dictionary mapping a variable to replace (the key), with
the variable one wants to replace this variable with (the value).
no_new: bool
raise an error if variables were created.
This is for sanity checking.
Raises:
ValueError: if a new variable or not all the replacements are used.
"""
# TODO(lmetz) This function is a bit scary, as it relies on monkey patching
# the call to get_variable. Ideally this can be done with variable_scope's
# custom_getter attribute, but when initially writing this that was not
# avalible.
replacements = {k: v for k, v in replacements.items() if not k == v}
init_vars = tf.trainable_variables()
old_get_variable = variable_scope.get_variable
old_tf_get_variable = tf.get_variable
names_replace = {}
has_replaced_names = []
tf.logging.vlog(2, "Trying to replace")
for k, v in replacements.items():
tf.logging.vlog(2, k.name + " >> " + v.name)
tf.logging.vlog(2, "===")
for k, v in replacements.items():
strip_name = k.name.replace("/read:0", "")
strip_name = strip_name.replace(":0", "")
names_replace[strip_name] = v
# TODO(lmetz) is there a cleaner way to do this?
def new_get_variable(name, *args, **kwargs):
#print "Monkeypatch get variable run with name:", name
n = tf.get_variable_scope().name + "/" + name
#print "Monkeypatch get variable run with name:", n
if n in names_replace:
has_replaced_names.append(n)
return names_replace[n]
else:
return old_get_variable(name, *args, **kwargs)
# perform the monkey patch
if _is_variable_replacing[0] == True:
raise ValueError("No recursive calling to variable replace allowed.")
variable_scope.get_variable = new_get_variable
tf.get_variable = new_get_variable
_is_variable_replacing[0] = True
yield
if set(has_replaced_names) != set(names_replace.keys()):
print "Didn't use all replacements"
print "replaced variables that are not requested??"
print "==="
for n in list(set(has_replaced_names) - set(names_replace.keys())):
print n
print "Missed replacing variables"
print "==="
for n in list(set(names_replace.keys()) - set(has_replaced_names)):
print n, "==>", names_replace[n].name
raise ValueError("Fix this -- see stderr")
# undo the monkey patch
tf.get_variable = old_tf_get_variable
variable_scope.get_variable = old_get_variable
_is_variable_replacing[0] = False
final_vars = tf.trainable_variables()
assert set(init_vars) == set(final_vars), "trainable variables changed"
......@@ -29,6 +29,7 @@ https://scholar.googleusercontent.com/scholar.bib?q=info:l291WsrB-hQJ:scholar.go
* Jonathan Huang, github: [jch1](https://github.com/jch1)
* Vivek Rathod, github: [tombstone](https://github.com/tombstone)
* Ronny Votel, github: [ronnyvotel](https://github.com/ronnyvotel)
* Derek Chow, github: [derekjchow](https://github.com/derekjchow)
* Chen Sun, github: [jesu9](https://github.com/jesu9)
* Menglong Zhu, github: [dreamdragon](https://github.com/dreamdragon)
......@@ -89,6 +90,16 @@ reporting an issue.
## Release information
### April 2, 2018
Supercharge your mobile phones with the next generation mobile object detector!
We are adding support for MobileNet V2 with SSDLite presented in
[MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381).
This model is 35% faster than Mobilenet V1 SSD on a Google Pixel phone CPU (200ms vs. 270ms) at the same accuracy.
Along with the model definition, we are also releasing a model checkpoint trained on the COCO dataset.
<b>Thanks to contributors</b>: Menglong Zhu, Mark Sandler, Zhichao Lu, Vivek Rathod, Jonathan Huang
### February 9, 2018
We now support instance segmentation!! In this API update we support a number of instance segmentation models similar to those discussed in the [Mask R-CNN paper](https://arxiv.org/abs/1703.06870). For further details refer to
......
......@@ -30,6 +30,7 @@ from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
from object_detection.models import faster_rcnn_inception_v2_feature_extractor as frcnn_inc_v2
from object_detection.models import faster_rcnn_nas_feature_extractor as frcnn_nas
from object_detection.models import faster_rcnn_pnas_feature_extractor as frcnn_pnas
from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1
from object_detection.models import ssd_resnet_v1_fpn_feature_extractor as ssd_resnet_v1_fpn
from object_detection.models.embedded_ssd_mobilenet_v1_feature_extractor import EmbeddedSSDMobileNetV1FeatureExtractor
......@@ -55,6 +56,8 @@ SSD_FEATURE_EXTRACTOR_CLASS_MAP = {
FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP = {
'faster_rcnn_nas':
frcnn_nas.FasterRCNNNASFeatureExtractor,
'faster_rcnn_pnas':
frcnn_pnas.FasterRCNNPNASFeatureExtractor,
'faster_rcnn_inception_resnet_v2':
frcnn_inc_res.FasterRCNNInceptionResnetV2FeatureExtractor,
'faster_rcnn_inception_v2':
......@@ -95,13 +98,19 @@ def build(model_config, is_training, add_summaries=True):
def _build_ssd_feature_extractor(feature_extractor_config, is_training,
reuse_weights=None):
reuse_weights=None,
inplace_batchnorm_update=False):
"""Builds a ssd_meta_arch.SSDFeatureExtractor based on config.
Args:
feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto.
is_training: True if this feature extractor is being built for training.
reuse_weights: if the feature extractor should reuse weights.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs. When
this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
Returns:
ssd_meta_arch.SSDFeatureExtractor based on config.
......@@ -126,7 +135,8 @@ def _build_ssd_feature_extractor(feature_extractor_config, is_training,
return feature_extractor_class(is_training, depth_multiplier, min_depth,
pad_to_multiple, conv_hyperparams,
batch_norm_trainable, reuse_weights,
use_explicit_padding, use_depthwise)
use_explicit_padding, use_depthwise,
inplace_batchnorm_update)
def _build_ssd_model(ssd_config, is_training, add_summaries):
......@@ -140,6 +150,7 @@ def _build_ssd_model(ssd_config, is_training, add_summaries):
Returns:
SSDMetaArch based on the config.
Raises:
ValueError: If ssd_config.type is not recognized (i.e. not registered in
model_class_map).
......@@ -147,8 +158,10 @@ def _build_ssd_model(ssd_config, is_training, add_summaries):
num_classes = ssd_config.num_classes
# Feature extractor
feature_extractor = _build_ssd_feature_extractor(ssd_config.feature_extractor,
is_training)
feature_extractor = _build_ssd_feature_extractor(
feature_extractor_config=ssd_config.feature_extractor,
is_training=is_training,
inplace_batchnorm_update=ssd_config.inplace_batchnorm_update)
box_coder = box_coder_builder.build(ssd_config.box_coder)
matcher = matcher_builder.build(ssd_config.matcher)
......@@ -194,7 +207,8 @@ def _build_ssd_model(ssd_config, is_training, add_summaries):
def _build_faster_rcnn_feature_extractor(
feature_extractor_config, is_training, reuse_weights=None):
feature_extractor_config, is_training, reuse_weights=None,
inplace_batchnorm_update=False):
"""Builds a faster_rcnn_meta_arch.FasterRCNNFeatureExtractor based on config.
Args:
......@@ -202,6 +216,11 @@ def _build_faster_rcnn_feature_extractor(
faster_rcnn.proto.
is_training: True if this feature extractor is being built for training.
reuse_weights: if the feature extractor should reuse weights.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs. When
this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
Returns:
faster_rcnn_meta_arch.FasterRCNNFeatureExtractor based on config.
......@@ -209,6 +228,8 @@ def _build_faster_rcnn_feature_extractor(
Raises:
ValueError: On invalid feature extractor type.
"""
if inplace_batchnorm_update:
raise ValueError('inplace batchnorm updates not supported.')
feature_type = feature_extractor_config.type
first_stage_features_stride = (
feature_extractor_config.first_stage_features_stride)
......@@ -238,6 +259,7 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
Returns:
FasterRCNNMetaArch based on the config.
Raises:
ValueError: If frcnn_config.type is not recognized (i.e. not registered in
model_class_map).
......@@ -246,7 +268,8 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)
feature_extractor = _build_faster_rcnn_feature_extractor(
frcnn_config.feature_extractor, is_training)
frcnn_config.feature_extractor, is_training,
frcnn_config.inplace_batchnorm_update)
number_of_stages = frcnn_config.number_of_stages
first_stage_anchor_generator = anchor_generator_builder.build(
......
......@@ -25,6 +25,7 @@ from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
from object_detection.models import faster_rcnn_inception_v2_feature_extractor as frcnn_inc_v2
from object_detection.models import faster_rcnn_nas_feature_extractor as frcnn_nas
from object_detection.models import faster_rcnn_pnas_feature_extractor as frcnn_pnas
from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1
from object_detection.models import ssd_resnet_v1_fpn_feature_extractor as ssd_resnet_v1_fpn
from object_detection.models.embedded_ssd_mobilenet_v1_feature_extractor import EmbeddedSSDMobileNetV1FeatureExtractor
......@@ -297,6 +298,7 @@ class ModelBuilderTest(tf.test.TestCase):
def test_create_ssd_mobilenet_v1_model_from_config(self):
model_text_proto = """
ssd {
inplace_batchnorm_update: true
feature_extractor {
type: 'ssd_mobilenet_v1'
conv_hyperparams {
......@@ -519,6 +521,7 @@ class ModelBuilderTest(tf.test.TestCase):
def test_create_faster_rcnn_resnet_v1_models_from_config(self):
model_text_proto = """
faster_rcnn {
inplace_batchnorm_update: true
num_classes: 3
image_resizer {
keep_aspect_ratio_resizer {
......@@ -726,6 +729,73 @@ class ModelBuilderTest(tf.test.TestCase):
model._feature_extractor,
frcnn_nas.FasterRCNNNASFeatureExtractor)
def test_create_faster_rcnn_pnas_model_from_config(self):
model_text_proto = """
faster_rcnn {
num_classes: 3
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 600
max_dimension: 1024
}
}
feature_extractor {
type: 'faster_rcnn_pnas'
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
initial_crop_size: 17
maxpool_kernel_size: 1
maxpool_stride: 1
second_stage_box_predictor {
mask_rcnn_box_predictor {
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.01
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
}"""
model_proto = model_pb2.DetectionModel()
text_format.Merge(model_text_proto, model_proto)
model = model_builder.build(model_proto, is_training=True)
self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch)
self.assertIsInstance(
model._feature_extractor,
frcnn_pnas.FasterRCNNPNASFeatureExtractor)
def test_create_faster_rcnn_inception_resnet_v2_model_from_config(self):
model_text_proto = """
faster_rcnn {
......
......@@ -17,6 +17,7 @@
import numpy as np
import tensorflow as tf
from tensorflow.python.framework import errors
from tensorflow.python.framework import ops
from object_detection.core import box_list
from object_detection.core import box_list_ops
......@@ -509,9 +510,13 @@ class BoxListOpsTest(tf.test.TestCase):
with self.assertRaises(ValueError):
box_list_ops.sort_by_field(boxes, 'misc')
with self.assertRaisesWithPredicateMatch(errors.InvalidArgumentError,
'Incorrect field size'):
sess.run(box_list_ops.sort_by_field(boxes, 'weights').get())
if ops._USE_C_API:
with self.assertRaises(ValueError):
box_list_ops.sort_by_field(boxes, 'weights')
else:
with self.assertRaisesWithPredicateMatch(errors.InvalidArgumentError,
'Incorrect field size'):
sess.run(box_list_ops.sort_by_field(boxes, 'weights').get())
def test_visualize_boxes_in_image(self):
image = tf.zeros((6, 4, 3))
......
......@@ -2279,7 +2279,11 @@ def resize_image(image,
return new_masks
def reshape_masks_branch():
new_masks = tf.reshape(masks, [0, new_size[0], new_size[1]])
# The shape function will be computed for both branches of the
# condition, regardless of which branch is actually taken. Make sure
# that we don't trigger an assertion in the shape function when trying
# to reshape a non empty tensor into an empty one.
new_masks = tf.reshape(masks, [-1, new_size[0], new_size[1]])
return new_masks
masks = tf.cond(num_instances > 0, resize_masks_branch,
......
......@@ -64,7 +64,7 @@ cd ${SCRATCH_DIR}
# Download the images.
BASE_IMAGE_URL="http://images.cocodataset.org/zips"
# TRAIN_IMAGE_FILE="train2017.zip"
TRAIN_IMAGE_FILE="train2017.zip"
download_and_unzip ${BASE_IMAGE_URL} ${TRAIN_IMAGE_FILE}
TRAIN_IMAGE_DIR="${SCRATCH_DIR}/train2017"
......@@ -91,7 +91,7 @@ download_and_unzip ${BASE_IMAGE_INFO_URL} ${IMAGE_INFO_FILE}
TESTDEV_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/image_info_test-dev2017.json"
# # Build TFRecords of the image data.
# Build TFRecords of the image data.
cd "${CURRENT_DIR}"
python object_detection/dataset_tools/create_coco_tf_record.py \
--logtostderr \
......
......@@ -79,7 +79,7 @@ def visualize_detection_results(result_dict,
data corresponding to each image being evaluated. The following keys
are required:
'original_image': a numpy array representing the image with shape
[1, height, width, 3]
[1, height, width, 3] or [1, height, width, 1]
'detection_boxes': a numpy array of shape [N, 4]
'detection_scores': a numpy array of shape [N]
'detection_classes': a numpy array of shape [N]
......@@ -133,6 +133,8 @@ def visualize_detection_results(result_dict,
category_index = label_map_util.create_category_index(categories)
image = np.squeeze(result_dict[input_fields.original_image], axis=0)
if image.shape[2] == 1: # If one channel image, repeat in RGB.
image = np.tile(image, [1, 1, 3])
detection_boxes = result_dict[detection_fields.detection_boxes]
detection_scores = result_dict[detection_fields.detection_scores]
detection_classes = np.int32((result_dict[
......
......@@ -94,14 +94,24 @@ def _extract_predictions_and_losses(model,
if fields.InputDataFields.groundtruth_group_of in input_dict:
groundtruth[fields.InputDataFields.groundtruth_group_of] = (
input_dict[fields.InputDataFields.groundtruth_group_of])
groundtruth_masks_list = None
if fields.DetectionResultFields.detection_masks in detections:
groundtruth[fields.InputDataFields.groundtruth_instance_masks] = (
input_dict[fields.InputDataFields.groundtruth_instance_masks])
groundtruth_masks_list = [
input_dict[fields.InputDataFields.groundtruth_instance_masks]]
groundtruth_keypoints_list = None
if fields.DetectionResultFields.detection_keypoints in detections:
groundtruth[fields.InputDataFields.groundtruth_keypoints] = (
input_dict[fields.InputDataFields.groundtruth_keypoints])
groundtruth_keypoints_list = [
input_dict[fields.InputDataFields.groundtruth_keypoints]]
label_id_offset = 1
model.provide_groundtruth(
[input_dict[fields.InputDataFields.groundtruth_boxes]],
[tf.one_hot(input_dict[fields.InputDataFields.groundtruth_classes]
- label_id_offset, depth=model.num_classes)])
- label_id_offset, depth=model.num_classes)],
groundtruth_masks_list, groundtruth_keypoints_list)
losses_dict.update(model.loss(prediction_dict, true_image_shapes))
result_dict = eval_util.result_dict_for_single_example(
......@@ -205,7 +215,7 @@ def evaluate(create_input_dict_fn, create_model_fn, eval_config, categories,
except tf.errors.InvalidArgumentError:
logging.info('Skipping image')
counters['skipped'] += 1
return {}
return {}, {}
global_step = tf.train.global_step(sess, tf.train.get_global_step())
if batch_index < eval_config.num_visualizations:
tag = 'image-{}'.format(batch_index)
......
......@@ -19,7 +19,9 @@ In the table below, we list each such pre-trained model including:
aware that these timings depend highly on one's specific hardware
configuration (these timings were performed using an Nvidia
GeForce GTX TITAN X card) and should be treated more as relative timings in
many cases.
many cases. Also note that desktop GPU timing does not always reflect mobile
run time. For example Mobilenet V2 is faster on mobile devices than Mobilenet
V1, but is slightly slower on desktop GPU.
* detector performance on subset of the COCO validation set or Open Images test split as measured by the dataset-specific mAP measure.
Here, higher is better, and we only report bounding box mAP rounded to the
nearest integer.
......@@ -68,6 +70,7 @@ Some remarks on frozen inference graphs:
| Model name | Speed (ms) | COCO mAP[^1] | Outputs |
| ------------ | :--------------: | :--------------: | :-------------: |
| [ssd_mobilenet_v1_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2017_11_17.tar.gz) | 30 | 21 | Boxes |
| [ssd_mobilenet_v2_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v2_coco_2018_03_29.tar.gz) | 31 | 22 | Boxes |
| [ssd_inception_v2_coco](http://download.tensorflow.org/models/object_detection/ssd_inception_v2_coco_2017_11_17.tar.gz) | 42 | 24 | Boxes |
| [faster_rcnn_inception_v2_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2018_01_28.tar.gz) | 58 | 28 | Boxes |
| [faster_rcnn_resnet50_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_coco_2018_01_28.tar.gz) | 89 | 30 | Boxes |
......
......@@ -37,7 +37,7 @@ environment variable below:
export YOUR_GCS_BUCKET=${YOUR_GCS_BUCKET}
```
It is also possible to run locally by following
It is also possible to run locally by following
[the running locally instructions](running_locally.md).
## Installing Tensorflow and the Tensorflow Object Detection API
......
......@@ -58,7 +58,8 @@ def transform_input_data(tensor_dict,
Data transformation functions are applied in the following order.
1. data_augmentation_fn (optional): applied on tensor_dict.
2. model_preprocess_fn: applied only on image tensor in tensor_dict.
3. image_resizer_fn: applied only on instance mask tensor in tensor_dict.
3. image_resizer_fn: applied on original image and instance mask tensor in
tensor_dict.
4. one_hot_encoding: applied to classes tensor in tensor_dict.
5. merge_multiple_boxes (optional): when groundtruth boxes are exactly the
same they can be merged into a single box with an associated k-hot class
......@@ -70,10 +71,11 @@ def transform_input_data(tensor_dict,
model_preprocess_fn: model's preprocess function to apply on image tensor.
This function must take in a 4-D float tensor and return a 4-D preprocess
float tensor and a tensor containing the true image shape.
image_resizer_fn: image resizer function to apply on groundtruth instance
masks. This function must take a 4-D float tensor of image and a 4-D
tensor of instances masks and return resized version of these along with
the true shapes.
image_resizer_fn: image resizer function to apply on original image (if
`retain_original_image` is True) and groundtruth instance masks. This
function must take a 3-D float tensor of an image and a 3-D tensor of
instance masks and return a resized version of these along with the true
shapes.
num_classes: number of max classes to one-hot (or k-hot) encode the class
labels.
data_augmentation_fn: (optional) data augmentation function to apply on
......@@ -88,17 +90,19 @@ def transform_input_data(tensor_dict,
after applying all the transformations.
"""
if retain_original_image:
tensor_dict[fields.InputDataFields.
original_image] = tensor_dict[fields.InputDataFields.image]
original_image_resized, _ = image_resizer_fn(
tensor_dict[fields.InputDataFields.image])
tensor_dict[fields.InputDataFields.original_image] = tf.cast(
original_image_resized, tf.uint8)
# Apply data augmentation ops.
if data_augmentation_fn is not None:
tensor_dict = data_augmentation_fn(tensor_dict)
# Apply model preprocessing ops and resize instance masks.
image = tf.expand_dims(
tf.to_float(tensor_dict[fields.InputDataFields.image]), axis=0)
preprocessed_resized_image, true_image_shape = model_preprocess_fn(image)
image = tensor_dict[fields.InputDataFields.image]
preprocessed_resized_image, true_image_shape = model_preprocess_fn(
tf.expand_dims(tf.to_float(image), axis=0))
tensor_dict[fields.InputDataFields.image] = tf.squeeze(
preprocessed_resized_image, axis=0)
tensor_dict[fields.InputDataFields.true_image_shape] = tf.squeeze(
......@@ -156,6 +160,52 @@ def augment_input_data(tensor_dict, data_augmentation_options):
return tensor_dict
def _get_labels_dict(input_dict):
"""Extracts labels dict from input dict."""
required_label_keys = [
fields.InputDataFields.num_groundtruth_boxes,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes,
fields.InputDataFields.groundtruth_weights
]
labels_dict = {}
for key in required_label_keys:
labels_dict[key] = input_dict[key]
optional_label_keys = [
fields.InputDataFields.groundtruth_keypoints,
fields.InputDataFields.groundtruth_instance_masks,
fields.InputDataFields.groundtruth_area,
fields.InputDataFields.groundtruth_is_crowd,
fields.InputDataFields.groundtruth_difficult
]
for key in optional_label_keys:
if key in input_dict:
labels_dict[key] = input_dict[key]
if fields.InputDataFields.groundtruth_difficult in labels_dict:
labels_dict[fields.InputDataFields.groundtruth_difficult] = tf.cast(
labels_dict[fields.InputDataFields.groundtruth_difficult], tf.int32)
return labels_dict
def _get_features_dict(input_dict):
"""Extracts features dict from input dict."""
hash_from_source_id = tf.string_to_hash_bucket_fast(
input_dict[fields.InputDataFields.source_id], HASH_BINS)
features = {
fields.InputDataFields.image:
input_dict[fields.InputDataFields.image],
HASH_KEY: tf.cast(hash_from_source_id, tf.int32),
fields.InputDataFields.true_image_shape:
input_dict[fields.InputDataFields.true_image_shape]
}
if fields.InputDataFields.original_image in input_dict:
features[fields.InputDataFields.original_image] = input_dict[
fields.InputDataFields.original_image]
return features
def create_train_input_fn(train_config, train_input_config,
model_config):
"""Creates a train `input` function for `Estimator`.
......@@ -184,6 +234,8 @@ def create_train_input_fn(train_config, train_input_config,
features[fields.InputDataFields.true_image_shape] is a [batch_size, 3]
int32 tensor representing the true image shapes, as preprocessed
images could be padded.
features[fields.InputDataFields.original_image] (optional) is a
[batch_size, H, W, C] float32 tensor with original images.
labels: Dictionary of groundtruth tensors.
labels[fields.InputDataFields.num_groundtruth_boxes] is a [batch_size]
int32 tensor indicating the number of groundtruth boxes.
......@@ -233,7 +285,8 @@ def create_train_input_fn(train_config, train_input_config,
transform_input_data, model_preprocess_fn=model.preprocess,
image_resizer_fn=image_resizer_fn,
num_classes=config_util.get_number_of_classes(model_config),
data_augmentation_fn=data_augmentation_fn)
data_augmentation_fn=data_augmentation_fn,
retain_original_image=train_config.retain_original_images)
dataset = INPUT_BUILDER_UTIL_MAP['dataset_build'](
train_input_config,
transform_input_data_fn=transform_data_fn,
......@@ -242,35 +295,8 @@ def create_train_input_fn(train_config, train_input_config,
num_classes=config_util.get_number_of_classes(model_config),
spatial_image_shape=config_util.get_spatial_image_size(
image_resizer_config))
tensor_dict = dataset_util.make_initializable_iterator(dataset).get_next()
hash_from_source_id = tf.string_to_hash_bucket_fast(
tensor_dict[fields.InputDataFields.source_id], HASH_BINS)
features = {
fields.InputDataFields.image: tensor_dict[fields.InputDataFields.image],
HASH_KEY: tf.cast(hash_from_source_id, tf.int32),
fields.InputDataFields.true_image_shape: tensor_dict[
fields.InputDataFields.true_image_shape]
}
labels = {
fields.InputDataFields.num_groundtruth_boxes: tensor_dict[
fields.InputDataFields.num_groundtruth_boxes],
fields.InputDataFields.groundtruth_boxes: tensor_dict[
fields.InputDataFields.groundtruth_boxes],
fields.InputDataFields.groundtruth_classes: tensor_dict[
fields.InputDataFields.groundtruth_classes],
fields.InputDataFields.groundtruth_weights: tensor_dict[
fields.InputDataFields.groundtruth_weights]
}
if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
labels[fields.InputDataFields.groundtruth_keypoints] = tensor_dict[
fields.InputDataFields.groundtruth_keypoints]
if fields.InputDataFields.groundtruth_instance_masks in tensor_dict:
labels[fields.InputDataFields.groundtruth_instance_masks] = tensor_dict[
fields.InputDataFields.groundtruth_instance_masks]
return features, labels
input_dict = dataset_util.make_initializable_iterator(dataset).get_next()
return (_get_features_dict(input_dict), _get_labels_dict(input_dict))
return _train_input_fn
......@@ -345,7 +371,7 @@ def create_eval_input_fn(eval_config, eval_input_config, model_config):
image_resizer_fn=image_resizer_fn,
num_classes=num_classes,
data_augmentation_fn=None,
retain_original_image=True)
retain_original_image=eval_config.retain_original_images)
dataset = INPUT_BUILDER_UTIL_MAP['dataset_build'](
eval_input_config,
transform_input_data_fn=transform_data_fn,
......@@ -355,36 +381,7 @@ def create_eval_input_fn(eval_config, eval_input_config, model_config):
image_resizer_config))
input_dict = dataset_util.make_initializable_iterator(dataset).get_next()
hash_from_source_id = tf.string_to_hash_bucket_fast(
input_dict[fields.InputDataFields.source_id], HASH_BINS)
features = {
fields.InputDataFields.image:
input_dict[fields.InputDataFields.image],
fields.InputDataFields.original_image:
input_dict[fields.InputDataFields.original_image],
HASH_KEY: tf.cast(hash_from_source_id, tf.int32),
fields.InputDataFields.true_image_shape:
input_dict[fields.InputDataFields.true_image_shape]
}
labels = {
fields.InputDataFields.groundtruth_boxes:
input_dict[fields.InputDataFields.groundtruth_boxes],
fields.InputDataFields.groundtruth_classes:
input_dict[fields.InputDataFields.groundtruth_classes],
fields.InputDataFields.groundtruth_area:
input_dict[fields.InputDataFields.groundtruth_area],
fields.InputDataFields.groundtruth_is_crowd:
input_dict[fields.InputDataFields.groundtruth_is_crowd],
fields.InputDataFields.groundtruth_difficult:
tf.cast(input_dict[fields.InputDataFields.groundtruth_difficult],
tf.int32)
}
if fields.InputDataFields.groundtruth_instance_masks in input_dict:
labels[fields.InputDataFields.groundtruth_instance_masks] = input_dict[
fields.InputDataFields.groundtruth_instance_masks]
return features, labels
return (_get_features_dict(input_dict), _get_labels_dict(input_dict))
return _eval_input_fn
......
......@@ -34,16 +34,12 @@ FLAGS = tf.flags.FLAGS
def _get_configs_for_model(model_name):
"""Returns configurations for model."""
fname = os.path.join(
FLAGS.test_srcdir,
('google3/third_party/tensorflow_models/'
'object_detection/samples/configs/' + model_name + '.config'))
label_map_path = os.path.join(FLAGS.test_srcdir,
('google3/third_party/tensorflow_models/'
'object_detection/data/pet_label_map.pbtxt'))
data_path = os.path.join(FLAGS.test_srcdir,
('google3/third_party/tensorflow_models/'
'object_detection/test_data/pets_examples.record'))
fname = os.path.join(tf.resource_loader.get_data_files_path(),
'samples/configs/' + model_name + '.config')
label_map_path = os.path.join(tf.resource_loader.get_data_files_path(),
'data/pet_label_map.pbtxt')
data_path = os.path.join(tf.resource_loader.get_data_files_path(),
'test_data/pets_examples.record')
configs = config_util.get_configs_from_pipeline_file(fname)
return config_util.merge_external_params_with_configs(
configs,
......@@ -462,22 +458,31 @@ class DataTransformationFnTest(tf.test.TestCase):
fields.InputDataFields.groundtruth_classes:
tf.constant(np.array([3, 1], np.int32))
}
def fake_image_resizer_fn(image, masks):
def fake_image_resizer_fn(image, masks=None):
resized_image = tf.image.resize_images(image, [8, 8])
resized_masks = tf.transpose(
tf.image.resize_images(tf.transpose(masks, [1, 2, 0]), [8, 8]),
[2, 0, 1])
return resized_image, resized_masks, tf.shape(resized_image)
results = [resized_image]
if masks is not None:
resized_masks = tf.transpose(
tf.image.resize_images(tf.transpose(masks, [1, 2, 0]), [8, 8]),
[2, 0, 1])
results.append(resized_masks)
results.append(tf.shape(resized_image))
return results
num_classes = 3
input_transformation_fn = functools.partial(
inputs.transform_input_data,
model_preprocess_fn=_fake_model_preprocessor_fn,
image_resizer_fn=fake_image_resizer_fn,
num_classes=num_classes)
num_classes=num_classes,
retain_original_image=True)
with self.test_session() as sess:
transformed_inputs = sess.run(
input_transformation_fn(tensor_dict=tensor_dict))
self.assertAllEqual(transformed_inputs[
fields.InputDataFields.original_image].dtype, tf.uint8)
self.assertAllEqual(transformed_inputs[
fields.InputDataFields.original_image].shape, [8, 8, 3])
self.assertAllEqual(transformed_inputs[
fields.InputDataFields.groundtruth_instance_masks].shape, [2, 8, 8])
......
......@@ -46,7 +46,8 @@ class SSDFeatureExtractor(object):
batch_norm_trainable=True,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
use_depthwise=False,
inplace_batchnorm_update=False):
"""Constructor.
Args:
......@@ -64,6 +65,10 @@ class SSDFeatureExtractor(object):
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
inplace_batchnorm_update: Whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
"""
self._is_training = is_training
self._depth_multiplier = depth_multiplier
......@@ -71,6 +76,7 @@ class SSDFeatureExtractor(object):
self._pad_to_multiple = pad_to_multiple
self._conv_hyperparams = conv_hyperparams
self._batch_norm_trainable = batch_norm_trainable
self._inplace_batchnorm_update = inplace_batchnorm_update
self._reuse_weights = reuse_weights
self._use_explicit_padding = use_explicit_padding
self._use_depthwise = use_depthwise
......@@ -108,7 +114,29 @@ class SSDFeatureExtractor(object):
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
pass
batchnorm_updates_collections = (None if self._inplace_batchnorm_update
else tf.GraphKeys.UPDATE_OPS)
with slim.arg_scope([slim.batch_norm],
updates_collections=batchnorm_updates_collections):
return self._extract_features(preprocessed_inputs)
@abstractmethod
def _extract_features(self, preprocessed_inputs):
"""Extracts features from preprocessed inputs.
This function is responsible for extracting feature maps from preprocessed
images.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
raise NotImplementedError
class SSDMetaArch(model.DetectionModel):
......
......@@ -49,8 +49,8 @@ tf.flags.DEFINE_string('model_dir', None, 'Path to output model directory '
'where event and checkpoint files will be written.')
tf.flags.DEFINE_string('pipeline_config_path', None, 'Path to pipeline config '
'file.')
tf.flags.DEFINE_integer('num_train_steps', 500000, 'Number of train steps.')
tf.flags.DEFINE_integer('num_eval_steps', 10000, 'Number of train steps.')
tf.flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.')
tf.flags.DEFINE_integer('num_eval_steps', None, 'Number of train steps.')
FLAGS = tf.flags.FLAGS
......@@ -225,7 +225,14 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
labels,
unpad_groundtruth_tensors=train_config.unpad_groundtruth_tensors)
elif mode == tf.estimator.ModeKeys.EVAL:
labels = unstack_batch(labels, unpad_groundtruth_tensors=False)
# For evaling on train data, it is necessary to check whether groundtruth
# must be unpadded.
boxes_shape = (
labels[fields.InputDataFields.groundtruth_boxes].get_shape()
.as_list())
unpad_groundtruth_tensors = True if boxes_shape[1] is not None else False
labels = unstack_batch(
labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors)
if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes]
......@@ -241,7 +248,9 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
groundtruth_boxes_list=gt_boxes_list,
groundtruth_classes_list=gt_classes_list,
groundtruth_masks_list=gt_masks_list,
groundtruth_keypoints_list=gt_keypoints_list)
groundtruth_keypoints_list=gt_keypoints_list,
groundtruth_weights_list=labels[
fields.InputDataFields.groundtruth_weights])
preprocessed_images = features[fields.InputDataFields.image]
prediction_dict = detection_model.predict(
......@@ -250,14 +259,6 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
prediction_dict, features[fields.InputDataFields.true_image_shape])
if mode == tf.estimator.ModeKeys.TRAIN:
if not train_config.fine_tune_checkpoint_type:
# train_config.from_detection_checkpoint field is deprecated. For
# backward compatibility, sets finetune_checkpoint_type based on
# from_detection_checkpoint.
if train_config.from_detection_checkpoint:
train_config.fine_tune_checkpoint_type = 'detection'
else:
train_config.fine_tune_checkpoint_type = 'classification'
if train_config.fine_tune_checkpoint and hparams.load_pretrained:
if not train_config.fine_tune_checkpoint_type:
# train_config.from_detection_checkpoint field is deprecated. For
......@@ -341,17 +342,16 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
}
eval_metric_ops = None
if mode == tf.estimator.ModeKeys.EVAL:
# Detection summaries during eval.
if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
class_agnostic = (fields.DetectionResultFields.detection_classes
not in detections)
groundtruth = _get_groundtruth_data(detection_model, class_agnostic)
use_original_images = fields.InputDataFields.original_image in features
eval_images = (
original_images = (
features[fields.InputDataFields.original_image] if use_original_images
else features[fields.InputDataFields.image])
eval_dict = eval_util.result_dict_for_single_example(
eval_images[0:1],
original_images[0:1],
features[inputs.HASH_KEY][0],
detections,
groundtruth,
......@@ -363,21 +363,26 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
else:
category_index = label_map_util.create_category_index_from_labelmap(
eval_input_config.label_map_path)
img_summary = None
if not use_tpu and use_original_images:
detection_and_groundtruth = (
vis_utils.draw_side_by_side_evaluation_image(
eval_dict, category_index, max_boxes_to_draw=20,
min_score_thresh=0.2))
tf.summary.image('Detections_Left_Groundtruth_Right',
detection_and_groundtruth)
# Eval metrics on a single image.
eval_metrics = eval_config.metrics_set
if not eval_metrics:
eval_metrics = ['coco_detection_metrics']
eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
eval_metrics, category_index.values(), eval_dict,
include_metrics_per_category=False)
img_summary = tf.summary.image('Detections_Left_Groundtruth_Right',
detection_and_groundtruth)
if mode == tf.estimator.ModeKeys.EVAL:
# Eval metrics on a single example.
eval_metrics = eval_config.metrics_set
if not eval_metrics:
eval_metrics = ['coco_detection_metrics']
eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
eval_metrics, category_index.values(), eval_dict,
include_metrics_per_category=False)
if img_summary is not None:
eval_metric_ops['Detections_Left_Groundtruth_Right'] = (
img_summary, tf.no_op())
if use_tpu:
return tf.contrib.tpu.TPUEstimatorSpec(
......
......@@ -32,20 +32,19 @@ from object_detection.builders import model_builder
from object_detection.core import standard_fields as fields
from object_detection.utils import config_util
FLAGS = tf.flags.FLAGS
MODEL_NAME_FOR_TEST = model_test_util.SSD_INCEPTION_MODEL_NAME
def _get_data_path():
"""Returns an absolute path to TFRecord file."""
return os.path.join(FLAGS.test_srcdir, model_test_util.PATH_BASE, 'test_data',
return os.path.join(tf.resource_loader.get_data_files_path(), 'test_data',
'pets_examples.record')
def _get_labelmap_path():
"""Returns an absolute path to label map file."""
return os.path.join(FLAGS.test_srcdir, model_test_util.PATH_BASE, 'data',
return os.path.join(tf.resource_loader.get_data_files_path(), 'data',
'pet_label_map.pbtxt')
......
......@@ -28,13 +28,12 @@ FLAGS = tf.flags.FLAGS
FASTER_RCNN_MODEL_NAME = 'faster_rcnn_resnet50_pets'
SSD_INCEPTION_MODEL_NAME = 'ssd_inception_v2_pets'
PATH_BASE = 'google3/third_party/tensorflow_models/object_detection/'
def GetPipelineConfigPath(model_name):
"""Returns path to the local pipeline config file."""
return os.path.join(FLAGS.test_srcdir, PATH_BASE, 'samples', 'configs',
model_name + '.config')
return os.path.join(tf.resource_loader.get_data_files_path(), 'samples',
'configs', model_name + '.config')
def InitializeFlags(model_name_for_test):
......
......@@ -53,7 +53,8 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
batch_norm_trainable=True,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
use_depthwise=False,
inplace_batchnorm_update=False):
"""MobileNetV1 Feature Extractor for Embedded-friendly SSD Models.
Args:
......@@ -71,6 +72,11 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
Raises:
ValueError: upon invalid `pad_to_multiple` values.
......@@ -82,9 +88,9 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
super(EmbeddedSSDMobileNetV1FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights,
use_explicit_padding, use_depthwise)
use_explicit_padding, use_depthwise, inplace_batchnorm_update)
def extract_features(self, preprocessed_inputs):
def _extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
......
......@@ -22,30 +22,6 @@ from nets import mobilenet_v1
slim = tf.contrib.slim
def _batch_norm_arg_scope(list_ops,
use_batch_norm=True,
batch_norm_decay=0.9997,
batch_norm_epsilon=0.001,
batch_norm_scale=False,
train_batch_norm=False):
"""Slim arg scope for Mobilenet V1 batch norm."""
if use_batch_norm:
batch_norm_params = {
'is_training': train_batch_norm,
'scale': batch_norm_scale,
'decay': batch_norm_decay,
'epsilon': batch_norm_epsilon
}
normalizer_fn = slim.batch_norm
else:
normalizer_fn = None
batch_norm_params = None
return slim.arg_scope(list_ops,
normalizer_fn=normalizer_fn,
normalizer_params=batch_norm_params)
class FasterRCNNMobilenetV1FeatureExtractor(
faster_rcnn_meta_arch.FasterRCNNFeatureExtractor):
"""Faster R-CNN Mobilenet V1 feature extractor implementation."""
......@@ -121,18 +97,19 @@ class FasterRCNNMobilenetV1FeatureExtractor(
['image size must at least be 33 in both height and width.'])
with tf.control_dependencies([shape_assert]):
with tf.variable_scope('MobilenetV1',
reuse=self._reuse_weights) as scope:
with _batch_norm_arg_scope([slim.conv2d, slim.separable_conv2d],
batch_norm_scale=True,
train_batch_norm=self._train_batch_norm):
with slim.arg_scope(
mobilenet_v1.mobilenet_v1_arg_scope(
is_training=self._train_batch_norm,
weight_decay=self._weight_decay)):
with tf.variable_scope('MobilenetV1',
reuse=self._reuse_weights) as scope:
_, activations = mobilenet_v1.mobilenet_v1_base(
preprocessed_inputs,
final_endpoint='Conv2d_13_pointwise',
final_endpoint='Conv2d_11_pointwise',
min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
scope=scope)
return activations['Conv2d_13_pointwise'], activations
return activations['Conv2d_11_pointwise'], activations
def _extract_box_classifier_features(self, proposal_feature_maps, scope):
"""Extracts second stage box classifier features.
......@@ -152,9 +129,10 @@ class FasterRCNNMobilenetV1FeatureExtractor(
depth = lambda d: max(int(d * 1.0), 16)
with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights):
with _batch_norm_arg_scope([slim.conv2d, slim.separable_conv2d],
batch_norm_scale=True,
train_batch_norm=self._train_batch_norm):
with slim.arg_scope(
mobilenet_v1.mobilenet_v1_arg_scope(
is_training=self._train_batch_norm,
weight_decay=self._weight_decay)):
with slim.arg_scope(
[slim.conv2d, slim.separable_conv2d], padding='SAME'):
net = slim.separable_conv2d(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment