.
- """
- return {
- k: tf.compat.v1.saved_model.utils.build_tensor_info(t)
- for k, t in tensor_dict.items()
- }
-
-
-def main(argv):
- if len(argv) > 1:
- raise app.UsageError('Too many command-line arguments.')
-
- export_path = FLAGS.export_path
- if os.path.exists(export_path):
- raise ValueError('Export_path already exists.')
-
- with tf.Graph().as_default() as g, tf.compat.v1.Session(graph=g) as sess:
-
+class _ExtractModule(tf.Module):
+ """Helper module to build and save DELF model."""
+
+ def __init__(self, block3_strides, iou):
+ """Initialization of DELF model.
+
+ Args:
+ block3_strides: bool, whether to add strides to the output of block3.
+ iou: IOU for non-max suppression.
+ """
+ self._stride_factor = 2.0 if block3_strides else 1.0
+ self._iou = iou
# Setup the DELF model for extraction.
- model = delf_model.Delf(block3_strides=FLAGS.block3_strides, name='DELF')
-
- # Initial forward pass to build model.
- images = tf.zeros((1, 321, 321, 3), dtype=tf.float32)
- model(images)
+ self._model = delf_model.Delf(
+ block3_strides=block3_strides, name='DELF')
- stride_factor = 2.0 if FLAGS.block3_strides else 1.0
+ def LoadWeights(self, checkpoint_path):
+ self._model.load_weights(checkpoint_path)
- # Setup the multiscale keypoint extraction.
- input_image = tf.compat.v1.placeholder(
- tf.uint8, shape=(None, None, 3), name='input_image')
- input_abs_thres = tf.compat.v1.placeholder(
- tf.float32, shape=(), name='input_abs_thres')
- input_scales = tf.compat.v1.placeholder(
- tf.float32, shape=[None], name='input_scales')
- input_max_feature_num = tf.compat.v1.placeholder(
- tf.int32, shape=(), name='input_max_feature_num')
+ @tf.function(input_signature=[
+ tf.TensorSpec(shape=[None, None, 3], dtype=tf.uint8, name='input_image'),
+ tf.TensorSpec(shape=[None], dtype=tf.float32, name='input_scales'),
+ tf.TensorSpec(shape=(), dtype=tf.int32, name='input_max_feature_num'),
+ tf.TensorSpec(shape=(), dtype=tf.float32, name='input_abs_thres')
+ ])
+ def ExtractFeatures(self, input_image, input_scales, input_max_feature_num,
+ input_abs_thres):
extracted_features = export_model_utils.ExtractLocalFeatures(
input_image, input_scales, input_max_feature_num, input_abs_thres,
- FLAGS.iou, lambda x: model(x, training=False), stride_factor)
+ self._iou, lambda x: self._model(x, training=False),
+ self._stride_factor)
- # Load the weights.
- checkpoint_path = FLAGS.ckpt_path
- model.load_weights(checkpoint_path)
- print('Checkpoint loaded from ', checkpoint_path)
-
- named_input_tensors = {
- 'input_image': input_image,
- 'input_scales': input_scales,
- 'input_abs_thres': input_abs_thres,
- 'input_max_feature_num': input_max_feature_num,
- }
-
- # Outputs to the exported model.
named_output_tensors = {}
named_output_tensors['boxes'] = tf.identity(
extracted_features[0], name='boxes')
@@ -112,25 +84,27 @@ def main(argv):
extracted_features[2], name='scales')
named_output_tensors['scores'] = tf.identity(
extracted_features[3], name='scores')
+ return named_output_tensors
+
+
+def main(argv):
+ if len(argv) > 1:
+ raise app.UsageError('Too many command-line arguments.')
+
+ export_path = FLAGS.export_path
+ if os.path.exists(export_path):
+ raise ValueError(f'Export_path {export_path} already exists. Please '
+ 'specify a different path or delete the existing one.')
+
+ module = _ExtractModule(FLAGS.block3_strides, FLAGS.iou)
+
+ # Load the weights.
+ checkpoint_path = FLAGS.ckpt_path
+ module.LoadWeights(checkpoint_path)
+ print('Checkpoint loaded from ', checkpoint_path)
- # Export the model.
- signature_def = tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
- inputs=_build_tensor_info(named_input_tensors),
- outputs=_build_tensor_info(named_output_tensors))
-
- print('Exporting trained model to:', export_path)
- builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(export_path)
-
- init_op = None
- builder.add_meta_graph_and_variables(
- sess, [tf.compat.v1.saved_model.tag_constants.SERVING],
- signature_def_map={
- tf.compat.v1.saved_model.signature_constants
- .DEFAULT_SERVING_SIGNATURE_DEF_KEY:
- signature_def
- },
- main_op=init_op)
- builder.save()
+ # Save the module
+ tf.saved_model.save(module, export_path)
if __name__ == '__main__':
diff --git a/research/delf/delf/python/training/model/export_model_utils.py b/research/delf/delf/python/training/model/export_model_utils.py
index f4302aca139802e99d80bfd4e1fc27e353abdfbb..3fc18a3c280de513070f6c09612506db37f9db1a 100644
--- a/research/delf/delf/python/training/model/export_model_utils.py
+++ b/research/delf/delf/python/training/model/export_model_utils.py
@@ -142,20 +142,21 @@ def ExtractLocalFeatures(image, image_scales, max_feature_num, abs_thres, iou,
keep_going = lambda j, b, f, scales, scores: tf.less(j, num_scales)
(_, output_boxes, output_features, output_scales,
- output_scores) = tf.while_loop(
- cond=keep_going,
- body=_ProcessSingleScale,
- loop_vars=[
- i, output_boxes, output_features, output_scales, output_scores
- ],
- shape_invariants=[
- i.get_shape(),
- tf.TensorShape([None, 4]),
- tf.TensorShape([None, feature_depth]),
- tf.TensorShape([None]),
- tf.TensorShape([None])
- ],
- back_prop=False)
+ output_scores) = tf.nest.map_structure(
+ tf.stop_gradient,
+ tf.while_loop(
+ cond=keep_going,
+ body=_ProcessSingleScale,
+ loop_vars=[
+ i, output_boxes, output_features, output_scales, output_scores
+ ],
+ shape_invariants=[
+ i.get_shape(),
+ tf.TensorShape([None, 4]),
+ tf.TensorShape([None, feature_depth]),
+ tf.TensorShape([None]),
+ tf.TensorShape([None])
+ ]))
feature_boxes = box_list.BoxList(output_boxes)
feature_boxes.add_field('features', output_features)
@@ -169,3 +170,109 @@ def ExtractLocalFeatures(image, image_scales, max_feature_num, abs_thres, iou,
return final_boxes.get(), final_boxes.get_field(
'features'), final_boxes.get_field('scales'), tf.expand_dims(
final_boxes.get_field('scores'), 1)
+
+
+def ExtractGlobalFeatures(image,
+ image_scales,
+ model_fn,
+ multi_scale_pool_type='None',
+ normalize_global_descriptor=False):
+ """Extract global features for input image.
+
+ Args:
+ image: image tensor of type tf.uint8 with shape [h, w, channels].
+ image_scales: 1D float tensor which contains float scales used for image
+ pyramid construction.
+ model_fn: model function. Follows the signature:
+ * Args:
+ * `images`: Image tensor which is re-scaled.
+ * Returns:
+ * `global_descriptors`: Global descriptors for input images.
+ multi_scale_pool_type: If set, the global descriptor of each scale is pooled
+ and a 1D global descriptor is returned.
+ normalize_global_descriptor: If True, output global descriptors are
+ L2-normalized.
+
+ Returns:
+ global_descriptors: If `multi_scale_pool_type` is 'None', returns a [S, D]
+ float tensor. S is the number of scales, and D the global descriptor
+ dimensionality. Each D-dimensional entry is a global descriptor, which may
+ be L2-normalized depending on `normalize_global_descriptor`. If
+ `multi_scale_pool_type` is not 'None', returns a [D] float tensor with the
+ pooled global descriptor.
+
+ """
+ original_image_shape_float = tf.gather(
+ tf.dtypes.cast(tf.shape(image), tf.float32), [0, 1])
+
+ image_tensor = gld.NormalizeImages(
+ image, pixel_value_offset=128.0, pixel_value_scale=128.0)
+ image_tensor = tf.expand_dims(image_tensor, 0, name='image/expand_dims')
+
+ def _ProcessSingleScale(scale_index, global_descriptors=None):
+ """Resizes the image and runs feature extraction.
+
+ This function will be passed into tf.while_loop() and be called
+ repeatedly. We get the current scale by image_scales[scale_index], and
+ run image resizing / feature extraction. In the end, we concat the
+ previous global descriptors with current descriptor as the output.
+
+ Args:
+ scale_index: A valid index in image_scales.
+ global_descriptors: Global descriptor tensor with the shape of [S, D]. If
+ None, no previous global descriptors are used, and the output will be of
+ shape [1, D].
+
+ Returns:
+ scale_index: The next scale index for processing.
+ global_descriptors: A concatenated global descriptor tensor with the shape
+ of [S+1, D].
+ """
+ scale = tf.gather(image_scales, scale_index)
+ new_image_size = tf.dtypes.cast(
+ tf.round(original_image_shape_float * scale), tf.int32)
+ resized_image = tf.image.resize(image_tensor, new_image_size)
+
+ global_descriptor = model_fn(resized_image)
+ if global_descriptors is None:
+ global_descriptors = global_descriptor
+ else:
+ global_descriptors = tf.concat([global_descriptors, global_descriptor], 0)
+
+ return scale_index + 1, global_descriptors
+
+ # Process the first scale separately, the following scales will reuse the
+ # graph variables.
+ (_, output_global) = _ProcessSingleScale(0)
+
+ i = tf.constant(1, dtype=tf.int32)
+ num_scales = tf.shape(image_scales)[0]
+ keep_going = lambda j, g: tf.less(j, num_scales)
+
+ (_, output_global) = tf.nest.map_structure(
+ tf.stop_gradient,
+ tf.while_loop(
+ cond=keep_going,
+ body=_ProcessSingleScale,
+ loop_vars=[i, output_global],
+ shape_invariants=[i.get_shape(),
+ tf.TensorShape([None, None])]))
+
+ normalization_axis = 1
+ if multi_scale_pool_type == 'average':
+ output_global = tf.reduce_mean(
+ output_global,
+ axis=0,
+ keepdims=False,
+ name='multi_scale_average_pooling')
+ normalization_axis = 0
+ elif multi_scale_pool_type == 'sum':
+ output_global = tf.reduce_sum(
+ output_global, axis=0, keepdims=False, name='multi_scale_sum_pooling')
+ normalization_axis = 0
+
+ if normalize_global_descriptor:
+ output_global = tf.nn.l2_normalize(
+ output_global, axis=normalization_axis, name='l2_normalization')
+
+ return output_global
diff --git a/research/delf/delf/python/training/model/resnet50.py b/research/delf/delf/python/training/model/resnet50.py
index 1c4d7c2f68dea12d74fcd32a8b52fd1285e92b59..6daaab67419d99ebcefd7b25f89c284bf00832af 100644
--- a/research/delf/delf/python/training/model/resnet50.py
+++ b/research/delf/delf/python/training/model/resnet50.py
@@ -22,9 +22,14 @@ from __future__ import division
from __future__ import print_function
import functools
+import os
+import tempfile
+from absl import logging
+import h5py
import tensorflow as tf
+
layers = tf.keras.layers
@@ -284,8 +289,8 @@ class ResNet50(tf.keras.Model):
else:
self.global_pooling = None
- def call(self, inputs, training=True, intermediates_dict=None):
- """Call the ResNet50 model.
+ def build_call(self, inputs, training=True, intermediates_dict=None):
+ """Building the ResNet50 model.
Args:
inputs: Images to compute features for.
@@ -356,3 +361,79 @@ class ResNet50(tf.keras.Model):
return self.global_pooling(x)
else:
return x
+
+ def call(self, inputs, training=True, intermediates_dict=None):
+ """Call the ResNet50 model.
+
+ Args:
+ inputs: Images to compute features for.
+ training: Whether model is in training phase.
+ intermediates_dict: `None` or dictionary. If not None, accumulate feature
+ maps from intermediate blocks into the dictionary. ""
+
+ Returns:
+ Tensor with featuremap.
+ """
+ return self.build_call(inputs, training, intermediates_dict)
+
+ def restore_weights(self, filepath):
+ """Load pretrained weights.
+
+ This function loads a .h5 file from the filepath with saved model weights
+ and assigns them to the model.
+
+ Args:
+ filepath: String, path to the .h5 file
+ Raises:
+ ValueError: if the file referenced by `filepath` does not exist.
+ """
+ if not tf.io.gfile.exists(filepath):
+ raise ValueError('Unable to load weights from %s. You must provide a'
+ 'valid file.' % (filepath))
+
+ # Create a local copy of the weights file for h5py to be able to read it.
+ local_filename = os.path.basename(filepath)
+ tmp_filename = os.path.join(tempfile.gettempdir(), local_filename)
+ tf.io.gfile.copy(filepath, tmp_filename, overwrite=True)
+
+ # Load the content of the weights file.
+ f = h5py.File(tmp_filename, mode='r')
+ saved_layer_names = [n.decode('utf8') for n in f.attrs['layer_names']]
+
+ try:
+ # Iterate through all the layers assuming the max `depth` is 2.
+ for layer in self.layers:
+ if hasattr(layer, 'layers'):
+ for inlayer in layer.layers:
+ # Make sure the weights are in the saved model, and that we are in
+ # the innermost layer.
+ if inlayer.name not in saved_layer_names:
+ raise ValueError('Layer %s absent from the pretrained weights.'
+ 'Unable to load its weights.' % (inlayer.name))
+ if hasattr(inlayer, 'layers'):
+ raise ValueError('Layer %s is not a depth 2 layer. Unable to load'
+ 'its weights.' % (inlayer.name))
+ # Assign the weights in the current layer.
+ g = f[inlayer.name]
+ weight_names = [n.decode('utf8') for n in g.attrs['weight_names']]
+ weight_values = [g[weight_name] for weight_name in weight_names]
+ print('Setting the weights for layer %s' % (inlayer.name))
+ inlayer.set_weights(weight_values)
+ finally:
+ # Clean up the temporary file.
+ tf.io.gfile.remove(tmp_filename)
+
+ def log_weights(self):
+ """Log backbone weights."""
+ logging.info('Logging backbone weights')
+ logging.info('------------------------')
+ for layer in self.layers:
+ if hasattr(layer, 'layers'):
+ for inlayer in layer.layers:
+ logging.info('Weights for layer: %s, inlayer % s', layer.name,
+ inlayer.name)
+ weights = inlayer.get_weights()
+ logging.info(weights)
+ else:
+ logging.info('Layer %s does not have inner layers.',
+ layer.name)
diff --git a/research/delf/delf/python/training/train.py b/research/delf/delf/python/training/train.py
index dcf61b3f35a8e9f580b7f9f143fbe2281172de04..12b7a5f9cc3282e59c738f74c7fbd4798021c429 100644
--- a/research/delf/delf/python/training/train.py
+++ b/research/delf/delf/python/training/train.py
@@ -43,13 +43,20 @@ flags.DEFINE_string('train_file_pattern', '/tmp/data/train*',
'File pattern of training dataset files.')
flags.DEFINE_string('validation_file_pattern', '/tmp/data/validation*',
'File pattern of validation dataset files.')
+flags.DEFINE_enum(
+ 'dataset_version', 'gld_v1', ['gld_v1', 'gld_v2', 'gld_v2_clean'],
+ 'Google Landmarks dataset version, used to determine the'
+ 'number of classes.')
flags.DEFINE_integer('seed', 0, 'Seed to training dataset.')
-flags.DEFINE_float('initial_lr', 0.001, 'Initial learning rate.')
+flags.DEFINE_float('initial_lr', 0.01, 'Initial learning rate.')
flags.DEFINE_integer('batch_size', 32, 'Global batch size.')
flags.DEFINE_integer('max_iters', 500000, 'Maximum iterations.')
-flags.DEFINE_boolean('block3_strides', False, 'Whether to use block3_strides.')
+flags.DEFINE_boolean('block3_strides', True, 'Whether to use block3_strides.')
flags.DEFINE_boolean('use_augmentation', True,
'Whether to use ImageNet style augmentation.')
+flags.DEFINE_string(
+ 'imagenet_checkpoint', None,
+ 'ImageNet checkpoint for ResNet backbone. If None, no checkpoint is used.')
def _record_accuracy(metric, logits, labels):
@@ -60,6 +67,10 @@ def _record_accuracy(metric, logits, labels):
def _attention_summaries(scores, global_step):
"""Record statistics of the attention score."""
+ tf.summary.image(
+ 'batch_attention',
+ scores / tf.reduce_max(scores + 1e-3),
+ step=global_step)
tf.summary.scalar('attention/max', tf.reduce_max(scores), step=global_step)
tf.summary.scalar('attention/min', tf.reduce_min(scores), step=global_step)
tf.summary.scalar('attention/mean', tf.reduce_mean(scores), step=global_step)
@@ -120,7 +131,7 @@ def main(argv):
max_iters = FLAGS.max_iters
global_batch_size = FLAGS.batch_size
image_size = 321
- num_eval = 1000
+ num_eval_batches = int(50000 / global_batch_size)
report_interval = 100
eval_interval = 1000
save_interval = 20000
@@ -130,15 +141,16 @@ def main(argv):
clip_val = tf.constant(10.0)
if FLAGS.debug:
+ tf.config.run_functions_eagerly(True)
global_batch_size = 4
- max_iters = 4
- num_eval = 1
+ max_iters = 100
+ num_eval_batches = 1
save_interval = 1
report_interval = 1
- # TODO(andrearaujo): Using placeholder, replace with actual value using
- # GoogleLandmarksInfo() from datasets/googlelandmarks.py.
- num_classes = 14951
+ # Determine the number of classes based on the version of the dataset.
+ gld_info = gld.GoogleLandmarksInfo()
+ num_classes = gld_info.num_classes[FLAGS.dataset_version]
# ------------------------------------------------------------
# Create the distributed train/validation sets.
@@ -155,11 +167,12 @@ def main(argv):
augmentation=False,
seed=FLAGS.seed)
- train_iterator = strategy.make_dataset_iterator(train_dataset)
- validation_iterator = strategy.make_dataset_iterator(validation_dataset)
+ train_dist_dataset = strategy.experimental_distribute_dataset(train_dataset)
+ validation_dist_dataset = strategy.experimental_distribute_dataset(
+ validation_dataset)
- train_iterator.initialize()
- validation_iterator.initialize()
+ train_iter = iter(train_dist_dataset)
+ validation_iter = iter(validation_dist_dataset)
# Create a checkpoint directory to store the checkpoints.
checkpoint_prefix = os.path.join(FLAGS.logdir, 'delf_tf2-ckpt')
@@ -215,11 +228,14 @@ def main(argv):
labels = tf.clip_by_value(labels, 0, model.num_classes)
global_step = optimizer.iterations
+ tf.summary.image('batch_images', (images + 1.0) / 2.0, step=global_step)
tf.summary.scalar(
'image_range/max', tf.reduce_max(images), step=global_step)
tf.summary.scalar(
'image_range/min', tf.reduce_min(images), step=global_step)
+ # TODO(andrearaujo): we should try to unify the backprop into a single
+ # function, instead of applying once to descriptor then to attention.
def _backprop_loss(tape, loss, weights):
"""Backpropogate losses using clipped gradients.
@@ -340,12 +356,25 @@ def main(argv):
with tf.summary.record_if(
tf.math.equal(0, optimizer.iterations % report_interval)):
+ # TODO(dananghel): try to load pretrained weights at backbone creation.
+ # Load pretrained weights for ResNet50 trained on ImageNet.
+ if FLAGS.imagenet_checkpoint is not None:
+ logging.info('Attempting to load ImageNet pretrained weights.')
+ input_batch = next(train_iter)
+ _, _ = distributed_train_step(input_batch)
+ model.backbone.restore_weights(FLAGS.imagenet_checkpoint)
+ logging.info('Done.')
+ else:
+ logging.info('Skip loading ImageNet pretrained weights.')
+ if FLAGS.debug:
+ model.backbone.log_weights()
+
global_step_value = optimizer.iterations.numpy()
while global_step_value < max_iters:
# input_batch : images(b, h, w, c), labels(b,).
try:
- input_batch = train_iterator.get_next()
+ input_batch = next(train_iter)
except tf.errors.OutOfRangeError:
# Break if we run out of data in the dataset.
logging.info('Stopping training at global step %d, no more data',
@@ -388,9 +417,9 @@ def main(argv):
# Validate once in {eval_interval*n, n \in N} steps.
if global_step_value % eval_interval == 0:
- for i in range(num_eval):
+ for i in range(num_eval_batches):
try:
- validation_batch = validation_iterator.get_next()
+ validation_batch = next(validation_iter)
desc_validation_result, attn_validation_result = (
distributed_validation_step(validation_batch))
except tf.errors.OutOfRangeError:
@@ -412,13 +441,17 @@ def main(argv):
print(' : attn:', attn_validation_result.numpy())
# Save checkpoint once (each save_interval*n, n \in N) steps.
+ # TODO(andrearaujo): save only in one of the two ways. They are
+ # identical, the only difference is that the manager adds some extra
+ # prefixes and variables (eg, optimizer variables).
if global_step_value % save_interval == 0:
save_path = manager.save()
- logging.info('Saved({global_step_value}) at %s', save_path)
+ logging.info('Saved (%d) at %s', global_step_value, save_path)
file_path = '%s/delf_weights' % FLAGS.logdir
model.save_weights(file_path, save_format='tf')
- logging.info('Saved weights({global_step_value}) at %s', file_path)
+ logging.info('Saved weights (%d) at %s', global_step_value,
+ file_path)
# Reset metrics for next step.
desc_train_accuracy.reset_states()
diff --git a/research/efficient-hrl/agent.py b/research/efficient-hrl/agent.py
index cb02b51fa9eb9d98c83ca863d4cfe8a9c90008ce..0028ddffa0d37a0e80d2c990e6263a3d9b4ab948 100644
--- a/research/efficient-hrl/agent.py
+++ b/research/efficient-hrl/agent.py
@@ -149,7 +149,7 @@ class UvfAgentCore(object):
error = tf.square(actions - pred_actions)
spec_range = (self._action_spec.maximum - self._action_spec.minimum) / 2
- normalized_error = error / tf.constant(spec_range) ** 2
+ normalized_error = tf.cast(error, tf.float64) / tf.constant(spec_range) ** 2
return -normalized_error
@gin.configurable('uvf_add_noise_fn')
diff --git a/research/neural_programmer/README.md b/research/neural_programmer/README.md
index 6101a85b9651fb7ad3de4f66af722c384c95d69a..dcc27f6fb015ec625935a0ea37d814a2ba10d2e3 100644
--- a/research/neural_programmer/README.md
+++ b/research/neural_programmer/README.md
@@ -4,20 +4,23 @@
# Neural Programmer
-Implementation of the Neural Programmer model described in [paper](https://openreview.net/pdf?id=ry2YOrcge)
+Implementation of the Neural Programmer model as described in this [paper](https://openreview.net/pdf?id=ry2YOrcge).
-Download and extract the data from [dropbox](https://www.dropbox.com/s/9tvtcv6lmy51zfw/data.zip?dl=0). Change the ``data_dir FLAG`` to the location of the data.
+Download and extract the data from the [WikiTableQuestions](https://ppasupat.github.io/WikiTableQuestions/) site. The dataset contains
+11321, 2831, and 4344 examples for training, development, and testing respectively. We use their tokenization, number and date pre-processing. Please note that the above paper used the [initial release](https://github.com/ppasupat/WikiTableQuestions/releases/tag/v0.2) for training, development and testing.
+
+Change the `data_dir FLAG` to the location of the data.
### Training
-``python neural_programmer.py``
+Run `python neural_programmer.py`
-The models are written to FLAGS.output_dir
+The models are written to `FLAGS.output_dir`.
### Testing
-``python neural_programmer.py --evaluator_job=True``
+Run `python neural_programmer.py --evaluator_job=True`
-The models are loaded from ``FLAGS.output_dir``. The evaluation is done on development data.
+The models are loaded from `FLAGS.output_dir`. The evaluation is done on development data.
-In case of errors because of encoding, add ``"# -*- coding: utf-8 -*-"`` as the first line in ``wiki_data.py``
+In case of errors because of encoding, add `"# -*- coding: utf-8 -*-"` as the first line in `wiki_data.py`
Maintained by Arvind Neelakantan (arvind2505)
diff --git a/research/object_detection/README.md b/research/object_detection/README.md
index b6dc9ad0ae97caa359b90ef5108de43c6ce71734..c88e88c4703754cf0a59088f8b57f0e29687e4ef 100644
--- a/research/object_detection/README.md
+++ b/research/object_detection/README.md
@@ -2,17 +2,16 @@

# Tensorflow Object Detection API
+
Creating accurate machine learning models capable of localizing and identifying
multiple objects in a single image remains a core challenge in computer vision.
The TensorFlow Object Detection API is an open source framework built on top of
TensorFlow that makes it easy to construct, train and deploy object detection
-models. At Google we’ve certainly found this codebase to be useful for our
-computer vision needs, and we hope that you will as well.
-
-
-
+models. At Google we’ve certainly found this codebase to be useful for our
+computer vision needs, and we hope that you will as well.
+
Contributions to the codebase are welcome and we would love to hear back from
-you if you find this API useful. Finally if you use the Tensorflow Object
+you if you find this API useful. Finally if you use the Tensorflow Object
Detection API for a research publication, please consider citing:
```
@@ -20,8 +19,8 @@ Detection API for a research publication, please consider citing:
Huang J, Rathod V, Sun C, Zhu M, Korattikara A, Fathi A, Fischer I, Wojna Z,
Song Y, Guadarrama S, Murphy K, CVPR 2017
```
-\[[link](https://arxiv.org/abs/1611.10012)\]\[[bibtex](
-https://scholar.googleusercontent.com/scholar.bib?q=info:l291WsrB-hQJ:scholar.google.com/&output=citation&scisig=AAGBfm0AAAAAWUIIlnPZ_L9jxvPwcC49kDlELtaeIyU-&scisf=4&ct=citation&cd=-1&hl=en&scfhb=1)\]
+
+\[[link](https://arxiv.org/abs/1611.10012)\]\[[bibtex](https://scholar.googleusercontent.com/scholar.bib?q=info:l291WsrB-hQJ:scholar.google.com/&output=citation&scisig=AAGBfm0AAAAAWUIIlnPZ_L9jxvPwcC49kDlELtaeIyU-&scisf=4&ct=citation&cd=-1&hl=en&scfhb=1)\]
@@ -29,63 +28,65 @@ https://scholar.googleusercontent.com/scholar.bib?q=info:l291WsrB-hQJ:scholar.go
## Maintainers
-| Name | GitHub |
-| --- | --- |
-| Jonathan Huang | [jch1](https://github.com/jch1) |
-| Vivek Rathod | [tombstone](https://github.com/tombstone) |
-| Ronny Votel | [ronnyvotel](https://github.com/ronnyvotel) |
-| Derek Chow | [derekjchow](https://github.com/derekjchow) |
-| Chen Sun | [jesu9](https://github.com/jesu9) |
-| Menglong Zhu | [dreamdragon](https://github.com/dreamdragon) |
-| Alireza Fathi | [afathi3](https://github.com/afathi3) |
-| Zhichao Lu | [pkulzc](https://github.com/pkulzc) |
+Name | GitHub
+-------------- | ---------------------------------------------
+Jonathan Huang | [jch1](https://github.com/jch1)
+Vivek Rathod | [tombstone](https://github.com/tombstone)
+Ronny Votel | [ronnyvotel](https://github.com/ronnyvotel)
+Derek Chow | [derekjchow](https://github.com/derekjchow)
+Chen Sun | [jesu9](https://github.com/jesu9)
+Menglong Zhu | [dreamdragon](https://github.com/dreamdragon)
+Alireza Fathi | [afathi3](https://github.com/afathi3)
+Zhichao Lu | [pkulzc](https://github.com/pkulzc)
## Table of contents
Setup:
- * Installation
+* Installation
Quick Start:
- *
+*
Quick Start: Jupyter notebook for off-the-shelf inference
- * Quick Start: Training a pet detector
+* Quick Start: Training a pet detector
Customizing a Pipeline:
- *
+*
Configuring an object detection pipeline
- * Preparing inputs
+* Preparing inputs
Running:
- * Running locally
- * Running on the cloud
+* Running locally
+* Running on the cloud
Extras:
- * Tensorflow detection model zoo
- *
+* Tensorflow detection model zoo
+*
Exporting a trained model for inference
- *
+*
Exporting a trained model for TPU inference
- *
+*
Defining your own model architecture
- *
+*
Bringing in your own dataset
- *
+*
Supported object detection evaluation protocols
- *
+*
Inference and evaluation on the Open Images dataset
- *
+*
Run an instance segmentation model
- *
+*
Run the evaluation for the Open Images Challenge 2018/2019
- *
+*
TPU compatible detection pipelines
- *
+*
Running object detection on mobile devices with TensorFlow Lite
+*
+ Context R-CNN documentation for data preparation, training, and export
## Getting Help
@@ -98,78 +99,107 @@ tensorflow/models GitHub
[issue tracker](https://github.com/tensorflow/models/issues), prefixing the
issue name with "object_detection".
-Please check [FAQ](g3doc/faq.md) for frequently asked questions before
-reporting an issue.
-
+Please check [FAQ](g3doc/faq.md) for frequently asked questions before reporting
+an issue.
## Release information
+### June 17th, 2020
+
+We have released [Context R-CNN](https://arxiv.org/abs/1912.03538), a model that
+uses attention to incorporate contextual information images (e.g. from
+temporally nearby frames taken by a static camera) in order to improve accuracy.
+Importantly, these contextual images need not be labeled.
+
+* When applied to a challenging wildlife detection dataset ([Snapshot Serengeti](http://lila.science/datasets/snapshot-serengeti)),
+ Context R-CNN with context from up to a month of images outperforms a
+ single-frame baseline by 17.9% mAP, and outperforms S3D (a 3d convolution
+ based baseline) by 11.2% mAP.
+* Context R-CNN leverages temporal context from the unlabeled frames of a
+ novel camera deployment to improve performance at that camera, boosting
+ model generalizeability.
+
+Read about Context R-CNN on the Google AI blog [here](https://ai.googleblog.com/2020/06/leveraging-temporal-context-for-object.html).
+
+We have provided code for generating data with associated context
+[here](g3doc/context_rcnn.md), and a sample config for a Context R-CNN
+model [here](samples/configs/context_rcnn_resnet101_snapshot_serengeti_sync.config).
+
+Snapshot Serengeti-trained Faster R-CNN and Context R-CNN models can be found in
+the [model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md#snapshot-serengeti-camera-trap-trained-models).
+
+A colab demonstrating Context R-CNN is provided
+[here](colab_tutorials/context_rcnn_tutorial.ipynb).
+
+Thanks to contributors: Sara Beery, Jonathan Huang, Guanhang Wu, Vivek
+Rathod, Ronny Votel, Zhichao Lu, David Ross, Pietro Perona, Tanya Birch, and
+the Wildlife Insights AI Team.
### May 19th, 2020
-We have released
-[MobileDets](https://arxiv.org/abs/2004.14525),
-a set of high-performance models for mobile CPUs, DSPs and EdgeTPUs.
-* MobileDets outperform MobileNetV3+SSDLite by 1.7 mAP at comparable mobile CPU
-inference latencies. MobileDets also outperform MobileNetV2+SSDLite by 1.9 mAP
-on mobile CPUs, 3.7 mAP on EdgeTPUs and 3.4 mAP on DSPs while running equally
-fast. MobileDets also offer up to 2x speedup over MnasFPN on EdgeTPUs and DSPs.
+We have released [MobileDets](https://arxiv.org/abs/2004.14525), a set of
+high-performance models for mobile CPUs, DSPs and EdgeTPUs.
+
+* MobileDets outperform MobileNetV3+SSDLite by 1.7 mAP at comparable mobile
+ CPU inference latencies. MobileDets also outperform MobileNetV2+SSDLite by
+ 1.9 mAP on mobile CPUs, 3.7 mAP on EdgeTPUs and 3.4 mAP on DSPs while
+ running equally fast. MobileDets also offer up to 2x speedup over MnasFPN on
+ EdgeTPUs and DSPs.
For each of the three hardware platforms we have released model definition,
model checkpoints trained on the COCO14 dataset and converted TFLite models in
fp32 and/or uint8.
-Thanks to contributors: Yunyang Xiong, Hanxiao Liu, Suyog Gupta,
-Berkin Akin, Gabriel Bender, Pieter-Jan Kindermans, Mingxing Tan, Vikas Singh,
-Bo Chen, Quoc Le, Zhichao Lu.
-
+Thanks to contributors: Yunyang Xiong, Hanxiao Liu, Suyog Gupta, Berkin
+Akin, Gabriel Bender, Pieter-Jan Kindermans, Mingxing Tan, Vikas Singh, Bo Chen,
+Quoc Le, Zhichao Lu.
### May 7th, 2020
+
We have released a mobile model with the
[MnasFPN head](https://arxiv.org/abs/1912.01106).
+* MnasFPN with MobileNet-V2 backbone is the most accurate (26.6 mAP at 183ms
+ on Pixel 1) mobile detection model we have released to date. With
+ depth-multiplier, MnasFPN with MobileNet-V2 backbone is 1.8 mAP higher than
+ MobileNet-V3-Large with SSDLite (23.8 mAP vs 22.0 mAP) at similar latency
+ (120ms) on Pixel 1.
-* MnasFPN with MobileNet-V2 backbone is the most accurate (26.6 mAP at 183ms on
-Pixel 1) mobile detection model we have released to date. With depth-multiplier,
-MnasFPN with MobileNet-V2 backbone is 1.8 mAP higher than MobileNet-V3-Large
-with SSDLite (23.8 mAP vs 22.0 mAP) at similar latency (120ms) on Pixel 1.
-
-We have released model definition, model checkpoints trained on
-the COCO14 dataset and a converted TFLite model.
-
-Thanks to contributors: Bo Chen, Golnaz Ghiasi, Hanxiao Liu,
-Tsung-Yi Lin, Dmitry Kalenichenko, Hartwig Adam, Quoc Le, Zhichao Lu,
-Jonathan Huang, Hao Xu.
-
+We have released model definition, model checkpoints trained on the COCO14
+dataset and a converted TFLite model.
+Thanks to contributors: Bo Chen, Golnaz Ghiasi, Hanxiao Liu, Tsung-Yi
+Lin, Dmitry Kalenichenko, Hartwig Adam, Quoc Le, Zhichao Lu, Jonathan Huang, Hao
+Xu.
### Nov 13th, 2019
+
We have released MobileNetEdgeTPU SSDLite model.
-* SSDLite with MobileNetEdgeTPU backbone, which achieves 10% mAP higher than
-MobileNetV2 SSDLite (24.3 mAP vs 22 mAP) on a Google Pixel4 at comparable
-latency (6.6ms vs 6.8ms).
+* SSDLite with MobileNetEdgeTPU backbone, which achieves 10% mAP higher than
+ MobileNetV2 SSDLite (24.3 mAP vs 22 mAP) on a Google Pixel4 at comparable
+ latency (6.6ms vs 6.8ms).
-Along with the model definition, we are also releasing model checkpoints
-trained on the COCO dataset.
+Along with the model definition, we are also releasing model checkpoints trained
+on the COCO dataset.
Thanks to contributors: Yunyang Xiong, Bo Chen, Suyog Gupta, Hanxiao Liu,
Gabriel Bender, Mingxing Tan, Berkin Akin, Zhichao Lu, Quoc Le
### Oct 15th, 2019
+
We have released two MobileNet V3 SSDLite models (presented in
[Searching for MobileNetV3](https://arxiv.org/abs/1905.02244)).
-* SSDLite with MobileNet-V3-Large backbone, which is 27% faster than Mobilenet
-V2 SSDLite (119ms vs 162ms) on a Google Pixel phone CPU at the same mAP.
-* SSDLite with MobileNet-V3-Small backbone, which is 37% faster than MnasNet
-SSDLite reduced with depth-multiplier (43ms vs 68ms) at the same mAP.
+* SSDLite with MobileNet-V3-Large backbone, which is 27% faster than Mobilenet
+ V2 SSDLite (119ms vs 162ms) on a Google Pixel phone CPU at the same mAP.
+* SSDLite with MobileNet-V3-Small backbone, which is 37% faster than MnasNet
+ SSDLite reduced with depth-multiplier (43ms vs 68ms) at the same mAP.
-Along with the model definition, we are also releasing model checkpoints
-trained on the COCO dataset.
+Along with the model definition, we are also releasing model checkpoints trained
+on the COCO dataset.
Thanks to contributors: Bo Chen, Zhichao Lu, Vivek Rathod, Jonathan Huang
-
### July 1st, 2019
We have released an updated set of utils and an updated
@@ -177,28 +207,30 @@ We have released an updated set of utils and an updated
[Open Images Challenge 2019](https://storage.googleapis.com/openimages/web/challenge2019.html)!
The Instance Segmentation metric for
-[Open Images V5](https://storage.googleapis.com/openimages/web/index.html)
-and [Challenge 2019](https://storage.googleapis.com/openimages/web/challenge2019.html)
-is part of this release. Check out [the metric description](https://storage.googleapis.com/openimages/web/evaluation.html#instance_segmentation_eval)
+[Open Images V5](https://storage.googleapis.com/openimages/web/index.html) and
+[Challenge 2019](https://storage.googleapis.com/openimages/web/challenge2019.html)
+is part of this release. Check out
+[the metric description](https://storage.googleapis.com/openimages/web/evaluation.html#instance_segmentation_eval)
on the Open Images website.
Thanks to contributors: Alina Kuznetsova, Rodrigo Benenson
### Feb 11, 2019
-We have released detection models trained on the Open Images Dataset V4
-in our detection model zoo, including
+We have released detection models trained on the Open Images Dataset V4 in our
+detection model zoo, including
-* Faster R-CNN detector with Inception Resnet V2 feature extractor
-* SSD detector with MobileNet V2 feature extractor
-* SSD detector with ResNet 101 FPN feature extractor (aka RetinaNet-101)
+* Faster R-CNN detector with Inception Resnet V2 feature extractor
+* SSD detector with MobileNet V2 feature extractor
+* SSD detector with ResNet 101 FPN feature extractor (aka RetinaNet-101)
Thanks to contributors: Alina Kuznetsova, Yinxiao Li
### Sep 17, 2018
We have released Faster R-CNN detectors with ResNet-50 / ResNet-101 feature
-extractors trained on the [iNaturalist Species Detection Dataset](https://github.com/visipedia/inat_comp/blob/master/2017/README.md#bounding-boxes).
+extractors trained on the
+[iNaturalist Species Detection Dataset](https://github.com/visipedia/inat_comp/blob/master/2017/README.md#bounding-boxes).
The models are trained on the training split of the iNaturalist data for 4M
iterations, they achieve 55% and 58% mean AP@.5 over 2854 classes respectively.
For more details please refer to this [paper](https://arxiv.org/abs/1707.06642).
@@ -210,42 +242,59 @@ For more details please refer to this [paper](https://arxiv.org/abs/1707.06642).
There are many new updates in this release, extending the functionality and
capability of the API:
-* Moving from slim-based training to [Estimator](https://www.tensorflow.org/api_docs/python/tf/estimator/Estimator)-based
-training.
-* Support for [RetinaNet](https://arxiv.org/abs/1708.02002), and a [MobileNet](https://ai.googleblog.com/2017/06/mobilenets-open-source-models-for.html)
-adaptation of RetinaNet.
-* A novel SSD-based architecture called the [Pooling Pyramid Network](https://arxiv.org/abs/1807.03284) (PPN).
-* Releasing several [TPU](https://cloud.google.com/tpu/)-compatible models.
-These can be found in the `samples/configs/` directory with a comment in the
-pipeline configuration files indicating TPU compatibility.
-* Support for quantized training.
-* Updated documentation for new binaries, Cloud training, and [Tensorflow Lite](https://www.tensorflow.org/mobile/tflite/).
-
-See also our [expanded announcement blogpost](https://ai.googleblog.com/2018/07/accelerated-training-and-inference-with.html) and accompanying tutorial at the [TensorFlow blog](https://medium.com/tensorflow/training-and-serving-a-realtime-mobile-object-detector-in-30-minutes-with-cloud-tpus-b78971cf1193).
+* Moving from slim-based training to
+ [Estimator](https://www.tensorflow.org/api_docs/python/tf/estimator/Estimator)-based
+ training.
+* Support for [RetinaNet](https://arxiv.org/abs/1708.02002), and a
+ [MobileNet](https://ai.googleblog.com/2017/06/mobilenets-open-source-models-for.html)
+ adaptation of RetinaNet.
+* A novel SSD-based architecture called the
+ [Pooling Pyramid Network](https://arxiv.org/abs/1807.03284) (PPN).
+* Releasing several [TPU](https://cloud.google.com/tpu/)-compatible models.
+ These can be found in the `samples/configs/` directory with a comment in the
+ pipeline configuration files indicating TPU compatibility.
+* Support for quantized training.
+* Updated documentation for new binaries, Cloud training, and
+ [Tensorflow Lite](https://www.tensorflow.org/mobile/tflite/).
+
+See also our
+[expanded announcement blogpost](https://ai.googleblog.com/2018/07/accelerated-training-and-inference-with.html)
+and accompanying tutorial at the
+[TensorFlow blog](https://medium.com/tensorflow/training-and-serving-a-realtime-mobile-object-detector-in-30-minutes-with-cloud-tpus-b78971cf1193).
Thanks to contributors: Sara Robinson, Aakanksha Chowdhery, Derek Chow,
Pengchong Jin, Jonathan Huang, Vivek Rathod, Zhichao Lu, Ronny Votel
-
### June 25, 2018
-Additional evaluation tools for the [Open Images Challenge 2018](https://storage.googleapis.com/openimages/web/challenge.html) are out.
-Check out our short tutorial on data preparation and running evaluation [here](g3doc/challenge_evaluation.md)!
+Additional evaluation tools for the
+[Open Images Challenge 2018](https://storage.googleapis.com/openimages/web/challenge.html)
+are out. Check out our short tutorial on data preparation and running evaluation
+[here](g3doc/challenge_evaluation.md)!
Thanks to contributors: Alina Kuznetsova
### June 5, 2018
-We have released the implementation of evaluation metrics for both tracks of the [Open Images Challenge 2018](https://storage.googleapis.com/openimages/web/challenge.html) as a part of the Object Detection API - see the [evaluation protocols](g3doc/evaluation_protocols.md) for more details.
-Additionally, we have released a tool for hierarchical labels expansion for the Open Images Challenge: check out [oid_hierarchical_labels_expansion.py](dataset_tools/oid_hierarchical_labels_expansion.py).
+We have released the implementation of evaluation metrics for both tracks of the
+[Open Images Challenge 2018](https://storage.googleapis.com/openimages/web/challenge.html)
+as a part of the Object Detection API - see the
+[evaluation protocols](g3doc/evaluation_protocols.md) for more details.
+Additionally, we have released a tool for hierarchical labels expansion for the
+Open Images Challenge: check out
+[oid_hierarchical_labels_expansion.py](dataset_tools/oid_hierarchical_labels_expansion.py).
-Thanks to contributors: Alina Kuznetsova, Vittorio Ferrari, Jasper Uijlings
+Thanks to contributors: Alina Kuznetsova, Vittorio Ferrari, Jasper
+Uijlings
### April 30, 2018
-We have released a Faster R-CNN detector with ResNet-101 feature extractor trained on [AVA](https://research.google.com/ava/) v2.1.
-Compared with other commonly used object detectors, it changes the action classification loss function to per-class Sigmoid loss to handle boxes with multiple labels.
-The model is trained on the training split of AVA v2.1 for 1.5M iterations, it achieves mean AP of 11.25% over 60 classes on the validation split of AVA v2.1.
+We have released a Faster R-CNN detector with ResNet-101 feature extractor
+trained on [AVA](https://research.google.com/ava/) v2.1. Compared with other
+commonly used object detectors, it changes the action classification loss
+function to per-class Sigmoid loss to handle boxes with multiple labels. The
+model is trained on the training split of AVA v2.1 for 1.5M iterations, it
+achieves mean AP of 11.25% over 60 classes on the validation split of AVA v2.1.
For more details please refer to this [paper](https://arxiv.org/abs/1705.08421).
Thanks to contributors: Chen Sun, David Ross
@@ -255,84 +304,94 @@ For more details please refer to this [paper](https://arxiv.org/abs/1705.08421).
Supercharge your mobile phones with the next generation mobile object detector!
We are adding support for MobileNet V2 with SSDLite presented in
[MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381).
-This model is 35% faster than Mobilenet V1 SSD on a Google Pixel phone CPU (200ms vs. 270ms) at the same accuracy.
-Along with the model definition, we are also releasing a model checkpoint trained on the COCO dataset.
+This model is 35% faster than Mobilenet V1 SSD on a Google Pixel phone CPU
+(200ms vs. 270ms) at the same accuracy. Along with the model definition, we are
+also releasing a model checkpoint trained on the COCO dataset.
-Thanks to contributors: Menglong Zhu, Mark Sandler, Zhichao Lu, Vivek Rathod, Jonathan Huang
+Thanks to contributors: Menglong Zhu, Mark Sandler, Zhichao Lu, Vivek
+Rathod, Jonathan Huang
### February 9, 2018
-We now support instance segmentation!! In this API update we support a number of instance segmentation models similar to those discussed in the [Mask R-CNN paper](https://arxiv.org/abs/1703.06870). For further details refer to
-[our slides](http://presentations.cocodataset.org/Places17-GMRI.pdf) from the 2017 Coco + Places Workshop.
-Refer to the section on [Running an Instance Segmentation Model](g3doc/instance_segmentation.md) for instructions on how to configure a model
-that predicts masks in addition to object bounding boxes.
+We now support instance segmentation!! In this API update we support a number of
+instance segmentation models similar to those discussed in the
+[Mask R-CNN paper](https://arxiv.org/abs/1703.06870). For further details refer
+to [our slides](http://presentations.cocodataset.org/Places17-GMRI.pdf) from the
+2017 Coco + Places Workshop. Refer to the section on
+[Running an Instance Segmentation Model](g3doc/instance_segmentation.md) for
+instructions on how to configure a model that predicts masks in addition to
+object bounding boxes.
-Thanks to contributors: Alireza Fathi, Zhichao Lu, Vivek Rathod, Ronny Votel, Jonathan Huang
+Thanks to contributors: Alireza Fathi, Zhichao Lu, Vivek Rathod, Ronny
+Votel, Jonathan Huang
### November 17, 2017
As a part of the Open Images V3 release we have released:
-* An implementation of the Open Images evaluation metric and the [protocol](g3doc/evaluation_protocols.md#open-images).
-* Additional tools to separate inference of detection and evaluation (see [this tutorial](g3doc/oid_inference_and_evaluation.md)).
-* A new detection model trained on the Open Images V2 data release (see [Open Images model](g3doc/detection_model_zoo.md#open-images-models)).
+* An implementation of the Open Images evaluation metric and the
+ [protocol](g3doc/evaluation_protocols.md#open-images).
+* Additional tools to separate inference of detection and evaluation (see
+ [this tutorial](g3doc/oid_inference_and_evaluation.md)).
+* A new detection model trained on the Open Images V2 data release (see
+ [Open Images model](g3doc/detection_model_zoo.md#open-images-models)).
-See more information on the [Open Images website](https://github.com/openimages/dataset)!
+See more information on the
+[Open Images website](https://github.com/openimages/dataset)!
Thanks to contributors: Stefan Popov, Alina Kuznetsova
### November 6, 2017
We have re-released faster versions of our (pre-trained) models in the
-model zoo. In addition to what
-was available before, we are also adding Faster R-CNN models trained on COCO
-with Inception V2 and Resnet-50 feature extractors, as well as a Faster R-CNN
-with Resnet-101 model trained on the KITTI dataset.
+model zoo. In addition to what was
+available before, we are also adding Faster R-CNN models trained on COCO with
+Inception V2 and Resnet-50 feature extractors, as well as a Faster R-CNN with
+Resnet-101 model trained on the KITTI dataset.
-Thanks to contributors: Jonathan Huang, Vivek Rathod, Derek Chow,
-Tal Remez, Chen Sun.
+Thanks to contributors: Jonathan Huang, Vivek Rathod, Derek Chow, Tal
+Remez, Chen Sun.
### October 31, 2017
-We have released a new state-of-the-art model for object detection using
-the Faster-RCNN with the
-[NASNet-A image featurization](https://arxiv.org/abs/1707.07012). This
-model achieves mAP of 43.1% on the test-dev validation dataset for COCO,
-improving on the best available model in the zoo by 6% in terms
-of absolute mAP.
+We have released a new state-of-the-art model for object detection using the
+Faster-RCNN with the
+[NASNet-A image featurization](https://arxiv.org/abs/1707.07012). This model
+achieves mAP of 43.1% on the test-dev validation dataset for COCO, improving on
+the best available model in the zoo by 6% in terms of absolute mAP.
-Thanks to contributors: Barret Zoph, Vijay Vasudevan, Jonathon Shlens, Quoc Le
+Thanks to contributors: Barret Zoph, Vijay Vasudevan, Jonathon Shlens,
+Quoc Le
### August 11, 2017
-We have released an update to the [Android Detect
-demo](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android)
-which will now run models trained using the Tensorflow Object
-Detection API on an Android device. By default, it currently runs a
-frozen SSD w/Mobilenet detector trained on COCO, but we encourage
-you to try out other detection models!
+We have released an update to the
+[Android Detect demo](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android)
+which will now run models trained using the Tensorflow Object Detection API on
+an Android device. By default, it currently runs a frozen SSD w/Mobilenet
+detector trained on COCO, but we encourage you to try out other detection
+models!
Thanks to contributors: Jonathan Huang, Andrew Harp
-
### June 15, 2017
-In addition to our base Tensorflow detection model definitions, this
-release includes:
-
-* A selection of trainable detection models, including:
- * Single Shot Multibox Detector (SSD) with MobileNet,
- * SSD with Inception V2,
- * Region-Based Fully Convolutional Networks (R-FCN) with Resnet 101,
- * Faster RCNN with Resnet 101,
- * Faster RCNN with Inception Resnet v2
-* Frozen weights (trained on the COCO dataset) for each of the above models to
- be used for out-of-the-box inference purposes.
-* A [Jupyter notebook](object_detection_tutorial.ipynb) for performing
- out-of-the-box inference with one of our released models
-* Convenient [local training](g3doc/running_locally.md) scripts as well as
- distributed training and evaluation pipelines via
- [Google Cloud](g3doc/running_on_cloud.md).
+In addition to our base Tensorflow detection model definitions, this release
+includes:
+
+* A selection of trainable detection models, including:
+ * Single Shot Multibox Detector (SSD) with MobileNet,
+ * SSD with Inception V2,
+ * Region-Based Fully Convolutional Networks (R-FCN) with Resnet 101,
+ * Faster RCNN with Resnet 101,
+ * Faster RCNN with Inception Resnet v2
+* Frozen weights (trained on the COCO dataset) for each of the above models to
+ be used for out-of-the-box inference purposes.
+* A [Jupyter notebook](colab_tutorials/object_detection_tutorial.ipynb) for
+ performing out-of-the-box inference with one of our released models
+* Convenient [local training](g3doc/running_locally.md) scripts as well as
+ distributed training and evaluation pipelines via
+ [Google Cloud](g3doc/running_on_cloud.md).
Thanks to contributors: Jonathan Huang, Vivek Rathod, Derek Chow, Chen
Sun, Menglong Zhu, Matthew Tang, Anoop Korattikara, Alireza Fathi, Ian Fischer,
diff --git a/research/object_detection/builders/box_predictor_builder_test.py b/research/object_detection/builders/box_predictor_builder_test.py
index 72a71b794c3572d12932ef16868d9793687945e4..7154cd2efc06e2c4581e654d718e3519152bc6bb 100644
--- a/research/object_detection/builders/box_predictor_builder_test.py
+++ b/research/object_detection/builders/box_predictor_builder_test.py
@@ -16,6 +16,7 @@
"""Tests for box_predictor_builder."""
+import unittest
import mock
import tensorflow.compat.v1 as tf
@@ -25,8 +26,10 @@ from object_detection.builders import hyperparams_builder
from object_detection.predictors import mask_rcnn_box_predictor
from object_detection.protos import box_predictor_pb2
from object_detection.protos import hyperparams_pb2
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only Tests.')
class ConvolutionalBoxPredictorBuilderTest(tf.test.TestCase):
def test_box_predictor_calls_conv_argscope_fn(self):
@@ -161,6 +164,7 @@ class ConvolutionalBoxPredictorBuilderTest(tf.test.TestCase):
self.assertFalse(class_head._use_depthwise)
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only Tests.')
class WeightSharedConvolutionalBoxPredictorBuilderTest(tf.test.TestCase):
def test_box_predictor_calls_conv_argscope_fn(self):
@@ -357,6 +361,7 @@ class WeightSharedConvolutionalBoxPredictorBuilderTest(tf.test.TestCase):
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only Tests.')
class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase):
def test_box_predictor_builder_calls_fc_argscope_fn(self):
@@ -537,6 +542,7 @@ class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase):
._convolve_then_upsample)
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only Tests.')
class RfcnBoxPredictorBuilderTest(tf.test.TestCase):
def test_box_predictor_calls_fc_argscope_fn(self):
diff --git a/research/object_detection/builders/calibration_builder_test.py b/research/object_detection/builders/calibration_builder_test.py
index a077ef4f92765c4599f16f432223504a2bda577c..a81d53a86e65bc400fe38cac8c96867aa1489607 100644
--- a/research/object_detection/builders/calibration_builder_test.py
+++ b/research/object_detection/builders/calibration_builder_test.py
@@ -25,31 +25,34 @@ from six.moves import zip
import tensorflow.compat.v1 as tf
from object_detection.builders import calibration_builder
from object_detection.protos import calibration_pb2
+from object_detection.utils import test_case
-class CalibrationBuilderTest(tf.test.TestCase):
+class CalibrationBuilderTest(test_case.TestCase):
def test_tf_linear_interp1d_map(self):
"""Tests TF linear interpolation mapping to a single number."""
- with self.test_session() as sess:
+ def graph_fn():
tf_x = tf.constant([0., 0.5, 1.])
tf_y = tf.constant([0.5, 0.5, 0.5])
new_x = tf.constant([0., 0.25, 0.5, 0.75, 1.])
tf_map_outputs = calibration_builder._tf_linear_interp1d(
new_x, tf_x, tf_y)
- tf_map_outputs_np = sess.run([tf_map_outputs])
- self.assertAllClose(tf_map_outputs_np, [[0.5, 0.5, 0.5, 0.5, 0.5]])
+ return tf_map_outputs
+ tf_map_outputs_np = self.execute(graph_fn, [])
+ self.assertAllClose(tf_map_outputs_np, [0.5, 0.5, 0.5, 0.5, 0.5])
def test_tf_linear_interp1d_interpolate(self):
"""Tests TF 1d linear interpolation not mapping to a single number."""
- with self.test_session() as sess:
+ def graph_fn():
tf_x = tf.constant([0., 0.5, 1.])
tf_y = tf.constant([0.6, 0.7, 1.0])
new_x = tf.constant([0., 0.25, 0.5, 0.75, 1.])
tf_interpolate_outputs = calibration_builder._tf_linear_interp1d(
new_x, tf_x, tf_y)
- tf_interpolate_outputs_np = sess.run([tf_interpolate_outputs])
- self.assertAllClose(tf_interpolate_outputs_np, [[0.6, 0.65, 0.7, 0.85, 1.]])
+ return tf_interpolate_outputs
+ tf_interpolate_outputs_np = self.execute(graph_fn, [])
+ self.assertAllClose(tf_interpolate_outputs_np, [0.6, 0.65, 0.7, 0.85, 1.])
@staticmethod
def _get_scipy_interp1d(new_x, x, y):
@@ -59,12 +62,13 @@ class CalibrationBuilderTest(tf.test.TestCase):
def _get_tf_interp1d(self, new_x, x, y):
"""Helper performing 1d linear interpolation using Tensorflow."""
- with self.test_session() as sess:
+ def graph_fn():
tf_interp_outputs = calibration_builder._tf_linear_interp1d(
tf.convert_to_tensor(new_x, dtype=tf.float32),
tf.convert_to_tensor(x, dtype=tf.float32),
tf.convert_to_tensor(y, dtype=tf.float32))
- np_tf_interp_outputs = sess.run(tf_interp_outputs)
+ return tf_interp_outputs
+ np_tf_interp_outputs = self.execute(graph_fn, [])
return np_tf_interp_outputs
def test_tf_linear_interp1d_against_scipy_map(self):
@@ -128,8 +132,7 @@ class CalibrationBuilderTest(tf.test.TestCase):
self._add_function_approximation_to_calibration_proto(
calibration_config, class_agnostic_x, class_agnostic_y, class_id=None)
- od_graph = tf.Graph()
- with self.test_session(graph=od_graph) as sess:
+ def graph_fn():
calibration_fn = calibration_builder.build(calibration_config)
# batch_size = 2, num_classes = 2, num_anchors = 2.
class_predictions_with_background = tf.constant(
@@ -140,7 +143,8 @@ class CalibrationBuilderTest(tf.test.TestCase):
# Everything should map to 0.5 if classes are ignored.
calibrated_scores = calibration_fn(class_predictions_with_background)
- calibrated_scores_np = sess.run(calibrated_scores)
+ return calibrated_scores
+ calibrated_scores_np = self.execute(graph_fn, [])
self.assertAllClose(calibrated_scores_np, [[[0.05, 0.1, 0.15],
[0.2, 0.25, 0.0]],
[[0.35, 0.45, 0.55],
@@ -161,8 +165,7 @@ class CalibrationBuilderTest(tf.test.TestCase):
self._add_function_approximation_to_calibration_proto(
calibration_config, class_1_x, class_1_y, class_id=1)
- od_graph = tf.Graph()
- with self.test_session(graph=od_graph) as sess:
+ def graph_fn():
calibration_fn = calibration_builder.build(calibration_config)
# batch_size = 2, num_classes = 2, num_anchors = 2.
class_predictions_with_background = tf.constant(
@@ -170,7 +173,8 @@ class CalibrationBuilderTest(tf.test.TestCase):
[[0.6, 0.4], [0.08, 0.92]]],
dtype=tf.float32)
calibrated_scores = calibration_fn(class_predictions_with_background)
- calibrated_scores_np = sess.run(calibrated_scores)
+ return calibrated_scores
+ calibrated_scores_np = self.execute(graph_fn, [])
self.assertAllClose(calibrated_scores_np, [[[0.5, 0.6], [0.5, 0.3]],
[[0.5, 0.7], [0.5, 0.96]]])
@@ -179,8 +183,7 @@ class CalibrationBuilderTest(tf.test.TestCase):
calibration_config = calibration_pb2.CalibrationConfig()
calibration_config.temperature_scaling_calibration.scaler = 2.0
- od_graph = tf.Graph()
- with self.test_session(graph=od_graph) as sess:
+ def graph_fn():
calibration_fn = calibration_builder.build(calibration_config)
# batch_size = 2, num_classes = 2, num_anchors = 2.
class_predictions_with_background = tf.constant(
@@ -188,7 +191,8 @@ class CalibrationBuilderTest(tf.test.TestCase):
[[0.6, 0.7, 0.8], [0.9, 1.0, 1.0]]],
dtype=tf.float32)
calibrated_scores = calibration_fn(class_predictions_with_background)
- calibrated_scores_np = sess.run(calibrated_scores)
+ return calibrated_scores
+ calibrated_scores_np = self.execute(graph_fn, [])
self.assertAllClose(calibrated_scores_np,
[[[0.05, 0.1, 0.15], [0.2, 0.25, 0.0]],
[[0.3, 0.35, 0.4], [0.45, 0.5, 0.5]]])
@@ -212,8 +216,7 @@ class CalibrationBuilderTest(tf.test.TestCase):
calibration_config = calibration_pb2.CalibrationConfig()
self._add_function_approximation_to_calibration_proto(
calibration_config, class_0_x, class_0_y, class_id=0)
- od_graph = tf.Graph()
- with self.test_session(graph=od_graph) as sess:
+ def graph_fn():
calibration_fn = calibration_builder.build(calibration_config)
# batch_size = 2, num_classes = 2, num_anchors = 2.
class_predictions_with_background = tf.constant(
@@ -221,7 +224,8 @@ class CalibrationBuilderTest(tf.test.TestCase):
[[0.6, 0.4], [0.08, 0.92]]],
dtype=tf.float32)
calibrated_scores = calibration_fn(class_predictions_with_background)
- calibrated_scores_np = sess.run(calibrated_scores)
+ return calibrated_scores
+ calibrated_scores_np = self.execute(graph_fn, [])
self.assertAllClose(calibrated_scores_np, [[[0.5, 0.2], [0.5, 0.1]],
[[0.5, 0.4], [0.5, 0.92]]])
diff --git a/research/object_detection/builders/dataset_builder.py b/research/object_detection/builders/dataset_builder.py
index 772086619a921335fd671a232a9917f51e7f58ce..c1c1ce3ecd17c2625585cd83f080b49c0150151a 100644
--- a/research/object_detection/builders/dataset_builder.py
+++ b/research/object_detection/builders/dataset_builder.py
@@ -29,7 +29,6 @@ from __future__ import print_function
import functools
import tensorflow.compat.v1 as tf
-from tensorflow.contrib import data as tf_data
from object_detection.builders import decoder_builder
from object_detection.protos import input_reader_pb2
@@ -94,7 +93,7 @@ def read_dataset(file_read_func, input_files, config,
filename_dataset = filename_dataset.repeat(config.num_epochs or None)
records_dataset = filename_dataset.apply(
- tf_data.parallel_interleave(
+ tf.data.experimental.parallel_interleave(
file_read_func,
cycle_length=num_readers,
block_length=config.read_block_length,
@@ -153,6 +152,30 @@ def build(input_reader_config, batch_size=None, transform_input_data_fn=None,
if not config.input_path:
raise ValueError('At least one input path must be specified in '
'`input_reader_config`.')
+ def dataset_map_fn(dataset, fn_to_map, batch_size=None,
+ input_reader_config=None):
+ """Handles whether or not to use the legacy map function.
+
+ Args:
+ dataset: A tf.Dataset.
+ fn_to_map: The function to be mapped for that dataset.
+ batch_size: Batch size. If batch size is None, no batching is performed.
+ input_reader_config: A input_reader_pb2.InputReader object.
+
+ Returns:
+ A tf.data.Dataset mapped with fn_to_map.
+ """
+ if hasattr(dataset, 'map_with_legacy_function'):
+ if batch_size:
+ num_parallel_calls = batch_size * (
+ input_reader_config.num_parallel_batches)
+ else:
+ num_parallel_calls = input_reader_config.num_parallel_map_calls
+ dataset = dataset.map_with_legacy_function(
+ fn_to_map, num_parallel_calls=num_parallel_calls)
+ else:
+ dataset = dataset.map(fn_to_map, tf.data.experimental.AUTOTUNE)
+ return dataset
shard_fn = shard_function_for_context(input_context)
if input_context is not None:
batch_size = input_context.get_per_replica_batch_size(batch_size)
@@ -163,15 +186,16 @@ def build(input_reader_config, batch_size=None, transform_input_data_fn=None,
dataset = dataset.shard(input_reader_config.sample_1_of_n_examples, 0)
# TODO(rathodv): make batch size a required argument once the old binaries
# are deleted.
- dataset = dataset.map(decoder.decode, tf.data.experimental.AUTOTUNE)
+ dataset = dataset_map_fn(dataset, decoder.decode, batch_size,
+ input_reader_config)
if reduce_to_frame_fn:
- dataset = reduce_to_frame_fn(dataset)
+ dataset = reduce_to_frame_fn(dataset, dataset_map_fn, batch_size,
+ input_reader_config)
if transform_input_data_fn is not None:
- dataset = dataset.map(transform_input_data_fn,
- tf.data.experimental.AUTOTUNE)
+ dataset = dataset_map_fn(dataset, transform_input_data_fn,
+ batch_size, input_reader_config)
if batch_size:
- dataset = dataset.apply(
- tf_data.batch_and_drop_remainder(batch_size))
+ dataset = dataset.batch(batch_size, drop_remainder=True)
dataset = dataset.prefetch(input_reader_config.num_prefetch_batches)
return dataset
diff --git a/research/object_detection/builders/dataset_builder_test.py b/research/object_detection/builders/dataset_builder_test.py
index 741ff3bcf84b5dae608e1e79241921f7e23ee3d1..eb2cdb3ccbd891e5f089281d9b506d636d26d6a9 100644
--- a/research/object_detection/builders/dataset_builder_test.py
+++ b/research/object_detection/builders/dataset_builder_test.py
@@ -197,13 +197,13 @@ class DatasetBuilderTest(test_case.TestCase):
output_dict[fields.InputDataFields.groundtruth_boxes][0][0])
def get_mock_reduce_to_frame_fn(self):
- def mock_reduce_to_frame_fn(dataset):
+ def mock_reduce_to_frame_fn(dataset, dataset_map_fn, batch_size, config):
def get_frame(tensor_dict):
out_tensor_dict = {}
out_tensor_dict[fields.InputDataFields.source_id] = (
tensor_dict[fields.InputDataFields.source_id][0])
return out_tensor_dict
- return dataset.map(get_frame, tf.data.experimental.AUTOTUNE)
+ return dataset_map_fn(dataset, get_frame, batch_size, config)
return mock_reduce_to_frame_fn
def test_build_tf_record_input_reader_sequence_example_train(self):
@@ -390,7 +390,7 @@ class DatasetBuilderTest(test_case.TestCase):
return iter1.get_next(), iter2.get_next()
output_dict1, output_dict2 = self.execute(graph_fn, [])
- self.assertAllEqual(['0'], output_dict1[fields.InputDataFields.source_id])
+ self.assertAllEqual([b'0'], output_dict1[fields.InputDataFields.source_id])
self.assertEqual([b'1'], output_dict2[fields.InputDataFields.source_id])
def test_sample_one_of_n_shards(self):
@@ -537,8 +537,15 @@ class ReadDatasetTest(test_case.TestCase):
def graph_fn():
keys = [1, 0, -1]
dataset = tf.data.Dataset.from_tensor_slices([[1, 2, -1, 5]])
- table = contrib_lookup.HashTable(
- initializer=contrib_lookup.KeyValueTensorInitializer(
+ try:
+ # Dynamically try to load the tf v2 lookup, falling back to contrib
+ lookup = tf.compat.v2.lookup
+ hash_table_class = tf.compat.v2.lookup.StaticHashTable
+ except AttributeError:
+ lookup = contrib_lookup
+ hash_table_class = contrib_lookup.HashTable
+ table = hash_table_class(
+ initializer=lookup.KeyValueTensorInitializer(
keys=keys, values=list(reversed(keys))),
default_value=100)
dataset = dataset.map(table.lookup)
@@ -559,7 +566,7 @@ class ReadDatasetTest(test_case.TestCase):
data = self.execute(graph_fn, [])
# Note that the execute function extracts single outputs if the return
# value is of size 1.
- self.assertAllEqual(
+ self.assertCountEqual(
data, [
1, 10, 2, 20, 3, 30, 4, 40, 5, 50, 1, 10, 2, 20, 3, 30, 4, 40, 5,
50
@@ -577,7 +584,7 @@ class ReadDatasetTest(test_case.TestCase):
data = self.execute(graph_fn, [])
# Note that the execute function extracts single outputs if the return
# value is of size 1.
- self.assertAllEqual(
+ self.assertCountEqual(
data, [
1, 10, 2, 20, 3, 30, 4, 40, 5, 50, 1, 10, 2, 20, 3, 30, 4, 40, 5,
50
@@ -607,12 +614,14 @@ class ReadDatasetTest(test_case.TestCase):
def graph_fn():
return self._get_dataset_next(
[self._shuffle_path_template % '*'], config, batch_size=10)
- expected_non_shuffle_output = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
+ expected_non_shuffle_output1 = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
+ expected_non_shuffle_output2 = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
# Note that the execute function extracts single outputs if the return
# value is of size 1.
data = self.execute(graph_fn, [])
- self.assertAllEqual(data, expected_non_shuffle_output)
+ self.assertTrue(all(data == expected_non_shuffle_output1) or
+ all(data == expected_non_shuffle_output2))
def test_read_dataset_single_epoch(self):
config = input_reader_pb2.InputReader()
diff --git a/research/object_detection/builders/decoder_builder.py b/research/object_detection/builders/decoder_builder.py
index d3cac57d0813c11451b7f92197136d9ea8844af2..59880735cd3fd6be3d4e9c567af615227d0a1fb1 100644
--- a/research/object_detection/builders/decoder_builder.py
+++ b/research/object_detection/builders/decoder_builder.py
@@ -48,7 +48,7 @@ def build(input_reader_config):
if input_reader_config.HasField('label_map_path'):
label_map_proto_file = input_reader_config.label_map_path
input_type = input_reader_config.input_type
- if input_type == input_reader_pb2.InputType.TF_EXAMPLE:
+ if input_type == input_reader_pb2.InputType.Value('TF_EXAMPLE'):
decoder = tf_example_decoder.TfExampleDecoder(
load_instance_masks=input_reader_config.load_instance_masks,
load_multiclass_scores=input_reader_config.load_multiclass_scores,
@@ -60,7 +60,7 @@ def build(input_reader_config):
num_keypoints=input_reader_config.num_keypoints,
expand_hierarchy_labels=input_reader_config.expand_labels_hierarchy)
return decoder
- elif input_type == input_reader_pb2.InputType.TF_SEQUENCE_EXAMPLE:
+ elif input_type == input_reader_pb2.InputType.Value('TF_SEQUENCE_EXAMPLE'):
decoder = tf_sequence_example_decoder.TfSequenceExampleDecoder(
label_map_proto_file=label_map_proto_file,
load_context_features=input_reader_config.load_context_features)
diff --git a/research/object_detection/builders/decoder_builder_test.py b/research/object_detection/builders/decoder_builder_test.py
index 767c108e94ae7840f9d3175e360dd2bcaa99dbbf..d45285fd19f7648ab4d9365b155ba35a2ce0d3ed 100644
--- a/research/object_detection/builders/decoder_builder_test.py
+++ b/research/object_detection/builders/decoder_builder_test.py
@@ -29,6 +29,7 @@ from object_detection.core import standard_fields as fields
from object_detection.dataset_tools import seq_example_util
from object_detection.protos import input_reader_pb2
from object_detection.utils import dataset_util
+from object_detection.utils import test_case
def _get_labelmap_path():
@@ -38,17 +39,20 @@ def _get_labelmap_path():
'pet_label_map.pbtxt')
-class DecoderBuilderTest(tf.test.TestCase):
+class DecoderBuilderTest(test_case.TestCase):
def _make_serialized_tf_example(self, has_additional_channels=False):
- image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
- additional_channels_tensor = np.random.randint(
+ image_tensor_np = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
+ additional_channels_tensor_np = np.random.randint(
255, size=(4, 5, 1)).astype(np.uint8)
flat_mask = (4 * 5) * [1.0]
- with self.test_session():
- encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
- encoded_additional_channels_jpeg = tf.image.encode_jpeg(
- tf.constant(additional_channels_tensor)).eval()
+ def graph_fn(image_tensor):
+ encoded_jpeg = tf.image.encode_jpeg(image_tensor)
+ return encoded_jpeg
+ encoded_jpeg = self.execute_cpu(graph_fn, [image_tensor_np])
+ encoded_additional_channels_jpeg = self.execute_cpu(
+ graph_fn, [additional_channels_tensor_np])
+
features = {
'image/source_id': dataset_util.bytes_feature('0'.encode()),
'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
@@ -71,46 +75,45 @@ class DecoderBuilderTest(tf.test.TestCase):
def _make_random_serialized_jpeg_images(self, num_frames, image_height,
image_width):
- images = tf.cast(tf.random.uniform(
- [num_frames, image_height, image_width, 3],
- maxval=256,
- dtype=tf.int32), dtype=tf.uint8)
- images_list = tf.unstack(images, axis=0)
- encoded_images_list = [tf.io.encode_jpeg(image) for image in images_list]
- with tf.Session() as sess:
- encoded_images = sess.run(encoded_images_list)
- return encoded_images
+ def graph_fn():
+ images = tf.cast(tf.random.uniform(
+ [num_frames, image_height, image_width, 3],
+ maxval=256,
+ dtype=tf.int32), dtype=tf.uint8)
+ images_list = tf.unstack(images, axis=0)
+ encoded_images = [tf.io.encode_jpeg(image) for image in images_list]
+ return encoded_images
+ return self.execute_cpu(graph_fn, [])
def _make_serialized_tf_sequence_example(self):
num_frames = 4
image_height = 20
image_width = 30
image_source_ids = [str(i) for i in range(num_frames)]
- with self.test_session():
- encoded_images = self._make_random_serialized_jpeg_images(
- num_frames, image_height, image_width)
- sequence_example_serialized = seq_example_util.make_sequence_example(
- dataset_name='video_dataset',
- video_id='video',
- encoded_images=encoded_images,
- image_height=image_height,
- image_width=image_width,
- image_source_ids=image_source_ids,
- image_format='JPEG',
- is_annotated=[[1], [1], [1], [1]],
- bboxes=[
- [[]], # Frame 0.
- [[0., 0., 1., 1.]], # Frame 1.
- [[0., 0., 1., 1.],
- [0.1, 0.1, 0.2, 0.2]], # Frame 2.
- [[]], # Frame 3.
- ],
- label_strings=[
- [], # Frame 0.
- ['Abyssinian'], # Frame 1.
- ['Abyssinian', 'american_bulldog'], # Frame 2.
- [], # Frame 3
- ]).SerializeToString()
+ encoded_images = self._make_random_serialized_jpeg_images(
+ num_frames, image_height, image_width)
+ sequence_example_serialized = seq_example_util.make_sequence_example(
+ dataset_name='video_dataset',
+ video_id='video',
+ encoded_images=encoded_images,
+ image_height=image_height,
+ image_width=image_width,
+ image_source_ids=image_source_ids,
+ image_format='JPEG',
+ is_annotated=[[1], [1], [1], [1]],
+ bboxes=[
+ [[]], # Frame 0.
+ [[0., 0., 1., 1.]], # Frame 1.
+ [[0., 0., 1., 1.],
+ [0.1, 0.1, 0.2, 0.2]], # Frame 2.
+ [[]], # Frame 3.
+ ],
+ label_strings=[
+ [], # Frame 0.
+ ['Abyssinian'], # Frame 1.
+ ['Abyssinian', 'american_bulldog'], # Frame 2.
+ [], # Frame 3
+ ]).SerializeToString()
return sequence_example_serialized
def test_build_tf_record_input_reader(self):
@@ -119,21 +122,19 @@ class DecoderBuilderTest(tf.test.TestCase):
text_format.Parse(input_reader_text_proto, input_reader_proto)
decoder = decoder_builder.build(input_reader_proto)
- tensor_dict = decoder.decode(self._make_serialized_tf_example())
-
- with tf.train.MonitoredSession() as sess:
- output_dict = sess.run(tensor_dict)
-
- self.assertNotIn(
- fields.InputDataFields.groundtruth_instance_masks, output_dict)
- self.assertEqual((4, 5, 3), output_dict[fields.InputDataFields.image].shape)
- self.assertAllEqual([2],
- output_dict[fields.InputDataFields.groundtruth_classes])
- self.assertEqual(
- (1, 4), output_dict[fields.InputDataFields.groundtruth_boxes].shape)
- self.assertAllEqual(
- [0.0, 0.0, 1.0, 1.0],
- output_dict[fields.InputDataFields.groundtruth_boxes][0])
+ serialized_seq_example = self._make_serialized_tf_example()
+ def graph_fn():
+ tensor_dict = decoder.decode(serialized_seq_example)
+ return (tensor_dict[fields.InputDataFields.image],
+ tensor_dict[fields.InputDataFields.groundtruth_classes],
+ tensor_dict[fields.InputDataFields.groundtruth_boxes])
+
+ (image, groundtruth_classes,
+ groundtruth_boxes) = self.execute_cpu(graph_fn, [])
+ self.assertEqual((4, 5, 3), image.shape)
+ self.assertAllEqual([2], groundtruth_classes)
+ self.assertEqual((1, 4), groundtruth_boxes.shape)
+ self.assertAllEqual([0.0, 0.0, 1.0, 1.0], groundtruth_boxes[0])
def test_build_tf_record_input_reader_sequence_example(self):
label_map_path = _get_labelmap_path()
@@ -145,12 +146,16 @@ class DecoderBuilderTest(tf.test.TestCase):
input_reader_proto.label_map_path = label_map_path
text_format.Parse(input_reader_text_proto, input_reader_proto)
- decoder = decoder_builder.build(input_reader_proto)
- tensor_dict = decoder.decode(self._make_serialized_tf_sequence_example())
-
- with tf.train.MonitoredSession() as sess:
- output_dict = sess.run(tensor_dict)
-
+ serialized_seq_example = self._make_serialized_tf_sequence_example()
+ def graph_fn():
+ decoder = decoder_builder.build(input_reader_proto)
+ tensor_dict = decoder.decode(serialized_seq_example)
+ return (tensor_dict[fields.InputDataFields.image],
+ tensor_dict[fields.InputDataFields.groundtruth_classes],
+ tensor_dict[fields.InputDataFields.groundtruth_boxes],
+ tensor_dict[fields.InputDataFields.num_groundtruth_boxes])
+ (actual_image, actual_groundtruth_classes, actual_groundtruth_boxes,
+ actual_num_groundtruth_boxes) = self.execute_cpu(graph_fn, [])
expected_groundtruth_classes = [[-1, -1], [1, -1], [1, 2], [-1, -1]]
expected_groundtruth_boxes = [[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
[[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0]],
@@ -158,19 +163,14 @@ class DecoderBuilderTest(tf.test.TestCase):
[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]]]
expected_num_groundtruth_boxes = [0, 1, 2, 0]
- self.assertNotIn(
- fields.InputDataFields.groundtruth_instance_masks, output_dict)
# Sequence example images are encoded.
- self.assertEqual((4,), output_dict[fields.InputDataFields.image].shape)
+ self.assertEqual((4,), actual_image.shape)
self.assertAllEqual(expected_groundtruth_classes,
- output_dict[fields.InputDataFields.groundtruth_classes])
- self.assertEqual(
- (4, 2, 4), output_dict[fields.InputDataFields.groundtruth_boxes].shape)
+ actual_groundtruth_classes)
self.assertAllClose(expected_groundtruth_boxes,
- output_dict[fields.InputDataFields.groundtruth_boxes])
+ actual_groundtruth_boxes)
self.assertAllClose(
- expected_num_groundtruth_boxes,
- output_dict[fields.InputDataFields.num_groundtruth_boxes])
+ expected_num_groundtruth_boxes, actual_num_groundtruth_boxes)
def test_build_tf_record_input_reader_and_load_instance_masks(self):
input_reader_text_proto = """
@@ -181,14 +181,12 @@ class DecoderBuilderTest(tf.test.TestCase):
text_format.Parse(input_reader_text_proto, input_reader_proto)
decoder = decoder_builder.build(input_reader_proto)
- tensor_dict = decoder.decode(self._make_serialized_tf_example())
-
- with tf.train.MonitoredSession() as sess:
- output_dict = sess.run(tensor_dict)
-
- self.assertAllEqual(
- (1, 4, 5),
- output_dict[fields.InputDataFields.groundtruth_instance_masks].shape)
+ serialized_seq_example = self._make_serialized_tf_example()
+ def graph_fn():
+ tensor_dict = decoder.decode(serialized_seq_example)
+ return tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
+ masks = self.execute_cpu(graph_fn, [])
+ self.assertAllEqual((1, 4, 5), masks.shape)
if __name__ == '__main__':
diff --git a/research/object_detection/builders/graph_rewriter_builder_test.py b/research/object_detection/builders/graph_rewriter_builder_tf1_test.py
similarity index 91%
rename from research/object_detection/builders/graph_rewriter_builder_test.py
rename to research/object_detection/builders/graph_rewriter_builder_tf1_test.py
index 02692ce91089e1f8f6a01fa27267d51ec0c516d1..8af8fe9627bf3041b0499909cc298d1790810753 100644
--- a/research/object_detection/builders/graph_rewriter_builder_test.py
+++ b/research/object_detection/builders/graph_rewriter_builder_tf1_test.py
@@ -13,22 +13,21 @@
# limitations under the License.
# ==============================================================================
"""Tests for graph_rewriter_builder."""
+import unittest
import mock
import tensorflow.compat.v1 as tf
import tf_slim as slim
from object_detection.builders import graph_rewriter_builder
from object_detection.protos import graph_rewriter_pb2
+from object_detection.utils import tf_version
-# pylint: disable=g-import-not-at-top
-try:
- from tensorflow.contrib import quantize as contrib_quantize
-except ImportError:
- # TF 2.0 doesn't ship with contrib.
- pass
-# pylint: enable=g-import-not-at-top
+if tf_version.is_tf1():
+ from tensorflow.contrib import quantize as contrib_quantize # pylint: disable=g-import-not-at-top
+
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class QuantizationBuilderTest(tf.test.TestCase):
def testQuantizationBuilderSetsUpCorrectTrainArguments(self):
diff --git a/research/object_detection/builders/hyperparams_builder.py b/research/object_detection/builders/hyperparams_builder.py
index f34e1112a81bd9dad1c30ba39af6b1a20a252d2c..90aef43ac1bd92fb86dbd730cdb0420858572c18 100644
--- a/research/object_detection/builders/hyperparams_builder.py
+++ b/research/object_detection/builders/hyperparams_builder.py
@@ -64,6 +64,7 @@ class KerasLayerHyperparams(object):
self._batch_norm_params = _build_keras_batch_norm_params(
hyperparams_config.batch_norm)
+ self._force_use_bias = hyperparams_config.force_use_bias
self._activation_fn = _build_activation_fn(hyperparams_config.activation)
# TODO(kaftan): Unclear if these kwargs apply to separable & depthwise conv
# (Those might use depthwise_* instead of kernel_*)
@@ -80,6 +81,13 @@ class KerasLayerHyperparams(object):
def use_batch_norm(self):
return self._batch_norm_params is not None
+ def force_use_bias(self):
+ return self._force_use_bias
+
+ def use_bias(self):
+ return (self._force_use_bias or not
+ (self.use_batch_norm() and self.batch_norm_params()['center']))
+
def batch_norm_params(self, **overrides):
"""Returns a dict containing batchnorm layer construction hyperparameters.
@@ -168,10 +176,7 @@ class KerasLayerHyperparams(object):
new_params['activation'] = None
if include_activation:
new_params['activation'] = self._activation_fn
- if self.use_batch_norm() and self.batch_norm_params()['center']:
- new_params['use_bias'] = False
- else:
- new_params['use_bias'] = True
+ new_params['use_bias'] = self.use_bias()
new_params.update(**overrides)
return new_params
@@ -210,6 +215,10 @@ def build(hyperparams_config, is_training):
raise ValueError('hyperparams_config not of type '
'hyperparams_pb.Hyperparams.')
+ if hyperparams_config.force_use_bias:
+ raise ValueError('Hyperparams force_use_bias only supported by '
+ 'KerasLayerHyperparams.')
+
normalizer_fn = None
batch_norm_params = None
if hyperparams_config.HasField('batch_norm'):
diff --git a/research/object_detection/builders/hyperparams_builder_test.py b/research/object_detection/builders/hyperparams_builder_test.py
index 0f92f7d75343f8423293be0239651820d433fcb1..e48ac23bcb547c9729038b901a9612d3712d69cb 100644
--- a/research/object_detection/builders/hyperparams_builder_test.py
+++ b/research/object_detection/builders/hyperparams_builder_test.py
@@ -16,6 +16,7 @@
"""Tests object_detection.core.hyperparams_builder."""
+import unittest
import numpy as np
import tensorflow.compat.v1 as tf
import tf_slim as slim
@@ -24,12 +25,14 @@ from google.protobuf import text_format
from object_detection.builders import hyperparams_builder
from object_detection.core import freezable_batch_norm
from object_detection.protos import hyperparams_pb2
+from object_detection.utils import tf_version
def _get_scope_key(op):
return getattr(op, '_key_op', str(op))
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only tests.')
class HyperparamsBuilderTest(tf.test.TestCase):
def test_default_arg_scope_has_conv2d_op(self):
@@ -149,29 +152,6 @@ class HyperparamsBuilderTest(tf.test.TestCase):
result = sess.run(regularizer(tf.constant(weights)))
self.assertAllClose(np.abs(weights).sum() * 0.5, result)
- def test_return_l1_regularized_weights_keras(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l1_regularizer {
- weight: 0.5
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- keras_config = hyperparams_builder.KerasLayerHyperparams(
- conv_hyperparams_proto)
-
- regularizer = keras_config.params()['kernel_regularizer']
- weights = np.array([1., -1, 4., 2.])
- with self.test_session() as sess:
- result = sess.run(regularizer(tf.constant(weights)))
- self.assertAllClose(np.abs(weights).sum() * 0.5, result)
-
def test_return_l2_regularizer_weights(self):
conv_hyperparams_text_proto = """
regularizer {
@@ -197,30 +177,39 @@ class HyperparamsBuilderTest(tf.test.TestCase):
result = sess.run(regularizer(tf.constant(weights)))
self.assertAllClose(np.power(weights, 2).sum() / 2.0 * 0.42, result)
- def test_return_l2_regularizer_weights_keras(self):
+ def test_return_non_default_batch_norm_params_with_train_during_train(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
- weight: 0.42
}
}
initializer {
truncated_normal_initializer {
}
}
+ batch_norm {
+ decay: 0.7
+ center: false
+ scale: true
+ epsilon: 0.03
+ train: true
+ }
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- keras_config = hyperparams_builder.KerasLayerHyperparams(
- conv_hyperparams_proto)
-
- regularizer = keras_config.params()['kernel_regularizer']
- weights = np.array([1., -1, 4., 2.])
- with self.test_session() as sess:
- result = sess.run(regularizer(tf.constant(weights)))
- self.assertAllClose(np.power(weights, 2).sum() / 2.0 * 0.42, result)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
+ self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
+ batch_norm_params = scope[_get_scope_key(slim.batch_norm)]
+ self.assertAlmostEqual(batch_norm_params['decay'], 0.7)
+ self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
+ self.assertFalse(batch_norm_params['center'])
+ self.assertTrue(batch_norm_params['scale'])
+ self.assertTrue(batch_norm_params['is_training'])
- def test_return_non_default_batch_norm_params_with_train_during_train(self):
+ def test_return_batch_norm_params_with_notrain_during_eval(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
@@ -241,7 +230,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
+ is_training=False)
scope = scope_fn()
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
@@ -250,10 +239,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
self.assertFalse(batch_norm_params['center'])
self.assertTrue(batch_norm_params['scale'])
- self.assertTrue(batch_norm_params['is_training'])
+ self.assertFalse(batch_norm_params['is_training'])
- def test_return_non_default_batch_norm_params_keras(
- self):
+ def test_return_batch_norm_params_with_notrain_when_train_is_false(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
@@ -268,26 +256,43 @@ class HyperparamsBuilderTest(tf.test.TestCase):
center: false
scale: true
epsilon: 0.03
+ train: false
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- keras_config = hyperparams_builder.KerasLayerHyperparams(
- conv_hyperparams_proto)
-
- self.assertTrue(keras_config.use_batch_norm())
- batch_norm_params = keras_config.batch_norm_params()
- self.assertAlmostEqual(batch_norm_params['momentum'], 0.7)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
+ self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
+ batch_norm_params = scope[_get_scope_key(slim.batch_norm)]
+ self.assertAlmostEqual(batch_norm_params['decay'], 0.7)
self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
self.assertFalse(batch_norm_params['center'])
self.assertTrue(batch_norm_params['scale'])
+ self.assertFalse(batch_norm_params['is_training'])
- batch_norm_layer = keras_config.build_batch_norm()
- self.assertIsInstance(batch_norm_layer,
- freezable_batch_norm.FreezableBatchNorm)
+ def test_do_not_use_batch_norm_if_default(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
+ self.assertEqual(conv_scope_arguments['normalizer_fn'], None)
- def test_return_non_default_batch_norm_params_keras_override(
- self):
+ def test_use_none_activation(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
@@ -297,26 +302,57 @@ class HyperparamsBuilderTest(tf.test.TestCase):
truncated_normal_initializer {
}
}
- batch_norm {
- decay: 0.7
- center: false
- scale: true
- epsilon: 0.03
+ activation: NONE
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
+ self.assertEqual(conv_scope_arguments['activation_fn'], None)
+
+ def test_use_relu_activation(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
}
+ activation: RELU
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- keras_config = hyperparams_builder.KerasLayerHyperparams(
- conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
+ self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu)
- self.assertTrue(keras_config.use_batch_norm())
- batch_norm_params = keras_config.batch_norm_params(momentum=0.4)
- self.assertAlmostEqual(batch_norm_params['momentum'], 0.4)
- self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
- self.assertFalse(batch_norm_params['center'])
- self.assertTrue(batch_norm_params['scale'])
+ def test_use_relu_6_activation(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ activation: RELU_6
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
+ self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu6)
- def test_return_batch_norm_params_with_notrain_during_eval(self):
+ def test_use_swish_activation(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
@@ -326,44 +362,89 @@ class HyperparamsBuilderTest(tf.test.TestCase):
truncated_normal_initializer {
}
}
- batch_norm {
- decay: 0.7
- center: false
- scale: true
- epsilon: 0.03
- train: true
+ activation: SWISH
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
+ self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.swish)
+
+ def _assert_variance_in_range(self, initializer, shape, variance,
+ tol=1e-2):
+ with tf.Graph().as_default() as g:
+ with self.test_session(graph=g) as sess:
+ var = tf.get_variable(
+ name='test',
+ shape=shape,
+ dtype=tf.float32,
+ initializer=initializer)
+ sess.run(tf.global_variables_initializer())
+ values = sess.run(var)
+ self.assertAllClose(np.var(values), variance, tol, tol)
+
+ def test_variance_in_range_with_variance_scaling_initializer_fan_in(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ variance_scaling_initializer {
+ factor: 2.0
+ mode: FAN_IN
+ uniform: false
+ }
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=False)
+ is_training=True)
scope = scope_fn()
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
- self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
- batch_norm_params = scope[_get_scope_key(slim.batch_norm)]
- self.assertAlmostEqual(batch_norm_params['decay'], 0.7)
- self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
- self.assertFalse(batch_norm_params['center'])
- self.assertTrue(batch_norm_params['scale'])
- self.assertFalse(batch_norm_params['is_training'])
+ initializer = conv_scope_arguments['weights_initializer']
+ self._assert_variance_in_range(initializer, shape=[100, 40],
+ variance=2. / 100.)
- def test_return_batch_norm_params_with_notrain_when_train_is_false(self):
+ def test_variance_in_range_with_variance_scaling_initializer_fan_out(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
- truncated_normal_initializer {
+ variance_scaling_initializer {
+ factor: 2.0
+ mode: FAN_OUT
+ uniform: false
}
}
- batch_norm {
- decay: 0.7
- center: false
- scale: true
- epsilon: 0.03
- train: false
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
+ initializer = conv_scope_arguments['weights_initializer']
+ self._assert_variance_in_range(initializer, shape=[100, 40],
+ variance=2. / 40.)
+
+ def test_variance_in_range_with_variance_scaling_initializer_fan_avg(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ variance_scaling_initializer {
+ factor: 2.0
+ mode: FAN_AVG
+ uniform: false
+ }
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
@@ -372,15 +453,35 @@ class HyperparamsBuilderTest(tf.test.TestCase):
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
- self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
- batch_norm_params = scope[_get_scope_key(slim.batch_norm)]
- self.assertAlmostEqual(batch_norm_params['decay'], 0.7)
- self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
- self.assertFalse(batch_norm_params['center'])
- self.assertTrue(batch_norm_params['scale'])
- self.assertFalse(batch_norm_params['is_training'])
+ initializer = conv_scope_arguments['weights_initializer']
+ self._assert_variance_in_range(initializer, shape=[100, 40],
+ variance=4. / (100. + 40.))
- def test_do_not_use_batch_norm_if_default(self):
+ def test_variance_in_range_with_variance_scaling_initializer_uniform(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ variance_scaling_initializer {
+ factor: 2.0
+ mode: FAN_IN
+ uniform: true
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
+ initializer = conv_scope_arguments['weights_initializer']
+ self._assert_variance_in_range(initializer, shape=[100, 40],
+ variance=2. / 100.)
+
+ def test_variance_in_range_with_truncated_normal_initializer(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
@@ -388,6 +489,8 @@ class HyperparamsBuilderTest(tf.test.TestCase):
}
initializer {
truncated_normal_initializer {
+ mean: 0.0
+ stddev: 0.8
}
}
"""
@@ -397,7 +500,149 @@ class HyperparamsBuilderTest(tf.test.TestCase):
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
- self.assertEqual(conv_scope_arguments['normalizer_fn'], None)
+ initializer = conv_scope_arguments['weights_initializer']
+ self._assert_variance_in_range(initializer, shape=[100, 40],
+ variance=0.49, tol=1e-1)
+
+ def test_variance_in_range_with_random_normal_initializer(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ random_normal_initializer {
+ mean: 0.0
+ stddev: 0.8
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
+ initializer = conv_scope_arguments['weights_initializer']
+ self._assert_variance_in_range(initializer, shape=[100, 40],
+ variance=0.64, tol=1e-1)
+
+
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only tests.')
+class KerasHyperparamsBuilderTest(tf.test.TestCase):
+
+ def _assert_variance_in_range(self, initializer, shape, variance,
+ tol=1e-2):
+ var = tf.Variable(initializer(shape=shape, dtype=tf.float32))
+ self.assertAllClose(np.var(var.numpy()), variance, tol, tol)
+
+ def test_return_l1_regularized_weights_keras(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l1_regularizer {
+ weight: 0.5
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ keras_config = hyperparams_builder.KerasLayerHyperparams(
+ conv_hyperparams_proto)
+
+ regularizer = keras_config.params()['kernel_regularizer']
+ weights = np.array([1., -1, 4., 2.])
+ result = regularizer(tf.constant(weights)).numpy()
+ self.assertAllClose(np.abs(weights).sum() * 0.5, result)
+
+ def test_return_l2_regularizer_weights_keras(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ weight: 0.42
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ keras_config = hyperparams_builder.KerasLayerHyperparams(
+ conv_hyperparams_proto)
+
+ regularizer = keras_config.params()['kernel_regularizer']
+ weights = np.array([1., -1, 4., 2.])
+ result = regularizer(tf.constant(weights)).numpy()
+ self.assertAllClose(np.power(weights, 2).sum() / 2.0 * 0.42, result)
+
+ def test_return_non_default_batch_norm_params_keras(
+ self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ batch_norm {
+ decay: 0.7
+ center: false
+ scale: true
+ epsilon: 0.03
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ keras_config = hyperparams_builder.KerasLayerHyperparams(
+ conv_hyperparams_proto)
+
+ self.assertTrue(keras_config.use_batch_norm())
+ batch_norm_params = keras_config.batch_norm_params()
+ self.assertAlmostEqual(batch_norm_params['momentum'], 0.7)
+ self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
+ self.assertFalse(batch_norm_params['center'])
+ self.assertTrue(batch_norm_params['scale'])
+
+ batch_norm_layer = keras_config.build_batch_norm()
+ self.assertIsInstance(batch_norm_layer,
+ freezable_batch_norm.FreezableBatchNorm)
+
+ def test_return_non_default_batch_norm_params_keras_override(
+ self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ batch_norm {
+ decay: 0.7
+ center: false
+ scale: true
+ epsilon: 0.03
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ keras_config = hyperparams_builder.KerasLayerHyperparams(
+ conv_hyperparams_proto)
+
+ self.assertTrue(keras_config.use_batch_norm())
+ batch_norm_params = keras_config.batch_norm_params(momentum=0.4)
+ self.assertAlmostEqual(batch_norm_params['momentum'], 0.4)
+ self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
+ self.assertFalse(batch_norm_params['center'])
+ self.assertTrue(batch_norm_params['scale'])
def test_do_not_use_batch_norm_if_default_keras(self):
conv_hyperparams_text_proto = """
@@ -422,7 +667,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
self.assertIsInstance(identity_layer,
tf.keras.layers.Lambda)
- def test_use_none_activation(self):
+ def test_do_not_use_bias_if_batch_norm_center_keras(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
@@ -432,17 +677,27 @@ class HyperparamsBuilderTest(tf.test.TestCase):
truncated_normal_initializer {
}
}
- activation: NONE
+ batch_norm {
+ decay: 0.7
+ center: true
+ scale: true
+ epsilon: 0.03
+ train: true
+ }
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
- self.assertEqual(conv_scope_arguments['activation_fn'], None)
+ keras_config = hyperparams_builder.KerasLayerHyperparams(
+ conv_hyperparams_proto)
- def test_use_none_activation_keras(self):
+ self.assertTrue(keras_config.use_batch_norm())
+ batch_norm_params = keras_config.batch_norm_params()
+ self.assertTrue(batch_norm_params['center'])
+ self.assertTrue(batch_norm_params['scale'])
+ hyperparams = keras_config.params()
+ self.assertFalse(hyperparams['use_bias'])
+
+ def test_force_use_bias_if_batch_norm_center_keras(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
@@ -452,20 +707,28 @@ class HyperparamsBuilderTest(tf.test.TestCase):
truncated_normal_initializer {
}
}
- activation: NONE
+ batch_norm {
+ decay: 0.7
+ center: true
+ scale: true
+ epsilon: 0.03
+ train: true
+ }
+ force_use_bias: true
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
keras_config = hyperparams_builder.KerasLayerHyperparams(
conv_hyperparams_proto)
- self.assertEqual(keras_config.params()['activation'], None)
- self.assertEqual(
- keras_config.params(include_activation=True)['activation'], None)
- activation_layer = keras_config.build_activation_layer()
- self.assertIsInstance(activation_layer, tf.keras.layers.Lambda)
- self.assertEqual(activation_layer.function, tf.identity)
- def test_use_relu_activation(self):
+ self.assertTrue(keras_config.use_batch_norm())
+ batch_norm_params = keras_config.batch_norm_params()
+ self.assertTrue(batch_norm_params['center'])
+ self.assertTrue(batch_norm_params['scale'])
+ hyperparams = keras_config.params()
+ self.assertTrue(hyperparams['use_bias'])
+
+ def test_use_none_activation_keras(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
@@ -475,15 +738,18 @@ class HyperparamsBuilderTest(tf.test.TestCase):
truncated_normal_initializer {
}
}
- activation: RELU
+ activation: NONE
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
- self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu)
+ keras_config = hyperparams_builder.KerasLayerHyperparams(
+ conv_hyperparams_proto)
+ self.assertIsNone(keras_config.params()['activation'])
+ self.assertIsNone(
+ keras_config.params(include_activation=True)['activation'])
+ activation_layer = keras_config.build_activation_layer()
+ self.assertIsInstance(activation_layer, tf.keras.layers.Lambda)
+ self.assertEqual(activation_layer.function, tf.identity)
def test_use_relu_activation_keras(self):
conv_hyperparams_text_proto = """
@@ -501,33 +767,13 @@ class HyperparamsBuilderTest(tf.test.TestCase):
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
keras_config = hyperparams_builder.KerasLayerHyperparams(
conv_hyperparams_proto)
- self.assertEqual(keras_config.params()['activation'], None)
+ self.assertIsNone(keras_config.params()['activation'])
self.assertEqual(
keras_config.params(include_activation=True)['activation'], tf.nn.relu)
activation_layer = keras_config.build_activation_layer()
self.assertIsInstance(activation_layer, tf.keras.layers.Lambda)
self.assertEqual(activation_layer.function, tf.nn.relu)
- def test_use_relu_6_activation(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- activation: RELU_6
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
- self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu6)
-
def test_use_relu_6_activation_keras(self):
conv_hyperparams_text_proto = """
regularizer {
@@ -544,33 +790,13 @@ class HyperparamsBuilderTest(tf.test.TestCase):
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
keras_config = hyperparams_builder.KerasLayerHyperparams(
conv_hyperparams_proto)
- self.assertEqual(keras_config.params()['activation'], None)
+ self.assertIsNone(keras_config.params()['activation'])
self.assertEqual(
keras_config.params(include_activation=True)['activation'], tf.nn.relu6)
activation_layer = keras_config.build_activation_layer()
self.assertIsInstance(activation_layer, tf.keras.layers.Lambda)
self.assertEqual(activation_layer.function, tf.nn.relu6)
- def test_use_swish_activation(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- activation: SWISH
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
- self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.swish)
-
def test_use_swish_activation_keras(self):
conv_hyperparams_text_proto = """
regularizer {
@@ -587,7 +813,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
keras_config = hyperparams_builder.KerasLayerHyperparams(
conv_hyperparams_proto)
- self.assertEqual(keras_config.params()['activation'], None)
+ self.assertIsNone(keras_config.params()['activation'])
self.assertEqual(
keras_config.params(include_activation=True)['activation'], tf.nn.swish)
activation_layer = keras_config.build_activation_layer()
@@ -613,43 +839,6 @@ class HyperparamsBuilderTest(tf.test.TestCase):
new_params = keras_config.params(activation=tf.nn.relu)
self.assertEqual(new_params['activation'], tf.nn.relu)
- def _assert_variance_in_range(self, initializer, shape, variance,
- tol=1e-2):
- with tf.Graph().as_default() as g:
- with self.test_session(graph=g) as sess:
- var = tf.get_variable(
- name='test',
- shape=shape,
- dtype=tf.float32,
- initializer=initializer)
- sess.run(tf.global_variables_initializer())
- values = sess.run(var)
- self.assertAllClose(np.var(values), variance, tol, tol)
-
- def test_variance_in_range_with_variance_scaling_initializer_fan_in(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 2.0
- mode: FAN_IN
- uniform: false
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
- initializer = conv_scope_arguments['weights_initializer']
- self._assert_variance_in_range(initializer, shape=[100, 40],
- variance=2. / 100.)
-
def test_variance_in_range_with_variance_scaling_initializer_fan_in_keras(
self):
conv_hyperparams_text_proto = """
@@ -673,30 +862,6 @@ class HyperparamsBuilderTest(tf.test.TestCase):
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=2. / 100.)
- def test_variance_in_range_with_variance_scaling_initializer_fan_out(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 2.0
- mode: FAN_OUT
- uniform: false
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
- initializer = conv_scope_arguments['weights_initializer']
- self._assert_variance_in_range(initializer, shape=[100, 40],
- variance=2. / 40.)
-
def test_variance_in_range_with_variance_scaling_initializer_fan_out_keras(
self):
conv_hyperparams_text_proto = """
@@ -720,30 +885,6 @@ class HyperparamsBuilderTest(tf.test.TestCase):
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=2. / 40.)
- def test_variance_in_range_with_variance_scaling_initializer_fan_avg(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 2.0
- mode: FAN_AVG
- uniform: false
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
- initializer = conv_scope_arguments['weights_initializer']
- self._assert_variance_in_range(initializer, shape=[100, 40],
- variance=4. / (100. + 40.))
-
def test_variance_in_range_with_variance_scaling_initializer_fan_avg_keras(
self):
conv_hyperparams_text_proto = """
@@ -767,30 +908,6 @@ class HyperparamsBuilderTest(tf.test.TestCase):
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=4. / (100. + 40.))
- def test_variance_in_range_with_variance_scaling_initializer_uniform(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 2.0
- mode: FAN_IN
- uniform: true
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
- initializer = conv_scope_arguments['weights_initializer']
- self._assert_variance_in_range(initializer, shape=[100, 40],
- variance=2. / 100.)
-
def test_variance_in_range_with_variance_scaling_initializer_uniform_keras(
self):
conv_hyperparams_text_proto = """
@@ -814,29 +931,6 @@ class HyperparamsBuilderTest(tf.test.TestCase):
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=2. / 100.)
- def test_variance_in_range_with_truncated_normal_initializer(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- mean: 0.0
- stddev: 0.8
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
- initializer = conv_scope_arguments['weights_initializer']
- self._assert_variance_in_range(initializer, shape=[100, 40],
- variance=0.49, tol=1e-1)
-
def test_variance_in_range_with_truncated_normal_initializer_keras(self):
conv_hyperparams_text_proto = """
regularizer {
@@ -858,29 +952,6 @@ class HyperparamsBuilderTest(tf.test.TestCase):
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=0.49, tol=1e-1)
- def test_variance_in_range_with_random_normal_initializer(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- random_normal_initializer {
- mean: 0.0
- stddev: 0.8
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
- initializer = conv_scope_arguments['weights_initializer']
- self._assert_variance_in_range(initializer, shape=[100, 40],
- variance=0.64, tol=1e-1)
-
def test_variance_in_range_with_random_normal_initializer_keras(self):
conv_hyperparams_text_proto = """
regularizer {
@@ -902,6 +973,5 @@ class HyperparamsBuilderTest(tf.test.TestCase):
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=0.64, tol=1e-1)
-
if __name__ == '__main__':
tf.test.main()
diff --git a/research/object_detection/builders/image_resizer_builder_test.py b/research/object_detection/builders/image_resizer_builder_test.py
index 62ea5dc9b1245589f7b0bf132ec82d1bf8a0c392..dfc456eab1da1ea7952d17be4d14fab9ca8bf9a4 100644
--- a/research/object_detection/builders/image_resizer_builder_test.py
+++ b/research/object_detection/builders/image_resizer_builder_test.py
@@ -18,21 +18,23 @@ import tensorflow.compat.v1 as tf
from google.protobuf import text_format
from object_detection.builders import image_resizer_builder
from object_detection.protos import image_resizer_pb2
+from object_detection.utils import test_case
-class ImageResizerBuilderTest(tf.test.TestCase):
+class ImageResizerBuilderTest(test_case.TestCase):
def _shape_of_resized_random_image_given_text_proto(self, input_shape,
text_proto):
image_resizer_config = image_resizer_pb2.ImageResizer()
text_format.Merge(text_proto, image_resizer_config)
image_resizer_fn = image_resizer_builder.build(image_resizer_config)
- images = tf.cast(
- tf.random_uniform(input_shape, minval=0, maxval=255, dtype=tf.int32),
- dtype=tf.float32)
- resized_images, _ = image_resizer_fn(images)
- with self.test_session() as sess:
- return sess.run(resized_images).shape
+ def graph_fn():
+ images = tf.cast(
+ tf.random_uniform(input_shape, minval=0, maxval=255, dtype=tf.int32),
+ dtype=tf.float32)
+ resized_images, _ = image_resizer_fn(images)
+ return resized_images
+ return self.execute_cpu(graph_fn, []).shape
def test_build_keep_aspect_ratio_resizer_returns_expected_shape(self):
image_resizer_text_proto = """
@@ -125,10 +127,10 @@ class ImageResizerBuilderTest(tf.test.TestCase):
image_resizer_config = image_resizer_pb2.ImageResizer()
text_format.Merge(text_proto, image_resizer_config)
image_resizer_fn = image_resizer_builder.build(image_resizer_config)
- image_placeholder = tf.placeholder(tf.uint8, [1, None, None, 3])
- resized_image, _ = image_resizer_fn(image_placeholder)
- with self.test_session() as sess:
- return sess.run(resized_image, feed_dict={image_placeholder: image})
+ def graph_fn(image):
+ resized_image, _ = image_resizer_fn(image)
+ return resized_image
+ return self.execute_cpu(graph_fn, [image])
def test_fixed_shape_resizer_nearest_neighbor_method(self):
image_resizer_text_proto = """
diff --git a/research/object_detection/builders/input_reader_builder.py b/research/object_detection/builders/input_reader_builder.py
index 0ab9c05b72653dddcec94ca3857928e662cdd9eb..c7755177e70d528984ea425f21fb9afaf11d9eaa 100644
--- a/research/object_detection/builders/input_reader_builder.py
+++ b/research/object_detection/builders/input_reader_builder.py
@@ -29,19 +29,12 @@ from __future__ import division
from __future__ import print_function
import tensorflow.compat.v1 as tf
+import tf_slim as slim
from object_detection.data_decoders import tf_example_decoder
from object_detection.data_decoders import tf_sequence_example_decoder
from object_detection.protos import input_reader_pb2
-# pylint: disable=g-import-not-at-top
-try:
- import tf_slim as slim
-except ImportError:
- # TF 2.0 doesn't ship with contrib.
- pass
-# pylint: enable=g-import-not-at-top
-
parallel_reader = slim.parallel_reader
@@ -82,14 +75,14 @@ def build(input_reader_config):
if input_reader_config.HasField('label_map_path'):
label_map_proto_file = input_reader_config.label_map_path
input_type = input_reader_config.input_type
- if input_type == input_reader_pb2.InputType.TF_EXAMPLE:
+ if input_type == input_reader_pb2.InputType.Value('TF_EXAMPLE'):
decoder = tf_example_decoder.TfExampleDecoder(
load_instance_masks=input_reader_config.load_instance_masks,
instance_mask_type=input_reader_config.mask_type,
label_map_proto_file=label_map_proto_file,
load_context_features=input_reader_config.load_context_features)
return decoder.decode(string_tensor)
- elif input_type == input_reader_pb2.InputType.TF_SEQUENCE_EXAMPLE:
+ elif input_type == input_reader_pb2.InputType.Value('TF_SEQUENCE_EXAMPLE'):
decoder = tf_sequence_example_decoder.TfSequenceExampleDecoder(
label_map_proto_file=label_map_proto_file,
load_context_features=input_reader_config.load_context_features)
diff --git a/research/object_detection/builders/input_reader_builder_test.py b/research/object_detection/builders/input_reader_builder_tf1_test.py
similarity index 98%
rename from research/object_detection/builders/input_reader_builder_test.py
rename to research/object_detection/builders/input_reader_builder_tf1_test.py
index 14a8eb819304e2f9db655067d0d817a22386f4ba..6049128b03f55501ddcd2a1b3334821800d826a1 100644
--- a/research/object_detection/builders/input_reader_builder_test.py
+++ b/research/object_detection/builders/input_reader_builder_tf1_test.py
@@ -16,6 +16,7 @@
"""Tests for input_reader_builder."""
import os
+import unittest
import numpy as np
import tensorflow.compat.v1 as tf
@@ -26,6 +27,7 @@ from object_detection.core import standard_fields as fields
from object_detection.dataset_tools import seq_example_util
from object_detection.protos import input_reader_pb2
from object_detection.utils import dataset_util
+from object_detection.utils import tf_version
def _get_labelmap_path():
@@ -35,6 +37,7 @@ def _get_labelmap_path():
'pet_label_map.pbtxt')
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class InputReaderBuilderTest(tf.test.TestCase):
def create_tf_record(self):
diff --git a/research/object_detection/builders/matcher_builder.py b/research/object_detection/builders/matcher_builder.py
index d334f435372984eb78265d72b2bcdf63c45bde5b..086f74b5c45f81cd555207f0ad593a52a0c0f307 100644
--- a/research/object_detection/builders/matcher_builder.py
+++ b/research/object_detection/builders/matcher_builder.py
@@ -16,8 +16,11 @@
"""A function to build an object detection matcher from configuration."""
from object_detection.matchers import argmax_matcher
-from object_detection.matchers import bipartite_matcher
from object_detection.protos import matcher_pb2
+from object_detection.utils import tf_version
+
+if tf_version.is_tf1():
+ from object_detection.matchers import bipartite_matcher # pylint: disable=g-import-not-at-top
def build(matcher_config):
@@ -48,6 +51,8 @@ def build(matcher_config):
force_match_for_each_row=matcher.force_match_for_each_row,
use_matmul_gather=matcher.use_matmul_gather)
if matcher_config.WhichOneof('matcher_oneof') == 'bipartite_matcher':
+ if tf_version.is_tf2():
+ raise ValueError('bipartite_matcher is not supported in TF 2.X')
matcher = matcher_config.bipartite_matcher
return bipartite_matcher.GreedyBipartiteMatcher(matcher.use_matmul_gather)
raise ValueError('Empty matcher.')
diff --git a/research/object_detection/builders/matcher_builder_test.py b/research/object_detection/builders/matcher_builder_test.py
index 451e1f9cc468fd828c453cad59b74ce67d63d092..cfa55ff94fb7a12dbf78787ffbbf762d1890e3bc 100644
--- a/research/object_detection/builders/matcher_builder_test.py
+++ b/research/object_detection/builders/matcher_builder_test.py
@@ -20,11 +20,15 @@ import tensorflow.compat.v1 as tf
from google.protobuf import text_format
from object_detection.builders import matcher_builder
from object_detection.matchers import argmax_matcher
-from object_detection.matchers import bipartite_matcher
from object_detection.protos import matcher_pb2
+from object_detection.utils import test_case
+from object_detection.utils import tf_version
+if tf_version.is_tf1():
+ from object_detection.matchers import bipartite_matcher # pylint: disable=g-import-not-at-top
-class MatcherBuilderTest(tf.test.TestCase):
+
+class MatcherBuilderTest(test_case.TestCase):
def test_build_arg_max_matcher_with_defaults(self):
matcher_text_proto = """
@@ -34,7 +38,7 @@ class MatcherBuilderTest(tf.test.TestCase):
matcher_proto = matcher_pb2.Matcher()
text_format.Merge(matcher_text_proto, matcher_proto)
matcher_object = matcher_builder.build(matcher_proto)
- self.assertTrue(isinstance(matcher_object, argmax_matcher.ArgMaxMatcher))
+ self.assertIsInstance(matcher_object, argmax_matcher.ArgMaxMatcher)
self.assertAlmostEqual(matcher_object._matched_threshold, 0.5)
self.assertAlmostEqual(matcher_object._unmatched_threshold, 0.5)
self.assertTrue(matcher_object._negatives_lower_than_unmatched)
@@ -49,7 +53,7 @@ class MatcherBuilderTest(tf.test.TestCase):
matcher_proto = matcher_pb2.Matcher()
text_format.Merge(matcher_text_proto, matcher_proto)
matcher_object = matcher_builder.build(matcher_proto)
- self.assertTrue(isinstance(matcher_object, argmax_matcher.ArgMaxMatcher))
+ self.assertIsInstance(matcher_object, argmax_matcher.ArgMaxMatcher)
self.assertEqual(matcher_object._matched_threshold, None)
self.assertEqual(matcher_object._unmatched_threshold, None)
self.assertTrue(matcher_object._negatives_lower_than_unmatched)
@@ -68,7 +72,7 @@ class MatcherBuilderTest(tf.test.TestCase):
matcher_proto = matcher_pb2.Matcher()
text_format.Merge(matcher_text_proto, matcher_proto)
matcher_object = matcher_builder.build(matcher_proto)
- self.assertTrue(isinstance(matcher_object, argmax_matcher.ArgMaxMatcher))
+ self.assertIsInstance(matcher_object, argmax_matcher.ArgMaxMatcher)
self.assertAlmostEqual(matcher_object._matched_threshold, 0.7)
self.assertAlmostEqual(matcher_object._unmatched_threshold, 0.3)
self.assertFalse(matcher_object._negatives_lower_than_unmatched)
@@ -76,6 +80,8 @@ class MatcherBuilderTest(tf.test.TestCase):
self.assertTrue(matcher_object._use_matmul_gather)
def test_build_bipartite_matcher(self):
+ if tf_version.is_tf2():
+ self.skipTest('BipartiteMatcher unsupported in TF 2.X. Skipping.')
matcher_text_proto = """
bipartite_matcher {
}
@@ -83,8 +89,8 @@ class MatcherBuilderTest(tf.test.TestCase):
matcher_proto = matcher_pb2.Matcher()
text_format.Merge(matcher_text_proto, matcher_proto)
matcher_object = matcher_builder.build(matcher_proto)
- self.assertTrue(
- isinstance(matcher_object, bipartite_matcher.GreedyBipartiteMatcher))
+ self.assertIsInstance(matcher_object,
+ bipartite_matcher.GreedyBipartiteMatcher)
def test_raise_error_on_empty_matcher(self):
matcher_text_proto = """
diff --git a/research/object_detection/builders/model_builder.py b/research/object_detection/builders/model_builder.py
index 481cd694bb9916942b472a37427033855155b8d9..b3d41e3fce0adef4a97835380777338784af5995 100644
--- a/research/object_detection/builders/model_builder.py
+++ b/research/object_detection/builders/model_builder.py
@@ -28,6 +28,8 @@ from object_detection.builders import region_similarity_calculator_builder as si
from object_detection.core import balanced_positive_negative_sampler as sampler
from object_detection.core import post_processing
from object_detection.core import target_assigner
+from object_detection.meta_architectures import center_net_meta_arch
+from object_detection.meta_architectures import context_rcnn_meta_arch
from object_detection.meta_architectures import faster_rcnn_meta_arch
from object_detection.meta_architectures import rfcn_meta_arch
from object_detection.meta_architectures import ssd_meta_arch
@@ -47,6 +49,7 @@ from object_detection.utils import tf_version
if tf_version.is_tf2():
from object_detection.models import center_net_hourglass_feature_extractor
from object_detection.models import center_net_resnet_feature_extractor
+ from object_detection.models import center_net_resnet_v1_fpn_feature_extractor
from object_detection.models import faster_rcnn_inception_resnet_v2_keras_feature_extractor as frcnn_inc_res_keras
from object_detection.models import faster_rcnn_resnet_keras_feature_extractor as frcnn_resnet_keras
from object_detection.models import ssd_resnet_v1_fpn_keras_feature_extractor as ssd_resnet_v1_fpn_keras
@@ -79,6 +82,7 @@ if tf_version.is_tf1():
from object_detection.models.ssd_mobiledet_feature_extractor import SSDMobileDetCPUFeatureExtractor
from object_detection.models.ssd_mobiledet_feature_extractor import SSDMobileDetDSPFeatureExtractor
from object_detection.models.ssd_mobiledet_feature_extractor import SSDMobileDetEdgeTPUFeatureExtractor
+ from object_detection.models.ssd_mobiledet_feature_extractor import SSDMobileDetGPUFeatureExtractor
from object_detection.models.ssd_pnasnet_feature_extractor import SSDPNASNetFeatureExtractor
from object_detection.predictors import rfcn_box_predictor
# pylint: enable=g-import-not-at-top
@@ -109,8 +113,12 @@ if tf_version.is_tf2():
}
CENTER_NET_EXTRACTOR_FUNCTION_MAP = {
- 'resnet_v2_101': center_net_resnet_feature_extractor.resnet_v2_101,
'resnet_v2_50': center_net_resnet_feature_extractor.resnet_v2_50,
+ 'resnet_v2_101': center_net_resnet_feature_extractor.resnet_v2_101,
+ 'resnet_v1_50_fpn':
+ center_net_resnet_v1_fpn_feature_extractor.resnet_v1_50_fpn,
+ 'resnet_v1_101_fpn':
+ center_net_resnet_v1_fpn_feature_extractor.resnet_v1_101_fpn,
'hourglass_104': center_net_hourglass_feature_extractor.hourglass_104,
}
@@ -160,9 +168,14 @@ if tf_version.is_tf1():
EmbeddedSSDMobileNetV1FeatureExtractor,
'ssd_pnasnet':
SSDPNASNetFeatureExtractor,
- 'ssd_mobiledet_cpu': SSDMobileDetCPUFeatureExtractor,
- 'ssd_mobiledet_dsp': SSDMobileDetDSPFeatureExtractor,
- 'ssd_mobiledet_edgetpu': SSDMobileDetEdgeTPUFeatureExtractor,
+ 'ssd_mobiledet_cpu':
+ SSDMobileDetCPUFeatureExtractor,
+ 'ssd_mobiledet_dsp':
+ SSDMobileDetDSPFeatureExtractor,
+ 'ssd_mobiledet_edgetpu':
+ SSDMobileDetEdgeTPUFeatureExtractor,
+ 'ssd_mobiledet_gpu':
+ SSDMobileDetGPUFeatureExtractor,
}
FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP = {
@@ -767,7 +780,9 @@ def keypoint_proto_to_params(kp_config, keypoint_map_dict):
unmatched_keypoint_score=kp_config.unmatched_keypoint_score,
box_scale=kp_config.box_scale,
candidate_search_scale=kp_config.candidate_search_scale,
- candidate_ranking_mode=kp_config.candidate_ranking_mode)
+ candidate_ranking_mode=kp_config.candidate_ranking_mode,
+ offset_peak_radius=kp_config.offset_peak_radius,
+ per_keypoint_offset=kp_config.per_keypoint_offset)
def object_detection_proto_to_params(od_config):
diff --git a/research/object_detection/builders/model_builder_tf1_test.py b/research/object_detection/builders/model_builder_tf1_test.py
index a4d2913f52fcbf551da91596ff1077caf02d94f1..083275ac466250220dd532f52288bab9a5a66daf 100644
--- a/research/object_detection/builders/model_builder_tf1_test.py
+++ b/research/object_detection/builders/model_builder_tf1_test.py
@@ -14,16 +14,19 @@
# limitations under the License.
# ==============================================================================
"""Tests for model_builder under TensorFlow 1.X."""
-
+import unittest
from absl.testing import parameterized
import tensorflow.compat.v1 as tf
from object_detection.builders import model_builder
from object_detection.builders import model_builder_test
+from object_detection.meta_architectures import context_rcnn_meta_arch
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.protos import losses_pb2
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class ModelBuilderTF1Test(model_builder_test.ModelBuilderTest):
def default_ssd_feature_extractor(self):
@@ -39,6 +42,14 @@ class ModelBuilderTF1Test(model_builder_test.ModelBuilderTest):
return model_builder.FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP
+ @parameterized.parameters(True, False)
+ def test_create_context_rcnn_from_config_with_params(self, is_training):
+ model_proto = self.create_default_faster_rcnn_model_proto()
+ model_proto.faster_rcnn.context_config.attention_bottleneck_dimension = 10
+ model_proto.faster_rcnn.context_config.attention_temperature = 0.5
+ model = model_builder.build(model_proto, is_training=is_training)
+ self.assertIsInstance(model, context_rcnn_meta_arch.ContextRCNNMetaArch)
+
if __name__ == '__main__':
tf.test.main()
diff --git a/research/object_detection/builders/model_builder_tf2_test.py b/research/object_detection/builders/model_builder_tf2_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..c2cd237292ab8cb534aa760380fb31e7a68f1e43
--- /dev/null
+++ b/research/object_detection/builders/model_builder_tf2_test.py
@@ -0,0 +1,261 @@
+# Lint as: python2, python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for model_builder under TensorFlow 2.X."""
+
+import os
+import unittest
+
+import tensorflow.compat.v1 as tf
+
+from google.protobuf import text_format
+from object_detection.builders import model_builder
+from object_detection.builders import model_builder_test
+from object_detection.core import losses
+from object_detection.models import center_net_resnet_feature_extractor
+from object_detection.protos import center_net_pb2
+from object_detection.protos import model_pb2
+from object_detection.utils import tf_version
+
+
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class ModelBuilderTF2Test(model_builder_test.ModelBuilderTest):
+
+ def default_ssd_feature_extractor(self):
+ return 'ssd_resnet50_v1_fpn_keras'
+
+ def default_faster_rcnn_feature_extractor(self):
+ return 'faster_rcnn_resnet101_keras'
+
+ def ssd_feature_extractors(self):
+ return model_builder.SSD_KERAS_FEATURE_EXTRACTOR_CLASS_MAP
+
+ def faster_rcnn_feature_extractors(self):
+ return model_builder.FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP
+
+ def get_fake_label_map_file_path(self):
+ keypoint_spec_text = """
+ item {
+ name: "/m/01g317"
+ id: 1
+ display_name: "person"
+ keypoints {
+ id: 0
+ label: 'nose'
+ }
+ keypoints {
+ id: 1
+ label: 'left_shoulder'
+ }
+ keypoints {
+ id: 2
+ label: 'right_shoulder'
+ }
+ keypoints {
+ id: 3
+ label: 'hip'
+ }
+ }
+ """
+ keypoint_label_map_path = os.path.join(
+ self.get_temp_dir(), 'keypoint_label_map')
+ with tf.gfile.Open(keypoint_label_map_path, 'wb') as f:
+ f.write(keypoint_spec_text)
+ return keypoint_label_map_path
+
+ def get_fake_keypoint_proto(self):
+ task_proto_txt = """
+ task_name: "human_pose"
+ task_loss_weight: 0.9
+ keypoint_regression_loss_weight: 1.0
+ keypoint_heatmap_loss_weight: 0.1
+ keypoint_offset_loss_weight: 0.5
+ heatmap_bias_init: 2.14
+ keypoint_class_name: "/m/01g317"
+ loss {
+ classification_loss {
+ penalty_reduced_logistic_focal_loss {
+ alpha: 3.0
+ beta: 4.0
+ }
+ }
+ localization_loss {
+ l1_localization_loss {
+ }
+ }
+ }
+ keypoint_label_to_std {
+ key: "nose"
+ value: 0.3
+ }
+ keypoint_label_to_std {
+ key: "hip"
+ value: 0.0
+ }
+ keypoint_candidate_score_threshold: 0.3
+ num_candidates_per_keypoint: 12
+ peak_max_pool_kernel_size: 5
+ unmatched_keypoint_score: 0.05
+ box_scale: 1.7
+ candidate_search_scale: 0.2
+ candidate_ranking_mode: "score_distance_ratio"
+ offset_peak_radius: 3
+ per_keypoint_offset: true
+ """
+ config = text_format.Merge(task_proto_txt,
+ center_net_pb2.CenterNet.KeypointEstimation())
+ return config
+
+ def get_fake_object_center_proto(self):
+ proto_txt = """
+ object_center_loss_weight: 0.5
+ heatmap_bias_init: 3.14
+ min_box_overlap_iou: 0.2
+ max_box_predictions: 15
+ classification_loss {
+ penalty_reduced_logistic_focal_loss {
+ alpha: 3.0
+ beta: 4.0
+ }
+ }
+ """
+ return text_format.Merge(proto_txt,
+ center_net_pb2.CenterNet.ObjectCenterParams())
+
+ def get_fake_object_detection_proto(self):
+ proto_txt = """
+ task_loss_weight: 0.5
+ offset_loss_weight: 0.1
+ scale_loss_weight: 0.2
+ localization_loss {
+ l1_localization_loss {
+ }
+ }
+ """
+ return text_format.Merge(proto_txt,
+ center_net_pb2.CenterNet.ObjectDetection())
+
+ def get_fake_mask_proto(self):
+ proto_txt = """
+ task_loss_weight: 0.7
+ classification_loss {
+ weighted_softmax {}
+ }
+ mask_height: 8
+ mask_width: 8
+ score_threshold: 0.7
+ heatmap_bias_init: -2.0
+ """
+ return text_format.Merge(proto_txt,
+ center_net_pb2.CenterNet.MaskEstimation())
+
+ def test_create_center_net_model(self):
+ """Test building a CenterNet model from proto txt."""
+ proto_txt = """
+ center_net {
+ num_classes: 10
+ feature_extractor {
+ type: "resnet_v2_101"
+ channel_stds: [4, 5, 6]
+ bgr_ordering: true
+ }
+ image_resizer {
+ keep_aspect_ratio_resizer {
+ min_dimension: 512
+ max_dimension: 512
+ pad_to_max_dimension: true
+ }
+ }
+ }
+ """
+ # Set up the configuration proto.
+ config = text_format.Merge(proto_txt, model_pb2.DetectionModel())
+ config.center_net.object_center_params.CopyFrom(
+ self.get_fake_object_center_proto())
+ config.center_net.object_detection_task.CopyFrom(
+ self.get_fake_object_detection_proto())
+ config.center_net.keypoint_estimation_task.append(
+ self.get_fake_keypoint_proto())
+ config.center_net.keypoint_label_map_path = (
+ self.get_fake_label_map_file_path())
+ config.center_net.mask_estimation_task.CopyFrom(
+ self.get_fake_mask_proto())
+
+ # Build the model from the configuration.
+ model = model_builder.build(config, is_training=True)
+
+ # Check object center related parameters.
+ self.assertEqual(model._num_classes, 10)
+ self.assertIsInstance(model._center_params.classification_loss,
+ losses.PenaltyReducedLogisticFocalLoss)
+ self.assertEqual(model._center_params.classification_loss._alpha, 3.0)
+ self.assertEqual(model._center_params.classification_loss._beta, 4.0)
+ self.assertAlmostEqual(model._center_params.min_box_overlap_iou, 0.2)
+ self.assertAlmostEqual(
+ model._center_params.heatmap_bias_init, 3.14, places=4)
+ self.assertEqual(model._center_params.max_box_predictions, 15)
+
+ # Check object detection related parameters.
+ self.assertAlmostEqual(model._od_params.offset_loss_weight, 0.1)
+ self.assertAlmostEqual(model._od_params.scale_loss_weight, 0.2)
+ self.assertAlmostEqual(model._od_params.task_loss_weight, 0.5)
+ self.assertIsInstance(model._od_params.localization_loss,
+ losses.L1LocalizationLoss)
+
+ # Check keypoint estimation related parameters.
+ kp_params = model._kp_params_dict['human_pose']
+ self.assertAlmostEqual(kp_params.task_loss_weight, 0.9)
+ self.assertAlmostEqual(kp_params.keypoint_regression_loss_weight, 1.0)
+ self.assertAlmostEqual(kp_params.keypoint_offset_loss_weight, 0.5)
+ self.assertAlmostEqual(kp_params.heatmap_bias_init, 2.14, places=4)
+ self.assertEqual(kp_params.classification_loss._alpha, 3.0)
+ self.assertEqual(kp_params.keypoint_indices, [0, 1, 2, 3])
+ self.assertEqual(kp_params.keypoint_labels,
+ ['nose', 'left_shoulder', 'right_shoulder', 'hip'])
+ self.assertAllClose(kp_params.keypoint_std_dev, [0.3, 1.0, 1.0, 0.0])
+ self.assertEqual(kp_params.classification_loss._beta, 4.0)
+ self.assertIsInstance(kp_params.localization_loss,
+ losses.L1LocalizationLoss)
+ self.assertAlmostEqual(kp_params.keypoint_candidate_score_threshold, 0.3)
+ self.assertEqual(kp_params.num_candidates_per_keypoint, 12)
+ self.assertEqual(kp_params.peak_max_pool_kernel_size, 5)
+ self.assertAlmostEqual(kp_params.unmatched_keypoint_score, 0.05)
+ self.assertAlmostEqual(kp_params.box_scale, 1.7)
+ self.assertAlmostEqual(kp_params.candidate_search_scale, 0.2)
+ self.assertEqual(kp_params.candidate_ranking_mode, 'score_distance_ratio')
+ self.assertEqual(kp_params.offset_peak_radius, 3)
+ self.assertEqual(kp_params.per_keypoint_offset, True)
+
+ # Check mask related parameters.
+ self.assertAlmostEqual(model._mask_params.task_loss_weight, 0.7)
+ self.assertIsInstance(model._mask_params.classification_loss,
+ losses.WeightedSoftmaxClassificationLoss)
+ self.assertEqual(model._mask_params.mask_height, 8)
+ self.assertEqual(model._mask_params.mask_width, 8)
+ self.assertAlmostEqual(model._mask_params.score_threshold, 0.7)
+ self.assertAlmostEqual(
+ model._mask_params.heatmap_bias_init, -2.0, places=4)
+
+ # Check feature extractor parameters.
+ self.assertIsInstance(
+ model._feature_extractor,
+ center_net_resnet_feature_extractor.CenterNetResnetFeatureExtractor)
+ self.assertAllClose(model._feature_extractor._channel_means, [0, 0, 0])
+ self.assertAllClose(model._feature_extractor._channel_stds, [4, 5, 6])
+ self.assertTrue(model._feature_extractor._bgr_ordering)
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/research/object_detection/builders/optimizer_builder.py b/research/object_detection/builders/optimizer_builder.py
index 548b5cdcf2cc15f53379377df9bc195c1283f9fd..d602bad1292e222b5cbc532a873299dd918ef011 100644
--- a/research/object_detection/builders/optimizer_builder.py
+++ b/research/object_detection/builders/optimizer_builder.py
@@ -17,10 +17,13 @@
import tensorflow.compat.v1 as tf
-
-from tensorflow.contrib import opt as tf_opt
from object_detection.utils import learning_schedules
+try:
+ from tensorflow.contrib import opt as tf_opt # pylint: disable=g-import-not-at-top
+except: # pylint: disable=bare-except
+ pass
+
def build_optimizers_tf_v1(optimizer_config, global_step=None):
"""Create a TF v1 compatible optimizer based on config.
diff --git a/research/object_detection/builders/optimizer_builder_tf1_test.py b/research/object_detection/builders/optimizer_builder_tf1_test.py
index 9a6d1e404f94181ebf68826f1708ba43949eda9b..350ecb84b11b3fbd87e584a5d8d23ae877089078 100644
--- a/research/object_detection/builders/optimizer_builder_tf1_test.py
+++ b/research/object_detection/builders/optimizer_builder_tf1_test.py
@@ -20,6 +20,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+import unittest
import six
import tensorflow.compat.v1 as tf
@@ -27,16 +28,15 @@ from google.protobuf import text_format
from object_detection.builders import optimizer_builder
from object_detection.protos import optimizer_pb2
+from object_detection.utils import tf_version
# pylint: disable=g-import-not-at-top
-try:
+if tf_version.is_tf1():
from tensorflow.contrib import opt as contrib_opt
-except ImportError:
- # TF 2.0 doesn't ship with contrib.
- pass
# pylint: enable=g-import-not-at-top
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class LearningRateBuilderTest(tf.test.TestCase):
def testBuildConstantLearningRate(self):
@@ -118,6 +118,7 @@ class LearningRateBuilderTest(tf.test.TestCase):
optimizer_builder._create_learning_rate(learning_rate_proto)
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class OptimizerBuilderTest(tf.test.TestCase):
def testBuildRMSPropOptimizer(self):
diff --git a/research/object_detection/builders/optimizer_builder_tf2_test.py b/research/object_detection/builders/optimizer_builder_tf2_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c555f9a0f4c22b7c27955c92eaa3655c8fae5c6
--- /dev/null
+++ b/research/object_detection/builders/optimizer_builder_tf2_test.py
@@ -0,0 +1,104 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for optimizer_builder."""
+import unittest
+import tensorflow.compat.v1 as tf
+
+from google.protobuf import text_format
+
+from object_detection.builders import optimizer_builder
+from object_detection.protos import optimizer_pb2
+from object_detection.utils import tf_version
+
+
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class OptimizerBuilderV2Test(tf.test.TestCase):
+ """Test building optimizers in V2 mode."""
+
+ def testBuildRMSPropOptimizer(self):
+ optimizer_text_proto = """
+ rms_prop_optimizer: {
+ learning_rate: {
+ exponential_decay_learning_rate {
+ initial_learning_rate: 0.004
+ decay_steps: 800720
+ decay_factor: 0.95
+ }
+ }
+ momentum_optimizer_value: 0.9
+ decay: 0.9
+ epsilon: 1.0
+ }
+ use_moving_average: false
+ """
+ optimizer_proto = optimizer_pb2.Optimizer()
+ text_format.Merge(optimizer_text_proto, optimizer_proto)
+ optimizer, _ = optimizer_builder.build(optimizer_proto)
+ self.assertIsInstance(optimizer, tf.keras.optimizers.RMSprop)
+
+ def testBuildMomentumOptimizer(self):
+ optimizer_text_proto = """
+ momentum_optimizer: {
+ learning_rate: {
+ constant_learning_rate {
+ learning_rate: 0.001
+ }
+ }
+ momentum_optimizer_value: 0.99
+ }
+ use_moving_average: false
+ """
+ optimizer_proto = optimizer_pb2.Optimizer()
+ text_format.Merge(optimizer_text_proto, optimizer_proto)
+ optimizer, _ = optimizer_builder.build(optimizer_proto)
+ self.assertIsInstance(optimizer, tf.keras.optimizers.SGD)
+
+ def testBuildAdamOptimizer(self):
+ optimizer_text_proto = """
+ adam_optimizer: {
+ learning_rate: {
+ constant_learning_rate {
+ learning_rate: 0.002
+ }
+ }
+ }
+ use_moving_average: false
+ """
+ optimizer_proto = optimizer_pb2.Optimizer()
+ text_format.Merge(optimizer_text_proto, optimizer_proto)
+ optimizer, _ = optimizer_builder.build(optimizer_proto)
+ self.assertIsInstance(optimizer, tf.keras.optimizers.Adam)
+
+ def testMovingAverageOptimizerUnsupported(self):
+ optimizer_text_proto = """
+ adam_optimizer: {
+ learning_rate: {
+ constant_learning_rate {
+ learning_rate: 0.002
+ }
+ }
+ }
+ use_moving_average: True
+ """
+ optimizer_proto = optimizer_pb2.Optimizer()
+ text_format.Merge(optimizer_text_proto, optimizer_proto)
+ with self.assertRaises(ValueError):
+ optimizer_builder.build(optimizer_proto)
+
+
+if __name__ == '__main__':
+ tf.enable_v2_behavior()
+ tf.test.main()
diff --git a/research/object_detection/builders/post_processing_builder_test.py b/research/object_detection/builders/post_processing_builder_test.py
index d163aa8f2701df742e27aaa7225bf358f671e8e7..b7383c92f99637ebf660d40a6074c65b03abd3c5 100644
--- a/research/object_detection/builders/post_processing_builder_test.py
+++ b/research/object_detection/builders/post_processing_builder_test.py
@@ -19,9 +19,10 @@ import tensorflow.compat.v1 as tf
from google.protobuf import text_format
from object_detection.builders import post_processing_builder
from object_detection.protos import post_processing_pb2
+from object_detection.utils import test_case
-class PostProcessingBuilderTest(tf.test.TestCase):
+class PostProcessingBuilderTest(test_case.TestCase):
def test_build_non_max_suppressor_with_correct_parameters(self):
post_processing_text_proto = """
@@ -77,13 +78,12 @@ class PostProcessingBuilderTest(tf.test.TestCase):
_, score_converter = post_processing_builder.build(
post_processing_config)
self.assertEqual(score_converter.__name__, 'identity_with_logit_scale')
-
- inputs = tf.constant([1, 1], tf.float32)
- outputs = score_converter(inputs)
- with self.test_session() as sess:
- converted_scores = sess.run(outputs)
- expected_converted_scores = sess.run(inputs)
- self.assertAllClose(converted_scores, expected_converted_scores)
+ def graph_fn():
+ inputs = tf.constant([1, 1], tf.float32)
+ outputs = score_converter(inputs)
+ return outputs
+ converted_scores = self.execute_cpu(graph_fn, [])
+ self.assertAllClose(converted_scores, [1, 1])
def test_build_identity_score_converter_with_logit_scale(self):
post_processing_text_proto = """
@@ -95,12 +95,12 @@ class PostProcessingBuilderTest(tf.test.TestCase):
_, score_converter = post_processing_builder.build(post_processing_config)
self.assertEqual(score_converter.__name__, 'identity_with_logit_scale')
- inputs = tf.constant([1, 1], tf.float32)
- outputs = score_converter(inputs)
- with self.test_session() as sess:
- converted_scores = sess.run(outputs)
- expected_converted_scores = sess.run(tf.constant([.5, .5], tf.float32))
- self.assertAllClose(converted_scores, expected_converted_scores)
+ def graph_fn():
+ inputs = tf.constant([1, 1], tf.float32)
+ outputs = score_converter(inputs)
+ return outputs
+ converted_scores = self.execute_cpu(graph_fn, [])
+ self.assertAllClose(converted_scores, [.5, .5])
def test_build_sigmoid_score_converter(self):
post_processing_text_proto = """
@@ -153,12 +153,12 @@ class PostProcessingBuilderTest(tf.test.TestCase):
self.assertEqual(calibrated_score_conversion_fn.__name__,
'calibrate_with_function_approximation')
- input_scores = tf.constant([1, 1], tf.float32)
- outputs = calibrated_score_conversion_fn(input_scores)
- with self.test_session() as sess:
- calibrated_scores = sess.run(outputs)
- expected_calibrated_scores = sess.run(tf.constant([0.5, 0.5], tf.float32))
- self.assertAllClose(calibrated_scores, expected_calibrated_scores)
+ def graph_fn():
+ input_scores = tf.constant([1, 1], tf.float32)
+ outputs = calibrated_score_conversion_fn(input_scores)
+ return outputs
+ calibrated_scores = self.execute_cpu(graph_fn, [])
+ self.assertAllClose(calibrated_scores, [0.5, 0.5])
def test_build_temperature_scaling_calibrator(self):
post_processing_text_proto = """
@@ -174,12 +174,12 @@ class PostProcessingBuilderTest(tf.test.TestCase):
self.assertEqual(calibrated_score_conversion_fn.__name__,
'calibrate_with_temperature_scaling_calibration')
- input_scores = tf.constant([1, 1], tf.float32)
- outputs = calibrated_score_conversion_fn(input_scores)
- with self.test_session() as sess:
- calibrated_scores = sess.run(outputs)
- expected_calibrated_scores = sess.run(tf.constant([0.5, 0.5], tf.float32))
- self.assertAllClose(calibrated_scores, expected_calibrated_scores)
+ def graph_fn():
+ input_scores = tf.constant([1, 1], tf.float32)
+ outputs = calibrated_score_conversion_fn(input_scores)
+ return outputs
+ calibrated_scores = self.execute_cpu(graph_fn, [])
+ self.assertAllClose(calibrated_scores, [0.5, 0.5])
if __name__ == '__main__':
tf.test.main()
diff --git a/research/object_detection/builders/preprocessor_builder.py b/research/object_detection/builders/preprocessor_builder.py
index aa6a6bc963f8635827aa8776252889d6c5f8d0e9..fe59039b10e47740614e58f913de2d80c6bdce0a 100644
--- a/research/object_detection/builders/preprocessor_builder.py
+++ b/research/object_detection/builders/preprocessor_builder.py
@@ -151,6 +151,7 @@ def build(preprocessor_step_config):
{
'keypoint_flip_permutation': tuple(
config.keypoint_flip_permutation) or None,
+ 'probability': config.probability or None,
})
if step_type == 'random_vertical_flip':
@@ -159,10 +160,17 @@ def build(preprocessor_step_config):
{
'keypoint_flip_permutation': tuple(
config.keypoint_flip_permutation) or None,
+ 'probability': config.probability or None,
})
if step_type == 'random_rotation90':
- return (preprocessor.random_rotation90, {})
+ config = preprocessor_step_config.random_rotation90
+ return (preprocessor.random_rotation90,
+ {
+ 'keypoint_rot_permutation': tuple(
+ config.keypoint_rot_permutation) or None,
+ 'probability': config.probability or None,
+ })
if step_type == 'random_crop_image':
config = preprocessor_step_config.random_crop_image
diff --git a/research/object_detection/builders/preprocessor_builder_test.py b/research/object_detection/builders/preprocessor_builder_test.py
index 4c283238c59695dace4d769b0a0fe0941a6a027c..9e90344d0478229fa95355b53ecfa5f876325936 100644
--- a/research/object_detection/builders/preprocessor_builder_test.py
+++ b/research/object_detection/builders/preprocessor_builder_test.py
@@ -65,13 +65,15 @@ class PreprocessorBuilderTest(tf.test.TestCase):
keypoint_flip_permutation: 3
keypoint_flip_permutation: 5
keypoint_flip_permutation: 4
+ probability: 0.5
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_horizontal_flip)
- self.assertEqual(args, {'keypoint_flip_permutation': (1, 0, 2, 3, 5, 4)})
+ self.assertEqual(args, {'keypoint_flip_permutation': (1, 0, 2, 3, 5, 4),
+ 'probability': 0.5})
def test_build_random_vertical_flip(self):
preprocessor_text_proto = """
@@ -82,23 +84,32 @@ class PreprocessorBuilderTest(tf.test.TestCase):
keypoint_flip_permutation: 3
keypoint_flip_permutation: 5
keypoint_flip_permutation: 4
+ probability: 0.5
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_vertical_flip)
- self.assertEqual(args, {'keypoint_flip_permutation': (1, 0, 2, 3, 5, 4)})
+ self.assertEqual(args, {'keypoint_flip_permutation': (1, 0, 2, 3, 5, 4),
+ 'probability': 0.5})
def test_build_random_rotation90(self):
preprocessor_text_proto = """
- random_rotation90 {}
+ random_rotation90 {
+ keypoint_rot_permutation: 3
+ keypoint_rot_permutation: 0
+ keypoint_rot_permutation: 1
+ keypoint_rot_permutation: 2
+ probability: 0.5
+ }
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_rotation90)
- self.assertEqual(args, {})
+ self.assertEqual(args, {'keypoint_rot_permutation': (3, 0, 1, 2),
+ 'probability': 0.5})
def test_build_random_pixel_value_scale(self):
preprocessor_text_proto = """
diff --git a/research/object_detection/colab_tutorials/context_rcnn_tutorial.ipynb b/research/object_detection/colab_tutorials/context_rcnn_tutorial.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..b735cfbcea0e2c5b7e7c44e706e68a59d98b68ec
--- /dev/null
+++ b/research/object_detection/colab_tutorials/context_rcnn_tutorial.ipynb
@@ -0,0 +1,1500 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "name": "context_rcnn_tutorial.ipynb",
+ "provenance": [],
+ "collapsed_sections": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "jZc1kMel3sZP",
+ "colab_type": "text"
+ },
+ "source": [
+ "# Context R-CNN Demo\n",
+ "\n",
+ "
\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "XuHWvdag3_b9",
+ "colab_type": "text"
+ },
+ "source": [
+ " This notebook will walk you step by step through the process of using a pre-trained model to build up a contextual memory bank for a set of images, and then detect objects in those images+context using [Context R-CNN](https://arxiv.org/abs/1912.03538)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "u0e-OOtn4hQ8",
+ "colab_type": "text"
+ },
+ "source": [
+ "# Setup"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "w-UrhxBw4iLA",
+ "colab_type": "text"
+ },
+ "source": [
+ "Important: If you're running on a local machine, be sure to follow the [installation instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md). This notebook includes only what's necessary to run in Colab."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "SAqMxS4V4lqS",
+ "colab_type": "text"
+ },
+ "source": [
+ "### Install"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "BPkovrxF4o8n",
+ "colab_type": "code",
+ "outputId": "e1b8debc-ab73-4b3e-9e44-c86446c7cda1",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 785
+ }
+ },
+ "source": [
+ "!pip install -U --pre tensorflow==\"2.*\"\n",
+ "!pip install tf_slim"
+ ],
+ "execution_count": 1,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Requirement already up-to-date: tensorflow==2.* in /usr/local/lib/python3.6/dist-packages (2.2.0)\n",
+ "Requirement already satisfied, skipping upgrade: scipy==1.4.1; python_version >= \"3\" in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.*) (1.4.1)\n",
+ "Requirement already satisfied, skipping upgrade: protobuf>=3.8.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.*) (3.10.0)\n",
+ "Requirement already satisfied, skipping upgrade: h5py<2.11.0,>=2.10.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.*) (2.10.0)\n",
+ "Requirement already satisfied, skipping upgrade: opt-einsum>=2.3.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.*) (3.2.1)\n",
+ "Requirement already satisfied, skipping upgrade: numpy<2.0,>=1.16.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.*) (1.18.5)\n",
+ "Requirement already satisfied, skipping upgrade: wheel>=0.26; python_version >= \"3\" in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.*) (0.34.2)\n",
+ "Requirement already satisfied, skipping upgrade: absl-py>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.*) (0.9.0)\n",
+ "Requirement already satisfied, skipping upgrade: tensorflow-estimator<2.3.0,>=2.2.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.*) (2.2.0)\n",
+ "Requirement already satisfied, skipping upgrade: google-pasta>=0.1.8 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.*) (0.2.0)\n",
+ "Requirement already satisfied, skipping upgrade: grpcio>=1.8.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.*) (1.29.0)\n",
+ "Requirement already satisfied, skipping upgrade: tensorboard<2.3.0,>=2.2.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.*) (2.2.2)\n",
+ "Requirement already satisfied, skipping upgrade: gast==0.3.3 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.*) (0.3.3)\n",
+ "Requirement already satisfied, skipping upgrade: astunparse==1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.*) (1.6.3)\n",
+ "Requirement already satisfied, skipping upgrade: keras-preprocessing>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.*) (1.1.2)\n",
+ "Requirement already satisfied, skipping upgrade: termcolor>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.*) (1.1.0)\n",
+ "Requirement already satisfied, skipping upgrade: six>=1.12.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.*) (1.12.0)\n",
+ "Requirement already satisfied, skipping upgrade: wrapt>=1.11.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.*) (1.12.1)\n",
+ "Requirement already satisfied, skipping upgrade: setuptools in /usr/local/lib/python3.6/dist-packages (from protobuf>=3.8.0->tensorflow==2.*) (47.1.1)\n",
+ "Requirement already satisfied, skipping upgrade: google-auth<2,>=1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.3.0,>=2.2.0->tensorflow==2.*) (1.7.2)\n",
+ "Requirement already satisfied, skipping upgrade: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.3.0,>=2.2.0->tensorflow==2.*) (1.0.1)\n",
+ "Requirement already satisfied, skipping upgrade: requests<3,>=2.21.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.3.0,>=2.2.0->tensorflow==2.*) (2.23.0)\n",
+ "Requirement already satisfied, skipping upgrade: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.3.0,>=2.2.0->tensorflow==2.*) (0.4.1)\n",
+ "Requirement already satisfied, skipping upgrade: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.3.0,>=2.2.0->tensorflow==2.*) (3.2.2)\n",
+ "Requirement already satisfied, skipping upgrade: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.3.0,>=2.2.0->tensorflow==2.*) (1.6.0.post3)\n",
+ "Requirement already satisfied, skipping upgrade: cachetools<3.2,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<2.3.0,>=2.2.0->tensorflow==2.*) (3.1.1)\n",
+ "Requirement already satisfied, skipping upgrade: rsa<4.1,>=3.1.4 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<2.3.0,>=2.2.0->tensorflow==2.*) (4.0)\n",
+ "Requirement already satisfied, skipping upgrade: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<2.3.0,>=2.2.0->tensorflow==2.*) (0.2.8)\n",
+ "Requirement already satisfied, skipping upgrade: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.3.0,>=2.2.0->tensorflow==2.*) (2.9)\n",
+ "Requirement already satisfied, skipping upgrade: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.3.0,>=2.2.0->tensorflow==2.*) (3.0.4)\n",
+ "Requirement already satisfied, skipping upgrade: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.3.0,>=2.2.0->tensorflow==2.*) (2020.4.5.1)\n",
+ "Requirement already satisfied, skipping upgrade: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.3.0,>=2.2.0->tensorflow==2.*) (1.24.3)\n",
+ "Requirement already satisfied, skipping upgrade: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.3.0,>=2.2.0->tensorflow==2.*) (1.3.0)\n",
+ "Requirement already satisfied, skipping upgrade: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from markdown>=2.6.8->tensorboard<2.3.0,>=2.2.0->tensorflow==2.*) (1.6.0)\n",
+ "Requirement already satisfied, skipping upgrade: pyasn1>=0.1.3 in /usr/local/lib/python3.6/dist-packages (from rsa<4.1,>=3.1.4->google-auth<2,>=1.6.3->tensorboard<2.3.0,>=2.2.0->tensorflow==2.*) (0.4.8)\n",
+ "Requirement already satisfied, skipping upgrade: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.3.0,>=2.2.0->tensorflow==2.*) (3.1.0)\n",
+ "Requirement already satisfied, skipping upgrade: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->markdown>=2.6.8->tensorboard<2.3.0,>=2.2.0->tensorflow==2.*) (3.1.0)\n",
+ "Collecting tf_slim\n",
+ "\u001b[?25l Downloading https://files.pythonhosted.org/packages/02/97/b0f4a64df018ca018cc035d44f2ef08f91e2e8aa67271f6f19633a015ff7/tf_slim-1.1.0-py2.py3-none-any.whl (352kB)\n",
+ "\u001b[K |████████████████████████████████| 358kB 2.8MB/s \n",
+ "\u001b[?25hRequirement already satisfied: absl-py>=0.2.2 in /usr/local/lib/python3.6/dist-packages (from tf_slim) (0.9.0)\n",
+ "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from absl-py>=0.2.2->tf_slim) (1.12.0)\n",
+ "Installing collected packages: tf-slim\n",
+ "Successfully installed tf-slim-1.1.0\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "zpKF8a2x4tec",
+ "colab_type": "text"
+ },
+ "source": [
+ "Make sure you have `pycocotools` installed"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "owcrp0AW4uCg",
+ "colab_type": "code",
+ "outputId": "001148a8-b0a8-43a1-f6df-225d86d90b8f",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ }
+ },
+ "source": [
+ "!pip install pycocotools"
+ ],
+ "execution_count": 2,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: pycocotools in /usr/local/lib/python3.6/dist-packages (2.0.0)\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "wHFSRVaO4wuq",
+ "colab_type": "text"
+ },
+ "source": [
+ "Get `tensorflow/models` or `cd` to parent directory of the repository."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "E0ZuGKoi4wTn",
+ "colab_type": "code",
+ "outputId": "2b5d93cb-3548-4347-9b76-ce12bea44a56",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 136
+ }
+ },
+ "source": [
+ "import os\n",
+ "import pathlib\n",
+ "\n",
+ "\n",
+ "if \"models\" in pathlib.Path.cwd().parts:\n",
+ " while \"models\" in pathlib.Path.cwd().parts:\n",
+ " os.chdir('..')\n",
+ "elif not pathlib.Path('models').exists():\n",
+ " !git clone --depth 1 https://github.com/tensorflow/models"
+ ],
+ "execution_count": 3,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Cloning into 'models'...\n",
+ "remote: Enumerating objects: 2694, done.\u001b[K\n",
+ "remote: Counting objects: 100% (2694/2694), done.\u001b[K\n",
+ "remote: Compressing objects: 100% (2370/2370), done.\u001b[K\n",
+ "remote: Total 2694 (delta 520), reused 1332 (delta 290), pack-reused 0\u001b[K\n",
+ "Receiving objects: 100% (2694/2694), 34.10 MiB | 29.32 MiB/s, done.\n",
+ "Resolving deltas: 100% (520/520), done.\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "GkqRm-WY47MR",
+ "colab_type": "text"
+ },
+ "source": [
+ "Compile protobufs and install the object_detection package"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "62Dn1_YU45O2",
+ "colab_type": "code",
+ "outputId": "439166dd-6202-4ff9-897d-100a35ae5af5",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 54
+ }
+ },
+ "source": [
+ "%%bash\n",
+ "cd models/research/\n",
+ "protoc object_detection/protos/*.proto --python_out=."
+ ],
+ "execution_count": 4,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "object_detection/protos/input_reader.proto: warning: Import object_detection/protos/image_resizer.proto but not used.\n"
+ ],
+ "name": "stderr"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "83kNiD-24-ZB",
+ "colab_type": "code",
+ "outputId": "aa148939-7dcc-4fbd-ea48-41236523712c",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 343
+ }
+ },
+ "source": [
+ "%%bash \n",
+ "cd models/research\n",
+ "pip install ."
+ ],
+ "execution_count": 5,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Processing /content/models/research\n",
+ "Requirement already satisfied: Pillow>=1.0 in /usr/local/lib/python3.6/dist-packages (from object-detection==0.1) (7.0.0)\n",
+ "Requirement already satisfied: Matplotlib>=2.1 in /usr/local/lib/python3.6/dist-packages (from object-detection==0.1) (3.2.1)\n",
+ "Requirement already satisfied: Cython>=0.28.1 in /usr/local/lib/python3.6/dist-packages (from object-detection==0.1) (0.29.19)\n",
+ "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from Matplotlib>=2.1->object-detection==0.1) (0.10.0)\n",
+ "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from Matplotlib>=2.1->object-detection==0.1) (2.4.7)\n",
+ "Requirement already satisfied: numpy>=1.11 in /usr/local/lib/python3.6/dist-packages (from Matplotlib>=2.1->object-detection==0.1) (1.18.5)\n",
+ "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.6/dist-packages (from Matplotlib>=2.1->object-detection==0.1) (2.8.1)\n",
+ "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from Matplotlib>=2.1->object-detection==0.1) (1.2.0)\n",
+ "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from cycler>=0.10->Matplotlib>=2.1->object-detection==0.1) (1.12.0)\n",
+ "Building wheels for collected packages: object-detection\n",
+ " Building wheel for object-detection (setup.py): started\n",
+ " Building wheel for object-detection (setup.py): finished with status 'done'\n",
+ " Created wheel for object-detection: filename=object_detection-0.1-cp36-none-any.whl size=1141324 sha256=1dff68de415a4ccc3af0e20b8f409a73d147d79720a713dcdc30f9bc8d4ab3a2\n",
+ " Stored in directory: /tmp/pip-ephem-wheel-cache-rlyj8yrw/wheels/94/49/4b/39b051683087a22ef7e80ec52152a27249d1a644ccf4e442ea\n",
+ "Successfully built object-detection\n",
+ "Installing collected packages: object-detection\n",
+ "Successfully installed object-detection-0.1\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "LBdjK2G5ywuc"
+ },
+ "source": [
+ "### Imports"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab_type": "code",
+ "id": "hV4P5gyTWKMI",
+ "colab": {}
+ },
+ "source": [
+ "import numpy as np\n",
+ "import os\n",
+ "import six\n",
+ "import six.moves.urllib as urllib\n",
+ "import sys\n",
+ "import tarfile\n",
+ "import tensorflow as tf\n",
+ "import zipfile\n",
+ "import pathlib\n",
+ "import json\n",
+ "import datetime\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "from collections import defaultdict\n",
+ "from io import StringIO\n",
+ "from matplotlib import pyplot as plt\n",
+ "from PIL import Image\n",
+ "from IPython.display import display"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "r5FNuiRPWKMN"
+ },
+ "source": [
+ "Import the object detection module."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab_type": "code",
+ "id": "4-IMl4b6BdGO",
+ "colab": {}
+ },
+ "source": [
+ "from object_detection.utils import ops as utils_ops\n",
+ "from object_detection.utils import label_map_util\n",
+ "from object_detection.utils import visualization_utils as vis_utils"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "RYPCiag2iz_q"
+ },
+ "source": [
+ "Patches:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab_type": "code",
+ "id": "mF-YlMl8c_bM",
+ "colab": {}
+ },
+ "source": [
+ "# patch tf1 into `utils.ops`\n",
+ "utils_ops.tf = tf.compat.v1\n",
+ "\n",
+ "# Patch the location of gfile\n",
+ "tf.gfile = tf.io.gfile"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "cfn_tRFOWKMO"
+ },
+ "source": [
+ "# Model preparation "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "7ai8pLZZWKMS"
+ },
+ "source": [
+ "## Loader"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab_type": "code",
+ "id": "zm8xp-0eoItE",
+ "colab": {}
+ },
+ "source": [
+ "def load_model(model_name):\n",
+ " base_url = 'http://download.tensorflow.org/models/object_detection/'\n",
+ " model_file = model_name + '.tar.gz'\n",
+ " model_dir = tf.keras.utils.get_file(\n",
+ " fname=model_name,\n",
+ " origin=base_url + model_file,\n",
+ " untar=True)\n",
+ "\n",
+ " model_dir = pathlib.Path(model_dir)/\"saved_model\"\n",
+ " model = tf.saved_model.load(str(model_dir))\n",
+ " model = model.signatures['serving_default']\n",
+ "\n",
+ " return model"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "_1MVVTcLWKMW"
+ },
+ "source": [
+ "## Loading label map\n",
+ "Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `zebra`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab_type": "code",
+ "id": "hDbpHkiWWKMX",
+ "colab": {}
+ },
+ "source": [
+ "# List of the strings that is used to add correct label for each box.\n",
+ "PATH_TO_LABELS = 'models/research/object_detection/data/snapshot_serengeti_label_map.pbtxt'\n",
+ "category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=False)"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "oVU3U_J6IJVb"
+ },
+ "source": [
+ "We will test on a context group of images from one month at one camera from the Snapshot Serengeti val split defined on [LILA.science](http://lila.science/datasets/snapshot-serengeti), which was not seen during model training:\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab_type": "code",
+ "id": "jG-zn5ykWKMd",
+ "outputId": "c7bbbb2f-0f6e-4380-fd92-c88c088bd766",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 85
+ }
+ },
+ "source": [
+ "# If you want to test the code with your images, just add path to the images to\n",
+ "# the TEST_IMAGE_PATHS.\n",
+ "PATH_TO_TEST_IMAGES_DIR = pathlib.Path('models/research/object_detection/test_images/snapshot_serengeti')\n",
+ "TEST_IMAGE_PATHS = sorted(list(PATH_TO_TEST_IMAGES_DIR.glob(\"*.jpeg\")))\n",
+ "TEST_IMAGE_PATHS"
+ ],
+ "execution_count": 11,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "[PosixPath('models/research/object_detection/test_images/snapshot_serengeti/S1_E03_R3_PICT0038.jpeg'),\n",
+ " PosixPath('models/research/object_detection/test_images/snapshot_serengeti/S1_E03_R3_PICT0039.jpeg'),\n",
+ " PosixPath('models/research/object_detection/test_images/snapshot_serengeti/S1_E03_R3_PICT0040.jpeg'),\n",
+ " PosixPath('models/research/object_detection/test_images/snapshot_serengeti/S1_E03_R3_PICT0041.jpeg')]"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 11
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "oBcQzptnQ-x6",
+ "colab_type": "text"
+ },
+ "source": [
+ "Load the metadata for each image"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "ZLLINOHcQ-An",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "source": [
+ "test_data_json = 'models/research/object_detection/test_images/snapshot_serengeti/context_rcnn_demo_metadata.json'\n",
+ "with open(test_data_json, 'r') as f:\n",
+ " test_metadata = json.load(f)"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "BgGTPHhkOAel",
+ "colab_type": "code",
+ "outputId": "1421a32a-c208-498f-931f-1bfeb25d6488",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 105
+ }
+ },
+ "source": [
+ "image_id_to_datetime = {im['id']:im['date_captured'] for im in test_metadata['images']}\n",
+ "image_path_to_id = {im['file_name']: im['id'] \n",
+ " for im in test_metadata['images']}\n",
+ "image_path_to_id"
+ ],
+ "execution_count": 13,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "{'models/research/object_detection/test_images/snapshot_serengeti/S1_E03_R3_PICT0038.jpeg': 'S1/E03/E03_R3/S1_E03_R3_PICT0038',\n",
+ " 'models/research/object_detection/test_images/snapshot_serengeti/S1_E03_R3_PICT0039.jpeg': 'S1/E03/E03_R3/S1_E03_R3_PICT0039',\n",
+ " 'models/research/object_detection/test_images/snapshot_serengeti/S1_E03_R3_PICT0040.jpeg': 'S1/E03/E03_R3/S1_E03_R3_PICT0040',\n",
+ " 'models/research/object_detection/test_images/snapshot_serengeti/S1_E03_R3_PICT0041.jpeg': 'S1/E03/E03_R3/S1_E03_R3_PICT0041'}"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 13
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "H0_1AGhrWKMc"
+ },
+ "source": [
+ "# Generate Context Features for each image"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "kt3_pPQOj7ii",
+ "colab_type": "code",
+ "outputId": "fc72e978-f576-43f4-bcf1-3eb49fef5726",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 88
+ }
+ },
+ "source": [
+ "faster_rcnn_model_name = 'faster_rcnn_resnet101_snapshot_serengeti_2020_06_10'\n",
+ "faster_rcnn_model = load_model(faster_rcnn_model_name)"
+ ],
+ "execution_count": 14,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Downloading data from http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_snapshot_serengeti_2020_06_10.tar.gz\n",
+ "588832768/588829839 [==============================] - 3s 0us/step\n",
+ "INFO:tensorflow:Saver not created because there are no variables in the graph to restore\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "k6Clkv_mBo_U",
+ "colab_type": "text"
+ },
+ "source": [
+ "Check the model's input signature, it expects a batch of 3-color images of type uint8."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "H1qNlFESBsTR",
+ "colab_type": "code",
+ "outputId": "9b8b84e0-d7a8-4ec9-d6e0-22d574cb6209",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ }
+ },
+ "source": [
+ "faster_rcnn_model.inputs"
+ ],
+ "execution_count": 15,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "[]"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 15
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "eYS8KpRCBtBH",
+ "colab_type": "text"
+ },
+ "source": [
+ "And it returns several outputs. Note this model has been exported with additional output 'detection_features' which will be used to build the contextual memory bank."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "5M-1yxgfkmQl",
+ "colab_type": "code",
+ "outputId": "1da98c3b-79c5-4d19-d64c-3e9dbadc97c0",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 153
+ }
+ },
+ "source": [
+ "faster_rcnn_model.output_dtypes"
+ ],
+ "execution_count": 16,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "{'detection_boxes': tf.float32,\n",
+ " 'detection_classes': tf.float32,\n",
+ " 'detection_features': tf.float32,\n",
+ " 'detection_multiclass_scores': tf.float32,\n",
+ " 'detection_scores': tf.float32,\n",
+ " 'num_detections': tf.float32,\n",
+ " 'raw_detection_boxes': tf.float32,\n",
+ " 'raw_detection_scores': tf.float32}"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 16
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "zVjNFFNIDCst",
+ "colab_type": "code",
+ "outputId": "edb46db0-05fb-4952-bc88-db09d7811b01",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 153
+ }
+ },
+ "source": [
+ "faster_rcnn_model.output_shapes"
+ ],
+ "execution_count": 17,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "{'detection_boxes': TensorShape([None, 300, 4]),\n",
+ " 'detection_classes': TensorShape([None, 300]),\n",
+ " 'detection_features': TensorShape([None, None, None, None, None]),\n",
+ " 'detection_multiclass_scores': TensorShape([None, 300, 49]),\n",
+ " 'detection_scores': TensorShape([None, 300]),\n",
+ " 'num_detections': TensorShape([None]),\n",
+ " 'raw_detection_boxes': TensorShape([None, 300, 4]),\n",
+ " 'raw_detection_scores': TensorShape([None, 300, 49])}"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 17
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "JP5qZ7sXJpwG"
+ },
+ "source": [
+ "Add a wrapper function to call the model, and cleanup the outputs:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab_type": "code",
+ "id": "ajmR_exWyN76",
+ "colab": {}
+ },
+ "source": [
+ "def run_inference_for_single_image(model, image):\n",
+ " '''Run single image through tensorflow object detection saved_model.\n",
+ "\n",
+ " This function runs a saved_model on a (single) provided image and returns\n",
+ " inference results in numpy arrays.\n",
+ "\n",
+ " Args:\n",
+ " model: tensorflow saved_model. This model can be obtained using \n",
+ " export_inference_graph.py.\n",
+ " image: uint8 numpy array with shape (img_height, img_width, 3)\n",
+ "\n",
+ " Returns:\n",
+ " output_dict: a dictionary holding the following entries:\n",
+ " `num_detections`: an integer\n",
+ " `detection_boxes`: a numpy (float32) array of shape [N, 4]\n",
+ " `detection_classes`: a numpy (uint8) array of shape [N]\n",
+ " `detection_scores`: a numpy (float32) array of shape [N]\n",
+ " `detection_features`: a numpy (float32) array of shape [N, 7, 7, 2048]\n",
+ " '''\n",
+ " image = np.asarray(image)\n",
+ " # The input needs to be a tensor, convert it using `tf.convert_to_tensor`.\n",
+ " input_tensor = tf.convert_to_tensor(image)\n",
+ " # The model expects a batch of images, so add an axis with `tf.newaxis`.\n",
+ " input_tensor = input_tensor[tf.newaxis,...]\n",
+ "\n",
+ " # Run inference\n",
+ " output_dict = model(input_tensor)\n",
+ " # All outputs are batches tensors.\n",
+ " # Convert to numpy arrays, and take index [0] to remove the batch dimension.\n",
+ " # We're only interested in the first num_detections.\n",
+ " num_dets = output_dict.pop('num_detections')\n",
+ " num_detections = int(num_dets)\n",
+ " for key,value in output_dict.items():\n",
+ " output_dict[key] = value[0, :num_detections].numpy() \n",
+ " output_dict['num_detections'] = num_detections\n",
+ "\n",
+ " # detection_classes should be ints.\n",
+ " output_dict['detection_classes'] = output_dict['detection_classes'].astype(\n",
+ " np.int64)\n",
+ " return output_dict"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "un5SXxIxMaaV",
+ "colab_type": "text"
+ },
+ "source": [
+ "Functions for embedding context features"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "qvtvAZFDMoTM",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "source": [
+ "def embed_date_captured(date_captured):\n",
+ " \"\"\"Encodes the datetime of the image.\n",
+ "\n",
+ " Takes a datetime object and encodes it into a normalized embedding of shape \n",
+ " [5], using hard-coded normalization factors for year, month, day, hour,\n",
+ " minute.\n",
+ "\n",
+ " Args:\n",
+ " date_captured: A datetime object.\n",
+ "\n",
+ " Returns:\n",
+ " A numpy float32 embedding of shape [5].\n",
+ " \"\"\"\n",
+ " embedded_date_captured = []\n",
+ " month_max = 12.0\n",
+ " day_max = 31.0\n",
+ " hour_max = 24.0\n",
+ " minute_max = 60.0\n",
+ " min_year = 1990.0\n",
+ " max_year = 2030.0\n",
+ "\n",
+ " year = (date_captured.year-min_year)/float(max_year-min_year)\n",
+ " embedded_date_captured.append(year)\n",
+ "\n",
+ " month = (date_captured.month-1)/month_max\n",
+ " embedded_date_captured.append(month)\n",
+ "\n",
+ " day = (date_captured.day-1)/day_max\n",
+ " embedded_date_captured.append(day)\n",
+ "\n",
+ " hour = date_captured.hour/hour_max\n",
+ " embedded_date_captured.append(hour)\n",
+ "\n",
+ " minute = date_captured.minute/minute_max\n",
+ " embedded_date_captured.append(minute)\n",
+ "\n",
+ " return np.asarray(embedded_date_captured)"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "xN8k5daOOA7b",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "source": [
+ "def embed_position_and_size(box):\n",
+ " \"\"\"Encodes the bounding box of the object of interest.\n",
+ "\n",
+ " Takes a bounding box and encodes it into a normalized embedding of shape \n",
+ " [4] - the center point (x,y) and width and height of the box.\n",
+ "\n",
+ " Args:\n",
+ " box: A bounding box, formatted as [ymin, xmin, ymax, xmax].\n",
+ "\n",
+ " Returns:\n",
+ " A numpy float32 embedding of shape [4].\n",
+ " \"\"\"\n",
+ " ymin = box[0]\n",
+ " xmin = box[1]\n",
+ " ymax = box[2]\n",
+ " xmax = box[3]\n",
+ " w = xmax - xmin\n",
+ " h = ymax - ymin\n",
+ " x = xmin + w / 2.0\n",
+ " y = ymin + h / 2.0\n",
+ " return np.asarray([x, y, w, h])"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "lJe2qy8HPc6Z",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "source": [
+ "def get_context_feature_embedding(date_captured, detection_boxes,\n",
+ " detection_features, detection_scores):\n",
+ " \"\"\"Extracts representative feature embedding for a given input image.\n",
+ "\n",
+ " Takes outputs of a detection model and focuses on the highest-confidence\n",
+ " detected object. Starts with detection_features and uses average pooling to\n",
+ " remove the spatial dimensions, then appends an embedding of the box position\n",
+ " and size, and an embedding of the date and time the image was captured,\n",
+ " returning a one-dimensional representation of the object.\n",
+ "\n",
+ " Args:\n",
+ " date_captured: A datetime string of format '%Y-%m-%d %H:%M:%S'.\n",
+ " detection_features: A numpy (float32) array of shape [N, 7, 7, 2048].\n",
+ " detection_boxes: A numpy (float32) array of shape [N, 4].\n",
+ " detection_scores: A numpy (float32) array of shape [N].\n",
+ "\n",
+ " Returns:\n",
+ " A numpy float32 embedding of shape [2057].\n",
+ " \"\"\"\n",
+ " date_captured = datetime.datetime.strptime(date_captured,'%Y-%m-%d %H:%M:%S')\n",
+ " temporal_embedding = embed_date_captured(date_captured)\n",
+ " embedding = detection_features[0]\n",
+ " pooled_embedding = np.mean(np.mean(embedding, axis=1), axis=0)\n",
+ " box = detection_boxes[0]\n",
+ " position_embedding = embed_position_and_size(box)\n",
+ " bb_embedding = np.concatenate((pooled_embedding, position_embedding))\n",
+ " embedding = np.expand_dims(np.concatenate((bb_embedding,temporal_embedding)),\n",
+ " axis=0)\n",
+ " score = detection_scores[0]\n",
+ " return embedding, score"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "z1wq0LVyMRR_"
+ },
+ "source": [
+ "Run it on each test image and use the output detection features and metadata to build up a context feature bank:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab_type": "code",
+ "id": "DWh_1zz6aqxs",
+ "colab": {}
+ },
+ "source": [
+ "def run_inference(model, image_path, date_captured, resize_image=True):\n",
+ " \"\"\"Runs inference over a single input image and extracts contextual features.\n",
+ "\n",
+ " Args:\n",
+ " model: A tensorflow saved_model object.\n",
+ " image_path: Absolute path to the input image.\n",
+ " date_captured: A datetime string of format '%Y-%m-%d %H:%M:%S'.\n",
+ " resize_image: Whether to resize the input image before running inference.\n",
+ "\n",
+ " Returns:\n",
+ " context_feature: A numpy float32 array of shape [2057].\n",
+ " score: A numpy float32 object score for the embedded object.\n",
+ " output_dict: The saved_model output dictionary for the image.\n",
+ " \"\"\"\n",
+ " with open(image_path,'rb') as f:\n",
+ " image = Image.open(f)\n",
+ " if resize_image:\n",
+ " image.thumbnail((640,640),Image.ANTIALIAS)\n",
+ " image_np = np.array(image)\n",
+ "\n",
+ " # Actual detection.\n",
+ " output_dict = run_inference_for_single_image(model, image_np)\n",
+ "\n",
+ " context_feature, score = get_context_feature_embedding(\n",
+ " date_captured, output_dict['detection_boxes'],\n",
+ " output_dict['detection_features'], output_dict['detection_scores'])\n",
+ " return context_feature, score, output_dict"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab_type": "code",
+ "id": "3a5wMHN8WKMh",
+ "colab": {}
+ },
+ "source": [
+ "context_features = []\n",
+ "scores = []\n",
+ "faster_rcnn_results = {}\n",
+ "for image_path in TEST_IMAGE_PATHS:\n",
+ " image_id = image_path_to_id[str(image_path)]\n",
+ " date_captured = image_id_to_datetime[image_id]\n",
+ " context_feature, score, results = run_inference(\n",
+ " faster_rcnn_model, image_path, date_captured)\n",
+ " faster_rcnn_results[image_id] = results\n",
+ " context_features.append(context_feature)\n",
+ " scores.append(score)\n",
+ "\n",
+ "# Concatenate all extracted context embeddings into a contextual memory bank.\n",
+ "context_features_matrix = np.concatenate(context_features, axis=0)\n"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "DsspMPX3Cssg"
+ },
+ "source": [
+ "## Run Detection With Context"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "f7aOtOlebK7h"
+ },
+ "source": [
+ "Load a context r-cnn object detection model:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab_type": "code",
+ "id": "1XNT0wxybKR6",
+ "outputId": "cc5b0677-cf16-46c2-9ae5-32681725f856",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 88
+ }
+ },
+ "source": [
+ "context_rcnn_model_name = 'context_rcnn_resnet101_snapshot_serengeti_2020_06_10'\n",
+ "context_rcnn_model = load_model(context_rcnn_model_name)\n"
+ ],
+ "execution_count": 24,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Downloading data from http://download.tensorflow.org/models/object_detection/context_rcnn_resnet101_snapshot_serengeti_2020_06_10.tar.gz\n",
+ "724664320/724658931 [==============================] - 3s 0us/step\n",
+ "INFO:tensorflow:Saver not created because there are no variables in the graph to restore\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "G6IGGtGqBH6y",
+ "colab_type": "text"
+ },
+ "source": [
+ "We need to define the expected context padding size for the\n",
+ "model, this must match the definition in the model config (max_num_context_features)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "4oh9XNLBjkTL",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "source": [
+ "context_padding_size = 2000"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "yN1AYfAEJIGp"
+ },
+ "source": [
+ "Check the model's input signature, it expects a batch of 3-color images of type uint8, plus context_features padded to the maximum context feature size for this model (2000) and valid_context_size to represent the non-padded context features: "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab_type": "code",
+ "id": "CK4cnry6wsHY",
+ "outputId": "d77af014-769f-4e20-b4ac-bfdd40502128",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 68
+ }
+ },
+ "source": [
+ "context_rcnn_model.inputs"
+ ],
+ "execution_count": 26,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "[,\n",
+ " ,\n",
+ " ]"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 26
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "Q8u3BjpMJXZF"
+ },
+ "source": [
+ "And returns several outputs:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab_type": "code",
+ "id": "oLSZpfaYwuSk",
+ "outputId": "63a3903f-529b-41f9-b742-9b81c4c5e096",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 136
+ }
+ },
+ "source": [
+ "context_rcnn_model.output_dtypes"
+ ],
+ "execution_count": 27,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "{'detection_boxes': tf.float32,\n",
+ " 'detection_classes': tf.float32,\n",
+ " 'detection_multiclass_scores': tf.float32,\n",
+ " 'detection_scores': tf.float32,\n",
+ " 'num_detections': tf.float32,\n",
+ " 'raw_detection_boxes': tf.float32,\n",
+ " 'raw_detection_scores': tf.float32}"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 27
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab_type": "code",
+ "id": "FZyKUJeuxvpT",
+ "outputId": "d2feeaba-2bb2-4779-a96a-94a8a0aff362",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 136
+ }
+ },
+ "source": [
+ "context_rcnn_model.output_shapes"
+ ],
+ "execution_count": 28,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "{'detection_boxes': TensorShape([1, 300, 4]),\n",
+ " 'detection_classes': TensorShape([1, 300]),\n",
+ " 'detection_multiclass_scores': TensorShape([1, 300, 49]),\n",
+ " 'detection_scores': TensorShape([1, 300]),\n",
+ " 'num_detections': TensorShape([1]),\n",
+ " 'raw_detection_boxes': TensorShape([1, 300, 4]),\n",
+ " 'raw_detection_scores': TensorShape([1, 300, 49])}"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 28
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab_type": "code",
+ "id": "CzkVv_n2MxKC",
+ "colab": {}
+ },
+ "source": [
+ "def run_context_rcnn_inference_for_single_image(\n",
+ " model, image, context_features, context_padding_size):\n",
+ " '''Run single image through a Context R-CNN saved_model.\n",
+ "\n",
+ " This function runs a saved_model on a (single) provided image and provided \n",
+ " contextual features and returns inference results in numpy arrays.\n",
+ "\n",
+ " Args:\n",
+ " model: tensorflow Context R-CNN saved_model. This model can be obtained\n",
+ " using export_inference_graph.py and setting side_input fields. \n",
+ " Example export call - \n",
+ " python export_inference_graph.py \\\n",
+ " --input_type image_tensor \\\n",
+ " --pipeline_config_path /path/to/context_rcnn_model.config \\\n",
+ " --trained_checkpoint_prefix /path/to/context_rcnn_model.ckpt \\\n",
+ " --output_directory /path/to/output_dir \\\n",
+ " --use_side_inputs True \\\n",
+ " --side_input_shapes 1,2000,2057/1 \\\n",
+ " --side_input_names context_features,valid_context_size \\\n",
+ " --side_input_types float,int \\\n",
+ " --input_shape 1,-1,-1,3\n",
+ "\n",
+ " image: uint8 numpy array with shape (img_height, img_width, 3)\n",
+ " context_features: A numpy float32 contextual memory bank of shape \n",
+ " [num_context_examples, 2057]\n",
+ " context_padding_size: The amount of expected padding in the contextual\n",
+ " memory bank, defined in the Context R-CNN config as \n",
+ " max_num_context_features.\n",
+ "\n",
+ " Returns:\n",
+ " output_dict: a dictionary holding the following entries:\n",
+ " `num_detections`: an integer\n",
+ " `detection_boxes`: a numpy (float32) array of shape [N, 4]\n",
+ " `detection_classes`: a numpy (uint8) array of shape [N]\n",
+ " `detection_scores`: a numpy (float32) array of shape [N]\n",
+ " '''\n",
+ " image = np.asarray(image)\n",
+ " # The input image needs to be a tensor, convert it using \n",
+ " # `tf.convert_to_tensor`.\n",
+ " image_tensor = tf.convert_to_tensor(\n",
+ " image, name='image_tensor')[tf.newaxis,...]\n",
+ "\n",
+ " context_features = np.asarray(context_features)\n",
+ " valid_context_size = context_features.shape[0]\n",
+ " valid_context_size_tensor = tf.convert_to_tensor(\n",
+ " valid_context_size, name='valid_context_size')[tf.newaxis,...]\n",
+ " padded_context_features = np.pad(\n",
+ " context_features,\n",
+ " ((0,context_padding_size-valid_context_size),(0,0)), mode='constant')\n",
+ " padded_context_features_tensor = tf.convert_to_tensor(\n",
+ " padded_context_features,\n",
+ " name='context_features',\n",
+ " dtype=tf.float32)[tf.newaxis,...]\n",
+ "\n",
+ " # Run inference\n",
+ " output_dict = model(\n",
+ " inputs=image_tensor,\n",
+ " context_features=padded_context_features_tensor,\n",
+ " valid_context_size=valid_context_size_tensor)\n",
+ " # All outputs are batches tensors.\n",
+ " # Convert to numpy arrays, and take index [0] to remove the batch dimension.\n",
+ " # We're only interested in the first num_detections.\n",
+ " num_dets = output_dict.pop('num_detections')\n",
+ " num_detections = int(num_dets)\n",
+ " for key,value in output_dict.items():\n",
+ " output_dict[key] = value[0, :num_detections].numpy() \n",
+ " output_dict['num_detections'] = num_detections\n",
+ "\n",
+ " # detection_classes should be ints.\n",
+ " output_dict['detection_classes'] = output_dict['detection_classes'].astype(np.int64)\n",
+ " return output_dict"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "0FqVkR3Agc6U",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "source": [
+ "def show_context_rcnn_inference(\n",
+ " model, image_path, context_features, faster_rcnn_output_dict,\n",
+ " context_padding_size, resize_image=True):\n",
+ " \"\"\"Runs inference over a single input image and visualizes Faster R-CNN vs. \n",
+ " Context R-CNN results.\n",
+ "\n",
+ " Args:\n",
+ " model: A tensorflow saved_model object.\n",
+ " image_path: Absolute path to the input image.\n",
+ " context_features: A numpy float32 contextual memory bank of shape \n",
+ " [num_context_examples, 2057]\n",
+ " faster_rcnn_output_dict: The output_dict corresponding to this input image\n",
+ " from the single-frame Faster R-CNN model, which was previously used to\n",
+ " build the memory bank.\n",
+ " context_padding_size: The amount of expected padding in the contextual\n",
+ " memory bank, defined in the Context R-CNN config as \n",
+ " max_num_context_features.\n",
+ " resize_image: Whether to resize the input image before running inference.\n",
+ "\n",
+ " Returns:\n",
+ " context_rcnn_image_np: Numpy image array showing Context R-CNN Results.\n",
+ " faster_rcnn_image_np: Numpy image array showing Faster R-CNN Results.\n",
+ " \"\"\"\n",
+ "\n",
+ " # the array based representation of the image will be used later in order to prepare the\n",
+ " # result image with boxes and labels on it.\n",
+ " with open(image_path,'rb') as f:\n",
+ " image = Image.open(f)\n",
+ " if resize_image:\n",
+ " image.thumbnail((640,640),Image.ANTIALIAS)\n",
+ " image_np = np.array(image)\n",
+ " image.thumbnail((400,400),Image.ANTIALIAS)\n",
+ " context_rcnn_image_np = np.array(image)\n",
+ " \n",
+ " faster_rcnn_image_np = np.copy(context_rcnn_image_np)\n",
+ "\n",
+ " # Actual detection.\n",
+ " output_dict = run_context_rcnn_inference_for_single_image(\n",
+ " model, image_np, context_features, context_padding_size)\n",
+ "\n",
+ " # Visualization of the results of a context_rcnn detection.\n",
+ " vis_utils.visualize_boxes_and_labels_on_image_array(\n",
+ " context_rcnn_image_np,\n",
+ " output_dict['detection_boxes'],\n",
+ " output_dict['detection_classes'],\n",
+ " output_dict['detection_scores'],\n",
+ " category_index,\n",
+ " use_normalized_coordinates=True,\n",
+ " line_thickness=2)\n",
+ " \n",
+ " # Visualization of the results of a faster_rcnn detection.\n",
+ " vis_utils.visualize_boxes_and_labels_on_image_array(\n",
+ " faster_rcnn_image_np,\n",
+ " faster_rcnn_output_dict['detection_boxes'],\n",
+ " faster_rcnn_output_dict['detection_classes'],\n",
+ " faster_rcnn_output_dict['detection_scores'],\n",
+ " category_index,\n",
+ " use_normalized_coordinates=True,\n",
+ " line_thickness=2)\n",
+ " return context_rcnn_image_np, faster_rcnn_image_np"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "3cYa2B8uAYx0",
+ "colab_type": "text"
+ },
+ "source": [
+ "Define Matplotlib parameters for pretty visualizations"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "9F8okR1uAQ0T",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "source": [
+ "%matplotlib inline\n",
+ "plt.rcParams['axes.grid'] = False\n",
+ "plt.rcParams['xtick.labelsize'] = False\n",
+ "plt.rcParams['ytick.labelsize'] = False\n",
+ "plt.rcParams['xtick.top'] = False\n",
+ "plt.rcParams['xtick.bottom'] = False\n",
+ "plt.rcParams['ytick.left'] = False\n",
+ "plt.rcParams['ytick.right'] = False\n",
+ "plt.rcParams['figure.figsize'] = [15,10]"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "YGj7nXXQAaQ7",
+ "colab_type": "text"
+ },
+ "source": [
+ "Run Context R-CNN inference and compare results to Faster R-CNN"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab_type": "code",
+ "id": "vQ2Sj2VIOZLA",
+ "outputId": "1c043894-09e5-4c9f-a99d-ae21d6e72d0c",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ }
+ },
+ "source": [
+ "for image_path in TEST_IMAGE_PATHS:\n",
+ " image_id = image_path_to_id[str(image_path)]\n",
+ " faster_rcnn_output_dict = faster_rcnn_results[image_id]\n",
+ " context_rcnn_image, faster_rcnn_image = show_context_rcnn_inference(\n",
+ " context_rcnn_model, image_path, context_features_matrix,\n",
+ " faster_rcnn_output_dict, context_padding_size)\n",
+ " plt.subplot(1,2,1)\n",
+ " plt.imshow(faster_rcnn_image)\n",
+ " plt.title('Faster R-CNN')\n",
+ " plt.subplot(1,2,2)\n",
+ " plt.imshow(context_rcnn_image)\n",
+ " plt.title('Context R-CNN')\n",
+ " plt.show()"
+ ],
+ "execution_count": 32,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": [],
+ "needs_background": "light"
+ }
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": [],
+ "needs_background": "light"
+ }
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": [],
+ "needs_background": "light"
+ }
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": [],
+ "needs_background": "light"
+ }
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "lMombPr0GF9a",
+ "colab_type": "text"
+ },
+ "source": [
+ "The images used in this demo are from the [Snapshot Serengeti dataset](http://lila.science/datasets/snapshot-serengeti), and released under the [Community Data License Agreement (permissive variant)](https://cdla.io/permissive-1-0/)."
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/research/object_detection/object_detection_tutorial.ipynb b/research/object_detection/colab_tutorials/object_detection_tutorial.ipynb
similarity index 98%
rename from research/object_detection/object_detection_tutorial.ipynb
rename to research/object_detection/colab_tutorials/object_detection_tutorial.ipynb
index c83b67ede32938c40596e2cc2fced0ab1ae952bb..9063f2cd33aa8fffe160b138b3a3ec69c0d3abdb 100644
--- a/research/object_detection/object_detection_tutorial.ipynb
+++ b/research/object_detection/colab_tutorials/object_detection_tutorial.ipynb
@@ -10,11 +10,11 @@
"# Object Detection API Demo\n",
"\n",
"\u003ctable align=\"left\"\u003e\u003ctd\u003e\n",
- " \u003ca target=\"_blank\" href=\"https://colab.sandbox.google.com/github/tensorflow/models/blob/master/research/object_detection/object_detection_tutorial.ipynb\"\u003e\n",
+ " \u003ca target=\"_blank\" href=\"https://colab.sandbox.google.com/github/tensorflow/models/blob/master/research/object_detection/colab_tutorials/colab_tutorials/object_detection_tutorial.ipynb\"\u003e\n",
" \u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\n",
" \u003c/a\u003e\n",
"\u003c/td\u003e\u003ctd\u003e\n",
- " \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/models/blob/master/research/object_detection/object_detection_tutorial.ipynb\"\u003e\n",
+ " \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/models/blob/master/research/object_detection/colab_tutorials/colab_tutorials/object_detection_tutorial.ipynb\"\u003e\n",
" \u003cimg width=32px src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n",
"\u003c/td\u003e\u003c/table\u003e"
]
diff --git a/research/object_detection/core/batch_multiclass_nms_test.py b/research/object_detection/core/batch_multiclass_nms_test.py
index d99116a4bf086107e08fec70c26089d6fa9c2cbf..06f17103b2b6bd7df5d449a270f0bddfd3514249 100644
--- a/research/object_detection/core/batch_multiclass_nms_test.py
+++ b/research/object_detection/core/batch_multiclass_nms_test.py
@@ -27,21 +27,20 @@ from object_detection.utils import test_case
class BatchMulticlassNonMaxSuppressionTest(test_case.TestCase,
parameterized.TestCase):
- @parameterized.named_parameters(('', False), ('_use_static_shapes', True))
- def test_batch_multiclass_nms_with_batch_size_1(self, use_static_shapes):
- boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
- [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
- [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11], [0, 10, 1, 11]],
- [[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101], [0, 100, 1, 101]],
- [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
- [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
- tf.float32)
- scores = tf.constant([[[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0],
- [.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]]])
+ def test_batch_multiclass_nms_with_batch_size_1(self):
+ boxes = np.array([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+ [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+ [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+ [[0, 10, 1, 11], [0, 10, 1, 11]],
+ [[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+ [[0, 100, 1, 101], [0, 100, 1, 101]],
+ [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+ [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+ np.float32)
+ scores = np.array([[[.9, 0.01], [.75, 0.05],
+ [.6, 0.01], [.95, 0],
+ [.5, 0.01], [.3, 0.01],
+ [.01, .85], [.01, .5]]], np.float32)
score_thresh = 0.1
iou_thresh = .5
max_output_size = 4
@@ -52,56 +51,51 @@ class BatchMulticlassNonMaxSuppressionTest(test_case.TestCase,
[0, 100, 1, 101]]]
exp_nms_scores = [[.95, .9, .85, .3]]
exp_nms_classes = [[0, 0, 1, 0]]
-
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- nmsed_additional_fields,
- num_detections) = post_processing.batch_multiclass_non_max_suppression(
- boxes,
- scores,
- score_thresh,
- iou_thresh,
- max_size_per_class=max_output_size,
- max_total_size=max_output_size,
- use_static_shapes=use_static_shapes)
-
- self.assertIsNone(nmsed_masks)
- self.assertIsNone(nmsed_additional_fields)
-
- with self.test_session() as sess:
- (nmsed_boxes, nmsed_scores, nmsed_classes,
- num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
- num_detections])
- self.assertAllClose(nmsed_boxes, exp_nms_corners)
- self.assertAllClose(nmsed_scores, exp_nms_scores)
- self.assertAllClose(nmsed_classes, exp_nms_classes)
- self.assertEqual(num_detections, [4])
+ def graph_fn(boxes, scores):
+ (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
+ nmsed_additional_fields, num_detections
+ ) = post_processing.batch_multiclass_non_max_suppression(
+ boxes, scores, score_thresh, iou_thresh,
+ max_size_per_class=max_output_size,
+ max_total_size=max_output_size)
+ self.assertIsNone(nmsed_masks)
+ self.assertIsNone(nmsed_additional_fields)
+ return (nmsed_boxes, nmsed_scores, nmsed_classes, num_detections)
+
+ (nmsed_boxes, nmsed_scores, nmsed_classes,
+ num_detections) = self.execute_cpu(graph_fn, [boxes, scores])
+ self.assertAllClose(nmsed_boxes, exp_nms_corners)
+ self.assertAllClose(nmsed_scores, exp_nms_scores)
+ self.assertAllClose(nmsed_classes, exp_nms_classes)
+ self.assertEqual(num_detections, [4])
def test_batch_iou_with_negative_data(self):
- boxes = tf.constant([[[0, -0.01, 0.1, 1.1], [0, 0.2, 0.2, 5.0],
- [0, -0.01, 0.1, 1.], [-1, -1, -1, -1]]], tf.float32)
- iou = post_processing.batch_iou(boxes, boxes)
+ def graph_fn():
+ boxes = tf.constant([[[0, -0.01, 0.1, 1.1], [0, 0.2, 0.2, 5.0],
+ [0, -0.01, 0.1, 1.], [-1, -1, -1, -1]]], tf.float32)
+ iou = post_processing.batch_iou(boxes, boxes)
+ return iou
+ iou = self.execute_cpu(graph_fn, [])
expected_iou = [[[0.99999994, 0.0917431, 0.9099099, -1.],
[0.0917431, 1., 0.08154944, -1.],
[0.9099099, 0.08154944, 1., -1.], [-1., -1., -1., -1.]]]
- with self.test_session() as sess:
- iou = sess.run(iou)
- self.assertAllClose(iou, expected_iou)
+ self.assertAllClose(iou, expected_iou)
@parameterized.parameters(False, True)
def test_batch_multiclass_nms_with_batch_size_2(self, use_dynamic_map_fn):
- boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
- [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
- [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11], [0, 10, 1, 11]]],
- [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101], [0, 100, 1, 101]],
- [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
- [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
- tf.float32)
- scores = tf.constant([[[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0]],
- [[.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]]])
+ boxes = np.array([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+ [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+ [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+ [[0, 10, 1, 11], [0, 10, 1, 11]]],
+ [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+ [[0, 100, 1, 101], [0, 100, 1, 101]],
+ [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+ [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+ np.float32)
+ scores = np.array([[[.9, 0.01], [.75, 0.05],
+ [.6, 0.01], [.95, 0]],
+ [[.5, 0.01], [.3, 0.01],
+ [.01, .85], [.01, .5]]], np.float32)
score_thresh = 0.1
iou_thresh = .5
max_output_size = 4
@@ -118,49 +112,48 @@ class BatchMulticlassNonMaxSuppressionTest(test_case.TestCase,
[.85, .5, .3, 0]])
exp_nms_classes = np.array([[0, 0, 0, 0],
[1, 0, 0, 0]])
-
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- nmsed_additional_fields, num_detections
- ) = post_processing.batch_multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh,
- max_size_per_class=max_output_size, max_total_size=max_output_size,
- use_dynamic_map_fn=use_dynamic_map_fn)
-
- self.assertIsNone(nmsed_masks)
- self.assertIsNone(nmsed_additional_fields)
- # Check static shapes
- self.assertAllEqual(nmsed_boxes.shape.as_list(),
- exp_nms_corners.shape)
- self.assertAllEqual(nmsed_scores.shape.as_list(),
- exp_nms_scores.shape)
- self.assertAllEqual(nmsed_classes.shape.as_list(),
- exp_nms_classes.shape)
- self.assertEqual(num_detections.shape.as_list(), [2])
-
- with self.test_session() as sess:
- (nmsed_boxes, nmsed_scores, nmsed_classes,
- num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
- num_detections])
- self.assertAllClose(nmsed_boxes, exp_nms_corners)
- self.assertAllClose(nmsed_scores, exp_nms_scores)
- self.assertAllClose(nmsed_classes, exp_nms_classes)
- self.assertAllClose(num_detections, [2, 3])
+ def graph_fn(boxes, scores):
+ (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
+ nmsed_additional_fields, num_detections
+ ) = post_processing.batch_multiclass_non_max_suppression(
+ boxes, scores, score_thresh, iou_thresh,
+ max_size_per_class=max_output_size,
+ max_total_size=max_output_size,
+ use_dynamic_map_fn=use_dynamic_map_fn)
+ self.assertIsNone(nmsed_masks)
+ self.assertIsNone(nmsed_additional_fields)
+ # Check static shapes
+ self.assertAllEqual(nmsed_boxes.shape.as_list(),
+ exp_nms_corners.shape)
+ self.assertAllEqual(nmsed_scores.shape.as_list(),
+ exp_nms_scores.shape)
+ self.assertAllEqual(nmsed_classes.shape.as_list(),
+ exp_nms_classes.shape)
+ self.assertEqual(num_detections.shape.as_list(), [2])
+ return (nmsed_boxes, nmsed_scores, nmsed_classes, num_detections)
+
+ (nmsed_boxes, nmsed_scores, nmsed_classes,
+ num_detections) = self.execute_cpu(graph_fn, [boxes, scores])
+ self.assertAllClose(nmsed_boxes, exp_nms_corners)
+ self.assertAllClose(nmsed_scores, exp_nms_scores)
+ self.assertAllClose(nmsed_classes, exp_nms_classes)
+ self.assertAllClose(num_detections, [2, 3])
def test_batch_multiclass_nms_with_per_batch_clip_window(self):
- boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
- [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
- [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11], [0, 10, 1, 11]]],
- [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101], [0, 100, 1, 101]],
- [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
- [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
- tf.float32)
- scores = tf.constant([[[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0]],
- [[.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]]])
- clip_window = tf.constant([0., 0., 200., 200.])
+ boxes = np.array([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+ [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+ [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+ [[0, 10, 1, 11], [0, 10, 1, 11]]],
+ [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+ [[0, 100, 1, 101], [0, 100, 1, 101]],
+ [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+ [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+ np.float32)
+ scores = np.array([[[.9, 0.01], [.75, 0.05],
+ [.6, 0.01], [.95, 0]],
+ [[.5, 0.01], [.3, 0.01],
+ [.01, .85], [.01, .5]]], np.float32)
+ clip_window = np.array([0., 0., 200., 200.], np.float32)
score_thresh = 0.1
iou_thresh = .5
max_output_size = 4
@@ -177,50 +170,48 @@ class BatchMulticlassNonMaxSuppressionTest(test_case.TestCase,
[.5, .3, 0, 0]])
exp_nms_classes = np.array([[0, 0, 0, 0],
[0, 0, 0, 0]])
-
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- nmsed_additional_fields, num_detections
- ) = post_processing.batch_multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh,
- max_size_per_class=max_output_size, max_total_size=max_output_size,
- clip_window=clip_window)
-
- self.assertIsNone(nmsed_masks)
- self.assertIsNone(nmsed_additional_fields)
- # Check static shapes
- self.assertAllEqual(nmsed_boxes.shape.as_list(),
- exp_nms_corners.shape)
- self.assertAllEqual(nmsed_scores.shape.as_list(),
- exp_nms_scores.shape)
- self.assertAllEqual(nmsed_classes.shape.as_list(),
- exp_nms_classes.shape)
- self.assertEqual(num_detections.shape.as_list(), [2])
-
- with self.test_session() as sess:
- (nmsed_boxes, nmsed_scores, nmsed_classes,
- num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
- num_detections])
- self.assertAllClose(nmsed_boxes, exp_nms_corners)
- self.assertAllClose(nmsed_scores, exp_nms_scores)
- self.assertAllClose(nmsed_classes, exp_nms_classes)
- self.assertAllClose(num_detections, [2, 2])
+ def graph_fn(boxes, scores, clip_window):
+ (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
+ nmsed_additional_fields, num_detections
+ ) = post_processing.batch_multiclass_non_max_suppression(
+ boxes, scores, score_thresh, iou_thresh,
+ max_size_per_class=max_output_size, max_total_size=max_output_size,
+ clip_window=clip_window)
+ self.assertIsNone(nmsed_masks)
+ self.assertIsNone(nmsed_additional_fields)
+ # Check static shapes
+ self.assertAllEqual(nmsed_boxes.shape.as_list(),
+ exp_nms_corners.shape)
+ self.assertAllEqual(nmsed_scores.shape.as_list(),
+ exp_nms_scores.shape)
+ self.assertAllEqual(nmsed_classes.shape.as_list(),
+ exp_nms_classes.shape)
+ self.assertEqual(num_detections.shape.as_list(), [2])
+ return nmsed_boxes, nmsed_scores, nmsed_classes, num_detections
+
+ (nmsed_boxes, nmsed_scores, nmsed_classes,
+ num_detections) = self.execute_cpu(graph_fn, [boxes, scores, clip_window])
+ self.assertAllClose(nmsed_boxes, exp_nms_corners)
+ self.assertAllClose(nmsed_scores, exp_nms_scores)
+ self.assertAllClose(nmsed_classes, exp_nms_classes)
+ self.assertAllClose(num_detections, [2, 2])
def test_batch_multiclass_nms_with_per_image_clip_window(self):
- boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
- [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
- [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11], [0, 10, 1, 11]]],
- [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101], [0, 100, 1, 101]],
- [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
- [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
- tf.float32)
- scores = tf.constant([[[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0]],
- [[.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]]])
- clip_window = tf.constant([[0., 0., 5., 5.],
- [0., 0., 200., 200.]])
+ boxes = np.array([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+ [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+ [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+ [[0, 10, 1, 11], [0, 10, 1, 11]]],
+ [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+ [[0, 100, 1, 101], [0, 100, 1, 101]],
+ [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+ [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+ np.float32)
+ scores = np.array([[[.9, 0.01], [.75, 0.05],
+ [.6, 0.01], [.95, 0]],
+ [[.5, 0.01], [.3, 0.01],
+ [.01, .85], [.01, .5]]], np.float32)
+ clip_window = np.array([[0., 0., 5., 5.],
+ [0., 0., 200., 200.]], np.float32)
score_thresh = 0.1
iou_thresh = .5
max_output_size = 4
@@ -238,56 +229,55 @@ class BatchMulticlassNonMaxSuppressionTest(test_case.TestCase,
exp_nms_classes = np.array([[0, 0, 0, 0],
[0, 0, 0, 0]])
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- nmsed_additional_fields, num_detections
- ) = post_processing.batch_multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh,
- max_size_per_class=max_output_size, max_total_size=max_output_size,
- clip_window=clip_window)
-
- self.assertIsNone(nmsed_masks)
- self.assertIsNone(nmsed_additional_fields)
- # Check static shapes
- self.assertAllEqual(nmsed_boxes.shape.as_list(),
- exp_nms_corners.shape)
- self.assertAllEqual(nmsed_scores.shape.as_list(),
- exp_nms_scores.shape)
- self.assertAllEqual(nmsed_classes.shape.as_list(),
- exp_nms_classes.shape)
- self.assertEqual(num_detections.shape.as_list(), [2])
-
- with self.test_session() as sess:
- (nmsed_boxes, nmsed_scores, nmsed_classes,
- num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
- num_detections])
- self.assertAllClose(nmsed_boxes, exp_nms_corners)
- self.assertAllClose(nmsed_scores, exp_nms_scores)
- self.assertAllClose(nmsed_classes, exp_nms_classes)
- self.assertAllClose(num_detections, [1, 2])
+ def graph_fn(boxes, scores, clip_window):
+ (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
+ nmsed_additional_fields, num_detections
+ ) = post_processing.batch_multiclass_non_max_suppression(
+ boxes, scores, score_thresh, iou_thresh,
+ max_size_per_class=max_output_size, max_total_size=max_output_size,
+ clip_window=clip_window)
+ self.assertIsNone(nmsed_masks)
+ self.assertIsNone(nmsed_additional_fields)
+ # Check static shapes
+ self.assertAllEqual(nmsed_boxes.shape.as_list(),
+ exp_nms_corners.shape)
+ self.assertAllEqual(nmsed_scores.shape.as_list(),
+ exp_nms_scores.shape)
+ self.assertAllEqual(nmsed_classes.shape.as_list(),
+ exp_nms_classes.shape)
+ self.assertEqual(num_detections.shape.as_list(), [2])
+ return nmsed_boxes, nmsed_scores, nmsed_classes, num_detections
+
+ (nmsed_boxes, nmsed_scores, nmsed_classes,
+ num_detections) = self.execute_cpu(graph_fn, [boxes, scores, clip_window])
+ self.assertAllClose(nmsed_boxes, exp_nms_corners)
+ self.assertAllClose(nmsed_scores, exp_nms_scores)
+ self.assertAllClose(nmsed_classes, exp_nms_classes)
+ self.assertAllClose(num_detections, [1, 2])
def test_batch_multiclass_nms_with_masks(self):
- boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
- [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
- [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11], [0, 10, 1, 11]]],
- [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101], [0, 100, 1, 101]],
- [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
- [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
- tf.float32)
- scores = tf.constant([[[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0]],
- [[.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]]])
- masks = tf.constant([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]],
- [[[2, 3], [4, 5]], [[3, 4], [5, 6]]],
- [[[4, 5], [6, 7]], [[5, 6], [7, 8]]],
- [[[6, 7], [8, 9]], [[7, 8], [9, 10]]]],
- [[[[8, 9], [10, 11]], [[9, 10], [11, 12]]],
- [[[10, 11], [12, 13]], [[11, 12], [13, 14]]],
- [[[12, 13], [14, 15]], [[13, 14], [15, 16]]],
- [[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]],
- tf.float32)
+ boxes = np.array([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+ [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+ [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+ [[0, 10, 1, 11], [0, 10, 1, 11]]],
+ [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+ [[0, 100, 1, 101], [0, 100, 1, 101]],
+ [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+ [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+ np.float32)
+ scores = np.array([[[.9, 0.01], [.75, 0.05],
+ [.6, 0.01], [.95, 0]],
+ [[.5, 0.01], [.3, 0.01],
+ [.01, .85], [.01, .5]]], np.float32)
+ masks = np.array([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]],
+ [[[2, 3], [4, 5]], [[3, 4], [5, 6]]],
+ [[[4, 5], [6, 7]], [[5, 6], [7, 8]]],
+ [[[6, 7], [8, 9]], [[7, 8], [9, 10]]]],
+ [[[[8, 9], [10, 11]], [[9, 10], [11, 12]]],
+ [[[10, 11], [12, 13]], [[11, 12], [13, 14]]],
+ [[[12, 13], [14, 15]], [[13, 14], [15, 16]]],
+ [[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]],
+ np.float32)
score_thresh = 0.1
iou_thresh = .5
max_output_size = 4
@@ -313,61 +303,58 @@ class BatchMulticlassNonMaxSuppressionTest(test_case.TestCase,
[[10, 11], [12, 13]],
[[0, 0], [0, 0]]]])
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- nmsed_additional_fields, num_detections
- ) = post_processing.batch_multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh,
- max_size_per_class=max_output_size, max_total_size=max_output_size,
- masks=masks)
-
- self.assertIsNone(nmsed_additional_fields)
- # Check static shapes
- self.assertAllEqual(nmsed_boxes.shape.as_list(), exp_nms_corners.shape)
- self.assertAllEqual(nmsed_scores.shape.as_list(), exp_nms_scores.shape)
- self.assertAllEqual(nmsed_classes.shape.as_list(), exp_nms_classes.shape)
- self.assertAllEqual(nmsed_masks.shape.as_list(), exp_nms_masks.shape)
- self.assertEqual(num_detections.shape.as_list(), [2])
-
- with self.test_session() as sess:
+ def graph_fn(boxes, scores, masks):
(nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
- nmsed_masks, num_detections])
+ nmsed_additional_fields, num_detections
+ ) = post_processing.batch_multiclass_non_max_suppression(
+ boxes, scores, score_thresh, iou_thresh,
+ max_size_per_class=max_output_size, max_total_size=max_output_size,
+ masks=masks)
+ self.assertIsNone(nmsed_additional_fields)
+ # Check static shapes
+ self.assertAllEqual(nmsed_boxes.shape.as_list(), exp_nms_corners.shape)
+ self.assertAllEqual(nmsed_scores.shape.as_list(), exp_nms_scores.shape)
+ self.assertAllEqual(nmsed_classes.shape.as_list(), exp_nms_classes.shape)
+ self.assertAllEqual(nmsed_masks.shape.as_list(), exp_nms_masks.shape)
+ self.assertEqual(num_detections.shape.as_list(), [2])
+ return (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
+ num_detections)
- self.assertAllClose(nmsed_boxes, exp_nms_corners)
- self.assertAllClose(nmsed_scores, exp_nms_scores)
- self.assertAllClose(nmsed_classes, exp_nms_classes)
- self.assertAllClose(num_detections, [2, 3])
- self.assertAllClose(nmsed_masks, exp_nms_masks)
+ (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
+ num_detections) = self.execute_cpu(graph_fn, [boxes, scores, masks])
+ self.assertAllClose(nmsed_boxes, exp_nms_corners)
+ self.assertAllClose(nmsed_scores, exp_nms_scores)
+ self.assertAllClose(nmsed_classes, exp_nms_classes)
+ self.assertAllClose(num_detections, [2, 3])
+ self.assertAllClose(nmsed_masks, exp_nms_masks)
def test_batch_multiclass_nms_with_additional_fields(self):
- boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
- [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
- [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11], [0, 10, 1, 11]]],
- [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101], [0, 100, 1, 101]],
- [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
- [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
- tf.float32)
- scores = tf.constant([[[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0]],
- [[.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]]])
- additional_fields = {
- 'keypoints': tf.constant(
- [[[[6, 7], [8, 9]],
- [[0, 1], [2, 3]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]]],
- [[[13, 14], [15, 16]],
- [[8, 9], [10, 11]],
- [[10, 11], [12, 13]],
- [[0, 0], [0, 0]]]],
- tf.float32)
- }
- additional_fields['size'] = tf.constant(
+ boxes = np.array([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+ [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+ [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+ [[0, 10, 1, 11], [0, 10, 1, 11]]],
+ [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+ [[0, 100, 1, 101], [0, 100, 1, 101]],
+ [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+ [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+ np.float32)
+ scores = np.array([[[.9, 0.01], [.75, 0.05],
+ [.6, 0.01], [.95, 0]],
+ [[.5, 0.01], [.3, 0.01],
+ [.01, .85], [.01, .5]]], np.float32)
+ keypoints = np.array(
+ [[[[6, 7], [8, 9]],
+ [[0, 1], [2, 3]],
+ [[0, 0], [0, 0]],
+ [[0, 0], [0, 0]]],
+ [[[13, 14], [15, 16]],
+ [[8, 9], [10, 11]],
+ [[10, 11], [12, 13]],
+ [[0, 0], [0, 0]]]],
+ np.float32)
+ size = np.array(
[[[[6], [8]], [[0], [2]], [[0], [0]], [[0], [0]]],
- [[[13], [15]], [[8], [10]], [[10], [12]], [[0], [0]]]], tf.float32)
+ [[[13], [15]], [[8], [10]], [[10], [12]], [[0], [0]]]], np.float32)
score_thresh = 0.1
iou_thresh = .5
max_output_size = 4
@@ -399,43 +386,43 @@ class BatchMulticlassNonMaxSuppressionTest(test_case.TestCase,
[[[10], [12]], [[13], [15]],
[[8], [10]], [[0], [0]]]])
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- nmsed_additional_fields, num_detections
- ) = post_processing.batch_multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh,
- max_size_per_class=max_output_size, max_total_size=max_output_size,
- additional_fields=additional_fields)
-
- self.assertIsNone(nmsed_masks)
- # Check static shapes
- self.assertAllEqual(nmsed_boxes.shape.as_list(), exp_nms_corners.shape)
- self.assertAllEqual(nmsed_scores.shape.as_list(), exp_nms_scores.shape)
- self.assertAllEqual(nmsed_classes.shape.as_list(), exp_nms_classes.shape)
- self.assertEqual(len(nmsed_additional_fields),
- len(exp_nms_additional_fields))
- for key in exp_nms_additional_fields:
- self.assertAllEqual(nmsed_additional_fields[key].shape.as_list(),
- exp_nms_additional_fields[key].shape)
- self.assertEqual(num_detections.shape.as_list(), [2])
-
- with self.test_session() as sess:
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_additional_fields,
- num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
- nmsed_additional_fields, num_detections])
-
- self.assertAllClose(nmsed_boxes, exp_nms_corners)
- self.assertAllClose(nmsed_scores, exp_nms_scores)
- self.assertAllClose(nmsed_classes, exp_nms_classes)
+ def graph_fn(boxes, scores, keypoints, size):
+ additional_fields = {'keypoints': keypoints, 'size': size}
+ (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
+ nmsed_additional_fields, num_detections
+ ) = post_processing.batch_multiclass_non_max_suppression(
+ boxes, scores, score_thresh, iou_thresh,
+ max_size_per_class=max_output_size, max_total_size=max_output_size,
+ additional_fields=additional_fields)
+ self.assertIsNone(nmsed_masks)
+ # Check static shapes
+ self.assertAllEqual(nmsed_boxes.shape.as_list(), exp_nms_corners.shape)
+ self.assertAllEqual(nmsed_scores.shape.as_list(), exp_nms_scores.shape)
+ self.assertAllEqual(nmsed_classes.shape.as_list(), exp_nms_classes.shape)
+ self.assertEqual(len(nmsed_additional_fields),
+ len(exp_nms_additional_fields))
for key in exp_nms_additional_fields:
- self.assertAllClose(nmsed_additional_fields[key],
- exp_nms_additional_fields[key])
- self.assertAllClose(num_detections, [2, 3])
-
- def test_batch_multiclass_nms_with_dynamic_batch_size(self):
- boxes_placeholder = tf.placeholder(tf.float32, shape=(None, None, 2, 4))
- scores_placeholder = tf.placeholder(tf.float32, shape=(None, None, 2))
- masks_placeholder = tf.placeholder(tf.float32, shape=(None, None, 2, 2, 2))
+ self.assertAllEqual(nmsed_additional_fields[key].shape.as_list(),
+ exp_nms_additional_fields[key].shape)
+ self.assertEqual(num_detections.shape.as_list(), [2])
+ return (nmsed_boxes, nmsed_scores, nmsed_classes,
+ nmsed_additional_fields['keypoints'],
+ nmsed_additional_fields['size'],
+ num_detections)
+ (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_keypoints, nmsed_size,
+ num_detections) = self.execute_cpu(graph_fn, [boxes, scores, keypoints,
+ size])
+
+ self.assertAllClose(nmsed_boxes, exp_nms_corners)
+ self.assertAllClose(nmsed_scores, exp_nms_scores)
+ self.assertAllClose(nmsed_classes, exp_nms_classes)
+ self.assertAllClose(nmsed_keypoints,
+ exp_nms_additional_fields['keypoints'])
+ self.assertAllClose(nmsed_size,
+ exp_nms_additional_fields['size'])
+ self.assertAllClose(num_detections, [2, 3])
+ def test_batch_multiclass_nms_with_masks_and_num_valid_boxes(self):
boxes = np.array([[[[0, 0, 1, 1], [0, 0, 4, 5]],
[[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
[[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
@@ -443,11 +430,12 @@ class BatchMulticlassNonMaxSuppressionTest(test_case.TestCase,
[[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
[[0, 100, 1, 101], [0, 100, 1, 101]],
[[0, 1000, 1, 1002], [0, 999, 2, 1004]],
- [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]])
+ [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+ np.float32)
scores = np.array([[[.9, 0.01], [.75, 0.05],
[.6, 0.01], [.95, 0]],
[[.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]]])
+ [.01, .85], [.01, .5]]], np.float32)
masks = np.array([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]],
[[[2, 3], [4, 5]], [[3, 4], [5, 6]]],
[[[4, 5], [6, 7]], [[5, 6], [7, 8]]],
@@ -455,84 +443,9 @@ class BatchMulticlassNonMaxSuppressionTest(test_case.TestCase,
[[[[8, 9], [10, 11]], [[9, 10], [11, 12]]],
[[[10, 11], [12, 13]], [[11, 12], [13, 14]]],
[[[12, 13], [14, 15]], [[13, 14], [15, 16]]],
- [[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]])
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
-
- exp_nms_corners = np.array([[[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 0, 0, 0],
- [0, 0, 0, 0]],
- [[0, 999, 2, 1004],
- [0, 10.1, 1, 11.1],
- [0, 100, 1, 101],
- [0, 0, 0, 0]]])
- exp_nms_scores = np.array([[.95, .9, 0, 0],
- [.85, .5, .3, 0]])
- exp_nms_classes = np.array([[0, 0, 0, 0],
- [1, 0, 0, 0]])
- exp_nms_masks = np.array([[[[6, 7], [8, 9]],
- [[0, 1], [2, 3]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]]],
- [[[13, 14], [15, 16]],
- [[8, 9], [10, 11]],
- [[10, 11], [12, 13]],
- [[0, 0], [0, 0]]]])
-
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- nmsed_additional_fields, num_detections
- ) = post_processing.batch_multiclass_non_max_suppression(
- boxes_placeholder, scores_placeholder, score_thresh, iou_thresh,
- max_size_per_class=max_output_size, max_total_size=max_output_size,
- masks=masks_placeholder)
-
- self.assertIsNone(nmsed_additional_fields)
- # Check static shapes
- self.assertAllEqual(nmsed_boxes.shape.as_list(), [None, 4, 4])
- self.assertAllEqual(nmsed_scores.shape.as_list(), [None, 4])
- self.assertAllEqual(nmsed_classes.shape.as_list(), [None, 4])
- self.assertAllEqual(nmsed_masks.shape.as_list(), [None, 4, 2, 2])
- self.assertEqual(num_detections.shape.as_list(), [None])
-
- with self.test_session() as sess:
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
- nmsed_masks, num_detections],
- feed_dict={boxes_placeholder: boxes,
- scores_placeholder: scores,
- masks_placeholder: masks})
- self.assertAllClose(nmsed_boxes, exp_nms_corners)
- self.assertAllClose(nmsed_scores, exp_nms_scores)
- self.assertAllClose(nmsed_classes, exp_nms_classes)
- self.assertAllClose(num_detections, [2, 3])
- self.assertAllClose(nmsed_masks, exp_nms_masks)
-
- def test_batch_multiclass_nms_with_masks_and_num_valid_boxes(self):
- boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
- [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
- [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11], [0, 10, 1, 11]]],
- [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101], [0, 100, 1, 101]],
- [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
- [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
- tf.float32)
- scores = tf.constant([[[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0]],
- [[.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]]])
- masks = tf.constant([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]],
- [[[2, 3], [4, 5]], [[3, 4], [5, 6]]],
- [[[4, 5], [6, 7]], [[5, 6], [7, 8]]],
- [[[6, 7], [8, 9]], [[7, 8], [9, 10]]]],
- [[[[8, 9], [10, 11]], [[9, 10], [11, 12]]],
- [[[10, 11], [12, 13]], [[11, 12], [13, 14]]],
- [[[12, 13], [14, 15]], [[13, 14], [15, 16]]],
- [[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]],
- tf.float32)
- num_valid_boxes = tf.constant([1, 1], tf.int32)
+ [[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]],
+ np.float32)
+ num_valid_boxes = np.array([1, 1], np.int32)
score_thresh = 0.1
iou_thresh = .5
max_output_size = 4
@@ -558,58 +471,56 @@ class BatchMulticlassNonMaxSuppressionTest(test_case.TestCase,
[[0, 0], [0, 0]],
[[0, 0], [0, 0]]]]
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- nmsed_additional_fields, num_detections
- ) = post_processing.batch_multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh,
- max_size_per_class=max_output_size, max_total_size=max_output_size,
- num_valid_boxes=num_valid_boxes, masks=masks)
-
- self.assertIsNone(nmsed_additional_fields)
-
- with self.test_session() as sess:
+ def graph_fn(boxes, scores, masks, num_valid_boxes):
(nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
- nmsed_masks, num_detections])
- self.assertAllClose(nmsed_boxes, exp_nms_corners)
- self.assertAllClose(nmsed_scores, exp_nms_scores)
- self.assertAllClose(nmsed_classes, exp_nms_classes)
- self.assertAllClose(num_detections, [1, 1])
- self.assertAllClose(nmsed_masks, exp_nms_masks)
+ nmsed_additional_fields, num_detections
+ ) = post_processing.batch_multiclass_non_max_suppression(
+ boxes, scores, score_thresh, iou_thresh,
+ max_size_per_class=max_output_size, max_total_size=max_output_size,
+ masks=masks, num_valid_boxes=num_valid_boxes)
+ self.assertIsNone(nmsed_additional_fields)
+ return (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
+ num_detections)
+
+ (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
+ num_detections) = self.execute_cpu(graph_fn, [boxes, scores, masks,
+ num_valid_boxes])
+ self.assertAllClose(nmsed_boxes, exp_nms_corners)
+ self.assertAllClose(nmsed_scores, exp_nms_scores)
+ self.assertAllClose(nmsed_classes, exp_nms_classes)
+ self.assertAllClose(num_detections, [1, 1])
+ self.assertAllClose(nmsed_masks, exp_nms_masks)
def test_batch_multiclass_nms_with_additional_fields_and_num_valid_boxes(
self):
- boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
- [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
- [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11], [0, 10, 1, 11]]],
- [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101], [0, 100, 1, 101]],
- [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
- [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
- tf.float32)
- scores = tf.constant([[[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0]],
- [[.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]]])
- additional_fields = {
- 'keypoints': tf.constant(
- [[[[6, 7], [8, 9]],
- [[0, 1], [2, 3]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]]],
- [[[13, 14], [15, 16]],
- [[8, 9], [10, 11]],
- [[10, 11], [12, 13]],
- [[0, 0], [0, 0]]]],
- tf.float32)
- }
-
- additional_fields['size'] = tf.constant(
+ boxes = np.array([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+ [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+ [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+ [[0, 10, 1, 11], [0, 10, 1, 11]]],
+ [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+ [[0, 100, 1, 101], [0, 100, 1, 101]],
+ [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+ [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+ np.float32)
+ scores = np.array([[[.9, 0.01], [.75, 0.05],
+ [.6, 0.01], [.95, 0]],
+ [[.5, 0.01], [.3, 0.01],
+ [.01, .85], [.01, .5]]], np.float32)
+ keypoints = np.array(
+ [[[[6, 7], [8, 9]],
+ [[0, 1], [2, 3]],
+ [[0, 0], [0, 0]],
+ [[0, 0], [0, 0]]],
+ [[[13, 14], [15, 16]],
+ [[8, 9], [10, 11]],
+ [[10, 11], [12, 13]],
+ [[0, 0], [0, 0]]]],
+ np.float32)
+ size = np.array(
[[[[7], [9]], [[1], [3]], [[0], [0]], [[0], [0]]],
- [[[14], [16]], [[9], [11]], [[11], [13]], [[0], [0]]]], tf.float32)
+ [[[14], [16]], [[9], [11]], [[11], [13]], [[0], [0]]]], np.float32)
- num_valid_boxes = tf.constant([1, 1], tf.int32)
+ num_valid_boxes = np.array([1, 1], np.int32)
score_thresh = 0.1
iou_thresh = .5
max_output_size = 4
@@ -641,45 +552,48 @@ class BatchMulticlassNonMaxSuppressionTest(test_case.TestCase,
[[0], [0]], [[0], [0]]],
[[[14], [16]], [[0], [0]],
[[0], [0]], [[0], [0]]]])
-
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- nmsed_additional_fields, num_detections
- ) = post_processing.batch_multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh,
- max_size_per_class=max_output_size, max_total_size=max_output_size,
- num_valid_boxes=num_valid_boxes,
- additional_fields=additional_fields)
-
- self.assertIsNone(nmsed_masks)
-
- with self.test_session() as sess:
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_additional_fields,
- num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
- nmsed_additional_fields, num_detections])
-
- self.assertAllClose(nmsed_boxes, exp_nms_corners)
- self.assertAllClose(nmsed_scores, exp_nms_scores)
- self.assertAllClose(nmsed_classes, exp_nms_classes)
- for key in exp_nms_additional_fields:
- self.assertAllClose(nmsed_additional_fields[key],
- exp_nms_additional_fields[key])
- self.assertAllClose(num_detections, [1, 1])
+ def graph_fn(boxes, scores, keypoints, size, num_valid_boxes):
+ additional_fields = {'keypoints': keypoints, 'size': size}
+ (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
+ nmsed_additional_fields, num_detections
+ ) = post_processing.batch_multiclass_non_max_suppression(
+ boxes, scores, score_thresh, iou_thresh,
+ max_size_per_class=max_output_size, max_total_size=max_output_size,
+ num_valid_boxes=num_valid_boxes,
+ additional_fields=additional_fields)
+ self.assertIsNone(nmsed_masks)
+ return (nmsed_boxes, nmsed_scores, nmsed_classes,
+ nmsed_additional_fields['keypoints'],
+ nmsed_additional_fields['size'], num_detections)
+
+ (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_keypoints, nmsed_size,
+ num_detections) = self.execute_cpu(graph_fn, [boxes, scores, keypoints,
+ size, num_valid_boxes])
+
+ self.assertAllClose(nmsed_boxes, exp_nms_corners)
+ self.assertAllClose(nmsed_scores, exp_nms_scores)
+ self.assertAllClose(nmsed_classes, exp_nms_classes)
+ self.assertAllClose(nmsed_keypoints,
+ exp_nms_additional_fields['keypoints'])
+ self.assertAllClose(nmsed_size,
+ exp_nms_additional_fields['size'])
+ self.assertAllClose(num_detections, [1, 1])
def test_combined_nms_with_batch_size_2(self):
"""Test use_combined_nms."""
- boxes = tf.constant([[[[0, 0, 0.1, 0.1], [0, 0, 0.1, 0.1]],
- [[0, 0.01, 1, 0.11], [0, 0.6, 0.1, 0.7]],
- [[0, -0.01, 0.1, 0.09], [0, -0.1, 0.1, 0.09]],
- [[0, 0.11, 0.1, 0.2], [0, 0.11, 0.1, 0.2]]],
- [[[0, 0, 0.2, 0.2], [0, 0, 0.2, 0.2]],
- [[0, 0.02, 0.2, 0.22], [0, 0.02, 0.2, 0.22]],
- [[0, -0.02, 0.2, 0.19], [0, -0.02, 0.2, 0.19]],
- [[0, 0.21, 0.2, 0.3], [0, 0.21, 0.2, 0.3]]]],
- tf.float32)
- scores = tf.constant([[[.1, 0.9], [.75, 0.8],
- [.6, 0.3], [0.95, 0.1]],
- [[.1, 0.9], [.75, 0.8],
- [.6, .3], [.95, .1]]])
+ boxes = np.array([[[[0, 0, 0.1, 0.1], [0, 0, 0.1, 0.1]],
+ [[0, 0.01, 1, 0.11], [0, 0.6, 0.1, 0.7]],
+ [[0, -0.01, 0.1, 0.09], [0, -0.1, 0.1, 0.09]],
+ [[0, 0.11, 0.1, 0.2], [0, 0.11, 0.1, 0.2]]],
+ [[[0, 0, 0.2, 0.2], [0, 0, 0.2, 0.2]],
+ [[0, 0.02, 0.2, 0.22], [0, 0.02, 0.2, 0.22]],
+ [[0, -0.02, 0.2, 0.19], [0, -0.02, 0.2, 0.19]],
+ [[0, 0.21, 0.2, 0.3], [0, 0.21, 0.2, 0.3]]]],
+ np.float32)
+ scores = np.array([[[.1, 0.9], [.75, 0.8],
+ [.6, 0.3], [0.95, 0.1]],
+ [[.1, 0.9], [.75, 0.8],
+ [.6, .3], [.95, .1]]], np.float32)
score_thresh = 0.1
iou_thresh = .5
max_output_size = 3
@@ -695,27 +609,78 @@ class BatchMulticlassNonMaxSuppressionTest(test_case.TestCase,
exp_nms_classes = np.array([[0, 1, 1],
[0, 1, 0]])
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- nmsed_additional_fields, num_detections
- ) = post_processing.batch_multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh,
- max_size_per_class=max_output_size, max_total_size=max_output_size,
- use_static_shapes=True,
- use_combined_nms=True)
-
- self.assertIsNone(nmsed_masks)
- self.assertIsNone(nmsed_additional_fields)
-
- with self.test_session() as sess:
- (nmsed_boxes, nmsed_scores, nmsed_classes,
- num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
- num_detections])
- self.assertAllClose(nmsed_boxes, exp_nms_corners)
- self.assertAllClose(nmsed_scores, exp_nms_scores)
- self.assertAllClose(nmsed_classes, exp_nms_classes)
- self.assertListEqual(num_detections.tolist(), [3, 3])
-
- # TODO(bhattad): Remove conditional after CMLE moves to TF 1.9
+ def graph_fn(boxes, scores):
+ (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
+ nmsed_additional_fields, num_detections
+ ) = post_processing.batch_multiclass_non_max_suppression(
+ boxes, scores, score_thresh, iou_thresh,
+ max_size_per_class=max_output_size, max_total_size=max_output_size,
+ use_static_shapes=True,
+ use_combined_nms=True)
+ self.assertIsNone(nmsed_masks)
+ self.assertIsNone(nmsed_additional_fields)
+ return (nmsed_boxes, nmsed_scores, nmsed_classes, num_detections)
+
+ (nmsed_boxes, nmsed_scores, nmsed_classes,
+ num_detections) = self.execute_cpu(graph_fn, [boxes, scores])
+ self.assertAllClose(nmsed_boxes, exp_nms_corners)
+ self.assertAllClose(nmsed_scores, exp_nms_scores)
+ self.assertAllClose(nmsed_classes, exp_nms_classes)
+ self.assertListEqual(num_detections.tolist(), [3, 3])
+
+ def test_batch_multiclass_nms_with_use_static_shapes(self):
+ boxes = np.array([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+ [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+ [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+ [[0, 10, 1, 11], [0, 10, 1, 11]]],
+ [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+ [[0, 100, 1, 101], [0, 100, 1, 101]],
+ [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+ [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+ np.float32)
+ scores = np.array([[[.9, 0.01], [.75, 0.05],
+ [.6, 0.01], [.95, 0]],
+ [[.5, 0.01], [.3, 0.01],
+ [.01, .85], [.01, .5]]],
+ np.float32)
+ clip_window = np.array([[0., 0., 5., 5.],
+ [0., 0., 200., 200.]],
+ np.float32)
+ score_thresh = 0.1
+ iou_thresh = .5
+ max_output_size = 4
+
+ exp_nms_corners = np.array([[[0, 0, 1, 1],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0]],
+ [[0, 10.1, 1, 11.1],
+ [0, 100, 1, 101],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0]]])
+ exp_nms_scores = np.array([[.9, 0., 0., 0.],
+ [.5, .3, 0, 0]])
+ exp_nms_classes = np.array([[0, 0, 0, 0],
+ [0, 0, 0, 0]])
+
+ def graph_fn(boxes, scores, clip_window):
+ (nmsed_boxes, nmsed_scores, nmsed_classes, _, _, num_detections
+ ) = post_processing.batch_multiclass_non_max_suppression(
+ boxes, scores, score_thresh, iou_thresh,
+ max_size_per_class=max_output_size, clip_window=clip_window,
+ use_static_shapes=True)
+ return nmsed_boxes, nmsed_scores, nmsed_classes, num_detections
+
+ (nmsed_boxes, nmsed_scores, nmsed_classes,
+ num_detections) = self.execute(graph_fn, [boxes, scores, clip_window])
+ for i in range(len(num_detections)):
+ self.assertAllClose(nmsed_boxes[i, 0:num_detections[i]],
+ exp_nms_corners[i, 0:num_detections[i]])
+ self.assertAllClose(nmsed_scores[i, 0:num_detections[i]],
+ exp_nms_scores[i, 0:num_detections[i]])
+ self.assertAllClose(nmsed_classes[i, 0:num_detections[i]],
+ exp_nms_classes[i, 0:num_detections[i]])
+ self.assertAllClose(num_detections, [1, 2])
if __name__ == '__main__':
tf.test.main()
diff --git a/research/object_detection/core/batcher.py b/research/object_detection/core/batcher.py
index 832e22420d0cd5d3081805ff4cf0f7ae05c20f9e..26832e30efa43a15436070e8676b1d020712a794 100644
--- a/research/object_detection/core/batcher.py
+++ b/research/object_detection/core/batcher.py
@@ -24,10 +24,6 @@ from six.moves import range
import tensorflow.compat.v1 as tf
from object_detection.core import prefetcher
-from object_detection.utils import tf_version
-
-if not tf_version.is_tf1():
- raise ValueError('`batcher.py` is only supported in Tensorflow 1.X')
rt_shape_str = '_runtime_shapes'
diff --git a/research/object_detection/core/batcher_tf1_test.py b/research/object_detection/core/batcher_tf1_test.py
index 8f443a942c2af67650de67d2d0583df07e6e9e0e..1688b87cdf08bc29ddb2413776757066047c80da 100644
--- a/research/object_detection/core/batcher_tf1_test.py
+++ b/research/object_detection/core/batcher_tf1_test.py
@@ -19,14 +19,17 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+import unittest
import numpy as np
from six.moves import range
import tensorflow.compat.v1 as tf
import tf_slim as slim
from object_detection.core import batcher
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class BatcherTest(tf.test.TestCase):
def test_batch_and_unpad_2d_tensors_of_different_sizes_in_1st_dimension(self):
diff --git a/research/object_detection/core/freezable_batch_norm_test.py b/research/object_detection/core/freezable_batch_norm_tf2_test.py
similarity index 98%
rename from research/object_detection/core/freezable_batch_norm_test.py
rename to research/object_detection/core/freezable_batch_norm_tf2_test.py
index 8379a38398414e89c611f2247209fb087fa9be31..4cc42ae3ef7da9b3412d2f461d7f9db62420e603 100644
--- a/research/object_detection/core/freezable_batch_norm_test.py
+++ b/research/object_detection/core/freezable_batch_norm_tf2_test.py
@@ -17,15 +17,17 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
-
+import unittest
import numpy as np
from six.moves import zip
import tensorflow.compat.v1 as tf
from object_detection.core import freezable_batch_norm
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class FreezableBatchNormTest(tf.test.TestCase):
"""Tests for FreezableBatchNorm operations."""
diff --git a/research/object_detection/core/keypoint_ops.py b/research/object_detection/core/keypoint_ops.py
index e321783d986b3c330300f347158c261a7e3f94a6..1b0c4ccfed42aae492550331e870173c624f0316 100644
--- a/research/object_detection/core/keypoint_ops.py
+++ b/research/object_detection/core/keypoint_ops.py
@@ -217,7 +217,7 @@ def to_absolute_coordinates(keypoints, height, width,
return scale(keypoints, height, width)
-def flip_horizontal(keypoints, flip_point, flip_permutation, scope=None):
+def flip_horizontal(keypoints, flip_point, flip_permutation=None, scope=None):
"""Flips the keypoints horizontally around the flip_point.
This operation flips the x coordinate for each keypoint around the flip_point
@@ -227,13 +227,14 @@ def flip_horizontal(keypoints, flip_point, flip_permutation, scope=None):
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
flip_point: (float) scalar tensor representing the x coordinate to flip the
keypoints around.
- flip_permutation: rank 1 int32 tensor containing the keypoint flip
- permutation. This specifies the mapping from original keypoint indices
- to the flipped keypoint indices. This is used primarily for keypoints
- that are not reflection invariant. E.g. Suppose there are 3 keypoints
- representing ['head', 'right_eye', 'left_eye'], then a logical choice for
- flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
- and 'right_eye' after a horizontal flip.
+ flip_permutation: integer list or rank 1 int32 tensor containing the
+ keypoint flip permutation. This specifies the mapping from original
+ keypoint indices to the flipped keypoint indices. This is used primarily
+ for keypoints that are not reflection invariant. E.g. Suppose there are 3
+ keypoints representing ['head', 'right_eye', 'left_eye'], then a logical
+ choice for flip_permutation might be [0, 2, 1] since we want to swap the
+ 'left_eye' and 'right_eye' after a horizontal flip.
+ Default to None or empty list to keep the original order after flip.
scope: name scope.
Returns:
@@ -241,7 +242,8 @@ def flip_horizontal(keypoints, flip_point, flip_permutation, scope=None):
"""
with tf.name_scope(scope, 'FlipHorizontal'):
keypoints = tf.transpose(keypoints, [1, 0, 2])
- keypoints = tf.gather(keypoints, flip_permutation)
+ if flip_permutation:
+ keypoints = tf.gather(keypoints, flip_permutation)
v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
u = flip_point * 2.0 - u
new_keypoints = tf.concat([v, u], 2)
@@ -249,7 +251,7 @@ def flip_horizontal(keypoints, flip_point, flip_permutation, scope=None):
return new_keypoints
-def flip_vertical(keypoints, flip_point, flip_permutation, scope=None):
+def flip_vertical(keypoints, flip_point, flip_permutation=None, scope=None):
"""Flips the keypoints vertically around the flip_point.
This operation flips the y coordinate for each keypoint around the flip_point
@@ -259,13 +261,14 @@ def flip_vertical(keypoints, flip_point, flip_permutation, scope=None):
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
flip_point: (float) scalar tensor representing the y coordinate to flip the
keypoints around.
- flip_permutation: rank 1 int32 tensor containing the keypoint flip
- permutation. This specifies the mapping from original keypoint indices
- to the flipped keypoint indices. This is used primarily for keypoints
- that are not reflection invariant. E.g. Suppose there are 3 keypoints
- representing ['head', 'right_eye', 'left_eye'], then a logical choice for
- flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
- and 'right_eye' after a horizontal flip.
+ flip_permutation: integer list or rank 1 int32 tensor containing the
+ keypoint flip permutation. This specifies the mapping from original
+ keypoint indices to the flipped keypoint indices. This is used primarily
+ for keypoints that are not reflection invariant. E.g. Suppose there are 3
+ keypoints representing ['head', 'right_eye', 'left_eye'], then a logical
+ choice for flip_permutation might be [0, 2, 1] since we want to swap the
+ 'left_eye' and 'right_eye' after a horizontal flip.
+ Default to None or empty list to keep the original order after flip.
scope: name scope.
Returns:
@@ -273,7 +276,8 @@ def flip_vertical(keypoints, flip_point, flip_permutation, scope=None):
"""
with tf.name_scope(scope, 'FlipVertical'):
keypoints = tf.transpose(keypoints, [1, 0, 2])
- keypoints = tf.gather(keypoints, flip_permutation)
+ if flip_permutation:
+ keypoints = tf.gather(keypoints, flip_permutation)
v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
v = flip_point * 2.0 - v
new_keypoints = tf.concat([v, u], 2)
@@ -281,18 +285,24 @@ def flip_vertical(keypoints, flip_point, flip_permutation, scope=None):
return new_keypoints
-def rot90(keypoints, scope=None):
+def rot90(keypoints, rotation_permutation=None, scope=None):
"""Rotates the keypoints counter-clockwise by 90 degrees.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+ rotation_permutation: integer list or rank 1 int32 tensor containing the
+ keypoint flip permutation. This specifies the mapping from original
+ keypoint indices to the rotated keypoint indices. This is used primarily
+ for keypoints that are not rotation invariant.
+ Default to None or empty list to keep the original order after rotation.
scope: name scope.
-
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
with tf.name_scope(scope, 'Rot90'):
keypoints = tf.transpose(keypoints, [1, 0, 2])
+ if rotation_permutation:
+ keypoints = tf.gather(keypoints, rotation_permutation)
v, u = tf.split(value=keypoints[:, :, ::-1], num_or_size_splits=2, axis=2)
v = 1.0 - v
new_keypoints = tf.concat([v, u], 2)
diff --git a/research/object_detection/core/keypoint_ops_test.py b/research/object_detection/core/keypoint_ops_test.py
index 695e8fa1c6efcac8900577cd4657393b01d6d8d1..bbdcf01940dcaf96da283bd6bcf73e91b633f0ee 100644
--- a/research/object_detection/core/keypoint_ops_test.py
+++ b/research/object_detection/core/keypoint_ops_test.py
@@ -180,6 +180,21 @@ class KeypointOpsTest(test_case.TestCase):
[[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
[[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]]
])
+ expected_keypoints = tf.constant([
+ [[0.1, 0.9], [0.2, 0.8], [0.3, 0.7]],
+ [[0.4, 0.6], [0.5, 0.5], [0.6, 0.4]],
+ ])
+ output = keypoint_ops.flip_horizontal(keypoints, 0.5)
+ return output, expected_keypoints
+
+ output, expected_keypoints = self.execute(graph_fn, [])
+ self.assertAllClose(output, expected_keypoints)
+
+ def test_flip_horizontal_permutation(self):
+
+ def graph_fn():
+ keypoints = tf.constant([[[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
+ [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]]])
flip_permutation = [0, 2, 1]
expected_keypoints = tf.constant([
@@ -197,6 +212,22 @@ class KeypointOpsTest(test_case.TestCase):
[[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
[[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]]
])
+
+ expected_keypoints = tf.constant([
+ [[0.9, 0.1], [0.8, 0.2], [0.7, 0.3]],
+ [[0.6, 0.4], [0.5, 0.5], [0.4, 0.6]],
+ ])
+ output = keypoint_ops.flip_vertical(keypoints, 0.5)
+ return output, expected_keypoints
+
+ output, expected_keypoints = self.execute(graph_fn, [])
+ self.assertAllClose(output, expected_keypoints)
+
+ def test_flip_vertical_permutation(self):
+
+ def graph_fn():
+ keypoints = tf.constant([[[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
+ [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]]])
flip_permutation = [0, 2, 1]
expected_keypoints = tf.constant([
@@ -223,6 +254,23 @@ class KeypointOpsTest(test_case.TestCase):
output, expected_keypoints = self.execute(graph_fn, [])
self.assertAllClose(output, expected_keypoints)
+ def test_rot90_permutation(self):
+
+ def graph_fn():
+ keypoints = tf.constant([[[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
+ [[0.4, 0.6], [0.5, 0.6], [0.6, 0.7]]])
+ rot_permutation = [0, 2, 1]
+ expected_keypoints = tf.constant([
+ [[0.9, 0.1], [0.7, 0.3], [0.8, 0.2]],
+ [[0.4, 0.4], [0.3, 0.6], [0.4, 0.5]],
+ ])
+ output = keypoint_ops.rot90(keypoints,
+ rotation_permutation=rot_permutation)
+ return output, expected_keypoints
+
+ output, expected_keypoints = self.execute(graph_fn, [])
+ self.assertAllClose(output, expected_keypoints)
+
def test_keypoint_weights_from_visibilities(self):
def graph_fn():
keypoint_visibilities = tf.constant([
diff --git a/research/object_detection/core/losses.py b/research/object_detection/core/losses.py
index 07e7dd3ff4c6d69d32ac11a305551fb716262eb2..c4d499e7e6c4ed5da803c48ff3d8908e713a3c2e 100644
--- a/research/object_detection/core/losses.py
+++ b/research/object_detection/core/losses.py
@@ -681,3 +681,95 @@ class HardExampleMiner(object):
num_positives, num_negatives)
+class PenaltyReducedLogisticFocalLoss(Loss):
+ """Penalty-reduced pixelwise logistic regression with focal loss.
+
+ The loss is defined in Equation (1) of the Objects as Points[1] paper.
+ Although the loss is defined per-pixel in the output space, this class
+ assumes that each pixel is an anchor to be compatible with the base class.
+
+ [1]: https://arxiv.org/abs/1904.07850
+ """
+
+ def __init__(self, alpha=2.0, beta=4.0, sigmoid_clip_value=1e-4):
+ """Constructor.
+
+ Args:
+ alpha: Focussing parameter of the focal loss. Increasing this will
+ decrease the loss contribution of the well classified examples.
+ beta: The local penalty reduction factor. Increasing this will decrease
+ the contribution of loss due to negative pixels near the keypoint.
+ sigmoid_clip_value: The sigmoid operation used internally will be clipped
+ between [sigmoid_clip_value, 1 - sigmoid_clip_value)
+ """
+ self._alpha = alpha
+ self._beta = beta
+ self._sigmoid_clip_value = sigmoid_clip_value
+ super(PenaltyReducedLogisticFocalLoss, self).__init__()
+
+ def _compute_loss(self, prediction_tensor, target_tensor, weights):
+ """Compute loss function.
+
+ In all input tensors, `num_anchors` is the total number of pixels in the
+ the output space.
+
+ Args:
+ prediction_tensor: A float tensor of shape [batch_size, num_anchors,
+ num_classes] representing the predicted unscaled logits for each class.
+ The function will compute sigmoid on this tensor internally.
+ target_tensor: A float tensor of shape [batch_size, num_anchors,
+ num_classes] representing a tensor with the 'splatted' keypoints,
+ possibly using a gaussian kernel. This function assumes that
+ the target is bounded between [0, 1].
+ weights: a float tensor of shape, either [batch_size, num_anchors,
+ num_classes] or [batch_size, num_anchors, 1]. If the shape is
+ [batch_size, num_anchors, 1], all the classses are equally weighted.
+
+
+ Returns:
+ loss: a float tensor of shape [batch_size, num_anchors, num_classes]
+ representing the value of the loss function.
+ """
+
+ is_present_tensor = tf.math.equal(target_tensor, 1.0)
+ prediction_tensor = tf.clip_by_value(tf.sigmoid(prediction_tensor),
+ self._sigmoid_clip_value,
+ 1 - self._sigmoid_clip_value)
+
+ positive_loss = (tf.math.pow((1 - prediction_tensor), self._alpha)*
+ tf.math.log(prediction_tensor))
+ negative_loss = (tf.math.pow((1 - target_tensor), self._beta)*
+ tf.math.pow(prediction_tensor, self._alpha)*
+ tf.math.log(1 - prediction_tensor))
+
+ loss = -tf.where(is_present_tensor, positive_loss, negative_loss)
+ return loss * weights
+
+
+class L1LocalizationLoss(Loss):
+ """L1 loss or absolute difference.
+
+ When used in a per-pixel manner, each pixel should be given as an anchor.
+ """
+
+ def _compute_loss(self, prediction_tensor, target_tensor, weights):
+ """Compute loss function.
+
+ Args:
+ prediction_tensor: A float tensor of shape [batch_size, num_anchors]
+ representing the (encoded) predicted locations of objects.
+ target_tensor: A float tensor of shape [batch_size, num_anchors]
+ representing the regression targets
+ weights: a float tensor of shape [batch_size, num_anchors]
+
+ Returns:
+ loss: a float tensor of shape [batch_size, num_anchors] tensor
+ representing the value of the loss function.
+ """
+ return tf.losses.absolute_difference(
+ target_tensor,
+ prediction_tensor,
+ weights=weights,
+ loss_collection=None,
+ reduction=tf.losses.Reduction.NONE
+ )
diff --git a/research/object_detection/core/model.py b/research/object_detection/core/model.py
index 0430b37b5c31c6e1ce9604898aaa8e73319400f8..437ed08e1f7e1ddcc053085010ea7be5378e7be9 100644
--- a/research/object_detection/core/model.py
+++ b/research/object_detection/core/model.py
@@ -391,7 +391,9 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
pass
@abc.abstractmethod
- def restore_map(self, fine_tune_checkpoint_type='detection'):
+ def restore_map(self,
+ fine_tune_checkpoint_type='detection',
+ load_all_detection_checkpoint_vars=False):
"""Returns a map of variables to load from a foreign checkpoint.
Returns a map of variable names to load from a checkpoint to variables in
@@ -407,6 +409,9 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
checkpoint (with compatible variable names) or to restore from a
classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`. Default 'detection'.
+ load_all_detection_checkpoint_vars: whether to load all variables (when
+ `fine_tune_checkpoint_type` is `detection`). If False, only variables
+ within the feature extractor scope are included. Default False.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
@@ -414,6 +419,36 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
"""
pass
+ @abc.abstractmethod
+ def restore_from_objects(self, fine_tune_checkpoint_type='detection'):
+ """Returns a map of variables to load from a foreign checkpoint.
+
+ Returns a dictionary of Tensorflow 2 Trackable objects (e.g. tf.Module
+ or Checkpoint). This enables the model to initialize based on weights from
+ another task. For example, the feature extractor variables from a
+ classification model can be used to bootstrap training of an object
+ detector. When loading from an object detection model, the checkpoint model
+ should have the same parameters as this detection model with exception of
+ the num_classes parameter.
+
+ Note that this function is intended to be used to restore Keras-based
+ models when running Tensorflow 2, whereas restore_map (above) is intended
+ to be used to restore Slim-based models when running Tensorflow 1.x.
+
+ TODO(jonathanhuang,rathodv): Check tf_version and raise unimplemented
+ error for both restore_map and restore_from_objects depending on version.
+
+ Args:
+ fine_tune_checkpoint_type: whether to restore from a full detection
+ checkpoint (with compatible variable names) or to restore from a
+ classification checkpoint for initialization prior to training.
+ Valid values: `detection`, `classification`. Default 'detection'.
+
+ Returns:
+ A dict mapping keys to Trackable objects (tf.Module or Checkpoint).
+ """
+ pass
+
@abc.abstractmethod
def updates(self):
"""Returns a list of update operators for this model.
diff --git a/research/object_detection/core/model_test.py b/research/object_detection/core/model_test.py
index 2bb1ab343a6634ffc8df9f71378e83371921da7a..fcc36c03d4a77a78193975766b5e96b37a32b075 100644
--- a/research/object_detection/core/model_test.py
+++ b/research/object_detection/core/model_test.py
@@ -57,6 +57,9 @@ class FakeModel(model.DetectionModel):
def restore_map(self):
return {}
+ def restore_from_objects(self, fine_tune_checkpoint_type):
+ pass
+
def regularization_losses(self):
return []
diff --git a/research/object_detection/core/prefetcher.py b/research/object_detection/core/prefetcher.py
index f88fbbd39258b0cc79b5ff2fb6bbad8f4373abdf..31e93eae80e25abde3166a56d212645ed4f17a5a 100644
--- a/research/object_detection/core/prefetcher.py
+++ b/research/object_detection/core/prefetcher.py
@@ -16,10 +16,6 @@
"""Provides functions to prefetch tensors to feed into models."""
import tensorflow.compat.v1 as tf
-from object_detection.utils import tf_version
-if not tf_version.is_tf1():
- raise ValueError('`prefetcher.py` is only supported in Tensorflow 1.X')
-
def prefetch(tensor_dict, capacity):
"""Creates a prefetch queue for tensors.
diff --git a/research/object_detection/core/prefetcher_tf1_test.py b/research/object_detection/core/prefetcher_tf1_test.py
index 3c827d8000e5d74a05c37a637aa6f7013e3e1cee..95e9155e5e38c762cee915389f55f0cc69334ae9 100644
--- a/research/object_detection/core/prefetcher_tf1_test.py
+++ b/research/object_detection/core/prefetcher_tf1_test.py
@@ -18,16 +18,16 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+import unittest
from six.moves import range
import tensorflow.compat.v1 as tf
-
-# pylint: disable=g-bad-import-order,
-from object_detection.core import prefetcher
import tf_slim as slim
-# pylint: disable=g-bad-import-order
+from object_detection.core import prefetcher
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class PrefetcherTest(tf.test.TestCase):
"""Test class for prefetcher."""
diff --git a/research/object_detection/core/preprocessor.py b/research/object_detection/core/preprocessor.py
index 8b8fdff5e5446f0739396eafd10b4b5d39bd14b5..a1e7ed0288692d8ad8aeb852ebef00462c3a91cd 100644
--- a/research/object_detection/core/preprocessor.py
+++ b/research/object_detection/core/preprocessor.py
@@ -569,12 +569,11 @@ def random_horizontal_flip(image,
keypoints=None,
keypoint_visibilities=None,
keypoint_flip_permutation=None,
+ probability=0.5,
seed=None,
preprocess_vars_cache=None):
"""Randomly flips the image and detections horizontally.
- The probability of flipping the image is 50%.
-
Args:
image: rank 3 float32 tensor with shape [height, width, channels].
boxes: (optional) rank 2 float32 tensor with shape [N, 4]
@@ -592,6 +591,7 @@ def random_horizontal_flip(image,
[num_instances, num_keypoints].
keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation.
+ probability: the probability of performing this augmentation.
seed: random seed
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
@@ -636,7 +636,7 @@ def random_horizontal_flip(image,
generator_func,
preprocessor_cache.PreprocessorCache.HORIZONTAL_FLIP,
preprocess_vars_cache)
- do_a_flip_random = tf.greater(do_a_flip_random, 0.5)
+ do_a_flip_random = tf.less(do_a_flip_random, probability)
# flip image
image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image)
@@ -682,6 +682,7 @@ def random_vertical_flip(image,
masks=None,
keypoints=None,
keypoint_flip_permutation=None,
+ probability=0.5,
seed=None,
preprocess_vars_cache=None):
"""Randomly flips the image and detections vertically.
@@ -703,6 +704,7 @@ def random_vertical_flip(image,
normalized coordinates.
keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation.
+ probability: the probability of performing this augmentation.
seed: random seed
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
@@ -743,7 +745,7 @@ def random_vertical_flip(image,
do_a_flip_random = _get_or_create_preprocess_rand_vars(
generator_func, preprocessor_cache.PreprocessorCache.VERTICAL_FLIP,
preprocess_vars_cache)
- do_a_flip_random = tf.greater(do_a_flip_random, 0.5)
+ do_a_flip_random = tf.less(do_a_flip_random, probability)
# flip image
image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image)
@@ -777,6 +779,8 @@ def random_rotation90(image,
boxes=None,
masks=None,
keypoints=None,
+ keypoint_rot_permutation=None,
+ probability=0.5,
seed=None,
preprocess_vars_cache=None):
"""Randomly rotates the image and detections 90 degrees counter-clockwise.
@@ -799,6 +803,9 @@ def random_rotation90(image,
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
+ keypoint_rot_permutation: rank 1 int32 tensor containing the keypoint flip
+ permutation.
+ probability: the probability of performing this augmentation.
seed: random seed
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
@@ -833,7 +840,7 @@ def random_rotation90(image,
do_a_rot90_random = _get_or_create_preprocess_rand_vars(
generator_func, preprocessor_cache.PreprocessorCache.ROTATION90,
preprocess_vars_cache)
- do_a_rot90_random = tf.greater(do_a_rot90_random, 0.5)
+ do_a_rot90_random = tf.less(do_a_rot90_random, probability)
# flip image
image = tf.cond(do_a_rot90_random, lambda: _rot90_image(image),
@@ -856,7 +863,7 @@ def random_rotation90(image,
if keypoints is not None:
keypoints = tf.cond(
do_a_rot90_random,
- lambda: keypoint_ops.rot90(keypoints),
+ lambda: keypoint_ops.rot90(keypoints, keypoint_rot_permutation),
lambda: keypoints)
result.append(keypoints)
diff --git a/research/object_detection/core/preprocessor_test.py b/research/object_detection/core/preprocessor_test.py
index a535ce207aae27afafc33308704f3c28c22b5619..5ebfe9eefe1a172e02f747ca81918612efe4792c 100644
--- a/research/object_detection/core/preprocessor_test.py
+++ b/research/object_detection/core/preprocessor_test.py
@@ -19,6 +19,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+import unittest
from absl.testing import parameterized
import numpy as np
import six
@@ -30,11 +31,12 @@ from object_detection.core import preprocessor
from object_detection.core import preprocessor_cache
from object_detection.core import standard_fields as fields
from object_detection.utils import test_case
+from object_detection.utils import tf_version
if six.PY2:
import mock # pylint: disable=g-import-not-at-top
else:
- from unittest import mock # pylint: disable=g-import-not-at-top
+ mock = unittest.mock # pylint: disable=g-import-not-at-top
class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
@@ -118,7 +120,10 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
return tf.constant(keypoints, dtype=tf.float32)
def createKeypointFlipPermutation(self):
- return np.array([0, 2, 1], dtype=np.int32)
+ return [0, 2, 1]
+
+ def createKeypointRotPermutation(self):
+ return [0, 2, 1]
def createTestLabels(self):
labels = tf.constant([1, 2], dtype=tf.int32)
@@ -910,19 +915,22 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
test_keypoints=True)
def testRunRandomRotation90WithMaskAndKeypoints(self):
- preprocess_options = [(preprocessor.random_rotation90, {})]
image_height = 3
image_width = 3
images = tf.random_uniform([1, image_height, image_width, 3])
boxes = self.createTestBoxes()
masks = self.createTestMasks()
keypoints, _ = self.createTestKeypoints()
+ keypoint_rot_permutation = self.createKeypointRotPermutation()
tensor_dict = {
fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_instance_masks: masks,
fields.InputDataFields.groundtruth_keypoints: keypoints
}
+ preprocess_options = [(preprocessor.random_rotation90, {
+ 'keypoint_rot_permutation': keypoint_rot_permutation
+ })]
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_instance_masks=True, include_keypoints=True)
tensor_dict = preprocessor.preprocess(
@@ -2819,6 +2827,7 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
self.assertAllEqual(images_shape, patched_images_shape)
self.assertAllEqual(images, patched_images)
+ @unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
def testAutoAugmentImage(self):
def graph_fn():
preprocessing_options = []
diff --git a/research/object_detection/core/standard_fields.py b/research/object_detection/core/standard_fields.py
index df995b4a429ec4e587d83cf8a94fb8c223ad4dca..fcfb97ae875e9cf97c195a8a998543879f202c57 100644
--- a/research/object_detection/core/standard_fields.py
+++ b/research/object_detection/core/standard_fields.py
@@ -66,6 +66,11 @@ class InputDataFields(object):
groundtruth_keypoint_weights: groundtruth weight factor for keypoints.
groundtruth_label_weights: groundtruth label weights.
groundtruth_weights: groundtruth weight factor for bounding boxes.
+ groundtruth_dp_num_points: The number of DensePose sampled points for each
+ instance.
+ groundtruth_dp_part_ids: Part indices for DensePose points.
+ groundtruth_dp_surface_coords: Image locations and UV coordinates for
+ DensePose points.
num_groundtruth_boxes: number of groundtruth boxes.
is_annotated: whether an image has been labeled or not.
true_image_shapes: true shapes of images in the resized images, as resized
@@ -108,6 +113,9 @@ class InputDataFields(object):
groundtruth_keypoint_weights = 'groundtruth_keypoint_weights'
groundtruth_label_weights = 'groundtruth_label_weights'
groundtruth_weights = 'groundtruth_weights'
+ groundtruth_dp_num_points = 'groundtruth_dp_num_points'
+ groundtruth_dp_part_ids = 'groundtruth_dp_part_ids'
+ groundtruth_dp_surface_coords = 'groundtruth_dp_surface_coords'
num_groundtruth_boxes = 'num_groundtruth_boxes'
is_annotated = 'is_annotated'
true_image_shape = 'true_image_shape'
diff --git a/research/object_detection/core/target_assigner.py b/research/object_detection/core/target_assigner.py
index 3d5453bf25007666340ba131ebb08b847ca8ba55..fd9020ebeac12c2610449afcbdd1f29dd3237f85 100644
--- a/research/object_detection/core/target_assigner.py
+++ b/research/object_detection/core/target_assigner.py
@@ -50,10 +50,12 @@ from object_detection.core import matcher as mat
from object_detection.core import region_similarity_calculator as sim_calc
from object_detection.core import standard_fields as fields
from object_detection.matchers import argmax_matcher
-from object_detection.matchers import bipartite_matcher
from object_detection.utils import shape_utils
from object_detection.utils import target_assigner_utils as ta_utils
+from object_detection.utils import tf_version
+if tf_version.is_tf1():
+ from object_detection.matchers import bipartite_matcher # pylint: disable=g-import-not-at-top
ResizeMethod = tf2.image.ResizeMethod
@@ -398,6 +400,8 @@ def create_target_assigner(reference, stage=None,
ValueError: if combination reference+stage is invalid.
"""
if reference == 'Multibox' and stage == 'proposal':
+ if tf_version.is_tf2():
+ raise ValueError('GreedyBipartiteMatcher is not supported in TF 2.X.')
similarity_calc = sim_calc.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder_instance = mean_stddev_box_coder.MeanStddevBoxCoder()
@@ -713,3 +717,943 @@ def batch_assign_confidences(target_assigner,
batch_reg_weights, batch_match)
+def _smallest_positive_root(a, b, c):
+ """Returns the smallest positive root of a quadratic equation."""
+
+ discriminant = tf.sqrt(b ** 2 - 4 * a * c)
+
+ # TODO(vighneshb) We are currently using the slightly incorrect
+ # CenterNet implementation. The commented lines implement the fixed version
+ # in https://github.com/princeton-vl/CornerNet. Change the implementation
+ # after verifying it has no negative impact.
+ # root1 = (-b - discriminant) / (2 * a)
+ # root2 = (-b + discriminant) / (2 * a)
+
+ # return tf.where(tf.less(root1, 0), root2, root1)
+
+ return (-b + discriminant) / (2.0)
+
+
+def max_distance_for_overlap(height, width, min_iou):
+ """Computes how far apart bbox corners can lie while maintaining the iou.
+
+ Given a bounding box size, this function returns a lower bound on how far
+ apart the corners of another box can lie while still maintaining the given
+ IoU. The implementation is based on the `gaussian_radius` function in the
+ Objects as Points github repo: https://github.com/xingyizhou/CenterNet
+
+ Args:
+ height: A 1-D float Tensor representing height of the ground truth boxes.
+ width: A 1-D float Tensor representing width of the ground truth boxes.
+ min_iou: A float representing the minimum IoU desired.
+
+ Returns:
+ distance: A 1-D Tensor of distances, of the same length as the input
+ height and width tensors.
+ """
+
+ # Given that the detected box is displaced at a distance `d`, the exact
+ # IoU value will depend on the angle at which each corner is displaced.
+ # We simplify our computation by assuming that each corner is displaced by
+ # a distance `d` in both x and y direction. This gives us a lower IoU than
+ # what is actually realizable and ensures that any box with corners less
+ # than `d` distance apart will always have an IoU greater than or equal
+ # to `min_iou`
+
+ # The following 3 cases can be worked on geometrically and come down to
+ # solving a quadratic inequality. In each case, to ensure `min_iou` we use
+ # the smallest positive root of the equation.
+
+ # Case where detected box is offset from ground truth and no box completely
+ # contains the other.
+
+ distance_detection_offset = _smallest_positive_root(
+ a=1, b=-(height + width),
+ c=width * height * ((1 - min_iou) / (1 + min_iou))
+ )
+
+ # Case where detection is smaller than ground truth and completely contained
+ # in it.
+ distance_detection_in_gt = _smallest_positive_root(
+ a=4, b=-2 * (height + width),
+ c=(1 - min_iou) * width * height
+ )
+
+ # Case where ground truth is smaller than detection and completely contained
+ # in it.
+ distance_gt_in_detection = _smallest_positive_root(
+ a=4 * min_iou, b=(2 * min_iou) * (width + height),
+ c=(min_iou - 1) * width * height
+ )
+
+ return tf.reduce_min([distance_detection_offset,
+ distance_gt_in_detection,
+ distance_detection_in_gt], axis=0)
+
+
+def get_batch_predictions_from_indices(batch_predictions, indices):
+ """Gets the values of predictions in a batch at the given indices.
+
+ The indices are expected to come from the offset targets generation functions
+ in this library. The returned value is intended to be used inside a loss
+ function.
+
+ Args:
+ batch_predictions: A tensor of shape [batch_size, height, width, 2] for
+ single class offsets and [batch_size, height, width, class, 2] for
+ multiple classes offsets (e.g. keypoint joint offsets) representing the
+ (height, width) or (y_offset, x_offset) predictions over a batch.
+ indices: A tensor of shape [num_instances, 3] for single class offset and
+ [num_instances, 4] for multiple classes offsets representing the indices
+ in the batch to be penalized in a loss function
+
+ Returns:
+ values: A tensor of shape [num_instances, 2] holding the predicted values
+ at the given indices.
+ """
+ return tf.gather_nd(batch_predictions, indices)
+
+
+def _compute_std_dev_from_box_size(boxes_height, boxes_width, min_overlap):
+ """Computes the standard deviation of the Gaussian kernel from box size.
+
+ Args:
+ boxes_height: A 1D tensor with shape [num_instances] representing the height
+ of each box.
+ boxes_width: A 1D tensor with shape [num_instances] representing the width
+ of each box.
+ min_overlap: The minimum IOU overlap that boxes need to have to not be
+ penalized.
+
+ Returns:
+ A 1D tensor with shape [num_instances] representing the computed Gaussian
+ sigma for each of the box.
+ """
+ # We are dividing by 3 so that points closer than the computed
+ # distance have a >99% CDF.
+ sigma = max_distance_for_overlap(boxes_height, boxes_width, min_overlap)
+ sigma = (2 * tf.math.maximum(tf.math.floor(sigma), 0.0) + 1) / 6.0
+ return sigma
+
+
+class CenterNetCenterHeatmapTargetAssigner(object):
+ """Wrapper to compute the object center heatmap."""
+
+ def __init__(self, stride, min_overlap=0.7):
+ """Initializes the target assigner.
+
+ Args:
+ stride: int, the stride of the network in output pixels.
+ min_overlap: The minimum IOU overlap that boxes need to have to not be
+ penalized.
+ """
+
+ self._stride = stride
+ self._min_overlap = min_overlap
+
+ def assign_center_targets_from_boxes(self,
+ height,
+ width,
+ gt_boxes_list,
+ gt_classes_list,
+ gt_weights_list=None):
+ """Computes the object center heatmap target.
+
+ Args:
+ height: int, height of input to the model. This is used to
+ determine the height of the output.
+ width: int, width of the input to the model. This is used to
+ determine the width of the output.
+ gt_boxes_list: A list of float tensors with shape [num_boxes, 4]
+ representing the groundtruth detection bounding boxes for each sample in
+ the batch. The box coordinates are expected in normalized coordinates.
+ gt_classes_list: A list of float tensors with shape [num_boxes,
+ num_classes] representing the one-hot encoded class labels for each box
+ in the gt_boxes_list.
+ gt_weights_list: A list of float tensors with shape [num_boxes]
+ representing the weight of each groundtruth detection box.
+
+ Returns:
+ heatmap: A Tensor of size [batch_size, output_height, output_width,
+ num_classes] representing the per class center heatmap. output_height
+ and output_width are computed by dividing the input height and width by
+ the stride specified during initialization.
+ """
+
+ out_height = tf.cast(height // self._stride, tf.float32)
+ out_width = tf.cast(width // self._stride, tf.float32)
+ # Compute the yx-grid to be used to generate the heatmap. Each returned
+ # tensor has shape of [out_height, out_width]
+ (y_grid, x_grid) = ta_utils.image_shape_to_grids(out_height, out_width)
+
+ heatmaps = []
+ if gt_weights_list is None:
+ gt_weights_list = [None] * len(gt_boxes_list)
+ # TODO(vighneshb) Replace the for loop with a batch version.
+ for boxes, class_targets, weights in zip(gt_boxes_list, gt_classes_list,
+ gt_weights_list):
+ boxes = box_list.BoxList(boxes)
+ # Convert the box coordinates to absolute output image dimension space.
+ boxes = box_list_ops.to_absolute_coordinates(boxes,
+ height // self._stride,
+ width // self._stride)
+ # Get the box center coordinates. Each returned tensors have the shape of
+ # [num_instances]
+ (y_center, x_center, boxes_height,
+ boxes_width) = boxes.get_center_coordinates_and_sizes()
+
+ # Compute the sigma from box size. The tensor shape: [num_instances].
+ sigma = _compute_std_dev_from_box_size(boxes_height, boxes_width,
+ self._min_overlap)
+ # Apply the Gaussian kernel to the center coordinates. Returned heatmap
+ # has shape of [out_height, out_width, num_classes]
+ heatmap = ta_utils.coordinates_to_heatmap(
+ y_grid=y_grid,
+ x_grid=x_grid,
+ y_coordinates=y_center,
+ x_coordinates=x_center,
+ sigma=sigma,
+ channel_onehot=class_targets,
+ channel_weights=weights)
+ heatmaps.append(heatmap)
+
+ # Return the stacked heatmaps over the batch.
+ return tf.stack(heatmaps, axis=0)
+
+
+class CenterNetBoxTargetAssigner(object):
+ """Wrapper to compute target tensors for the object detection task.
+
+ This class has methods that take as input a batch of ground truth tensors
+ (in the form of a list) and return the targets required to train the object
+ detection task.
+ """
+
+ def __init__(self, stride):
+ """Initializes the target assigner.
+
+ Args:
+ stride: int, the stride of the network in output pixels.
+ """
+
+ self._stride = stride
+
+ def assign_size_and_offset_targets(self,
+ height,
+ width,
+ gt_boxes_list,
+ gt_weights_list=None):
+ """Returns the box height/width and center offset targets and their indices.
+
+ The returned values are expected to be used with predicted tensors
+ of size (batch_size, height//self._stride, width//self._stride, 2). The
+ predicted values at the relevant indices can be retrieved with the
+ get_batch_predictions_from_indices function.
+
+ Args:
+ height: int, height of input to the model. This is used to determine the
+ height of the output.
+ width: int, width of the input to the model. This is used to determine the
+ width of the output.
+ gt_boxes_list: A list of float tensors with shape [num_boxes, 4]
+ representing the groundtruth detection bounding boxes for each sample in
+ the batch. The coordinates are expected in normalized coordinates.
+ gt_weights_list: A list of tensors with shape [num_boxes] corresponding to
+ the weight of each groundtruth detection box.
+
+ Returns:
+ batch_indices: an integer tensor of shape [num_boxes, 3] holding the
+ indices inside the predicted tensor which should be penalized. The
+ first column indicates the index along the batch dimension and the
+ second and third columns indicate the index along the y and x
+ dimensions respectively.
+ batch_box_height_width: a float tensor of shape [num_boxes, 2] holding
+ expected height and width of each box in the output space.
+ batch_offsets: a float tensor of shape [num_boxes, 2] holding the
+ expected y and x offset of each box in the output space.
+ batch_weights: a float tensor of shape [num_boxes] indicating the
+ weight of each prediction.
+ """
+
+ if gt_weights_list is None:
+ gt_weights_list = [None] * len(gt_boxes_list)
+
+ batch_indices = []
+ batch_box_height_width = []
+ batch_weights = []
+ batch_offsets = []
+
+ for i, (boxes, weights) in enumerate(zip(gt_boxes_list, gt_weights_list)):
+ boxes = box_list.BoxList(boxes)
+ boxes = box_list_ops.to_absolute_coordinates(boxes,
+ height // self._stride,
+ width // self._stride)
+ # Get the box center coordinates. Each returned tensors have the shape of
+ # [num_boxes]
+ (y_center, x_center, boxes_height,
+ boxes_width) = boxes.get_center_coordinates_and_sizes()
+ num_boxes = tf.shape(x_center)
+
+ # Compute the offsets and indices of the box centers. Shape:
+ # offsets: [num_boxes, 2]
+ # indices: [num_boxes, 2]
+ (offsets, indices) = ta_utils.compute_floor_offsets_with_indices(
+ y_source=y_center, x_source=x_center)
+
+ # Assign ones if weights are not provided.
+ if weights is None:
+ weights = tf.ones(num_boxes, dtype=tf.float32)
+
+ # Shape of [num_boxes, 1] integer tensor filled with current batch index.
+ batch_index = i * tf.ones_like(indices[:, 0:1], dtype=tf.int32)
+ batch_indices.append(tf.concat([batch_index, indices], axis=1))
+ batch_box_height_width.append(
+ tf.stack([boxes_height, boxes_width], axis=1))
+ batch_weights.append(weights)
+ batch_offsets.append(offsets)
+
+ batch_indices = tf.concat(batch_indices, axis=0)
+ batch_box_height_width = tf.concat(batch_box_height_width, axis=0)
+ batch_weights = tf.concat(batch_weights, axis=0)
+ batch_offsets = tf.concat(batch_offsets, axis=0)
+ return (batch_indices, batch_box_height_width, batch_offsets, batch_weights)
+
+
+# TODO(yuhuic): Update this class to handle the instance/keypoint weights.
+# Currently those weights are used as "mask" to indicate whether an
+# instance/keypoint should be considered or not (expecting only either 0 or 1
+# value). In reality, the weights can be any value and this class should handle
+# those values properly.
+class CenterNetKeypointTargetAssigner(object):
+ """Wrapper to compute target tensors for the CenterNet keypoint estimation.
+
+ This class has methods that take as input a batch of groundtruth tensors
+ (in the form of a list) and returns the targets required to train the
+ CenterNet model for keypoint estimation. Specifically, the class methods
+ expect the groundtruth in the following formats (consistent with the
+ standard Object Detection API). Note that usually the groundtruth tensors are
+ packed with a list which represents the batch dimension:
+
+ gt_classes_list: [Required] a list of 2D tf.float32 one-hot
+ (or k-hot) tensors of shape [num_instances, num_classes] containing the
+ class targets with the 0th index assumed to map to the first non-background
+ class.
+ gt_keypoints_list: [Required] a list of 3D tf.float32 tensors of
+ shape [num_instances, num_total_keypoints, 2] containing keypoint
+ coordinates. Note that the "num_total_keypoints" should be the sum of the
+ num_keypoints over all possible keypoint types, e.g. human pose, face.
+ For example, if a dataset contains both 17 human pose keypoints and 5 face
+ keypoints, then num_total_keypoints = 17 + 5 = 22.
+ If an intance contains only a subet of keypoints (e.g. human pose keypoints
+ but not face keypoints), the face keypoints will be filled with zeros.
+ Also note that keypoints are assumed to be provided in normalized
+ coordinates and missing keypoints should be encoded as NaN.
+ gt_keypoints_weights_list: [Optional] a list 3D tf.float32 tensors of shape
+ [num_instances, num_total_keypoints] representing the weights of each
+ keypoints. If not provided, then all not NaN keypoints will be equally
+ weighted.
+ gt_boxes_list: [Optional] a list of 2D tf.float32 tensors of shape
+ [num_instances, 4] containing coordinates of the groundtruth boxes.
+ Groundtruth boxes are provided in [y_min, x_min, y_max, x_max] format and
+ assumed to be normalized and clipped relative to the image window with
+ y_min <= y_max and x_min <= x_max.
+ Note that the boxes are only used to compute the center targets but are not
+ considered as required output of the keypoint task. If the boxes were not
+ provided, the center targets will be inferred from the keypoints
+ [not implemented yet].
+ gt_weights_list: [Optional] A list of 1D tf.float32 tensors of shape
+ [num_instances] containing weights for groundtruth boxes. Only useful when
+ gt_boxes_list is also provided.
+ """
+
+ def __init__(self,
+ stride,
+ class_id,
+ keypoint_indices,
+ keypoint_std_dev=None,
+ per_keypoint_offset=False,
+ peak_radius=0):
+ """Initializes a CenterNet keypoints target assigner.
+
+ Args:
+ stride: int, the stride of the network in output pixels.
+ class_id: int, the ID of the class (0-indexed) that contains the target
+ keypoints to consider in this task. For example, if the task is human
+ pose estimation, the class id should correspond to the "human" class.
+ keypoint_indices: A list of integers representing the indices of the
+ keypoints to be considered in this task. This is used to retrieve the
+ subset of the keypoints from gt_keypoints that should be considered in
+ this task.
+ keypoint_std_dev: A list of floats represent the standard deviation of the
+ Gaussian kernel used to generate the keypoint heatmap (in the unit of
+ output pixels). It is to provide the flexibility of using different
+ sizes of Gaussian kernel for each keypoint type. If not provided, then
+ all standard deviation will be the same as the default value (10.0 in
+ the output pixel space). If provided, the length of keypoint_std_dev
+ needs to be the same as the length of keypoint_indices, indicating the
+ standard deviation of each keypoint type.
+ per_keypoint_offset: boolean, indicating whether to assign offset for
+ each keypoint channel. If set False, the output offset target will have
+ the shape [batch_size, out_height, out_width, 2]. If set True, the
+ output offset target will have the shape [batch_size, out_height,
+ out_width, 2 * num_keypoints].
+ peak_radius: int, the radius (in the unit of output pixel) around heatmap
+ peak to assign the offset targets.
+ """
+
+ self._stride = stride
+ self._class_id = class_id
+ self._keypoint_indices = keypoint_indices
+ self._per_keypoint_offset = per_keypoint_offset
+ self._peak_radius = peak_radius
+ if keypoint_std_dev is None:
+ self._keypoint_std_dev = ([_DEFAULT_KEYPOINT_OFFSET_STD_DEV] *
+ len(keypoint_indices))
+ else:
+ assert len(keypoint_indices) == len(keypoint_std_dev)
+ self._keypoint_std_dev = keypoint_std_dev
+
+ def _preprocess_keypoints_and_weights(self, out_height, out_width, keypoints,
+ class_onehot, class_weights,
+ keypoint_weights):
+ """Preprocesses the keypoints and the corresponding keypoint weights.
+
+ This function performs several common steps to preprocess the keypoints and
+ keypoint weights features, including:
+ 1) Select the subset of keypoints based on the keypoint indices, fill the
+ keypoint NaN values with zeros and convert to absoluate coordinates.
+ 2) Generate the weights of the keypoint using the following information:
+ a. The class of the instance.
+ b. The NaN value of the keypoint coordinates.
+ c. The provided keypoint weights.
+
+ Args:
+ out_height: An integer or an interger tensor indicating the output height
+ of the model.
+ out_width: An integer or an interger tensor indicating the output width of
+ the model.
+ keypoints: A float tensor of shape [num_instances, num_total_keypoints, 2]
+ representing the original keypoint grountruth coordinates.
+ class_onehot: A float tensor of shape [num_instances, num_classes]
+ containing the class targets with the 0th index assumed to map to the
+ first non-background class.
+ class_weights: A float tensor of shape [num_instances] containing weights
+ for groundtruth instances.
+ keypoint_weights: A float tensor of shape
+ [num_instances, num_total_keypoints] representing the weights of each
+ keypoints.
+
+ Returns:
+ A tuple of two tensors:
+ keypoint_absolute: A float tensor of shape
+ [num_instances, num_keypoints, 2] which is the selected and updated
+ keypoint coordinates.
+ keypoint_weights: A float tensor of shape [num_instances, num_keypoints]
+ representing the updated weight of each keypoint.
+ """
+ # Select the targets keypoints by their type ids and generate the mask
+ # of valid elements.
+ valid_mask, keypoints = ta_utils.get_valid_keypoint_mask_for_class(
+ keypoint_coordinates=keypoints,
+ class_id=self._class_id,
+ class_onehot=class_onehot,
+ class_weights=class_weights,
+ keypoint_indices=self._keypoint_indices)
+ # Keypoint coordinates in absolute coordinate system.
+ # The shape of the tensors: [num_instances, num_keypoints, 2].
+ keypoints_absolute = keypoint_ops.to_absolute_coordinates(
+ keypoints, out_height, out_width)
+ # Assign default weights for the keypoints.
+ if keypoint_weights is None:
+ keypoint_weights = tf.ones_like(keypoints[:, :, 0])
+ else:
+ keypoint_weights = tf.gather(
+ keypoint_weights, indices=self._keypoint_indices, axis=1)
+ keypoint_weights = keypoint_weights * valid_mask
+ return keypoints_absolute, keypoint_weights
+
+ def assign_keypoint_heatmap_targets(self,
+ height,
+ width,
+ gt_keypoints_list,
+ gt_classes_list,
+ gt_keypoints_weights_list=None,
+ gt_weights_list=None,
+ gt_boxes_list=None):
+ """Returns the keypoint heatmap targets for the CenterNet model.
+
+ Args:
+ height: int, height of input to the CenterNet model. This is used to
+ determine the height of the output.
+ width: int, width of the input to the CenterNet model. This is used to
+ determine the width of the output.
+ gt_keypoints_list: A list of float tensors with shape [num_instances,
+ num_total_keypoints, 2]. See class-level description for more detail.
+ gt_classes_list: A list of float tensors with shape [num_instances,
+ num_classes]. See class-level description for more detail.
+ gt_keypoints_weights_list: A list of tensors with shape [num_instances,
+ num_total_keypoints] corresponding to the weight of each keypoint.
+ gt_weights_list: A list of float tensors with shape [num_instances]. See
+ class-level description for more detail.
+ gt_boxes_list: A list of float tensors with shape [num_instances, 4]. See
+ class-level description for more detail. If provided, the keypoint
+ standard deviations will be scaled based on the box sizes.
+
+ Returns:
+ heatmap: A float tensor of shape [batch_size, output_height, output_width,
+ num_keypoints] representing the per keypoint type center heatmap.
+ output_height and output_width are computed by dividing the input height
+ and width by the stride specified during initialization. Note that the
+ "num_keypoints" is defined by the length of keypoint_indices, which is
+ not necessarily equal to "num_total_keypoints".
+ num_instances_batch: A 2D int tensor of shape
+ [batch_size, num_keypoints] representing number of instances for each
+ keypoint type.
+ valid_mask: A float tensor with shape [batch_size, output_height,
+ output_width] where all values within the regions of the blackout boxes
+ are 0.0 and 1.0 else where.
+ """
+ out_width = tf.cast(width // self._stride, tf.float32)
+ out_height = tf.cast(height // self._stride, tf.float32)
+ # Compute the yx-grid to be used to generate the heatmap. Each returned
+ # tensor has shape of [out_height, out_width]
+ y_grid, x_grid = ta_utils.image_shape_to_grids(out_height, out_width)
+
+ if gt_keypoints_weights_list is None:
+ gt_keypoints_weights_list = [None] * len(gt_keypoints_list)
+ if gt_weights_list is None:
+ gt_weights_list = [None] * len(gt_classes_list)
+ if gt_boxes_list is None:
+ gt_boxes_list = [None] * len(gt_keypoints_list)
+
+ heatmaps = []
+ num_instances_list = []
+ valid_mask_list = []
+ for keypoints, classes, kp_weights, weights, boxes in zip(
+ gt_keypoints_list, gt_classes_list, gt_keypoints_weights_list,
+ gt_weights_list, gt_boxes_list):
+ keypoints_absolute, kp_weights = self._preprocess_keypoints_and_weights(
+ out_height=out_height,
+ out_width=out_width,
+ keypoints=keypoints,
+ class_onehot=classes,
+ class_weights=weights,
+ keypoint_weights=kp_weights)
+ num_instances, num_keypoints, _ = (
+ shape_utils.combined_static_and_dynamic_shape(keypoints_absolute))
+
+ # A tensor of shape [num_instances, num_keypoints] with
+ # each element representing the type dimension for each corresponding
+ # keypoint:
+ # [[0, 1, ..., k-1],
+ # [0, 1, ..., k-1],
+ # :
+ # [0, 1, ..., k-1]]
+ keypoint_types = tf.tile(
+ input=tf.expand_dims(tf.range(num_keypoints), axis=0),
+ multiples=[num_instances, 1])
+
+ # A tensor of shape [num_instances, num_keypoints] with
+ # each element representing the sigma of the Gaussian kernel for each
+ # keypoint.
+ keypoint_std_dev = tf.tile(
+ input=tf.expand_dims(tf.constant(self._keypoint_std_dev), axis=0),
+ multiples=[num_instances, 1])
+
+ # If boxes is not None, then scale the standard deviation based on the
+ # size of the object bounding boxes similar to object center heatmap.
+ if boxes is not None:
+ boxes = box_list.BoxList(boxes)
+ # Convert the box coordinates to absolute output image dimension space.
+ boxes = box_list_ops.to_absolute_coordinates(boxes,
+ height // self._stride,
+ width // self._stride)
+ # Get the box height and width. Each returned tensors have the shape
+ # of [num_instances]
+ (_, _, boxes_height,
+ boxes_width) = boxes.get_center_coordinates_and_sizes()
+
+ # Compute the sigma from box size. The tensor shape: [num_instances].
+ sigma = _compute_std_dev_from_box_size(boxes_height, boxes_width, 0.7)
+ keypoint_std_dev = keypoint_std_dev * tf.stack(
+ [sigma] * num_keypoints, axis=1)
+
+ # Generate the valid region mask to ignore regions with target class but
+ # no corresponding keypoints.
+ # Shape: [num_instances].
+ blackout = tf.logical_and(classes[:, self._class_id] > 0,
+ tf.reduce_max(kp_weights, axis=1) < 1e-3)
+ valid_mask = ta_utils.blackout_pixel_weights_by_box_regions(
+ out_height, out_width, boxes.get(), blackout)
+ valid_mask_list.append(valid_mask)
+
+ # Apply the Gaussian kernel to the keypoint coordinates. Returned heatmap
+ # has shape of [out_height, out_width, num_keypoints].
+ heatmap = ta_utils.coordinates_to_heatmap(
+ y_grid=y_grid,
+ x_grid=x_grid,
+ y_coordinates=tf.keras.backend.flatten(keypoints_absolute[:, :, 0]),
+ x_coordinates=tf.keras.backend.flatten(keypoints_absolute[:, :, 1]),
+ sigma=tf.keras.backend.flatten(keypoint_std_dev),
+ channel_onehot=tf.one_hot(
+ tf.keras.backend.flatten(keypoint_types), depth=num_keypoints),
+ channel_weights=tf.keras.backend.flatten(kp_weights))
+ num_instances_list.append(
+ tf.cast(tf.reduce_sum(kp_weights, axis=0), dtype=tf.int32))
+ heatmaps.append(heatmap)
+ return (tf.stack(heatmaps, axis=0), tf.stack(num_instances_list, axis=0),
+ tf.stack(valid_mask_list, axis=0))
+
+ def _get_keypoint_types(self, num_instances, num_keypoints, num_neighbors):
+ """Gets keypoint type index tensor.
+
+ The function prepares the tensor of keypoint indices with shape
+ [num_instances, num_keypoints, num_neighbors]. Each element represents the
+ keypoint type index for each corresponding keypoint and tiled along the 3rd
+ axis:
+ [[0, 1, ..., num_keypoints - 1],
+ [0, 1, ..., num_keypoints - 1],
+ :
+ [0, 1, ..., num_keypoints - 1]]
+
+ Args:
+ num_instances: int, the number of instances, used to define the 1st
+ dimension.
+ num_keypoints: int, the number of keypoint types, used to define the 2nd
+ dimension.
+ num_neighbors: int, the number of neighborhood pixels to consider for each
+ keypoint, used to define the 3rd dimension.
+
+ Returns:
+ A integer tensor of shape [num_instances, num_keypoints, num_neighbors].
+ """
+ keypoint_types = tf.range(num_keypoints)[tf.newaxis, :, tf.newaxis]
+ tiled_keypoint_types = tf.tile(keypoint_types,
+ multiples=[num_instances, 1, num_neighbors])
+ return tiled_keypoint_types
+
+ def assign_keypoints_offset_targets(self,
+ height,
+ width,
+ gt_keypoints_list,
+ gt_classes_list,
+ gt_keypoints_weights_list=None,
+ gt_weights_list=None):
+ """Returns the offsets and indices of the keypoints for location refinement.
+
+ The returned values are used to refine the location of each keypoints in the
+ heatmap. The predicted values at the relevant indices can be retrieved with
+ the get_batch_predictions_from_indices function.
+
+ Args:
+ height: int, height of input to the CenterNet model. This is used to
+ determine the height of the output.
+ width: int, width of the input to the CenterNet model. This is used to
+ determine the width of the output.
+ gt_keypoints_list: A list of tensors with shape [num_instances,
+ num_total_keypoints]. See class-level description for more detail.
+ gt_classes_list: A list of tensors with shape [num_instances,
+ num_classes]. See class-level description for more detail.
+ gt_keypoints_weights_list: A list of tensors with shape [num_instances,
+ num_total_keypoints] corresponding to the weight of each keypoint.
+ gt_weights_list: A list of float tensors with shape [num_instances]. See
+ class-level description for more detail.
+
+ Returns:
+ batch_indices: an integer tensor of shape [num_total_instances, 3] (or
+ [num_total_instances, 4] if 'per_keypoint_offset' is set True) holding
+ the indices inside the predicted tensor which should be penalized. The
+ first column indicates the index along the batch dimension and the
+ second and third columns indicate the index along the y and x
+ dimensions respectively. The fourth column corresponds to the channel
+ dimension (if 'per_keypoint_offset' is set True).
+ batch_offsets: a float tensor of shape [num_total_instances, 2] holding
+ the expected y and x offset of each box in the output space.
+ batch_weights: a float tensor of shape [num_total_instances] indicating
+ the weight of each prediction.
+ Note that num_total_instances = batch_size * num_instances *
+ num_keypoints * num_neighbors
+ """
+
+ batch_indices = []
+ batch_offsets = []
+ batch_weights = []
+
+ if gt_keypoints_weights_list is None:
+ gt_keypoints_weights_list = [None] * len(gt_keypoints_list)
+ if gt_weights_list is None:
+ gt_weights_list = [None] * len(gt_classes_list)
+ for i, (keypoints, classes, kp_weights, weights) in enumerate(
+ zip(gt_keypoints_list, gt_classes_list, gt_keypoints_weights_list,
+ gt_weights_list)):
+ keypoints_absolute, kp_weights = self._preprocess_keypoints_and_weights(
+ out_height=height // self._stride,
+ out_width=width // self._stride,
+ keypoints=keypoints,
+ class_onehot=classes,
+ class_weights=weights,
+ keypoint_weights=kp_weights)
+ num_instances, num_keypoints, _ = (
+ shape_utils.combined_static_and_dynamic_shape(keypoints_absolute))
+
+ # [num_instances * num_keypoints]
+ y_source = tf.keras.backend.flatten(keypoints_absolute[:, :, 0])
+ x_source = tf.keras.backend.flatten(keypoints_absolute[:, :, 1])
+
+ # All keypoint coordinates and their neighbors:
+ # [num_instance * num_keypoints, num_neighbors]
+ (y_source_neighbors, x_source_neighbors,
+ valid_sources) = ta_utils.get_surrounding_grids(height // self._stride,
+ width // self._stride,
+ y_source, x_source,
+ self._peak_radius)
+ _, num_neighbors = shape_utils.combined_static_and_dynamic_shape(
+ y_source_neighbors)
+
+ # Update the valid keypoint weights.
+ # [num_instance * num_keypoints, num_neighbors]
+ valid_keypoints = tf.cast(
+ valid_sources, dtype=tf.float32) * tf.stack(
+ [tf.keras.backend.flatten(kp_weights)] * num_neighbors, axis=-1)
+
+ # Compute the offsets and indices of the box centers. Shape:
+ # offsets: [num_instances * num_keypoints, num_neighbors, 2]
+ # indices: [num_instances * num_keypoints, num_neighbors, 2]
+ offsets, indices = ta_utils.compute_floor_offsets_with_indices(
+ y_source=y_source_neighbors,
+ x_source=x_source_neighbors,
+ y_target=y_source,
+ x_target=x_source)
+ # Reshape to:
+ # offsets: [num_instances * num_keypoints * num_neighbors, 2]
+ # indices: [num_instances * num_keypoints * num_neighbors, 2]
+ offsets = tf.reshape(offsets, [-1, 2])
+ indices = tf.reshape(indices, [-1, 2])
+
+ # Prepare the batch indices to be prepended.
+ batch_index = tf.fill(
+ [num_instances * num_keypoints * num_neighbors, 1], i)
+ if self._per_keypoint_offset:
+ tiled_keypoint_types = self._get_keypoint_types(
+ num_instances, num_keypoints, num_neighbors)
+ batch_indices.append(
+ tf.concat([batch_index, indices,
+ tf.reshape(tiled_keypoint_types, [-1, 1])], axis=1))
+ else:
+ batch_indices.append(tf.concat([batch_index, indices], axis=1))
+ batch_offsets.append(offsets)
+ batch_weights.append(tf.keras.backend.flatten(valid_keypoints))
+
+ # Concatenate the tensors in the batch in the first dimension:
+ # shape: [batch_size * num_instances * num_keypoints * num_neighbors, 3] or
+ # [batch_size * num_instances * num_keypoints * num_neighbors, 4] if
+ # 'per_keypoint_offset' is set to True.
+ batch_indices = tf.concat(batch_indices, axis=0)
+ # shape: [batch_size * num_instances * num_keypoints * num_neighbors]
+ batch_weights = tf.concat(batch_weights, axis=0)
+ # shape: [batch_size * num_instances * num_keypoints * num_neighbors, 2]
+ batch_offsets = tf.concat(batch_offsets, axis=0)
+ return (batch_indices, batch_offsets, batch_weights)
+
+ def assign_joint_regression_targets(self,
+ height,
+ width,
+ gt_keypoints_list,
+ gt_classes_list,
+ gt_boxes_list=None,
+ gt_keypoints_weights_list=None,
+ gt_weights_list=None):
+ """Returns the joint regression from center grid to keypoints.
+
+ The joint regression is used as the grouping cue from the estimated
+ keypoints to instance center. The offsets are the vectors from the floored
+ object center coordinates to the keypoint coordinates.
+
+ Args:
+ height: int, height of input to the CenterNet model. This is used to
+ determine the height of the output.
+ width: int, width of the input to the CenterNet model. This is used to
+ determine the width of the output.
+ gt_keypoints_list: A list of float tensors with shape [num_instances,
+ num_total_keypoints]. See class-level description for more detail.
+ gt_classes_list: A list of float tensors with shape [num_instances,
+ num_classes]. See class-level description for more detail.
+ gt_boxes_list: A list of float tensors with shape [num_instances, 4]. See
+ class-level description for more detail. If provided, then the center
+ targets will be computed based on the center of the boxes.
+ gt_keypoints_weights_list: A list of float tensors with shape
+ [num_instances, num_total_keypoints] representing to the weight of each
+ keypoint.
+ gt_weights_list: A list of float tensors with shape [num_instances]. See
+ class-level description for more detail.
+
+ Returns:
+ batch_indices: an integer tensor of shape [num_instances, 4] holding the
+ indices inside the predicted tensor which should be penalized. The
+ first column indicates the index along the batch dimension and the
+ second and third columns indicate the index along the y and x
+ dimensions respectively, the last dimension refers to the keypoint type
+ dimension.
+ batch_offsets: a float tensor of shape [num_instances, 2] holding the
+ expected y and x offset of each box in the output space.
+ batch_weights: a float tensor of shape [num_instances] indicating the
+ weight of each prediction.
+ Note that num_total_instances = batch_size * num_instances * num_keypoints
+
+ Raises:
+ NotImplementedError: currently the object center coordinates need to be
+ computed from groundtruth bounding boxes. The functionality of
+ generating the object center coordinates from keypoints is not
+ implemented yet.
+ """
+
+ batch_indices = []
+ batch_offsets = []
+ batch_weights = []
+ batch_size = len(gt_keypoints_list)
+ if gt_keypoints_weights_list is None:
+ gt_keypoints_weights_list = [None] * batch_size
+ if gt_boxes_list is None:
+ gt_boxes_list = [None] * batch_size
+ if gt_weights_list is None:
+ gt_weights_list = [None] * len(gt_classes_list)
+ for i, (keypoints, classes, boxes, kp_weights, weights) in enumerate(
+ zip(gt_keypoints_list, gt_classes_list,
+ gt_boxes_list, gt_keypoints_weights_list, gt_weights_list)):
+ keypoints_absolute, kp_weights = self._preprocess_keypoints_and_weights(
+ out_height=height // self._stride,
+ out_width=width // self._stride,
+ keypoints=keypoints,
+ class_onehot=classes,
+ class_weights=weights,
+ keypoint_weights=kp_weights)
+ num_instances, num_keypoints, _ = (
+ shape_utils.combined_static_and_dynamic_shape(keypoints_absolute))
+
+ # If boxes are provided, compute the joint center from it.
+ if boxes is not None:
+ # Compute joint center from boxes.
+ boxes = box_list.BoxList(boxes)
+ boxes = box_list_ops.to_absolute_coordinates(boxes,
+ height // self._stride,
+ width // self._stride)
+ y_center, x_center, _, _ = boxes.get_center_coordinates_and_sizes()
+ else:
+ # TODO(yuhuic): Add the logic to generate object centers from keypoints.
+ raise NotImplementedError((
+ 'The functionality of generating object centers from keypoints is'
+ ' not implemented yet. Please provide groundtruth bounding boxes.'
+ ))
+
+ # Tile the yx center coordinates to be the same shape as keypoints.
+ y_center_tiled = tf.tile(
+ tf.reshape(y_center, shape=[num_instances, 1]),
+ multiples=[1, num_keypoints])
+ x_center_tiled = tf.tile(
+ tf.reshape(x_center, shape=[num_instances, 1]),
+ multiples=[1, num_keypoints])
+ # [num_instance * num_keypoints, num_neighbors]
+ (y_source_neighbors, x_source_neighbors,
+ valid_sources) = ta_utils.get_surrounding_grids(
+ height // self._stride, width // self._stride,
+ tf.keras.backend.flatten(y_center_tiled),
+ tf.keras.backend.flatten(x_center_tiled), self._peak_radius)
+
+ _, num_neighbors = shape_utils.combined_static_and_dynamic_shape(
+ y_source_neighbors)
+ valid_keypoints = tf.cast(
+ valid_sources, dtype=tf.float32) * tf.stack(
+ [tf.keras.backend.flatten(kp_weights)] * num_neighbors, axis=-1)
+
+ # Compute the offsets and indices of the box centers. Shape:
+ # offsets: [num_instances * num_keypoints, 2]
+ # indices: [num_instances * num_keypoints, 2]
+ (offsets, indices) = ta_utils.compute_floor_offsets_with_indices(
+ y_source=y_source_neighbors,
+ x_source=x_source_neighbors,
+ y_target=tf.keras.backend.flatten(keypoints_absolute[:, :, 0]),
+ x_target=tf.keras.backend.flatten(keypoints_absolute[:, :, 1]))
+ # Reshape to:
+ # offsets: [num_instances * num_keypoints * num_neighbors, 2]
+ # indices: [num_instances * num_keypoints * num_neighbors, 2]
+ offsets = tf.reshape(offsets, [-1, 2])
+ indices = tf.reshape(indices, [-1, 2])
+
+ # keypoint type tensor: [num_instances, num_keypoints, num_neighbors].
+ tiled_keypoint_types = self._get_keypoint_types(
+ num_instances, num_keypoints, num_neighbors)
+
+ batch_index = tf.fill(
+ [num_instances * num_keypoints * num_neighbors, 1], i)
+ batch_indices.append(
+ tf.concat([batch_index, indices,
+ tf.reshape(tiled_keypoint_types, [-1, 1])], axis=1))
+ batch_offsets.append(offsets)
+ batch_weights.append(tf.keras.backend.flatten(valid_keypoints))
+
+ # Concatenate the tensors in the batch in the first dimension:
+ # shape: [batch_size * num_instances * num_keypoints, 4]
+ batch_indices = tf.concat(batch_indices, axis=0)
+ # shape: [batch_size * num_instances * num_keypoints]
+ batch_weights = tf.concat(batch_weights, axis=0)
+ # shape: [batch_size * num_instances * num_keypoints, 2]
+ batch_offsets = tf.concat(batch_offsets, axis=0)
+ return (batch_indices, batch_offsets, batch_weights)
+
+
+class CenterNetMaskTargetAssigner(object):
+ """Wrapper to compute targets for segmentation masks."""
+
+ def __init__(self, stride):
+ self._stride = stride
+
+ def assign_segmentation_targets(
+ self, gt_masks_list, gt_classes_list,
+ mask_resize_method=ResizeMethod.BILINEAR):
+ """Computes the segmentation targets.
+
+ This utility produces a semantic segmentation mask for each class, starting
+ with whole image instance segmentation masks. Effectively, each per-class
+ segmentation target is the union of all masks from that class.
+
+ Args:
+ gt_masks_list: A list of float tensors with shape [num_boxes,
+ input_height, input_width] with values in {0, 1} representing instance
+ masks for each object.
+ gt_classes_list: A list of float tensors with shape [num_boxes,
+ num_classes] representing the one-hot encoded class labels for each box
+ in the gt_boxes_list.
+ mask_resize_method: A `tf.compat.v2.image.ResizeMethod`. The method to use
+ when resizing masks from input resolution to output resolution.
+
+ Returns:
+ segmentation_targets: An int32 tensor of size [batch_size, output_height,
+ output_width, num_classes] representing the class of each location in
+ the output space.
+ """
+ # TODO(ronnyvotel): Handle groundtruth weights.
+ _, num_classes = shape_utils.combined_static_and_dynamic_shape(
+ gt_classes_list[0])
+
+ _, input_height, input_width = (
+ shape_utils.combined_static_and_dynamic_shape(gt_masks_list[0]))
+ output_height = input_height // self._stride
+ output_width = input_width // self._stride
+
+ segmentation_targets_list = []
+ for gt_masks, gt_classes in zip(gt_masks_list, gt_classes_list):
+ # Resize segmentation masks to conform to output dimensions. Use TF2
+ # image resize because TF1's version is buggy:
+ # https://yaqs.corp.google.com/eng/q/4970450458378240
+ gt_masks = tf2.image.resize(
+ gt_masks[:, :, :, tf.newaxis],
+ size=(output_height, output_width),
+ method=mask_resize_method)
+ gt_classes_reshaped = tf.reshape(gt_classes, [-1, 1, 1, num_classes])
+ # Shape: [h, w, num_classes].
+ segmentations_for_image = tf.reduce_max(
+ gt_masks * gt_classes_reshaped, axis=0)
+ segmentation_targets_list.append(segmentations_for_image)
+
+ segmentation_target = tf.stack(segmentation_targets_list, axis=0)
+ return segmentation_target
diff --git a/research/object_detection/core/target_assigner_test.py b/research/object_detection/core/target_assigner_test.py
index fb0a63bd1342b8826da56654d647656f736964ae..5a0ca43e558beac27076bce02ae8f62d6624d6f7 100644
--- a/research/object_detection/core/target_assigner_test.py
+++ b/research/object_detection/core/target_assigner_test.py
@@ -24,9 +24,9 @@ from object_detection.core import region_similarity_calculator
from object_detection.core import standard_fields as fields
from object_detection.core import target_assigner as targetassigner
from object_detection.matchers import argmax_matcher
-from object_detection.matchers import bipartite_matcher
from object_detection.utils import np_box_ops
from object_detection.utils import test_case
+from object_detection.utils import tf_version
class TargetAssignerTest(test_case.TestCase):
@@ -439,7 +439,7 @@ class TargetAssignerTest(test_case.TestCase):
def test_raises_error_on_incompatible_groundtruth_boxes_and_labels(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
- matcher = bipartite_matcher.GreedyBipartiteMatcher()
+ matcher = argmax_matcher.ArgMaxMatcher(0.5)
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_class_label = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
target_assigner = targetassigner.TargetAssigner(
@@ -469,7 +469,7 @@ class TargetAssignerTest(test_case.TestCase):
def test_raises_error_on_invalid_groundtruth_labels(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
- matcher = bipartite_matcher.GreedyBipartiteMatcher()
+ matcher = argmax_matcher.ArgMaxMatcher(0.5)
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=1.0)
unmatched_class_label = tf.constant([[0, 0], [0, 0], [0, 0]], tf.float32)
target_assigner = targetassigner.TargetAssigner(
@@ -1191,7 +1191,7 @@ class BatchTargetAssignConfidencesTest(test_case.TestCase):
])
-class CreateTargetAssignerTest(tf.test.TestCase):
+class CreateTargetAssignerTest(test_case.TestCase):
def test_create_target_assigner(self):
"""Tests that named constructor gives working target assigners.
@@ -1202,9 +1202,10 @@ class CreateTargetAssignerTest(tf.test.TestCase):
groundtruth = box_list.BoxList(tf.constant(corners))
priors = box_list.BoxList(tf.constant(corners))
- multibox_ta = (targetassigner
- .create_target_assigner('Multibox', stage='proposal'))
- multibox_ta.assign(priors, groundtruth)
+ if tf_version.is_tf1():
+ multibox_ta = (targetassigner
+ .create_target_assigner('Multibox', stage='proposal'))
+ multibox_ta.assign(priors, groundtruth)
# No tests on output, as that may vary arbitrarily as new target assigners
# are added. As long as it is constructed correctly and runs without errors,
# tests on the individual assigners cover correctness of the assignments.
@@ -1229,6 +1230,681 @@ class CreateTargetAssignerTest(tf.test.TestCase):
stage='invalid_stage')
+def _array_argmax(array):
+ return np.unravel_index(np.argmax(array), array.shape)
+
+
+class CenterNetCenterHeatmapTargetAssignerTest(test_case.TestCase):
+
+ def setUp(self):
+ super(CenterNetCenterHeatmapTargetAssignerTest, self).setUp()
+
+ self._box_center = [0.0, 0.0, 1.0, 1.0]
+ self._box_center_small = [0.25, 0.25, 0.75, 0.75]
+ self._box_lower_left = [0.5, 0.0, 1.0, 0.5]
+ self._box_center_offset = [0.1, 0.05, 1.0, 1.0]
+ self._box_odd_coordinates = [0.1625, 0.2125, 0.5625, 0.9625]
+
+ def test_center_location(self):
+ """Test that the centers are at the correct location."""
+ def graph_fn():
+ box_batch = [tf.constant([self._box_center, self._box_lower_left])]
+ classes = [
+ tf.one_hot([0, 1], depth=4),
+ ]
+ assigner = targetassigner.CenterNetCenterHeatmapTargetAssigner(4)
+ targets = assigner.assign_center_targets_from_boxes(80, 80, box_batch,
+ classes)
+ return targets
+ targets = self.execute(graph_fn, [])
+ self.assertEqual((10, 10), _array_argmax(targets[0, :, :, 0]))
+ self.assertAlmostEqual(1.0, targets[0, 10, 10, 0])
+ self.assertEqual((15, 5), _array_argmax(targets[0, :, :, 1]))
+ self.assertAlmostEqual(1.0, targets[0, 15, 5, 1])
+
+ def test_center_batch_shape(self):
+ """Test that the shape of the target for a batch is correct."""
+ def graph_fn():
+ box_batch = [
+ tf.constant([self._box_center, self._box_lower_left]),
+ tf.constant([self._box_center]),
+ tf.constant([self._box_center_small]),
+ ]
+ classes = [
+ tf.one_hot([0, 1], depth=4),
+ tf.one_hot([2], depth=4),
+ tf.one_hot([3], depth=4),
+ ]
+ assigner = targetassigner.CenterNetCenterHeatmapTargetAssigner(4)
+ targets = assigner.assign_center_targets_from_boxes(80, 80, box_batch,
+ classes)
+ return targets
+ targets = self.execute(graph_fn, [])
+ self.assertEqual((3, 20, 20, 4), targets.shape)
+
+ def test_center_overlap_maximum(self):
+ """Test that when boxes overlap we, are computing the maximum."""
+ def graph_fn():
+ box_batch = [
+ tf.constant([
+ self._box_center, self._box_center_offset, self._box_center,
+ self._box_center_offset
+ ])
+ ]
+ classes = [
+ tf.one_hot([0, 0, 1, 2], depth=4),
+ ]
+
+ assigner = targetassigner.CenterNetCenterHeatmapTargetAssigner(4)
+ targets = assigner.assign_center_targets_from_boxes(80, 80, box_batch,
+ classes)
+ return targets
+ targets = self.execute(graph_fn, [])
+ class0_targets = targets[0, :, :, 0]
+ class1_targets = targets[0, :, :, 1]
+ class2_targets = targets[0, :, :, 2]
+ np.testing.assert_allclose(class0_targets,
+ np.maximum(class1_targets, class2_targets))
+
+ def test_size_blur(self):
+ """Test that the heatmap of a larger box is more blurred."""
+ def graph_fn():
+ box_batch = [tf.constant([self._box_center, self._box_center_small])]
+
+ classes = [
+ tf.one_hot([0, 1], depth=4),
+ ]
+ assigner = targetassigner.CenterNetCenterHeatmapTargetAssigner(4)
+ targets = assigner.assign_center_targets_from_boxes(80, 80, box_batch,
+ classes)
+ return targets
+ targets = self.execute(graph_fn, [])
+ self.assertGreater(
+ np.count_nonzero(targets[:, :, :, 0]),
+ np.count_nonzero(targets[:, :, :, 1]))
+
+ def test_weights(self):
+ """Test that the weights correctly ignore ground truth."""
+ def graph1_fn():
+ box_batch = [
+ tf.constant([self._box_center, self._box_lower_left]),
+ tf.constant([self._box_center]),
+ tf.constant([self._box_center_small]),
+ ]
+ classes = [
+ tf.one_hot([0, 1], depth=4),
+ tf.one_hot([2], depth=4),
+ tf.one_hot([3], depth=4),
+ ]
+ assigner = targetassigner.CenterNetCenterHeatmapTargetAssigner(4)
+ targets = assigner.assign_center_targets_from_boxes(80, 80, box_batch,
+ classes)
+ return targets
+
+ targets = self.execute(graph1_fn, [])
+ self.assertAlmostEqual(1.0, targets[0, :, :, 0].max())
+ self.assertAlmostEqual(1.0, targets[0, :, :, 1].max())
+ self.assertAlmostEqual(1.0, targets[1, :, :, 2].max())
+ self.assertAlmostEqual(1.0, targets[2, :, :, 3].max())
+ self.assertAlmostEqual(0.0, targets[0, :, :, [2, 3]].max())
+ self.assertAlmostEqual(0.0, targets[1, :, :, [0, 1, 3]].max())
+ self.assertAlmostEqual(0.0, targets[2, :, :, :3].max())
+
+ def graph2_fn():
+ weights = [
+ tf.constant([0., 1.]),
+ tf.constant([1.]),
+ tf.constant([1.]),
+ ]
+ box_batch = [
+ tf.constant([self._box_center, self._box_lower_left]),
+ tf.constant([self._box_center]),
+ tf.constant([self._box_center_small]),
+ ]
+ classes = [
+ tf.one_hot([0, 1], depth=4),
+ tf.one_hot([2], depth=4),
+ tf.one_hot([3], depth=4),
+ ]
+ assigner = targetassigner.CenterNetCenterHeatmapTargetAssigner(4)
+ targets = assigner.assign_center_targets_from_boxes(80, 80, box_batch,
+ classes,
+ weights)
+ return targets
+ targets = self.execute(graph2_fn, [])
+ self.assertAlmostEqual(1.0, targets[0, :, :, 1].max())
+ self.assertAlmostEqual(1.0, targets[1, :, :, 2].max())
+ self.assertAlmostEqual(1.0, targets[2, :, :, 3].max())
+ self.assertAlmostEqual(0.0, targets[0, :, :, [0, 2, 3]].max())
+ self.assertAlmostEqual(0.0, targets[1, :, :, [0, 1, 3]].max())
+ self.assertAlmostEqual(0.0, targets[2, :, :, :3].max())
+
+ def test_low_overlap(self):
+ def graph1_fn():
+ box_batch = [tf.constant([self._box_center])]
+ classes = [
+ tf.one_hot([0], depth=2),
+ ]
+ assigner = targetassigner.CenterNetCenterHeatmapTargetAssigner(
+ 4, min_overlap=0.1)
+ targets_low_overlap = assigner.assign_center_targets_from_boxes(
+ 80, 80, box_batch, classes)
+ return targets_low_overlap
+ targets_low_overlap = self.execute(graph1_fn, [])
+ self.assertLess(1, np.count_nonzero(targets_low_overlap))
+
+ def graph2_fn():
+ box_batch = [tf.constant([self._box_center])]
+ classes = [
+ tf.one_hot([0], depth=2),
+ ]
+ assigner = targetassigner.CenterNetCenterHeatmapTargetAssigner(
+ 4, min_overlap=0.6)
+ targets_medium_overlap = assigner.assign_center_targets_from_boxes(
+ 80, 80, box_batch, classes)
+ return targets_medium_overlap
+ targets_medium_overlap = self.execute(graph2_fn, [])
+ self.assertLess(1, np.count_nonzero(targets_medium_overlap))
+
+ def graph3_fn():
+ box_batch = [tf.constant([self._box_center])]
+ classes = [
+ tf.one_hot([0], depth=2),
+ ]
+ assigner = targetassigner.CenterNetCenterHeatmapTargetAssigner(
+ 4, min_overlap=0.99)
+ targets_high_overlap = assigner.assign_center_targets_from_boxes(
+ 80, 80, box_batch, classes)
+ return targets_high_overlap
+
+ targets_high_overlap = self.execute(graph3_fn, [])
+ self.assertTrue(np.all(targets_low_overlap >= targets_medium_overlap))
+ self.assertTrue(np.all(targets_medium_overlap >= targets_high_overlap))
+
+ def test_empty_box_list(self):
+ """Test that an empty box list gives an all 0 heatmap."""
+ def graph_fn():
+ box_batch = [
+ tf.zeros((0, 4), dtype=tf.float32),
+ ]
+
+ classes = [
+ tf.zeros((0, 5), dtype=tf.float32),
+ ]
+
+ assigner = targetassigner.CenterNetCenterHeatmapTargetAssigner(
+ 4, min_overlap=0.1)
+ targets = assigner.assign_center_targets_from_boxes(
+ 80, 80, box_batch, classes)
+ return targets
+ targets = self.execute(graph_fn, [])
+ np.testing.assert_allclose(targets, 0.)
+
+
+class CenterNetBoxTargetAssignerTest(test_case.TestCase):
+
+ def setUp(self):
+ super(CenterNetBoxTargetAssignerTest, self).setUp()
+ self._box_center = [0.0, 0.0, 1.0, 1.0]
+ self._box_center_small = [0.25, 0.25, 0.75, 0.75]
+ self._box_lower_left = [0.5, 0.0, 1.0, 0.5]
+ self._box_center_offset = [0.1, 0.05, 1.0, 1.0]
+ self._box_odd_coordinates = [0.1625, 0.2125, 0.5625, 0.9625]
+
+ def test_max_distance_for_overlap(self):
+ """Test that the distance ensures the IoU with random boxes."""
+
+ # TODO(vighneshb) remove this after the `_smallest_positive_root`
+ # function if fixed.
+ self.skipTest(('Skipping test because we are using an incorrect version of'
+ 'the `max_distance_for_overlap` function to reproduce'
+ ' results.'))
+
+ rng = np.random.RandomState(0)
+ n_samples = 100
+
+ width = rng.uniform(1, 100, size=n_samples)
+ height = rng.uniform(1, 100, size=n_samples)
+ min_iou = rng.uniform(0.1, 1.0, size=n_samples)
+
+ def graph_fn():
+ max_dist = targetassigner.max_distance_for_overlap(height, width, min_iou)
+ return max_dist
+ max_dist = self.execute(graph_fn, [])
+ xmin1 = np.zeros(n_samples)
+ ymin1 = np.zeros(n_samples)
+ xmax1 = np.zeros(n_samples) + width
+ ymax1 = np.zeros(n_samples) + height
+
+ xmin2 = max_dist * np.cos(rng.uniform(0, 2 * np.pi))
+ ymin2 = max_dist * np.sin(rng.uniform(0, 2 * np.pi))
+ xmax2 = width + max_dist * np.cos(rng.uniform(0, 2 * np.pi))
+ ymax2 = height + max_dist * np.sin(rng.uniform(0, 2 * np.pi))
+
+ boxes1 = np.vstack([ymin1, xmin1, ymax1, xmax1]).T
+ boxes2 = np.vstack([ymin2, xmin2, ymax2, xmax2]).T
+
+ iou = np.diag(np_box_ops.iou(boxes1, boxes2))
+
+ self.assertTrue(np.all(iou >= min_iou))
+
+ def test_max_distance_for_overlap_centernet(self):
+ """Test the version of the function used in the CenterNet paper."""
+
+ def graph_fn():
+ distance = targetassigner.max_distance_for_overlap(10, 5, 0.5)
+ return distance
+ distance = self.execute(graph_fn, [])
+ self.assertAlmostEqual(2.807764064, distance)
+
+ def test_assign_size_and_offset_targets(self):
+ """Test the assign_size_and_offset_targets function."""
+ def graph_fn():
+ box_batch = [
+ tf.constant([self._box_center, self._box_lower_left]),
+ tf.constant([self._box_center_offset]),
+ tf.constant([self._box_center_small, self._box_odd_coordinates]),
+ ]
+
+ assigner = targetassigner.CenterNetBoxTargetAssigner(4)
+ indices, hw, yx_offset, weights = assigner.assign_size_and_offset_targets(
+ 80, 80, box_batch)
+ return indices, hw, yx_offset, weights
+ indices, hw, yx_offset, weights = self.execute(graph_fn, [])
+ self.assertEqual(indices.shape, (5, 3))
+ self.assertEqual(hw.shape, (5, 2))
+ self.assertEqual(yx_offset.shape, (5, 2))
+ self.assertEqual(weights.shape, (5,))
+ np.testing.assert_array_equal(
+ indices,
+ [[0, 10, 10], [0, 15, 5], [1, 11, 10], [2, 10, 10], [2, 7, 11]])
+ np.testing.assert_array_equal(
+ hw, [[20, 20], [10, 10], [18, 19], [10, 10], [8, 15]])
+ np.testing.assert_array_equal(
+ yx_offset, [[0, 0], [0, 0], [0, 0.5], [0, 0], [0.25, 0.75]])
+ np.testing.assert_array_equal(weights, 1)
+
+ def test_assign_size_and_offset_targets_weights(self):
+ """Test the assign_size_and_offset_targets function with box weights."""
+ def graph_fn():
+ box_batch = [
+ tf.constant([self._box_center, self._box_lower_left]),
+ tf.constant([self._box_lower_left, self._box_center_small]),
+ tf.constant([self._box_center_small, self._box_odd_coordinates]),
+ ]
+
+ cn_assigner = targetassigner.CenterNetBoxTargetAssigner(4)
+ weights_batch = [
+ tf.constant([0.0, 1.0]),
+ tf.constant([1.0, 1.0]),
+ tf.constant([0.0, 0.0])
+ ]
+ indices, hw, yx_offset, weights = cn_assigner.assign_size_and_offset_targets(
+ 80, 80, box_batch, weights_batch)
+ return indices, hw, yx_offset, weights
+ indices, hw, yx_offset, weights = self.execute(graph_fn, [])
+ self.assertEqual(indices.shape, (6, 3))
+ self.assertEqual(hw.shape, (6, 2))
+ self.assertEqual(yx_offset.shape, (6, 2))
+ self.assertEqual(weights.shape, (6,))
+ np.testing.assert_array_equal(indices,
+ [[0, 10, 10], [0, 15, 5], [1, 15, 5],
+ [1, 10, 10], [2, 10, 10], [2, 7, 11]])
+ np.testing.assert_array_equal(
+ hw, [[20, 20], [10, 10], [10, 10], [10, 10], [10, 10], [8, 15]])
+ np.testing.assert_array_equal(
+ yx_offset, [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0.25, 0.75]])
+ np.testing.assert_array_equal(weights, [0, 1, 1, 1, 0, 0])
+
+ def test_get_batch_predictions_from_indices(self):
+ """Test the get_batch_predictions_from_indices function.
+
+ This test verifies that the indices returned by
+ assign_size_and_offset_targets function work as expected with a predicted
+ tensor.
+
+ """
+ def graph_fn():
+ box_batch = [
+ tf.constant([self._box_center, self._box_lower_left]),
+ tf.constant([self._box_center_small, self._box_odd_coordinates]),
+ ]
+
+ pred_array = np.ones((2, 40, 20, 2), dtype=np.int32) * -1000
+ pred_array[0, 20, 10] = [1, 2]
+ pred_array[0, 30, 5] = [3, 4]
+ pred_array[1, 20, 10] = [5, 6]
+ pred_array[1, 14, 11] = [7, 8]
+
+ pred_tensor = tf.constant(pred_array)
+
+ cn_assigner = targetassigner.CenterNetBoxTargetAssigner(4)
+ indices, _, _, _ = cn_assigner.assign_size_and_offset_targets(
+ 160, 80, box_batch)
+
+ preds = targetassigner.get_batch_predictions_from_indices(
+ pred_tensor, indices)
+ return preds
+ preds = self.execute(graph_fn, [])
+ np.testing.assert_array_equal(preds, [[1, 2], [3, 4], [5, 6], [7, 8]])
+
+
+class CenterNetKeypointTargetAssignerTest(test_case.TestCase):
+
+ def test_keypoint_heatmap_targets(self):
+ def graph_fn():
+ gt_classes_list = [
+ tf.one_hot([0, 1, 0, 1], depth=4),
+ ]
+ coordinates = tf.expand_dims(
+ tf.constant(
+ np.array([[0.1, 0.2, 0.3, 0.4, 0.5],
+ [float('nan'), 0.7, float('nan'), 0.9, 1.0],
+ [0.4, 0.1, 0.4, 0.2, 0.1],
+ [float('nan'), 0.1, 0.5, 0.7, 0.6]]),
+ dtype=tf.float32),
+ axis=2)
+ gt_keypoints_list = [tf.concat([coordinates, coordinates], axis=2)]
+ gt_boxes_list = [
+ tf.constant(
+ np.array([[0.0, 0.0, 0.3, 0.3],
+ [0.0, 0.0, 0.5, 0.5],
+ [0.0, 0.0, 0.5, 0.5],
+ [0.0, 0.0, 1.0, 1.0]]),
+ dtype=tf.float32)
+ ]
+
+ cn_assigner = targetassigner.CenterNetKeypointTargetAssigner(
+ stride=4,
+ class_id=1,
+ keypoint_indices=[0, 2])
+ (targets, num_instances_batch,
+ valid_mask) = cn_assigner.assign_keypoint_heatmap_targets(
+ 120,
+ 80,
+ gt_keypoints_list,
+ gt_classes_list,
+ gt_boxes_list=gt_boxes_list)
+ return targets, num_instances_batch, valid_mask
+
+ targets, num_instances_batch, valid_mask = self.execute(graph_fn, [])
+ # keypoint (0.5, 0.5) is selected. The peak is expected to appear at the
+ # center of the image.
+ self.assertEqual((15, 10), _array_argmax(targets[0, :, :, 1]))
+ self.assertAlmostEqual(1.0, targets[0, 15, 10, 1])
+ # No peak for the first class since NaN is selected.
+ self.assertAlmostEqual(0.0, targets[0, 15, 10, 0])
+ # Verify the output heatmap shape.
+ self.assertAllEqual([1, 30, 20, 2], targets.shape)
+ # Verify the number of instances is correct.
+ np.testing.assert_array_almost_equal([[0, 1]],
+ num_instances_batch)
+ # When calling the function, we specify the class id to be 1 (1th and 3rd)
+ # instance and the keypoint indices to be [0, 2], meaning that the 1st
+ # instance is the target class with no valid keypoints in it. As a result,
+ # the region of the 1st instance boxing box should be blacked out
+ # (0.0, 0.0, 0.5, 0.5), transfering to (0, 0, 15, 10) in absolute output
+ # space.
+ self.assertAlmostEqual(np.sum(valid_mask[:, 0:16, 0:11]), 0.0)
+ # All other values are 1.0 so the sum is: 30 * 20 - 16 * 11 = 424.
+ self.assertAlmostEqual(np.sum(valid_mask), 424.0)
+
+ def test_assign_keypoints_offset_targets(self):
+ def graph_fn():
+ gt_classes_list = [
+ tf.one_hot([0, 1, 0, 1], depth=4),
+ ]
+ coordinates = tf.expand_dims(
+ tf.constant(
+ np.array([[0.1, 0.2, 0.3, 0.4, 0.5],
+ [float('nan'), 0.7, float('nan'), 0.9, 0.4],
+ [0.4, 0.1, 0.4, 0.2, 0.0],
+ [float('nan'), 0.0, 0.12, 0.7, 0.4]]),
+ dtype=tf.float32),
+ axis=2)
+ gt_keypoints_list = [tf.concat([coordinates, coordinates], axis=2)]
+
+ cn_assigner = targetassigner.CenterNetKeypointTargetAssigner(
+ stride=4,
+ class_id=1,
+ keypoint_indices=[0, 2])
+ (indices, offsets, weights) = cn_assigner.assign_keypoints_offset_targets(
+ height=120,
+ width=80,
+ gt_keypoints_list=gt_keypoints_list,
+ gt_classes_list=gt_classes_list)
+ return indices, weights, offsets
+ indices, weights, offsets = self.execute(graph_fn, [])
+ # Only the last element has positive weight.
+ np.testing.assert_array_almost_equal(
+ [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], weights)
+ # Validate the last element's indices and offsets.
+ np.testing.assert_array_equal([0, 3, 2], indices[7, :])
+ np.testing.assert_array_almost_equal([0.6, 0.4], offsets[7, :])
+
+ def test_assign_keypoints_offset_targets_radius(self):
+ def graph_fn():
+ gt_classes_list = [
+ tf.one_hot([0, 1, 0, 1], depth=4),
+ ]
+ coordinates = tf.expand_dims(
+ tf.constant(
+ np.array([[0.1, 0.2, 0.3, 0.4, 0.5],
+ [float('nan'), 0.7, float('nan'), 0.9, 0.4],
+ [0.4, 0.1, 0.4, 0.2, 0.0],
+ [float('nan'), 0.0, 0.12, 0.7, 0.4]]),
+ dtype=tf.float32),
+ axis=2)
+ gt_keypoints_list = [tf.concat([coordinates, coordinates], axis=2)]
+
+ cn_assigner = targetassigner.CenterNetKeypointTargetAssigner(
+ stride=4,
+ class_id=1,
+ keypoint_indices=[0, 2],
+ peak_radius=1,
+ per_keypoint_offset=True)
+ (indices, offsets, weights) = cn_assigner.assign_keypoints_offset_targets(
+ height=120,
+ width=80,
+ gt_keypoints_list=gt_keypoints_list,
+ gt_classes_list=gt_classes_list)
+ return indices, weights, offsets
+ indices, weights, offsets = self.execute(graph_fn, [])
+
+ # There are total 8 * 5 (neighbors) = 40 targets.
+ self.assertAllEqual(indices.shape, [40, 4])
+ self.assertAllEqual(offsets.shape, [40, 2])
+ self.assertAllEqual(weights.shape, [40])
+ # Only the last 5 (radius 1 generates 5 valid points) element has positive
+ # weight.
+ np.testing.assert_array_almost_equal([
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0
+ ], weights)
+ # Validate the last element's (with neighbors) indices and offsets.
+ np.testing.assert_array_equal([0, 2, 2, 1], indices[35, :])
+ np.testing.assert_array_equal([0, 3, 1, 1], indices[36, :])
+ np.testing.assert_array_equal([0, 3, 2, 1], indices[37, :])
+ np.testing.assert_array_equal([0, 3, 3, 1], indices[38, :])
+ np.testing.assert_array_equal([0, 4, 2, 1], indices[39, :])
+ np.testing.assert_array_almost_equal([1.6, 0.4], offsets[35, :])
+ np.testing.assert_array_almost_equal([0.6, 1.4], offsets[36, :])
+ np.testing.assert_array_almost_equal([0.6, 0.4], offsets[37, :])
+ np.testing.assert_array_almost_equal([0.6, -0.6], offsets[38, :])
+ np.testing.assert_array_almost_equal([-0.4, 0.4], offsets[39, :])
+
+ def test_assign_joint_regression_targets(self):
+ def graph_fn():
+ gt_boxes_list = [
+ tf.constant(
+ np.array([[0.0, 0.0, 0.0, 0.0],
+ [0.0, 0.0, 0.0, 0.0],
+ [0.0, 0.0, 0.0, 0.0],
+ [0.0, 0.0, 1.0, 1.0]]),
+ dtype=tf.float32)
+ ]
+ gt_classes_list = [
+ tf.one_hot([0, 1, 0, 1], depth=4),
+ ]
+ coordinates = tf.expand_dims(
+ tf.constant(
+ np.array([[0.1, 0.2, 0.3, 0.4, 0.5],
+ [float('nan'), 0.7, float('nan'), 0.9, 0.4],
+ [0.4, 0.1, 0.4, 0.2, 0.0],
+ [float('nan'), 0.0, 0.12, 0.7, 0.4]]),
+ dtype=tf.float32),
+ axis=2)
+ gt_keypoints_list = [tf.concat([coordinates, coordinates], axis=2)]
+
+ cn_assigner = targetassigner.CenterNetKeypointTargetAssigner(
+ stride=4,
+ class_id=1,
+ keypoint_indices=[0, 2])
+ (indices, offsets, weights) = cn_assigner.assign_joint_regression_targets(
+ height=120,
+ width=80,
+ gt_keypoints_list=gt_keypoints_list,
+ gt_classes_list=gt_classes_list,
+ gt_boxes_list=gt_boxes_list)
+ return indices, offsets, weights
+ indices, offsets, weights = self.execute(graph_fn, [])
+ np.testing.assert_array_almost_equal(
+ [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], weights)
+ np.testing.assert_array_equal([0, 15, 10, 1], indices[7, :])
+ np.testing.assert_array_almost_equal([-11.4, -7.6], offsets[7, :])
+
+ def test_assign_joint_regression_targets_radius(self):
+ def graph_fn():
+ gt_boxes_list = [
+ tf.constant(
+ np.array([[0.0, 0.0, 0.0, 0.0],
+ [0.0, 0.0, 0.0, 0.0],
+ [0.0, 0.0, 0.0, 0.0],
+ [0.0, 0.0, 1.0, 1.0]]),
+ dtype=tf.float32)
+ ]
+ gt_classes_list = [
+ tf.one_hot([0, 1, 0, 1], depth=4),
+ ]
+ coordinates = tf.expand_dims(
+ tf.constant(
+ np.array([[0.1, 0.2, 0.3, 0.4, 0.5],
+ [float('nan'), 0.7, float('nan'), 0.9, 0.4],
+ [0.4, 0.1, 0.4, 0.2, 0.0],
+ [float('nan'), 0.0, 0.12, 0.7, 0.4]]),
+ dtype=tf.float32),
+ axis=2)
+ gt_keypoints_list = [tf.concat([coordinates, coordinates], axis=2)]
+
+ cn_assigner = targetassigner.CenterNetKeypointTargetAssigner(
+ stride=4,
+ class_id=1,
+ keypoint_indices=[0, 2],
+ peak_radius=1)
+ (indices, offsets, weights) = cn_assigner.assign_joint_regression_targets(
+ height=120,
+ width=80,
+ gt_keypoints_list=gt_keypoints_list,
+ gt_classes_list=gt_classes_list,
+ gt_boxes_list=gt_boxes_list)
+ return indices, offsets, weights
+ indices, offsets, weights = self.execute(graph_fn, [])
+
+ # There are total 8 * 5 (neighbors) = 40 targets.
+ self.assertAllEqual(indices.shape, [40, 4])
+ self.assertAllEqual(offsets.shape, [40, 2])
+ self.assertAllEqual(weights.shape, [40])
+ # Only the last 5 (radius 1 generates 5 valid points) element has positive
+ # weight.
+ np.testing.assert_array_almost_equal([
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0
+ ], weights)
+ # Test the values of the indices and offsets of the last 5 elements.
+ np.testing.assert_array_equal([0, 14, 10, 1], indices[35, :])
+ np.testing.assert_array_equal([0, 15, 9, 1], indices[36, :])
+ np.testing.assert_array_equal([0, 15, 10, 1], indices[37, :])
+ np.testing.assert_array_equal([0, 15, 11, 1], indices[38, :])
+ np.testing.assert_array_equal([0, 16, 10, 1], indices[39, :])
+ np.testing.assert_array_almost_equal([-10.4, -7.6], offsets[35, :])
+ np.testing.assert_array_almost_equal([-11.4, -6.6], offsets[36, :])
+ np.testing.assert_array_almost_equal([-11.4, -7.6], offsets[37, :])
+ np.testing.assert_array_almost_equal([-11.4, -8.6], offsets[38, :])
+ np.testing.assert_array_almost_equal([-12.4, -7.6], offsets[39, :])
+
+
+class CenterNetMaskTargetAssignerTest(test_case.TestCase):
+
+ def test_assign_segmentation_targets(self):
+ def graph_fn():
+ gt_masks_list = [
+ # Example 0.
+ tf.constant([
+ [
+ [1., 0., 0., 0.],
+ [1., 1., 0., 0.],
+ [0., 0., 0., 0.],
+ [0., 0., 0., 0.],
+ ],
+ [
+ [0., 0., 0., 0.],
+ [0., 0., 0., 1.],
+ [0., 0., 0., 0.],
+ [0., 0., 0., 0.],
+ ],
+ [
+ [1., 1., 0., 0.],
+ [1., 1., 0., 0.],
+ [0., 0., 1., 1.],
+ [0., 0., 1., 1.],
+ ]
+ ], dtype=tf.float32),
+ # Example 1.
+ tf.constant([
+ [
+ [1., 1., 0., 1.],
+ [1., 1., 1., 1.],
+ [0., 0., 1., 1.],
+ [0., 0., 0., 1.],
+ ],
+ [
+ [0., 0., 0., 0.],
+ [0., 0., 0., 0.],
+ [1., 1., 0., 0.],
+ [1., 1., 0., 0.],
+ ],
+ ], dtype=tf.float32),
+ ]
+ gt_classes_list = [
+ # Example 0.
+ tf.constant([[1., 0., 0.],
+ [0., 1., 0.],
+ [1., 0., 0.]], dtype=tf.float32),
+ # Example 1.
+ tf.constant([[0., 1., 0.],
+ [0., 1., 0.]], dtype=tf.float32)
+ ]
+ cn_assigner = targetassigner.CenterNetMaskTargetAssigner(stride=2)
+ segmentation_target = cn_assigner.assign_segmentation_targets(
+ gt_masks_list=gt_masks_list,
+ gt_classes_list=gt_classes_list,
+ mask_resize_method=targetassigner.ResizeMethod.NEAREST_NEIGHBOR)
+ return segmentation_target
+ segmentation_target = self.execute(graph_fn, [])
+
+ expected_seg_target = np.array([
+ # Example 0 [[class 0, class 1], [background, class 0]]
+ [[[1, 0, 0], [0, 1, 0]],
+ [[0, 0, 0], [1, 0, 0]]],
+ # Example 1 [[class 1, class 1], [class 1, class 1]]
+ [[[0, 1, 0], [0, 1, 0]],
+ [[0, 1, 0], [0, 1, 0]]],
+ ], dtype=np.float32)
+ np.testing.assert_array_almost_equal(
+ expected_seg_target, segmentation_target)
+
if __name__ == '__main__':
tf.enable_v2_behavior()
diff --git a/research/object_detection/data_decoders/tf_example_decoder.py b/research/object_detection/data_decoders/tf_example_decoder.py
index bd1fa2c771ec61a0ebc438392a966c08aff6faad..04cc4db59988161345c5cacd2e6f513b2707b0a1 100644
--- a/research/object_detection/data_decoders/tf_example_decoder.py
+++ b/research/object_detection/data_decoders/tf_example_decoder.py
@@ -30,6 +30,7 @@ from object_detection.core import data_decoder
from object_detection.core import standard_fields as fields
from object_detection.protos import input_reader_pb2
from object_detection.utils import label_map_util
+from object_detection.utils import shape_utils
# pylint: disable=g-import-not-at-top
try:
@@ -170,7 +171,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
num_additional_channels=0,
load_multiclass_scores=False,
load_context_features=False,
- expand_hierarchy_labels=False):
+ expand_hierarchy_labels=False,
+ load_dense_pose=False):
"""Constructor sets keys_to_features and items_to_handlers.
Args:
@@ -201,6 +203,7 @@ class TfExampleDecoder(data_decoder.DataDecoder):
account the provided hierarchy in the label_map_proto_file. For positive
classes, the labels are extended to ancestor. For negative classes,
the labels are expanded to descendants.
+ load_dense_pose: Whether to load DensePose annotations.
Raises:
ValueError: If `instance_mask_type` option is not one of
@@ -371,6 +374,34 @@ class TfExampleDecoder(data_decoder.DataDecoder):
self._decode_png_instance_masks))
else:
raise ValueError('Did not recognize the `instance_mask_type` option.')
+ if load_dense_pose:
+ self.keys_to_features['image/object/densepose/num'] = (
+ tf.VarLenFeature(tf.int64))
+ self.keys_to_features['image/object/densepose/part_index'] = (
+ tf.VarLenFeature(tf.int64))
+ self.keys_to_features['image/object/densepose/x'] = (
+ tf.VarLenFeature(tf.float32))
+ self.keys_to_features['image/object/densepose/y'] = (
+ tf.VarLenFeature(tf.float32))
+ self.keys_to_features['image/object/densepose/u'] = (
+ tf.VarLenFeature(tf.float32))
+ self.keys_to_features['image/object/densepose/v'] = (
+ tf.VarLenFeature(tf.float32))
+ self.items_to_handlers[
+ fields.InputDataFields.groundtruth_dp_num_points] = (
+ slim_example_decoder.Tensor('image/object/densepose/num'))
+ self.items_to_handlers[fields.InputDataFields.groundtruth_dp_part_ids] = (
+ slim_example_decoder.ItemHandlerCallback(
+ ['image/object/densepose/part_index',
+ 'image/object/densepose/num'], self._dense_pose_part_indices))
+ self.items_to_handlers[
+ fields.InputDataFields.groundtruth_dp_surface_coords] = (
+ slim_example_decoder.ItemHandlerCallback(
+ ['image/object/densepose/x', 'image/object/densepose/y',
+ 'image/object/densepose/u', 'image/object/densepose/v',
+ 'image/object/densepose/num'],
+ self._dense_pose_surface_coordinates))
+
if label_map_proto_file:
# If the label_map_proto is provided, try to use it in conjunction with
# the class text, and fall back to a materialized ID.
@@ -547,6 +578,14 @@ class TfExampleDecoder(data_decoder.DataDecoder):
group_of = fields.InputDataFields.groundtruth_group_of
tensor_dict[group_of] = tf.cast(tensor_dict[group_of], dtype=tf.bool)
+ if fields.InputDataFields.groundtruth_dp_num_points in tensor_dict:
+ tensor_dict[fields.InputDataFields.groundtruth_dp_num_points] = tf.cast(
+ tensor_dict[fields.InputDataFields.groundtruth_dp_num_points],
+ dtype=tf.int32)
+ tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids] = tf.cast(
+ tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids],
+ dtype=tf.int32)
+
return tensor_dict
def _reshape_keypoints(self, keys_to_tensors):
@@ -697,6 +736,97 @@ class TfExampleDecoder(data_decoder.DataDecoder):
lambda: tf.map_fn(decode_png_mask, png_masks, dtype=tf.float32),
lambda: tf.zeros(tf.cast(tf.stack([0, height, width]), dtype=tf.int32)))
+ def _dense_pose_part_indices(self, keys_to_tensors):
+ """Creates a tensor that contains part indices for each DensePose point.
+
+ Args:
+ keys_to_tensors: a dictionary from keys to tensors.
+
+ Returns:
+ A 2-D int32 tensor of shape [num_instances, num_points] where each element
+ contains the DensePose part index (0-23). The value `num_points`
+ corresponds to the maximum number of sampled points across all instances
+ in the image. Note that instances with less sampled points will be padded
+ with zeros in the last dimension.
+ """
+ num_points_per_instances = keys_to_tensors['image/object/densepose/num']
+ part_index = keys_to_tensors['image/object/densepose/part_index']
+ if isinstance(num_points_per_instances, tf.SparseTensor):
+ num_points_per_instances = tf.sparse_tensor_to_dense(
+ num_points_per_instances)
+ if isinstance(part_index, tf.SparseTensor):
+ part_index = tf.sparse_tensor_to_dense(part_index)
+ part_index = tf.cast(part_index, dtype=tf.int32)
+ max_points_per_instance = tf.cast(
+ tf.math.reduce_max(num_points_per_instances), dtype=tf.int32)
+ num_points_cumulative = tf.concat([
+ [0], tf.math.cumsum(num_points_per_instances)], axis=0)
+
+ def pad_parts_tensor(instance_ind):
+ points_range_start = num_points_cumulative[instance_ind]
+ points_range_end = num_points_cumulative[instance_ind + 1]
+ part_inds = part_index[points_range_start:points_range_end]
+ return shape_utils.pad_or_clip_nd(part_inds,
+ output_shape=[max_points_per_instance])
+
+ return tf.map_fn(pad_parts_tensor,
+ tf.range(tf.size(num_points_per_instances)),
+ dtype=tf.int32)
+
+ def _dense_pose_surface_coordinates(self, keys_to_tensors):
+ """Creates a tensor that contains surface coords for each DensePose point.
+
+ Args:
+ keys_to_tensors: a dictionary from keys to tensors.
+
+ Returns:
+ A 3-D float32 tensor of shape [num_instances, num_points, 4] where each
+ point contains (y, x, v, u) data for each sampled DensePose point. The
+ (y, x) coordinate has normalized image locations for the point, and (v, u)
+ contains the surface coordinate (also normalized) for the part. The value
+ `num_points` corresponds to the maximum number of sampled points across
+ all instances in the image. Note that instances with less sampled points
+ will be padded with zeros in dim=1.
+ """
+ num_points_per_instances = keys_to_tensors['image/object/densepose/num']
+ dp_y = keys_to_tensors['image/object/densepose/y']
+ dp_x = keys_to_tensors['image/object/densepose/x']
+ dp_v = keys_to_tensors['image/object/densepose/v']
+ dp_u = keys_to_tensors['image/object/densepose/u']
+ if isinstance(num_points_per_instances, tf.SparseTensor):
+ num_points_per_instances = tf.sparse_tensor_to_dense(
+ num_points_per_instances)
+ if isinstance(dp_y, tf.SparseTensor):
+ dp_y = tf.sparse_tensor_to_dense(dp_y)
+ if isinstance(dp_x, tf.SparseTensor):
+ dp_x = tf.sparse_tensor_to_dense(dp_x)
+ if isinstance(dp_v, tf.SparseTensor):
+ dp_v = tf.sparse_tensor_to_dense(dp_v)
+ if isinstance(dp_u, tf.SparseTensor):
+ dp_u = tf.sparse_tensor_to_dense(dp_u)
+ max_points_per_instance = tf.cast(
+ tf.math.reduce_max(num_points_per_instances), dtype=tf.int32)
+ num_points_cumulative = tf.concat([
+ [0], tf.math.cumsum(num_points_per_instances)], axis=0)
+
+ def pad_surface_coordinates_tensor(instance_ind):
+ """Pads DensePose surface coordinates for each instance."""
+ points_range_start = num_points_cumulative[instance_ind]
+ points_range_end = num_points_cumulative[instance_ind + 1]
+ y = dp_y[points_range_start:points_range_end]
+ x = dp_x[points_range_start:points_range_end]
+ v = dp_v[points_range_start:points_range_end]
+ u = dp_u[points_range_start:points_range_end]
+ # Create [num_points_i, 4] tensor, where num_points_i is the number of
+ # sampled points for instance i.
+ unpadded_tensor = tf.stack([y, x, v, u], axis=1)
+ return shape_utils.pad_or_clip_nd(
+ unpadded_tensor, output_shape=[max_points_per_instance, 4])
+
+ return tf.map_fn(pad_surface_coordinates_tensor,
+ tf.range(tf.size(num_points_per_instances)),
+ dtype=tf.float32)
+
def _expand_image_label_hierarchy(self, image_classes, image_confidences):
"""Expand image level labels according to the hierarchy.
diff --git a/research/object_detection/data_decoders/tf_example_decoder_test.py b/research/object_detection/data_decoders/tf_example_decoder_test.py
index 9cbed32fc05f3d6b2c9e3233633627412482e0f5..81ed9258e650d7534bd9e3ae76aa574bc2a06b61 100644
--- a/research/object_detection/data_decoders/tf_example_decoder_test.py
+++ b/research/object_detection/data_decoders/tf_example_decoder_test.py
@@ -1096,8 +1096,8 @@ class TfExampleDecoderTest(test_case.TestCase):
return example_decoder.decode(tf.convert_to_tensor(example))
tensor_dict = self.execute_cpu(graph_fn, [])
- self.assertTrue(
- fields.InputDataFields.groundtruth_instance_masks not in tensor_dict)
+ self.assertNotIn(fields.InputDataFields.groundtruth_instance_masks,
+ tensor_dict)
def testDecodeImageLabels(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
@@ -1116,8 +1116,7 @@ class TfExampleDecoderTest(test_case.TestCase):
return example_decoder.decode(tf.convert_to_tensor(example))
tensor_dict = self.execute_cpu(graph_fn_1, [])
- self.assertTrue(
- fields.InputDataFields.groundtruth_image_classes in tensor_dict)
+ self.assertIn(fields.InputDataFields.groundtruth_image_classes, tensor_dict)
self.assertAllEqual(
tensor_dict[fields.InputDataFields.groundtruth_image_classes],
np.array([1, 2]))
@@ -1152,8 +1151,7 @@ class TfExampleDecoderTest(test_case.TestCase):
return example_decoder.decode(tf.convert_to_tensor(example))
tensor_dict = self.execute_cpu(graph_fn_2, [])
- self.assertTrue(
- fields.InputDataFields.groundtruth_image_classes in tensor_dict)
+ self.assertIn(fields.InputDataFields.groundtruth_image_classes, tensor_dict)
self.assertAllEqual(
tensor_dict[fields.InputDataFields.groundtruth_image_classes],
np.array([1, 3]))
@@ -1345,6 +1343,93 @@ class TfExampleDecoderTest(test_case.TestCase):
expected_image_confidence,
tensor_dict[fields.InputDataFields.groundtruth_image_confidences])
+ def testDecodeDensePose(self):
+ image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+ encoded_jpeg, _ = self._create_encoded_and_decoded_data(
+ image_tensor, 'jpeg')
+ bbox_ymins = [0.0, 4.0, 2.0]
+ bbox_xmins = [1.0, 5.0, 8.0]
+ bbox_ymaxs = [2.0, 6.0, 1.0]
+ bbox_xmaxs = [3.0, 7.0, 3.3]
+ densepose_num = [0, 4, 2]
+ densepose_part_index = [2, 2, 3, 4, 2, 9]
+ densepose_x = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]
+ densepose_y = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4]
+ densepose_u = [0.01, 0.02, 0.03, 0.04, 0.05, 0.06]
+ densepose_v = [0.99, 0.98, 0.97, 0.96, 0.95, 0.94]
+
+ def graph_fn():
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded':
+ dataset_util.bytes_feature(encoded_jpeg),
+ 'image/format':
+ dataset_util.bytes_feature(six.b('jpeg')),
+ 'image/object/bbox/ymin':
+ dataset_util.float_list_feature(bbox_ymins),
+ 'image/object/bbox/xmin':
+ dataset_util.float_list_feature(bbox_xmins),
+ 'image/object/bbox/ymax':
+ dataset_util.float_list_feature(bbox_ymaxs),
+ 'image/object/bbox/xmax':
+ dataset_util.float_list_feature(bbox_xmaxs),
+ 'image/object/densepose/num':
+ dataset_util.int64_list_feature(densepose_num),
+ 'image/object/densepose/part_index':
+ dataset_util.int64_list_feature(densepose_part_index),
+ 'image/object/densepose/x':
+ dataset_util.float_list_feature(densepose_x),
+ 'image/object/densepose/y':
+ dataset_util.float_list_feature(densepose_y),
+ 'image/object/densepose/u':
+ dataset_util.float_list_feature(densepose_u),
+ 'image/object/densepose/v':
+ dataset_util.float_list_feature(densepose_v),
+
+ })).SerializeToString()
+
+ example_decoder = tf_example_decoder.TfExampleDecoder(
+ load_dense_pose=True)
+ output = example_decoder.decode(tf.convert_to_tensor(example))
+ dp_num_points = output[fields.InputDataFields.groundtruth_dp_num_points]
+ dp_part_ids = output[fields.InputDataFields.groundtruth_dp_part_ids]
+ dp_surface_coords = output[
+ fields.InputDataFields.groundtruth_dp_surface_coords]
+ return dp_num_points, dp_part_ids, dp_surface_coords
+
+ dp_num_points, dp_part_ids, dp_surface_coords = self.execute_cpu(
+ graph_fn, [])
+
+ expected_dp_num_points = [0, 4, 2]
+ expected_dp_part_ids = [
+ [0, 0, 0, 0],
+ [2, 2, 3, 4],
+ [2, 9, 0, 0]
+ ]
+ expected_dp_surface_coords = np.array(
+ [
+ # Instance 0 (no points).
+ [[0., 0., 0., 0.],
+ [0., 0., 0., 0.],
+ [0., 0., 0., 0.],
+ [0., 0., 0., 0.]],
+ # Instance 1 (4 points).
+ [[0.9, 0.1, 0.99, 0.01],
+ [0.8, 0.2, 0.98, 0.02],
+ [0.7, 0.3, 0.97, 0.03],
+ [0.6, 0.4, 0.96, 0.04]],
+ # Instance 2 (2 points).
+ [[0.5, 0.5, 0.95, 0.05],
+ [0.4, 0.6, 0.94, 0.06],
+ [0., 0., 0., 0.],
+ [0., 0., 0., 0.]],
+ ], dtype=np.float32)
+
+ self.assertAllEqual(dp_num_points, expected_dp_num_points)
+ self.assertAllEqual(dp_part_ids, expected_dp_part_ids)
+ self.assertAllClose(dp_surface_coords, expected_dp_surface_coords)
+
if __name__ == '__main__':
tf.test.main()
diff --git a/official/r1/__init__.py b/research/object_detection/dataset_tools/context_rcnn/__init__.py
similarity index 100%
rename from official/r1/__init__.py
rename to research/object_detection/dataset_tools/context_rcnn/__init__.py
diff --git a/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples.py b/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c05387980e193f9cb40a767944357d80379384c
--- /dev/null
+++ b/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples.py
@@ -0,0 +1,845 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""A Beam job to add contextual memory banks to tf.Examples.
+
+This tool groups images containing bounding boxes and embedded context features
+by a key, either `image/location` or `image/seq_id`, and time horizon,
+then uses these groups to build up a contextual memory bank from the embedded
+context features from each image in the group and adds that context to the
+output tf.Examples for each image in the group.
+
+Steps to generate a dataset with context from one with bounding boxes and
+embedded context features:
+1. Use object/detection/export_inference_graph.py to get a `saved_model` for
+ inference. The input node must accept a tf.Example proto.
+2. Run this tool with `saved_model` from step 1 and a TFRecord of tf.Example
+ protos containing images, bounding boxes, and embedded context features.
+ The context features can be added to tf.Examples using
+ generate_embedding_data.py.
+
+Example Usage:
+--------------
+python add_context_to_examples.py \
+ --input_tfrecord path/to/input_tfrecords* \
+ --output_tfrecord path/to/output_tfrecords \
+ --sequence_key image/location \
+ --time_horizon month
+
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import copy
+import datetime
+import io
+import itertools
+import json
+import os
+
+from absl import app
+from absl import flags
+import apache_beam as beam
+import numpy as np
+import PIL.Image
+import six
+import tensorflow as tf
+
+from apache_beam import runners
+
+
+flags.DEFINE_string('input_tfrecord', None, 'TFRecord containing images in '
+ 'tf.Example format for object detection, with bounding'
+ 'boxes and contextual feature embeddings.')
+flags.DEFINE_string('output_tfrecord', None,
+ 'TFRecord containing images in tf.Example format, with '
+ 'added contextual memory banks.')
+flags.DEFINE_string('sequence_key', None, 'Key to use when grouping sequences: '
+ 'so far supports `image/seq_id` and `image/location`.')
+flags.DEFINE_string('time_horizon', None, 'What time horizon to use when '
+ 'splitting the data, if any. Options are: `year`, `month`,'
+ ' `week`, `day `, `hour`, `minute`, `None`.')
+flags.DEFINE_integer('subsample_context_features_rate', 0, 'Whether to '
+ 'subsample the context_features, and if so how many to '
+ 'sample. If the rate is set to X, it will sample context '
+ 'from 1 out of every X images. Default is sampling from '
+ 'every image, which is X=0.')
+flags.DEFINE_boolean('reduce_image_size', True, 'downsamples images to'
+ 'have longest side max_image_dimension, maintaining aspect'
+ ' ratio')
+flags.DEFINE_integer('max_image_dimension', 1024, 'sets max image dimension')
+flags.DEFINE_boolean('add_context_features', True, 'adds a memory bank of'
+ 'embeddings to each clip')
+flags.DEFINE_boolean('sorted_image_ids', True, 'whether the image source_ids '
+ 'are sortable to deal with date_captured tie-breaks')
+flags.DEFINE_string('image_ids_to_keep', 'All', 'path to .json list of image'
+ 'ids to keep, used for ground truth eval creation')
+flags.DEFINE_boolean('keep_context_features_image_id_list', False, 'Whether or '
+ 'not to keep a list of the image_ids corresponding to the '
+ 'memory bank')
+flags.DEFINE_boolean('keep_only_positives', False, 'Whether or not to '
+ 'keep only positive boxes based on score')
+flags.DEFINE_boolean('keep_only_positives_gt', False, 'Whether or not to '
+ 'keep only positive boxes based on gt class')
+flags.DEFINE_float('context_features_score_threshold', 0.7, 'What score '
+ 'threshold to use for boxes in context_features')
+flags.DEFINE_integer('max_num_elements_in_context_features', 2000, 'Sets max '
+ 'num elements per memory bank')
+flags.DEFINE_integer('num_shards', 0, 'Number of output shards.')
+flags.DEFINE_string('output_type', 'tf_sequence_example', 'Output type, one of '
+ '`tf_example`, `tf_sequence_example`')
+flags.DEFINE_integer('max_clip_length', None, 'Max length for sequence '
+ 'example outputs.')
+
+FLAGS = flags.FLAGS
+
+DEFAULT_FEATURE_LENGTH = 2057
+
+
+class ReKeyDataFn(beam.DoFn):
+ """Re-keys tfrecords by sequence_key.
+
+ This Beam DoFn re-keys the tfrecords by a user-defined sequence_key
+ """
+
+ def __init__(self, sequence_key, time_horizon,
+ reduce_image_size, max_image_dimension):
+ """Initialization function.
+
+ Args:
+ sequence_key: A feature name to use as a key for grouping sequences.
+ Must point to a key of type bytes_list
+ time_horizon: What length of time to use to partition the data when
+ building the memory banks. Options: `year`, `month`, `week`, `day `,
+ `hour`, `minute`, None
+ reduce_image_size: Whether to reduce the sizes of the stored images.
+ max_image_dimension: maximum dimension of reduced images
+ """
+ self._sequence_key = sequence_key
+ if time_horizon is None or time_horizon in {'year', 'month', 'week', 'day',
+ 'hour', 'minute'}:
+ self._time_horizon = time_horizon
+ else:
+ raise ValueError('Time horizon not supported.')
+ self._reduce_image_size = reduce_image_size
+ self._max_image_dimension = max_image_dimension
+ self._session = None
+ self._num_examples_processed = beam.metrics.Metrics.counter(
+ 'data_rekey', 'num_tf_examples_processed')
+ self._num_images_resized = beam.metrics.Metrics.counter(
+ 'data_rekey', 'num_images_resized')
+ self._num_images_read = beam.metrics.Metrics.counter(
+ 'data_rekey', 'num_images_read')
+ self._num_images_found = beam.metrics.Metrics.counter(
+ 'data_rekey', 'num_images_read')
+ self._num_got_shape = beam.metrics.Metrics.counter(
+ 'data_rekey', 'num_images_got_shape')
+ self._num_images_found_size = beam.metrics.Metrics.counter(
+ 'data_rekey', 'num_images_found_size')
+ self._num_examples_cleared = beam.metrics.Metrics.counter(
+ 'data_rekey', 'num_examples_cleared')
+ self._num_examples_updated = beam.metrics.Metrics.counter(
+ 'data_rekey', 'num_examples_updated')
+
+ def process(self, tfrecord_entry):
+ return self._rekey_examples(tfrecord_entry)
+
+ def _largest_size_at_most(self, height, width, largest_side):
+ """Computes new shape with the largest side equal to `largest_side`.
+
+ Args:
+ height: an int indicating the current height.
+ width: an int indicating the current width.
+ largest_side: A python integer indicating the size of
+ the largest side after resize.
+ Returns:
+ new_height: an int indicating the new height.
+ new_width: an int indicating the new width.
+ """
+
+ x_scale = float(largest_side) / float(width)
+ y_scale = float(largest_side) / float(height)
+ scale = min(x_scale, y_scale)
+
+ new_width = int(width * scale)
+ new_height = int(height * scale)
+
+ return new_height, new_width
+
+ def _resize_image(self, input_example):
+ """Resizes the image within input_example and updates the height and width.
+
+ Args:
+ input_example: A tf.Example that we want to update to contain a resized
+ image.
+ Returns:
+ input_example: Updated tf.Example.
+ """
+
+ original_image = copy.deepcopy(
+ input_example.features.feature['image/encoded'].bytes_list.value[0])
+ self._num_images_read.inc(1)
+
+ height = copy.deepcopy(
+ input_example.features.feature['image/height'].int64_list.value[0])
+
+ width = copy.deepcopy(
+ input_example.features.feature['image/width'].int64_list.value[0])
+
+ self._num_got_shape.inc(1)
+
+ new_height, new_width = self._largest_size_at_most(
+ height, width, self._max_image_dimension)
+
+ self._num_images_found_size.inc(1)
+
+ encoded_jpg_io = io.BytesIO(original_image)
+ image = PIL.Image.open(encoded_jpg_io)
+ resized_image = image.resize((new_width, new_height))
+
+ with io.BytesIO() as output:
+ resized_image.save(output, format='JPEG')
+ encoded_resized_image = output.getvalue()
+
+ self._num_images_resized.inc(1)
+
+ del input_example.features.feature['image/encoded'].bytes_list.value[:]
+ del input_example.features.feature['image/height'].int64_list.value[:]
+ del input_example.features.feature['image/width'].int64_list.value[:]
+
+ self._num_examples_cleared.inc(1)
+
+ input_example.features.feature['image/encoded'].bytes_list.value.extend(
+ [encoded_resized_image])
+ input_example.features.feature['image/height'].int64_list.value.extend(
+ [new_height])
+ input_example.features.feature['image/width'].int64_list.value.extend(
+ [new_width])
+ self._num_examples_updated.inc(1)
+
+ return input_example
+
+ def _rekey_examples(self, tfrecord_entry):
+ serialized_example = copy.deepcopy(tfrecord_entry)
+
+ input_example = tf.train.Example.FromString(serialized_example)
+
+ self._num_images_found.inc(1)
+
+ if self._reduce_image_size:
+ input_example = self._resize_image(input_example)
+ self._num_images_resized.inc(1)
+
+ new_key = input_example.features.feature[
+ self._sequence_key].bytes_list.value[0]
+
+ if self._time_horizon:
+ date_captured = datetime.datetime.strptime(
+ six.ensure_str(input_example.features.feature[
+ 'image/date_captured'].bytes_list.value[0]), '%Y-%m-%d %H:%M:%S')
+ year = date_captured.year
+ month = date_captured.month
+ day = date_captured.day
+ week = np.floor(float(day) / float(7))
+ hour = date_captured.hour
+ minute = date_captured.minute
+
+ if self._time_horizon == 'year':
+ new_key = new_key + six.ensure_binary('/' + str(year))
+ elif self._time_horizon == 'month':
+ new_key = new_key + six.ensure_binary(
+ '/' + str(year) + '/' + str(month))
+ elif self._time_horizon == 'week':
+ new_key = new_key + six.ensure_binary(
+ '/' + str(year) + '/' + str(month) + '/' + str(week))
+ elif self._time_horizon == 'day':
+ new_key = new_key + six.ensure_binary(
+ '/' + str(year) + '/' + str(month) + '/' + str(day))
+ elif self._time_horizon == 'hour':
+ new_key = new_key + six.ensure_binary(
+ '/' + str(year) + '/' + str(month) + '/' + str(day) + '/' + (
+ str(hour)))
+ elif self._time_horizon == 'minute':
+ new_key = new_key + six.ensure_binary(
+ '/' + str(year) + '/' + str(month) + '/' + str(day) + '/' + (
+ str(hour) + '/' + str(minute)))
+
+ self._num_examples_processed.inc(1)
+
+ return [(new_key, input_example)]
+
+
+class SortGroupedDataFn(beam.DoFn):
+ """Sorts data within a keyed group.
+
+ This Beam DoFn sorts the grouped list of image examples by frame_num
+ """
+
+ def __init__(self, sequence_key, sorted_image_ids,
+ max_num_elements_in_context_features):
+ """Initialization function.
+
+ Args:
+ sequence_key: A feature name to use as a key for grouping sequences.
+ Must point to a key of type bytes_list
+ sorted_image_ids: Whether the image ids are sortable to use as sorting
+ tie-breakers
+ max_num_elements_in_context_features: The maximum number of elements
+ allowed in the memory bank
+ """
+ self._session = None
+ self._num_examples_processed = beam.metrics.Metrics.counter(
+ 'sort_group', 'num_groups_sorted')
+ self._too_many_elements = beam.metrics.Metrics.counter(
+ 'sort_group', 'too_many_elements')
+ self._split_elements = beam.metrics.Metrics.counter(
+ 'sort_group', 'split_elements')
+ self._sequence_key = six.ensure_binary(sequence_key)
+ self._sorted_image_ids = sorted_image_ids
+ self._max_num_elements_in_context_features = (
+ max_num_elements_in_context_features)
+
+ def process(self, grouped_entry):
+ return self._sort_image_examples(grouped_entry)
+
+ def _sort_image_examples(self, grouped_entry):
+ key, example_collection = grouped_entry
+ example_list = list(example_collection)
+
+ def get_frame_num(example):
+ return example.features.feature['image/seq_frame_num'].int64_list.value[0]
+
+ def get_date_captured(example):
+ return datetime.datetime.strptime(
+ six.ensure_str(
+ example.features.feature[
+ 'image/date_captured'].bytes_list.value[0]),
+ '%Y-%m-%d %H:%M:%S')
+
+ def get_image_id(example):
+ return example.features.feature['image/source_id'].bytes_list.value[0]
+
+ if self._sequence_key == six.ensure_binary('image/seq_id'):
+ sorting_fn = get_frame_num
+ elif self._sequence_key == six.ensure_binary('image/location'):
+ if self._sorted_image_ids:
+ sorting_fn = get_image_id
+ else:
+ sorting_fn = get_date_captured
+
+ sorted_example_list = sorted(example_list, key=sorting_fn)
+
+ self._num_examples_processed.inc(1)
+
+ if len(sorted_example_list) > self._max_num_elements_in_context_features:
+ leftovers = sorted_example_list
+ output_list = []
+ count = 0
+ self._too_many_elements.inc(1)
+ while len(leftovers) > self._max_num_elements_in_context_features:
+ self._split_elements.inc(1)
+ new_key = key + six.ensure_binary('_' + str(count))
+ new_list = leftovers[:self._max_num_elements_in_context_features]
+ output_list.append((new_key, new_list))
+ leftovers = leftovers[:self._max_num_elements_in_context_features]
+ count += 1
+ else:
+ output_list = [(key, sorted_example_list)]
+
+ return output_list
+
+
+def get_sliding_window(example_list, max_clip_length, stride_length):
+ """Yields a sliding window over data from example_list.
+
+ Sliding window has width max_clip_len (n) and stride stride_len (m).
+ s -> (s0,s1,...s[n-1]), (s[m],s[m+1],...,s[m+n]), ...
+
+ Args:
+ example_list: A list of examples.
+ max_clip_length: The maximum length of each clip.
+ stride_length: The stride between each clip.
+
+ Yields:
+ A list of lists of examples, each with length <= max_clip_length
+ """
+
+ # check if the list is too short to slide over
+ if len(example_list) < max_clip_length:
+ yield example_list
+ else:
+ starting_values = [i*stride_length for i in
+ range(len(example_list)) if
+ len(example_list) > i*stride_length]
+ for start in starting_values:
+ result = tuple(itertools.islice(example_list, start,
+ min(start + max_clip_length,
+ len(example_list))))
+ yield result
+
+
+class GenerateContextFn(beam.DoFn):
+ """Generates context data for camera trap images.
+
+ This Beam DoFn builds up contextual memory banks from groups of images and
+ stores them in the output tf.Example or tf.Sequence_example for each image.
+ """
+
+ def __init__(self, sequence_key, add_context_features, image_ids_to_keep,
+ keep_context_features_image_id_list=False,
+ subsample_context_features_rate=0,
+ keep_only_positives=False,
+ context_features_score_threshold=0.7,
+ keep_only_positives_gt=False,
+ max_num_elements_in_context_features=5000,
+ pad_context_features=False,
+ output_type='tf_example', max_clip_length=None):
+ """Initialization function.
+
+ Args:
+ sequence_key: A feature name to use as a key for grouping sequences.
+ add_context_features: Whether to keep and store the contextual memory
+ bank.
+ image_ids_to_keep: A list of image ids to save, to use to build data
+ subsets for evaluation.
+ keep_context_features_image_id_list: Whether to save an ordered list of
+ the ids of the images in the contextual memory bank.
+ subsample_context_features_rate: What rate to subsample images for the
+ contextual memory bank.
+ keep_only_positives: Whether to only keep high scoring
+ (>context_features_score_threshold) features in the contextual memory
+ bank.
+ context_features_score_threshold: What threshold to use for keeping
+ features.
+ keep_only_positives_gt: Whether to only keep features from images that
+ contain objects based on the ground truth (for training).
+ max_num_elements_in_context_features: the maximum number of elements in
+ the memory bank
+ pad_context_features: Whether to pad the context features to a fixed size.
+ output_type: What type of output, tf_example of tf_sequence_example
+ max_clip_length: The maximum length of a sequence example, before
+ splitting into multiple
+ """
+ self._session = None
+ self._num_examples_processed = beam.metrics.Metrics.counter(
+ 'sequence_data_generation', 'num_seq_examples_processed')
+ self._num_keys_processed = beam.metrics.Metrics.counter(
+ 'sequence_data_generation', 'num_keys_processed')
+ self._sequence_key = sequence_key
+ self._add_context_features = add_context_features
+ self._pad_context_features = pad_context_features
+ self._output_type = output_type
+ self._max_clip_length = max_clip_length
+ if six.ensure_str(image_ids_to_keep) == 'All':
+ self._image_ids_to_keep = None
+ else:
+ with tf.io.gfile.GFile(image_ids_to_keep) as f:
+ self._image_ids_to_keep = json.load(f)
+ self._keep_context_features_image_id_list = (
+ keep_context_features_image_id_list)
+ self._subsample_context_features_rate = subsample_context_features_rate
+ self._keep_only_positives = keep_only_positives
+ self._keep_only_positives_gt = keep_only_positives_gt
+ self._context_features_score_threshold = context_features_score_threshold
+ self._max_num_elements_in_context_features = (
+ max_num_elements_in_context_features)
+
+ self._images_kept = beam.metrics.Metrics.counter(
+ 'sequence_data_generation', 'images_kept')
+ self._images_loaded = beam.metrics.Metrics.counter(
+ 'sequence_data_generation', 'images_loaded')
+
+ def process(self, grouped_entry):
+ return self._add_context_to_example(copy.deepcopy(grouped_entry))
+
+ def _build_context_features(self, example_list):
+ context_features = []
+ context_features_image_id_list = []
+ count = 0
+ example_embedding = []
+
+ for idx, example in enumerate(example_list):
+ if self._subsample_context_features_rate > 0:
+ if (idx % self._subsample_context_features_rate) != 0:
+ example.features.feature[
+ 'context_features_idx'].int64_list.value.append(
+ self._max_num_elements_in_context_features + 1)
+ continue
+ if self._keep_only_positives:
+ if example.features.feature[
+ 'image/embedding_score'
+ ].float_list.value[0] < self._context_features_score_threshold:
+ example.features.feature[
+ 'context_features_idx'].int64_list.value.append(
+ self._max_num_elements_in_context_features + 1)
+ continue
+ if self._keep_only_positives_gt:
+ if len(example.features.feature[
+ 'image/object/bbox/xmin'
+ ].float_list.value) < 1:
+ example.features.feature[
+ 'context_features_idx'].int64_list.value.append(
+ self._max_num_elements_in_context_features + 1)
+ continue
+
+ example_embedding = list(example.features.feature[
+ 'image/embedding'].float_list.value)
+ context_features.extend(example_embedding)
+ example.features.feature[
+ 'context_features_idx'].int64_list.value.append(count)
+ count += 1
+ example_image_id = example.features.feature[
+ 'image/source_id'].bytes_list.value[0]
+ context_features_image_id_list.append(example_image_id)
+
+ if not example_embedding:
+ example_embedding.append(np.zeros(DEFAULT_FEATURE_LENGTH))
+
+ feature_length = DEFAULT_FEATURE_LENGTH
+
+ # If the example_list is not empty and image/embedding_length is in the
+ # featture dict, feature_length will be assigned to that. Otherwise, it will
+ # be kept as default.
+ if example_list and (
+ 'image/embedding_length' in example_list[0].features.feature):
+ feature_length = example_list[0].features.feature[
+ 'image/embedding_length'].int64_list.value[0]
+
+ if self._pad_context_features:
+ while len(context_features_image_id_list) < (
+ self._max_num_elements_in_context_features):
+ context_features_image_id_list.append('')
+
+ return context_features, feature_length, context_features_image_id_list
+
+ def _add_context_to_example(self, grouped_entry):
+ key, example_collection = grouped_entry
+ list_of_examples = []
+
+ example_list = list(example_collection)
+
+ if self._add_context_features:
+ context_features, feature_length, context_features_image_id_list = (
+ self._build_context_features(example_list))
+
+ if self._image_ids_to_keep is not None:
+ new_example_list = []
+ for example in example_list:
+ im_id = example.features.feature['image/source_id'].bytes_list.value[0]
+ self._images_loaded.inc(1)
+ if six.ensure_str(im_id) in self._image_ids_to_keep:
+ self._images_kept.inc(1)
+ new_example_list.append(example)
+ if new_example_list:
+ example_list = new_example_list
+ else:
+ return []
+
+ if self._output_type == 'tf_sequence_example':
+ if self._max_clip_length is not None:
+ # For now, no overlap
+ clips = get_sliding_window(
+ example_list, self._max_clip_length, self._max_clip_length)
+ else:
+ clips = [example_list]
+
+ for clip_num, clip_list in enumerate(clips):
+ # initialize sequence example
+ seq_example = tf.train.SequenceExample()
+ video_id = six.ensure_str(key)+'_'+ str(clip_num)
+ seq_example.context.feature['clip/media_id'].bytes_list.value.append(
+ video_id.encode('utf8'))
+ seq_example.context.feature['clip/frames'].int64_list.value.append(
+ len(clip_list))
+
+ seq_example.context.feature[
+ 'clip/start/timestamp'].int64_list.value.append(0)
+ seq_example.context.feature[
+ 'clip/end/timestamp'].int64_list.value.append(len(clip_list))
+ seq_example.context.feature['image/format'].bytes_list.value.append(
+ six.ensure_binary('JPG'))
+ seq_example.context.feature['image/channels'].int64_list.value.append(3)
+ context_example = clip_list[0]
+ seq_example.context.feature['image/height'].int64_list.value.append(
+ context_example.features.feature[
+ 'image/height'].int64_list.value[0])
+ seq_example.context.feature['image/width'].int64_list.value.append(
+ context_example.features.feature['image/width'].int64_list.value[0])
+
+ seq_example.context.feature[
+ 'image/context_feature_length'].int64_list.value.append(
+ feature_length)
+ seq_example.context.feature[
+ 'image/context_features'].float_list.value.extend(
+ context_features)
+ if self._keep_context_features_image_id_list:
+ seq_example.context.feature[
+ 'image/context_features_image_id_list'].bytes_list.value.extend(
+ context_features_image_id_list)
+
+ encoded_image_list = seq_example.feature_lists.feature_list[
+ 'image/encoded']
+ timestamps_list = seq_example.feature_lists.feature_list[
+ 'image/timestamp']
+ context_features_idx_list = seq_example.feature_lists.feature_list[
+ 'image/context_features_idx']
+ date_captured_list = seq_example.feature_lists.feature_list[
+ 'image/date_captured']
+ unix_time_list = seq_example.feature_lists.feature_list[
+ 'image/unix_time']
+ location_list = seq_example.feature_lists.feature_list['image/location']
+ image_ids_list = seq_example.feature_lists.feature_list[
+ 'image/source_id']
+ gt_xmin_list = seq_example.feature_lists.feature_list[
+ 'region/bbox/xmin']
+ gt_xmax_list = seq_example.feature_lists.feature_list[
+ 'region/bbox/xmax']
+ gt_ymin_list = seq_example.feature_lists.feature_list[
+ 'region/bbox/ymin']
+ gt_ymax_list = seq_example.feature_lists.feature_list[
+ 'region/bbox/ymax']
+ gt_type_list = seq_example.feature_lists.feature_list[
+ 'region/label/index']
+ gt_type_string_list = seq_example.feature_lists.feature_list[
+ 'region/label/string']
+ gt_is_annotated_list = seq_example.feature_lists.feature_list[
+ 'region/is_annotated']
+
+ for idx, example in enumerate(clip_list):
+
+ encoded_image = encoded_image_list.feature.add()
+ encoded_image.bytes_list.value.extend(
+ example.features.feature['image/encoded'].bytes_list.value)
+
+ image_id = image_ids_list.feature.add()
+ image_id.bytes_list.value.append(
+ example.features.feature['image/source_id'].bytes_list.value[0])
+
+ timestamp = timestamps_list.feature.add()
+ # Timestamp is currently order in the list.
+ timestamp.int64_list.value.extend([idx])
+
+ context_features_idx = context_features_idx_list.feature.add()
+ context_features_idx.int64_list.value.extend(
+ example.features.feature['context_features_idx'].int64_list.value)
+
+ date_captured = date_captured_list.feature.add()
+ date_captured.bytes_list.value.extend(
+ example.features.feature['image/date_captured'].bytes_list.value)
+ unix_time = unix_time_list.feature.add()
+ unix_time.float_list.value.extend(
+ example.features.feature['image/unix_time'].float_list.value)
+ location = location_list.feature.add()
+ location.bytes_list.value.extend(
+ example.features.feature['image/location'].bytes_list.value)
+
+ gt_xmin = gt_xmin_list.feature.add()
+ gt_xmax = gt_xmax_list.feature.add()
+ gt_ymin = gt_ymin_list.feature.add()
+ gt_ymax = gt_ymax_list.feature.add()
+ gt_type = gt_type_list.feature.add()
+ gt_type_str = gt_type_string_list.feature.add()
+
+ gt_is_annotated = gt_is_annotated_list.feature.add()
+ gt_is_annotated.int64_list.value.append(1)
+
+ gt_xmin.float_list.value.extend(
+ example.features.feature[
+ 'image/object/bbox/xmin'].float_list.value)
+ gt_xmax.float_list.value.extend(
+ example.features.feature[
+ 'image/object/bbox/xmax'].float_list.value)
+ gt_ymin.float_list.value.extend(
+ example.features.feature[
+ 'image/object/bbox/ymin'].float_list.value)
+ gt_ymax.float_list.value.extend(
+ example.features.feature[
+ 'image/object/bbox/ymax'].float_list.value)
+
+ gt_type.int64_list.value.extend(
+ example.features.feature[
+ 'image/object/class/label'].int64_list.value)
+ gt_type_str.bytes_list.value.extend(
+ example.features.feature[
+ 'image/object/class/text'].bytes_list.value)
+
+ self._num_examples_processed.inc(1)
+ list_of_examples.append(seq_example)
+
+ elif self._output_type == 'tf_example':
+
+ for example in example_list:
+ im_id = example.features.feature['image/source_id'].bytes_list.value[0]
+
+ if self._add_context_features:
+ example.features.feature[
+ 'image/context_features'].float_list.value.extend(
+ context_features)
+ example.features.feature[
+ 'image/context_feature_length'].int64_list.value.append(
+ feature_length)
+
+ if self._keep_context_features_image_id_list:
+ example.features.feature[
+ 'image/context_features_image_id_list'].bytes_list.value.extend(
+ context_features_image_id_list)
+
+ self._num_examples_processed.inc(1)
+ list_of_examples.append(example)
+
+ return list_of_examples
+
+
+def construct_pipeline(input_tfrecord,
+ output_tfrecord,
+ sequence_key,
+ time_horizon=None,
+ subsample_context_features_rate=0,
+ reduce_image_size=True,
+ max_image_dimension=1024,
+ add_context_features=True,
+ sorted_image_ids=True,
+ image_ids_to_keep='All',
+ keep_context_features_image_id_list=False,
+ keep_only_positives=False,
+ context_features_score_threshold=0.7,
+ keep_only_positives_gt=False,
+ max_num_elements_in_context_features=5000,
+ num_shards=0,
+ output_type='tf_example',
+ max_clip_length=None):
+ """Returns a beam pipeline to run object detection inference.
+
+ Args:
+ input_tfrecord: An TFRecord of tf.train.Example protos containing images.
+ output_tfrecord: An TFRecord of tf.train.Example protos that contain images
+ in the input TFRecord and the detections from the model.
+ sequence_key: A feature name to use as a key for grouping sequences.
+ time_horizon: What length of time to use to partition the data when building
+ the memory banks. Options: `year`, `month`, `week`, `day `, `hour`,
+ `minute`, None.
+ subsample_context_features_rate: What rate to subsample images for the
+ contextual memory bank.
+ reduce_image_size: Whether to reduce the size of the stored images.
+ max_image_dimension: The maximum image dimension to use for resizing.
+ add_context_features: Whether to keep and store the contextual memory bank.
+ sorted_image_ids: Whether the image ids are sortable, and can be used as
+ datetime tie-breakers when building memory banks.
+ image_ids_to_keep: A list of image ids to save, to use to build data subsets
+ for evaluation.
+ keep_context_features_image_id_list: Whether to save an ordered list of the
+ ids of the images in the contextual memory bank.
+ keep_only_positives: Whether to only keep high scoring
+ (>context_features_score_threshold) features in the contextual memory
+ bank.
+ context_features_score_threshold: What threshold to use for keeping
+ features.
+ keep_only_positives_gt: Whether to only keep features from images that
+ contain objects based on the ground truth (for training).
+ max_num_elements_in_context_features: the maximum number of elements in the
+ memory bank
+ num_shards: The number of output shards.
+ output_type: What type of output, tf_example of tf_sequence_example
+ max_clip_length: The maximum length of a sequence example, before
+ splitting into multiple
+ """
+ def pipeline(root):
+ if output_type == 'tf_example':
+ coder = beam.coders.ProtoCoder(tf.train.Example)
+ elif output_type == 'tf_sequence_example':
+ coder = beam.coders.ProtoCoder(tf.train.SequenceExample)
+ else:
+ raise ValueError('Unsupported output type.')
+ input_collection = (
+ root | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
+ input_tfrecord,
+ coder=beam.coders.BytesCoder()))
+ rekey_collection = input_collection | 'RekeyExamples' >> beam.ParDo(
+ ReKeyDataFn(sequence_key, time_horizon,
+ reduce_image_size, max_image_dimension))
+ grouped_collection = (
+ rekey_collection | 'GroupBySequenceKey' >> beam.GroupByKey())
+ grouped_collection = (
+ grouped_collection | 'ReshuffleGroups' >> beam.Reshuffle())
+ ordered_collection = (
+ grouped_collection | 'OrderByFrameNumber' >> beam.ParDo(
+ SortGroupedDataFn(sequence_key, sorted_image_ids,
+ max_num_elements_in_context_features)))
+ ordered_collection = (
+ ordered_collection | 'ReshuffleSortedGroups' >> beam.Reshuffle())
+ output_collection = (
+ ordered_collection | 'AddContextToExamples' >> beam.ParDo(
+ GenerateContextFn(
+ sequence_key, add_context_features, image_ids_to_keep,
+ keep_context_features_image_id_list=(
+ keep_context_features_image_id_list),
+ subsample_context_features_rate=subsample_context_features_rate,
+ keep_only_positives=keep_only_positives,
+ keep_only_positives_gt=keep_only_positives_gt,
+ context_features_score_threshold=(
+ context_features_score_threshold),
+ max_num_elements_in_context_features=(
+ max_num_elements_in_context_features),
+ output_type=output_type,
+ max_clip_length=max_clip_length)))
+
+ output_collection = (
+ output_collection | 'ReshuffleExamples' >> beam.Reshuffle())
+ _ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
+ output_tfrecord,
+ num_shards=num_shards,
+ coder=coder)
+ return pipeline
+
+
+def main(_):
+ """Runs the Beam pipeline that builds context features.
+
+ Args:
+ _: unused
+ """
+ # must create before flags are used
+ runner = runners.DirectRunner()
+
+ dirname = os.path.dirname(FLAGS.output_tfrecord)
+ tf.io.gfile.makedirs(dirname)
+ runner.run(
+ construct_pipeline(FLAGS.input_tfrecord,
+ FLAGS.output_tfrecord,
+ FLAGS.sequence_key,
+ FLAGS.time_horizon,
+ FLAGS.subsample_context_features_rate,
+ FLAGS.reduce_image_size,
+ FLAGS.max_image_dimension,
+ FLAGS.add_context_features,
+ FLAGS.sorted_image_ids,
+ FLAGS.image_ids_to_keep,
+ FLAGS.keep_context_features_image_id_list,
+ FLAGS.keep_only_positives,
+ FLAGS.context_features_score_threshold,
+ FLAGS.keep_only_positives_gt,
+ FLAGS.max_num_elements_in_context_features,
+ FLAGS.num_shards,
+ FLAGS.output_type,
+ FLAGS.max_clip_length))
+
+
+if __name__ == '__main__':
+ flags.mark_flags_as_required([
+ 'input_tfrecord',
+ 'output_tfrecord'
+ ])
+ app.run(main)
diff --git a/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples_tf1_test.py b/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples_tf1_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f10fa7764965f63fdc74ed39861aefa0fc266d5
--- /dev/null
+++ b/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples_tf1_test.py
@@ -0,0 +1,384 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for add_context_to_examples."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import contextlib
+import datetime
+import os
+import tempfile
+import unittest
+import numpy as np
+import six
+import tensorflow.compat.v1 as tf
+
+from object_detection.dataset_tools.context_rcnn import add_context_to_examples
+from object_detection.utils import tf_version
+from apache_beam import runners
+
+
+@contextlib.contextmanager
+def InMemoryTFRecord(entries):
+ temp = tempfile.NamedTemporaryFile(delete=False)
+ filename = temp.name
+ try:
+ with tf.python_io.TFRecordWriter(filename) as writer:
+ for value in entries:
+ writer.write(value)
+ yield filename
+ finally:
+ os.unlink(temp.name)
+
+
+def BytesFeature(value):
+ return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+
+
+def BytesListFeature(value):
+ return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
+
+
+def Int64Feature(value):
+ return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
+
+
+def Int64ListFeature(value):
+ return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
+
+
+def FloatListFeature(value):
+ return tf.train.Feature(float_list=tf.train.FloatList(value=value))
+
+
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
+class GenerateContextDataTest(tf.test.TestCase):
+
+ def _create_first_tf_example(self):
+ with self.test_session():
+ encoded_image = tf.image.encode_jpeg(
+ tf.constant(np.ones((4, 4, 3)).astype(np.uint8))).eval()
+
+ example = tf.train.Example(features=tf.train.Features(feature={
+ 'image/encoded': BytesFeature(encoded_image),
+ 'image/source_id': BytesFeature(six.ensure_binary('image_id_1')),
+ 'image/height': Int64Feature(4),
+ 'image/width': Int64Feature(4),
+ 'image/object/class/label': Int64ListFeature([5, 5]),
+ 'image/object/class/text': BytesListFeature([six.ensure_binary('hyena'),
+ six.ensure_binary('hyena')
+ ]),
+ 'image/object/bbox/xmin': FloatListFeature([0.0, 0.1]),
+ 'image/object/bbox/xmax': FloatListFeature([0.2, 0.3]),
+ 'image/object/bbox/ymin': FloatListFeature([0.4, 0.5]),
+ 'image/object/bbox/ymax': FloatListFeature([0.6, 0.7]),
+ 'image/seq_id': BytesFeature(six.ensure_binary('01')),
+ 'image/seq_num_frames': Int64Feature(2),
+ 'image/seq_frame_num': Int64Feature(0),
+ 'image/date_captured': BytesFeature(
+ six.ensure_binary(str(datetime.datetime(2020, 1, 1, 1, 0, 0)))),
+ 'image/embedding': FloatListFeature([0.1, 0.2, 0.3]),
+ 'image/embedding_score': FloatListFeature([0.9]),
+ 'image/embedding_length': Int64Feature(3)
+
+ }))
+
+ return example.SerializeToString()
+
+ def _create_second_tf_example(self):
+ with self.test_session():
+ encoded_image = tf.image.encode_jpeg(
+ tf.constant(np.ones((4, 4, 3)).astype(np.uint8))).eval()
+
+ example = tf.train.Example(features=tf.train.Features(feature={
+ 'image/encoded': BytesFeature(encoded_image),
+ 'image/source_id': BytesFeature(six.ensure_binary('image_id_2')),
+ 'image/height': Int64Feature(4),
+ 'image/width': Int64Feature(4),
+ 'image/object/class/label': Int64ListFeature([5]),
+ 'image/object/class/text': BytesListFeature([six.ensure_binary('hyena')
+ ]),
+ 'image/object/bbox/xmin': FloatListFeature([0.0]),
+ 'image/object/bbox/xmax': FloatListFeature([0.1]),
+ 'image/object/bbox/ymin': FloatListFeature([0.2]),
+ 'image/object/bbox/ymax': FloatListFeature([0.3]),
+ 'image/seq_id': BytesFeature(six.ensure_binary('01')),
+ 'image/seq_num_frames': Int64Feature(2),
+ 'image/seq_frame_num': Int64Feature(1),
+ 'image/date_captured': BytesFeature(
+ six.ensure_binary(str(datetime.datetime(2020, 1, 1, 1, 1, 0)))),
+ 'image/embedding': FloatListFeature([0.4, 0.5, 0.6]),
+ 'image/embedding_score': FloatListFeature([0.9]),
+ 'image/embedding_length': Int64Feature(3)
+ }))
+
+ return example.SerializeToString()
+
+ def assert_expected_examples(self, tf_example_list):
+ self.assertAllEqual(
+ {tf_example.features.feature['image/source_id'].bytes_list.value[0]
+ for tf_example in tf_example_list},
+ {six.ensure_binary('image_id_1'), six.ensure_binary('image_id_2')})
+ self.assertAllClose(
+ tf_example_list[0].features.feature[
+ 'image/context_features'].float_list.value,
+ [0.1, 0.2, 0.3, 0.4, 0.5, 0.6])
+ self.assertAllClose(
+ tf_example_list[1].features.feature[
+ 'image/context_features'].float_list.value,
+ [0.1, 0.2, 0.3, 0.4, 0.5, 0.6])
+
+ def assert_expected_sequence_example(self, tf_sequence_example_list):
+ tf_sequence_example = tf_sequence_example_list[0]
+ num_frames = 2
+
+ self.assertAllEqual(
+ tf_sequence_example.context.feature[
+ 'clip/media_id'].bytes_list.value[0], six.ensure_binary(
+ '01_0'))
+ self.assertAllClose(
+ tf_sequence_example.context.feature[
+ 'image/context_features'].float_list.value,
+ [0.1, 0.2, 0.3, 0.4, 0.5, 0.6])
+
+ seq_feature_dict = tf_sequence_example.feature_lists.feature_list
+
+ self.assertLen(
+ seq_feature_dict['image/encoded'].feature[:],
+ num_frames)
+ actual_timestamps = [
+ feature.int64_list.value[0] for feature
+ in seq_feature_dict['image/timestamp'].feature]
+ timestamps = [0, 1]
+ self.assertAllEqual(timestamps, actual_timestamps)
+
+ # First image.
+ self.assertAllClose(
+ [0.4, 0.5],
+ seq_feature_dict['region/bbox/ymin'].feature[0].float_list.value[:])
+ self.assertAllClose(
+ [0.0, 0.1],
+ seq_feature_dict['region/bbox/xmin'].feature[0].float_list.value[:])
+ self.assertAllClose(
+ [0.6, 0.7],
+ seq_feature_dict['region/bbox/ymax'].feature[0].float_list.value[:])
+ self.assertAllClose(
+ [0.2, 0.3],
+ seq_feature_dict['region/bbox/xmax'].feature[0].float_list.value[:])
+ self.assertAllEqual(
+ [six.ensure_binary('hyena'), six.ensure_binary('hyena')],
+ seq_feature_dict['region/label/string'].feature[0].bytes_list.value[:])
+
+ # Second example.
+ self.assertAllClose(
+ [0.2],
+ seq_feature_dict['region/bbox/ymin'].feature[1].float_list.value[:])
+ self.assertAllClose(
+ [0.0],
+ seq_feature_dict['region/bbox/xmin'].feature[1].float_list.value[:])
+ self.assertAllClose(
+ [0.3],
+ seq_feature_dict['region/bbox/ymax'].feature[1].float_list.value[:])
+ self.assertAllClose(
+ [0.1],
+ seq_feature_dict['region/bbox/xmax'].feature[1].float_list.value[:])
+ self.assertAllEqual(
+ [six.ensure_binary('hyena')],
+ seq_feature_dict['region/label/string'].feature[1].bytes_list.value[:])
+
+ def assert_expected_key(self, key):
+ self.assertAllEqual(key, b'01')
+
+ def assert_sorted(self, example_collection):
+ example_list = list(example_collection)
+ counter = 0
+ for example in example_list:
+ frame_num = example.features.feature[
+ 'image/seq_frame_num'].int64_list.value[0]
+ self.assertGreaterEqual(frame_num, counter)
+ counter = frame_num
+
+ def assert_context(self, example_collection):
+ example_list = list(example_collection)
+ for example in example_list:
+ context = example.features.feature[
+ 'image/context_features'].float_list.value
+ self.assertAllClose([0.1, 0.2, 0.3, 0.4, 0.5, 0.6], context)
+
+ def assert_resized(self, example):
+ width = example.features.feature['image/width'].int64_list.value[0]
+ self.assertAllEqual(width, 2)
+ height = example.features.feature['image/height'].int64_list.value[0]
+ self.assertAllEqual(height, 2)
+
+ def assert_size(self, example):
+ width = example.features.feature['image/width'].int64_list.value[0]
+ self.assertAllEqual(width, 4)
+ height = example.features.feature['image/height'].int64_list.value[0]
+ self.assertAllEqual(height, 4)
+
+ def test_sliding_window(self):
+ example_list = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
+ max_clip_length = 3
+ stride_length = 3
+ out_list = [list(i) for i in add_context_to_examples.get_sliding_window(
+ example_list, max_clip_length, stride_length)]
+ self.assertAllEqual(out_list, [['a', 'b', 'c'],
+ ['d', 'e', 'f'],
+ ['g']])
+
+ def test_rekey_data_fn(self):
+ sequence_key = 'image/seq_id'
+ time_horizon = None
+ reduce_image_size = False
+ max_dim = None
+
+ rekey_fn = add_context_to_examples.ReKeyDataFn(
+ sequence_key, time_horizon,
+ reduce_image_size, max_dim)
+ output = rekey_fn.process(self._create_first_tf_example())
+
+ self.assert_expected_key(output[0][0])
+ self.assert_size(output[0][1])
+
+ def test_rekey_data_fn_w_resize(self):
+ sequence_key = 'image/seq_id'
+ time_horizon = None
+ reduce_image_size = True
+ max_dim = 2
+
+ rekey_fn = add_context_to_examples.ReKeyDataFn(
+ sequence_key, time_horizon,
+ reduce_image_size, max_dim)
+ output = rekey_fn.process(self._create_first_tf_example())
+
+ self.assert_expected_key(output[0][0])
+ self.assert_resized(output[0][1])
+
+ def test_sort_fn(self):
+ sequence_key = 'image/seq_id'
+ sorted_image_ids = False
+ max_num_elements_in_context_features = 10
+ sort_fn = add_context_to_examples.SortGroupedDataFn(
+ sequence_key, sorted_image_ids, max_num_elements_in_context_features)
+ output = sort_fn.process(
+ ('dummy_key', [tf.train.Example.FromString(
+ self._create_second_tf_example()),
+ tf.train.Example.FromString(
+ self._create_first_tf_example())]))
+
+ self.assert_sorted(output[0][1])
+
+ def test_add_context_fn(self):
+ sequence_key = 'image/seq_id'
+ add_context_features = True
+ image_ids_to_keep = 'All'
+ context_fn = add_context_to_examples.GenerateContextFn(
+ sequence_key, add_context_features, image_ids_to_keep)
+ output = context_fn.process(
+ ('dummy_key', [tf.train.Example.FromString(
+ self._create_first_tf_example()),
+ tf.train.Example.FromString(
+ self._create_second_tf_example())]))
+
+ self.assertEqual(len(output), 2)
+ self.assert_context(output)
+
+ def test_add_context_fn_output_sequence_example(self):
+ sequence_key = 'image/seq_id'
+ add_context_features = True
+ image_ids_to_keep = 'All'
+ context_fn = add_context_to_examples.GenerateContextFn(
+ sequence_key, add_context_features, image_ids_to_keep,
+ output_type='tf_sequence_example')
+ output = context_fn.process(
+ ('01',
+ [tf.train.Example.FromString(self._create_first_tf_example()),
+ tf.train.Example.FromString(self._create_second_tf_example())]))
+
+ self.assertEqual(len(output), 1)
+ self.assert_expected_sequence_example(output)
+
+ def test_add_context_fn_output_sequence_example_cliplen(self):
+ sequence_key = 'image/seq_id'
+ add_context_features = True
+ image_ids_to_keep = 'All'
+ context_fn = add_context_to_examples.GenerateContextFn(
+ sequence_key, add_context_features, image_ids_to_keep,
+ output_type='tf_sequence_example', max_clip_length=1)
+ output = context_fn.process(
+ ('01',
+ [tf.train.Example.FromString(self._create_first_tf_example()),
+ tf.train.Example.FromString(self._create_second_tf_example())]))
+ self.assertEqual(len(output), 2)
+
+ def test_beam_pipeline(self):
+ with InMemoryTFRecord(
+ [self._create_first_tf_example(),
+ self._create_second_tf_example()]) as input_tfrecord:
+ runner = runners.DirectRunner()
+ temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
+ output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
+ sequence_key = six.ensure_binary('image/seq_id')
+ max_num_elements = 10
+ num_shards = 1
+ pipeline = add_context_to_examples.construct_pipeline(
+ input_tfrecord,
+ output_tfrecord,
+ sequence_key,
+ max_num_elements_in_context_features=max_num_elements,
+ num_shards=num_shards)
+ runner.run(pipeline)
+ filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
+ actual_output = []
+ record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
+ for record in record_iterator:
+ actual_output.append(record)
+ self.assertEqual(len(actual_output), 2)
+ self.assert_expected_examples([tf.train.Example.FromString(
+ tf_example) for tf_example in actual_output])
+
+ def test_beam_pipeline_sequence_example(self):
+ with InMemoryTFRecord(
+ [self._create_first_tf_example(),
+ self._create_second_tf_example()]) as input_tfrecord:
+ runner = runners.DirectRunner()
+ temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
+ output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
+ sequence_key = six.ensure_binary('image/seq_id')
+ max_num_elements = 10
+ num_shards = 1
+ pipeline = add_context_to_examples.construct_pipeline(
+ input_tfrecord,
+ output_tfrecord,
+ sequence_key,
+ max_num_elements_in_context_features=max_num_elements,
+ num_shards=num_shards,
+ output_type='tf_sequence_example')
+ runner.run(pipeline)
+ filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
+ actual_output = []
+ record_iterator = tf.python_io.tf_record_iterator(
+ path=filenames[0])
+ for record in record_iterator:
+ actual_output.append(record)
+ self.assertEqual(len(actual_output), 1)
+ self.assert_expected_sequence_example(
+ [tf.train.SequenceExample.FromString(
+ tf_example) for tf_example in actual_output])
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_main.py b/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_main.py
new file mode 100644
index 0000000000000000000000000000000000000000..106cf5adb94d8d1017a1834de42ab2096d85c67c
--- /dev/null
+++ b/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_main.py
@@ -0,0 +1,324 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Beam pipeline to create COCO Camera Traps Object Detection TFRecords.
+
+Please note that this tool creates sharded output files.
+
+This tool assumes the input annotations are in the COCO Camera Traps json
+format, specified here:
+https://github.com/Microsoft/CameraTraps/blob/master/data_management/README.md
+
+Example usage:
+
+ python create_cococameratraps_tfexample_main.py \
+ --alsologtostderr \
+ --output_tfrecord_prefix="/path/to/output/tfrecord/location/prefix" \
+ --image_directory="/path/to/image/folder/" \
+ --input_annotations_file="path/to/annotations.json"
+
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import hashlib
+import io
+import json
+import logging
+import os
+from absl import app
+from absl import flags
+import apache_beam as beam
+import numpy as np
+import PIL.Image
+import tensorflow.compat.v1 as tf
+from apache_beam import runners
+from object_detection.utils import dataset_util
+
+flags.DEFINE_string('image_directory', None, 'Directory where images are '
+ 'stored')
+flags.DEFINE_string('output_tfrecord_prefix', None,
+ 'TFRecord containing images in tf.Example format.')
+flags.DEFINE_string('input_annotations_file', None, 'Path to Coco-CameraTraps'
+ 'style annotations file')
+flags.DEFINE_integer('num_images_per_shard',
+ 200,
+ 'The number of images to be stored in each shard.')
+
+FLAGS = flags.FLAGS
+
+
+class ParseImage(beam.DoFn):
+ """A DoFn that parses a COCO-CameraTraps json and emits TFRecords."""
+
+ def __init__(self, image_directory, images, annotations, categories,
+ keep_bboxes):
+ """Initialization function.
+
+ Args:
+ image_directory: Path to image directory
+ images: list of COCO Camera Traps style image dictionaries
+ annotations: list of COCO Camera Traps style annotation dictionaries
+ categories: list of COCO Camera Traps style category dictionaries
+ keep_bboxes: Whether to keep any bounding boxes that exist in the
+ annotations
+ """
+
+ self._image_directory = image_directory
+ self._image_dict = {im['id']: im for im in images}
+ self._annotation_dict = {im['id']: [] for im in images}
+ self._category_dict = {int(cat['id']): cat for cat in categories}
+ for ann in annotations:
+ self._annotation_dict[ann['image_id']].append(ann)
+ self._images = images
+ self._keep_bboxes = keep_bboxes
+
+ self._num_examples_processed = beam.metrics.Metrics.counter(
+ 'cococameratraps_data_generation', 'num_tf_examples_processed')
+
+ def process(self, image_id):
+ """Builds a tf.Example given an image id.
+
+ Args:
+ image_id: the image id of the associated image
+
+ Returns:
+ List of tf.Examples.
+ """
+
+ image = self._image_dict[image_id]
+ annotations = self._annotation_dict[image_id]
+ image_height = image['height']
+ image_width = image['width']
+ filename = image['file_name']
+ image_id = image['id']
+ image_location_id = image['location']
+
+ image_datetime = str(image['date_captured'])
+
+ image_sequence_id = str(image['seq_id'])
+ image_sequence_num_frames = int(image['seq_num_frames'])
+ image_sequence_frame_num = int(image['frame_num'])
+
+ full_path = os.path.join(self._image_directory, filename)
+
+ try:
+ # Ensure the image exists and is not corrupted
+ with tf.io.gfile.GFile(full_path, 'rb') as fid:
+ encoded_jpg = fid.read()
+ encoded_jpg_io = io.BytesIO(encoded_jpg)
+ image = PIL.Image.open(encoded_jpg_io)
+ # Ensure the image can be read by tf
+ with tf.Graph().as_default():
+ image = tf.image.decode_jpeg(encoded_jpg, channels=3)
+ init_op = tf.initialize_all_tables()
+ with tf.Session() as sess:
+ sess.run(init_op)
+ sess.run(image)
+ except Exception as e: # pylint: disable=broad-except
+ # The image file is missing or corrupt
+ tf.logging.error(str(e))
+ return []
+
+ key = hashlib.sha256(encoded_jpg).hexdigest()
+ feature_dict = {
+ 'image/height':
+ dataset_util.int64_feature(image_height),
+ 'image/width':
+ dataset_util.int64_feature(image_width),
+ 'image/filename':
+ dataset_util.bytes_feature(filename.encode('utf8')),
+ 'image/source_id':
+ dataset_util.bytes_feature(str(image_id).encode('utf8')),
+ 'image/key/sha256':
+ dataset_util.bytes_feature(key.encode('utf8')),
+ 'image/encoded':
+ dataset_util.bytes_feature(encoded_jpg),
+ 'image/format':
+ dataset_util.bytes_feature('jpeg'.encode('utf8')),
+ 'image/location':
+ dataset_util.bytes_feature(str(image_location_id).encode('utf8')),
+ 'image/seq_num_frames':
+ dataset_util.int64_feature(image_sequence_num_frames),
+ 'image/seq_frame_num':
+ dataset_util.int64_feature(image_sequence_frame_num),
+ 'image/seq_id':
+ dataset_util.bytes_feature(image_sequence_id.encode('utf8')),
+ 'image/date_captured':
+ dataset_util.bytes_feature(image_datetime.encode('utf8'))
+ }
+
+ num_annotations_skipped = 0
+ if annotations:
+ xmin = []
+ xmax = []
+ ymin = []
+ ymax = []
+ category_names = []
+ category_ids = []
+ area = []
+
+ for object_annotations in annotations:
+ if 'bbox' in object_annotations and self._keep_bboxes:
+ (x, y, width, height) = tuple(object_annotations['bbox'])
+ if width <= 0 or height <= 0:
+ num_annotations_skipped += 1
+ continue
+ if x + width > image_width or y + height > image_height:
+ num_annotations_skipped += 1
+ continue
+ xmin.append(float(x) / image_width)
+ xmax.append(float(x + width) / image_width)
+ ymin.append(float(y) / image_height)
+ ymax.append(float(y + height) / image_height)
+ if 'area' in object_annotations:
+ area.append(object_annotations['area'])
+ else:
+ # approximate area using l*w/2
+ area.append(width*height/2.0)
+
+ category_id = int(object_annotations['category_id'])
+ category_ids.append(category_id)
+ category_names.append(
+ self._category_dict[category_id]['name'].encode('utf8'))
+
+ feature_dict.update({
+ 'image/object/bbox/xmin':
+ dataset_util.float_list_feature(xmin),
+ 'image/object/bbox/xmax':
+ dataset_util.float_list_feature(xmax),
+ 'image/object/bbox/ymin':
+ dataset_util.float_list_feature(ymin),
+ 'image/object/bbox/ymax':
+ dataset_util.float_list_feature(ymax),
+ 'image/object/class/text':
+ dataset_util.bytes_list_feature(category_names),
+ 'image/object/class/label':
+ dataset_util.int64_list_feature(category_ids),
+ 'image/object/area':
+ dataset_util.float_list_feature(area),
+ })
+
+ # For classification, add the first category to image/class/label and
+ # image/class/text
+ if not category_ids:
+ feature_dict.update({
+ 'image/class/label':
+ dataset_util.int64_list_feature([0]),
+ 'image/class/text':
+ dataset_util.bytes_list_feature(['empty'.encode('utf8')]),
+ })
+ else:
+ feature_dict.update({
+ 'image/class/label':
+ dataset_util.int64_list_feature([category_ids[0]]),
+ 'image/class/text':
+ dataset_util.bytes_list_feature([category_names[0]]),
+ })
+
+ else:
+ # Add empty class if there are no annotations
+ feature_dict.update({
+ 'image/class/label':
+ dataset_util.int64_list_feature([0]),
+ 'image/class/text':
+ dataset_util.bytes_list_feature(['empty'.encode('utf8')]),
+ })
+
+ example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
+ self._num_examples_processed.inc(1)
+
+ return [(example)]
+
+
+def _load_json_data(data_file):
+ with tf.io.gfile.GFile(data_file, 'r') as fid:
+ data_dict = json.load(fid)
+ return data_dict
+
+
+def create_pipeline(image_directory,
+ input_annotations_file,
+ output_tfrecord_prefix=None,
+ num_images_per_shard=200,
+ keep_bboxes=True):
+ """Creates a beam pipeline for producing a COCO-CameraTraps Image dataset.
+
+ Args:
+ image_directory: Path to image directory
+ input_annotations_file: Path to a coco-cameratraps annotation file
+ output_tfrecord_prefix: Absolute path for tfrecord outputs. Final files will
+ be named {output_tfrecord_prefix}@N.
+ num_images_per_shard: The number of images to store in each shard
+ keep_bboxes: Whether to keep any bounding boxes that exist in the json file
+
+ Returns:
+ A Beam pipeline.
+ """
+
+ logging.info('Reading data from COCO-CameraTraps Dataset.')
+
+ data = _load_json_data(input_annotations_file)
+
+ num_shards = int(np.ceil(float(len(data['images']))/num_images_per_shard))
+
+ def pipeline(root):
+ """Builds beam pipeline."""
+
+ image_examples = (
+ root
+ | ('CreateCollections') >> beam.Create(
+ [im['id'] for im in data['images']])
+ | ('ParseImage') >> beam.ParDo(ParseImage(
+ image_directory, data['images'], data['annotations'],
+ data['categories'], keep_bboxes=keep_bboxes)))
+ _ = (image_examples
+ | ('Reshuffle') >> beam.Reshuffle()
+ | ('WriteTfImageExample') >> beam.io.tfrecordio.WriteToTFRecord(
+ output_tfrecord_prefix,
+ num_shards=num_shards,
+ coder=beam.coders.ProtoCoder(tf.train.Example)))
+
+ return pipeline
+
+
+def main(_):
+ """Runs the Beam pipeline that performs inference.
+
+ Args:
+ _: unused
+ """
+
+ # must create before flags are used
+ runner = runners.DirectRunner()
+
+ dirname = os.path.dirname(FLAGS.output_tfrecord_prefix)
+ tf.io.gfile.makedirs(dirname)
+
+ runner.run(
+ create_pipeline(
+ image_directory=FLAGS.image_directory,
+ input_annotations_file=FLAGS.input_annotations_file,
+ output_tfrecord_prefix=FLAGS.output_tfrecord_prefix,
+ num_images_per_shard=FLAGS.num_images_per_shard))
+
+
+if __name__ == '__main__':
+ flags.mark_flags_as_required([
+ 'image_directory',
+ 'input_annotations_file',
+ 'output_tfrecord_prefix'
+ ])
+ app.run(main)
diff --git a/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_tf1_test.py b/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_tf1_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..3f3569e1384857641de7c767ae76f0d9023d7291
--- /dev/null
+++ b/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_tf1_test.py
@@ -0,0 +1,201 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for create_cococameratraps_tfexample_main."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import datetime
+import json
+import os
+import tempfile
+import unittest
+import numpy as np
+
+from PIL import Image
+import tensorflow.compat.v1 as tf
+from object_detection.dataset_tools.context_rcnn import create_cococameratraps_tfexample_main
+from object_detection.utils import tf_version
+from apache_beam import runners
+
+
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
+class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
+
+ IMAGE_HEIGHT = 360
+ IMAGE_WIDTH = 480
+
+ def _write_random_images_to_directory(self, directory, num_frames):
+ for frame_num in range(num_frames):
+ img = np.random.randint(0, high=256,
+ size=(self.IMAGE_HEIGHT, self.IMAGE_WIDTH, 3),
+ dtype=np.uint8)
+ pil_image = Image.fromarray(img)
+ fname = 'im_' + str(frame_num) + '.jpg'
+ pil_image.save(os.path.join(directory, fname), 'JPEG')
+
+ def _create_json_file(self, directory, num_frames, keep_bboxes=False):
+ json_dict = {'images': [], 'annotations': []}
+ json_dict['categories'] = [{'id': 0, 'name': 'empty'},
+ {'id': 1, 'name': 'animal'}]
+ for idx in range(num_frames):
+ im = {'id': 'im_' + str(idx),
+ 'file_name': 'im_' + str(idx) + '.jpg',
+ 'height': self.IMAGE_HEIGHT,
+ 'width': self.IMAGE_WIDTH,
+ 'seq_id': 'seq_1',
+ 'seq_num_frames': num_frames,
+ 'frame_num': idx,
+ 'location': 'loc_' + str(idx),
+ 'date_captured': str(datetime.datetime.now())
+ }
+ json_dict['images'].append(im)
+ ann = {'id': 'ann' + str(idx),
+ 'image_id': 'im_' + str(idx),
+ 'category_id': 1,
+ }
+ if keep_bboxes:
+ ann['bbox'] = [0.0 * self.IMAGE_WIDTH,
+ 0.1 * self.IMAGE_HEIGHT,
+ 0.5 * self.IMAGE_WIDTH,
+ 0.5 * self.IMAGE_HEIGHT]
+ json_dict['annotations'].append(ann)
+
+ json_path = os.path.join(directory, 'test_file.json')
+ with tf.io.gfile.GFile(json_path, 'w') as f:
+ json.dump(json_dict, f)
+ return json_path
+
+ def assert_expected_example_bbox(self, example):
+ self.assertAllClose(
+ example.features.feature['image/object/bbox/ymin'].float_list.value,
+ [0.1])
+ self.assertAllClose(
+ example.features.feature['image/object/bbox/xmin'].float_list.value,
+ [0.0])
+ self.assertAllClose(
+ example.features.feature['image/object/bbox/ymax'].float_list.value,
+ [0.6])
+ self.assertAllClose(
+ example.features.feature['image/object/bbox/xmax'].float_list.value,
+ [0.5])
+ self.assertAllClose(
+ example.features.feature['image/object/class/label']
+ .int64_list.value, [1])
+ self.assertAllEqual(
+ example.features.feature['image/object/class/text']
+ .bytes_list.value, [b'animal'])
+ self.assertAllClose(
+ example.features.feature['image/class/label']
+ .int64_list.value, [1])
+ self.assertAllEqual(
+ example.features.feature['image/class/text']
+ .bytes_list.value, [b'animal'])
+
+ # Check other essential attributes.
+ self.assertAllEqual(
+ example.features.feature['image/height'].int64_list.value,
+ [self.IMAGE_HEIGHT])
+ self.assertAllEqual(
+ example.features.feature['image/width'].int64_list.value,
+ [self.IMAGE_WIDTH])
+ self.assertAllEqual(
+ example.features.feature['image/source_id'].bytes_list.value,
+ [b'im_0'])
+ self.assertTrue(
+ example.features.feature['image/encoded'].bytes_list.value)
+
+ def assert_expected_example(self, example):
+ self.assertAllClose(
+ example.features.feature['image/object/bbox/ymin'].float_list.value,
+ [])
+ self.assertAllClose(
+ example.features.feature['image/object/bbox/xmin'].float_list.value,
+ [])
+ self.assertAllClose(
+ example.features.feature['image/object/bbox/ymax'].float_list.value,
+ [])
+ self.assertAllClose(
+ example.features.feature['image/object/bbox/xmax'].float_list.value,
+ [])
+ self.assertAllClose(
+ example.features.feature['image/object/class/label']
+ .int64_list.value, [1])
+ self.assertAllEqual(
+ example.features.feature['image/object/class/text']
+ .bytes_list.value, [b'animal'])
+ self.assertAllClose(
+ example.features.feature['image/class/label']
+ .int64_list.value, [1])
+ self.assertAllEqual(
+ example.features.feature['image/class/text']
+ .bytes_list.value, [b'animal'])
+
+ # Check other essential attributes.
+ self.assertAllEqual(
+ example.features.feature['image/height'].int64_list.value,
+ [self.IMAGE_HEIGHT])
+ self.assertAllEqual(
+ example.features.feature['image/width'].int64_list.value,
+ [self.IMAGE_WIDTH])
+ self.assertAllEqual(
+ example.features.feature['image/source_id'].bytes_list.value,
+ [b'im_0'])
+ self.assertTrue(
+ example.features.feature['image/encoded'].bytes_list.value)
+
+ def test_beam_pipeline(self):
+ runner = runners.DirectRunner()
+ num_frames = 1
+ temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
+ json_path = self._create_json_file(temp_dir, num_frames)
+ output_tfrecord = temp_dir+'/output'
+ self._write_random_images_to_directory(temp_dir, num_frames)
+ pipeline = create_cococameratraps_tfexample_main.create_pipeline(
+ temp_dir, json_path,
+ output_tfrecord_prefix=output_tfrecord)
+ runner.run(pipeline)
+ filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
+ actual_output = []
+ record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
+ for record in record_iterator:
+ actual_output.append(record)
+ self.assertEqual(len(actual_output), num_frames)
+ self.assert_expected_example(tf.train.Example.FromString(
+ actual_output[0]))
+
+ def test_beam_pipeline_bbox(self):
+ runner = runners.DirectRunner()
+ num_frames = 1
+ temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
+ json_path = self._create_json_file(temp_dir, num_frames, keep_bboxes=True)
+ output_tfrecord = temp_dir+'/output'
+ self._write_random_images_to_directory(temp_dir, num_frames)
+ pipeline = create_cococameratraps_tfexample_main.create_pipeline(
+ temp_dir, json_path,
+ output_tfrecord_prefix=output_tfrecord,
+ keep_bboxes=True)
+ runner.run(pipeline)
+ filenames = tf.io.gfile.glob(output_tfrecord+'-?????-of-?????')
+ actual_output = []
+ record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
+ for record in record_iterator:
+ actual_output.append(record)
+ self.assertEqual(len(actual_output), num_frames)
+ self.assert_expected_example_bbox(tf.train.Example.FromString(
+ actual_output[0]))
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/research/object_detection/dataset_tools/context_rcnn/generate_detection_data.py b/research/object_detection/dataset_tools/context_rcnn/generate_detection_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..95c16c1358a15ece03aaa9e80353e1ebf2c17166
--- /dev/null
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_detection_data.py
@@ -0,0 +1,262 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""A Beam job to generate detection data for camera trap images.
+
+This tools allows to run inference with an exported Object Detection model in
+`saved_model` format and produce raw detection boxes on images in tf.Examples,
+with the assumption that the bounding box class label will match the image-level
+class label in the tf.Example.
+
+Steps to generate a detection dataset:
+1. Use object_detection/export_inference_graph.py to get a `saved_model` for
+ inference. The input node must accept a tf.Example proto.
+2. Run this tool with `saved_model` from step 1 and an TFRecord of tf.Example
+ protos containing images for inference.
+
+Example Usage:
+--------------
+python tensorflow_models/object_detection/export_inference_graph.py \
+ --alsologtostderr \
+ --input_type tf_example \
+ --pipeline_config_path path/to/detection_model.config \
+ --trained_checkpoint_prefix path/to/model.ckpt \
+ --output_directory path/to/exported_model_directory
+
+python generate_detection_data.py \
+ --alsologtostderr \
+ --input_tfrecord path/to/input_tfrecord@X \
+ --output_tfrecord path/to/output_tfrecord@X \
+ --model_dir path/to/exported_model_directory/saved_model
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import threading
+from absl import app
+from absl import flags
+import apache_beam as beam
+import tensorflow.compat.v1 as tf
+from apache_beam import runners
+
+
+flags.DEFINE_string('detection_input_tfrecord', None, 'TFRecord containing '
+ 'images in tf.Example format for object detection.')
+flags.DEFINE_string('detection_output_tfrecord', None,
+ 'TFRecord containing detections in tf.Example format.')
+flags.DEFINE_string('detection_model_dir', None, 'Path to directory containing'
+ 'an object detection SavedModel.')
+flags.DEFINE_float('confidence_threshold', 0.9,
+ 'Min confidence to keep bounding boxes')
+flags.DEFINE_integer('num_shards', 0, 'Number of output shards.')
+
+FLAGS = flags.FLAGS
+
+
+class GenerateDetectionDataFn(beam.DoFn):
+ """Generates detection data for camera trap images.
+
+ This Beam DoFn performs inference with an object detection `saved_model` and
+ produces detection boxes for camera trap data, matched to the
+ object class.
+ """
+ session_lock = threading.Lock()
+
+ def __init__(self, model_dir, confidence_threshold):
+ """Initialization function.
+
+ Args:
+ model_dir: A directory containing saved model.
+ confidence_threshold: the confidence threshold for boxes to keep
+ """
+ self._model_dir = model_dir
+ self._confidence_threshold = confidence_threshold
+ self._session = None
+ self._num_examples_processed = beam.metrics.Metrics.counter(
+ 'detection_data_generation', 'num_tf_examples_processed')
+
+ def start_bundle(self):
+ self._load_inference_model()
+
+ def _load_inference_model(self):
+ # Because initialization of the tf.Session is expensive we share
+ # one instance across all threads in the worker. This is possible since
+ # tf.Session.run() is thread safe.
+ with self.session_lock:
+ if self._session is None:
+ graph = tf.Graph()
+ self._session = tf.Session(graph=graph)
+ with graph.as_default():
+ meta_graph = tf.saved_model.loader.load(
+ self._session, [tf.saved_model.tag_constants.SERVING],
+ self._model_dir)
+ signature = meta_graph.signature_def['serving_default']
+ input_tensor_name = signature.inputs['inputs'].name
+ self._input = graph.get_tensor_by_name(input_tensor_name)
+ self._boxes_node = graph.get_tensor_by_name(
+ signature.outputs['detection_boxes'].name)
+ self._scores_node = graph.get_tensor_by_name(
+ signature.outputs['detection_scores'].name)
+ self._num_detections_node = graph.get_tensor_by_name(
+ signature.outputs['num_detections'].name)
+
+ def process(self, tfrecord_entry):
+ return self._run_inference_and_generate_detections(tfrecord_entry)
+
+ def _run_inference_and_generate_detections(self, tfrecord_entry):
+ input_example = tf.train.Example.FromString(tfrecord_entry)
+ if input_example.features.feature[
+ 'image/object/bbox/ymin'].float_list.value:
+ # There are already ground truth boxes for this image, just keep them.
+ return [input_example]
+
+ detection_boxes, detection_scores, num_detections = self._session.run(
+ [self._boxes_node, self._scores_node, self._num_detections_node],
+ feed_dict={self._input: [tfrecord_entry]})
+
+ example = tf.train.Example()
+
+ num_detections = int(num_detections[0])
+
+ image_class_labels = input_example.features.feature[
+ 'image/object/class/label'].int64_list.value
+
+ image_class_texts = input_example.features.feature[
+ 'image/object/class/text'].bytes_list.value
+
+ # Ignore any images with multiple classes,
+ # we can't match the class to the box.
+ if len(image_class_labels) > 1:
+ return []
+
+ # Don't add boxes for images already labeled empty (for now)
+ if len(image_class_labels) == 1:
+ # Add boxes over confidence threshold.
+ for idx, score in enumerate(detection_scores[0]):
+ if score >= self._confidence_threshold and idx < num_detections:
+ example.features.feature[
+ 'image/object/bbox/ymin'].float_list.value.extend([
+ detection_boxes[0, idx, 0]])
+ example.features.feature[
+ 'image/object/bbox/xmin'].float_list.value.extend([
+ detection_boxes[0, idx, 1]])
+ example.features.feature[
+ 'image/object/bbox/ymax'].float_list.value.extend([
+ detection_boxes[0, idx, 2]])
+ example.features.feature[
+ 'image/object/bbox/xmax'].float_list.value.extend([
+ detection_boxes[0, idx, 3]])
+
+ # Add box scores and class texts and labels.
+ example.features.feature[
+ 'image/object/class/score'].float_list.value.extend(
+ [score])
+
+ example.features.feature[
+ 'image/object/class/label'].int64_list.value.extend(
+ [image_class_labels[0]])
+
+ example.features.feature[
+ 'image/object/class/text'].bytes_list.value.extend(
+ [image_class_texts[0]])
+
+ # Add other essential example attributes
+ example.features.feature['image/encoded'].bytes_list.value.extend(
+ input_example.features.feature['image/encoded'].bytes_list.value)
+ example.features.feature['image/height'].int64_list.value.extend(
+ input_example.features.feature['image/height'].int64_list.value)
+ example.features.feature['image/width'].int64_list.value.extend(
+ input_example.features.feature['image/width'].int64_list.value)
+ example.features.feature['image/source_id'].bytes_list.value.extend(
+ input_example.features.feature['image/source_id'].bytes_list.value)
+ example.features.feature['image/location'].bytes_list.value.extend(
+ input_example.features.feature['image/location'].bytes_list.value)
+
+ example.features.feature['image/date_captured'].bytes_list.value.extend(
+ input_example.features.feature['image/date_captured'].bytes_list.value)
+
+ example.features.feature['image/class/text'].bytes_list.value.extend(
+ input_example.features.feature['image/class/text'].bytes_list.value)
+ example.features.feature['image/class/label'].int64_list.value.extend(
+ input_example.features.feature['image/class/label'].int64_list.value)
+
+ example.features.feature['image/seq_id'].bytes_list.value.extend(
+ input_example.features.feature['image/seq_id'].bytes_list.value)
+ example.features.feature['image/seq_num_frames'].int64_list.value.extend(
+ input_example.features.feature['image/seq_num_frames'].int64_list.value)
+ example.features.feature['image/seq_frame_num'].int64_list.value.extend(
+ input_example.features.feature['image/seq_frame_num'].int64_list.value)
+
+ self._num_examples_processed.inc(1)
+ return [example]
+
+
+def construct_pipeline(input_tfrecord, output_tfrecord, model_dir,
+ confidence_threshold, num_shards):
+ """Returns a Beam pipeline to run object detection inference.
+
+ Args:
+ input_tfrecord: A TFRecord of tf.train.Example protos containing images.
+ output_tfrecord: A TFRecord of tf.train.Example protos that contain images
+ in the input TFRecord and the detections from the model.
+ model_dir: Path to `saved_model` to use for inference.
+ confidence_threshold: Threshold to use when keeping detection results.
+ num_shards: The number of output shards.
+ Returns:
+ pipeline: A Beam pipeline.
+ """
+ def pipeline(root):
+ input_collection = (
+ root | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
+ input_tfrecord,
+ coder=beam.coders.BytesCoder()))
+ output_collection = input_collection | 'RunInference' >> beam.ParDo(
+ GenerateDetectionDataFn(model_dir, confidence_threshold))
+ output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle()
+ _ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
+ output_tfrecord,
+ num_shards=num_shards,
+ coder=beam.coders.ProtoCoder(tf.train.Example))
+ return pipeline
+
+
+def main(_):
+ """Runs the Beam pipeline that performs inference.
+
+ Args:
+ _: unused
+ """
+ # must create before flags are used
+ runner = runners.DirectRunner()
+
+ dirname = os.path.dirname(FLAGS.detection_output_tfrecord)
+ tf.io.gfile.makedirs(dirname)
+ runner.run(
+ construct_pipeline(FLAGS.detection_input_tfrecord,
+ FLAGS.detection_output_tfrecord,
+ FLAGS.detection_model_dir,
+ FLAGS.confidence_threshold,
+ FLAGS.num_shards))
+
+
+if __name__ == '__main__':
+ flags.mark_flags_as_required([
+ 'detection_input_tfrecord',
+ 'detection_output_tfrecord',
+ 'detection_model_dir'
+ ])
+ app.run(main)
diff --git a/research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf1_test.py b/research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf1_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..279183110b2e60d4dedd56af80a7cb45d33a8367
--- /dev/null
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf1_test.py
@@ -0,0 +1,270 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for generate_detection_data."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import contextlib
+import os
+import tempfile
+import unittest
+import numpy as np
+import six
+import tensorflow.compat.v1 as tf
+
+from object_detection import exporter
+from object_detection.builders import model_builder
+from object_detection.core import model
+from object_detection.dataset_tools.context_rcnn import generate_detection_data
+from object_detection.protos import pipeline_pb2
+from object_detection.utils import tf_version
+from apache_beam import runners
+
+if six.PY2:
+ import mock # pylint: disable=g-import-not-at-top
+else:
+ mock = unittest.mock
+
+
+class FakeModel(model.DetectionModel):
+ """A Fake Detection model with expected output nodes from post-processing."""
+
+ def preprocess(self, inputs):
+ true_image_shapes = [] # Doesn't matter for the fake model.
+ return tf.identity(inputs), true_image_shapes
+
+ def predict(self, preprocessed_inputs, true_image_shapes):
+ return {'image': tf.layers.conv2d(preprocessed_inputs, 3, 1)}
+
+ def postprocess(self, prediction_dict, true_image_shapes):
+ with tf.control_dependencies(prediction_dict.values()):
+ postprocessed_tensors = {
+ 'detection_boxes': tf.constant([[[0.0, 0.1, 0.5, 0.6],
+ [0.5, 0.5, 0.8, 0.8]]], tf.float32),
+ 'detection_scores': tf.constant([[0.95, 0.6]], tf.float32),
+ 'detection_multiclass_scores': tf.constant([[[0.1, 0.7, 0.2],
+ [0.3, 0.1, 0.6]]],
+ tf.float32),
+ 'detection_classes': tf.constant([[0, 1]], tf.float32),
+ 'num_detections': tf.constant([2], tf.float32)
+ }
+ return postprocessed_tensors
+
+ def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
+ pass
+
+ def restore_from_objects(self, fine_tune_checkpoint_type):
+ pass
+
+ def loss(self, prediction_dict, true_image_shapes):
+ pass
+
+ def regularization_losses(self):
+ pass
+
+ def updates(self):
+ pass
+
+
+@contextlib.contextmanager
+def InMemoryTFRecord(entries):
+ temp = tempfile.NamedTemporaryFile(delete=False)
+ filename = temp.name
+ try:
+ with tf.python_io.TFRecordWriter(filename) as writer:
+ for value in entries:
+ writer.write(value)
+ yield filename
+ finally:
+ os.unlink(filename)
+
+
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
+class GenerateDetectionDataTest(tf.test.TestCase):
+
+ def _save_checkpoint_from_mock_model(self, checkpoint_path):
+ """A function to save checkpoint from a fake Detection Model.
+
+ Args:
+ checkpoint_path: Path to save checkpoint from Fake model.
+ """
+ g = tf.Graph()
+ with g.as_default():
+ mock_model = FakeModel(num_classes=5)
+ preprocessed_inputs, true_image_shapes = mock_model.preprocess(
+ tf.placeholder(tf.float32, shape=[None, None, None, 3]))
+ predictions = mock_model.predict(preprocessed_inputs, true_image_shapes)
+ mock_model.postprocess(predictions, true_image_shapes)
+ tf.train.get_or_create_global_step()
+ saver = tf.train.Saver()
+ init = tf.global_variables_initializer()
+ with self.test_session(graph=g) as sess:
+ sess.run(init)
+ saver.save(sess, checkpoint_path)
+
+ def _export_saved_model(self):
+ tmp_dir = self.get_temp_dir()
+ checkpoint_path = os.path.join(tmp_dir, 'model.ckpt')
+ self._save_checkpoint_from_mock_model(checkpoint_path)
+ output_directory = os.path.join(tmp_dir, 'output')
+ saved_model_path = os.path.join(output_directory, 'saved_model')
+ tf.io.gfile.makedirs(output_directory)
+ with mock.patch.object(
+ model_builder, 'build', autospec=True) as mock_builder:
+ mock_builder.return_value = FakeModel(num_classes=5)
+ pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
+ pipeline_config.eval_config.use_moving_averages = False
+ detection_model = model_builder.build(pipeline_config.model,
+ is_training=False)
+ outputs, placeholder_tensor = exporter.build_detection_graph(
+ input_type='tf_example',
+ detection_model=detection_model,
+ input_shape=None,
+ output_collection_name='inference_op',
+ graph_hook_fn=None)
+ output_node_names = ','.join(outputs.keys())
+ saver = tf.train.Saver()
+ input_saver_def = saver.as_saver_def()
+ frozen_graph_def = exporter.freeze_graph_with_def_protos(
+ input_graph_def=tf.get_default_graph().as_graph_def(),
+ input_saver_def=input_saver_def,
+ input_checkpoint=checkpoint_path,
+ output_node_names=output_node_names,
+ restore_op_name='save/restore_all',
+ filename_tensor_name='save/Const:0',
+ output_graph='',
+ clear_devices=True,
+ initializer_nodes='')
+ exporter.write_saved_model(
+ saved_model_path=saved_model_path,
+ frozen_graph_def=frozen_graph_def,
+ inputs=placeholder_tensor,
+ outputs=outputs)
+ return saved_model_path
+
+ def _create_tf_example(self):
+ with self.test_session():
+ encoded_image = tf.image.encode_jpeg(
+ tf.constant(np.ones((4, 6, 3)).astype(np.uint8))).eval()
+
+ def BytesFeature(value):
+ return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+
+ def Int64Feature(value):
+ return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
+
+ example = tf.train.Example(features=tf.train.Features(feature={
+ 'image/encoded': BytesFeature(encoded_image),
+ 'image/source_id': BytesFeature(b'image_id'),
+ 'image/height': Int64Feature(4),
+ 'image/width': Int64Feature(6),
+ 'image/object/class/label': Int64Feature(5),
+ 'image/object/class/text': BytesFeature(b'hyena'),
+ 'image/class/label': Int64Feature(5),
+ 'image/class/text': BytesFeature(b'hyena'),
+ }))
+
+ return example.SerializeToString()
+
+ def assert_expected_example(self, example):
+ self.assertAllClose(
+ example.features.feature['image/object/bbox/ymin'].float_list.value,
+ [0.0])
+ self.assertAllClose(
+ example.features.feature['image/object/bbox/xmin'].float_list.value,
+ [0.1])
+ self.assertAllClose(
+ example.features.feature['image/object/bbox/ymax'].float_list.value,
+ [0.5])
+ self.assertAllClose(
+ example.features.feature['image/object/bbox/xmax'].float_list.value,
+ [0.6])
+ self.assertAllClose(
+ example.features.feature['image/object/class/score']
+ .float_list.value, [0.95])
+ self.assertAllClose(
+ example.features.feature['image/object/class/label']
+ .int64_list.value, [5])
+ self.assertAllEqual(
+ example.features.feature['image/object/class/text']
+ .bytes_list.value, [b'hyena'])
+ self.assertAllClose(
+ example.features.feature['image/class/label']
+ .int64_list.value, [5])
+ self.assertAllEqual(
+ example.features.feature['image/class/text']
+ .bytes_list.value, [b'hyena'])
+
+ # Check other essential attributes.
+ self.assertAllEqual(
+ example.features.feature['image/height'].int64_list.value, [4])
+ self.assertAllEqual(
+ example.features.feature['image/width'].int64_list.value, [6])
+ self.assertAllEqual(
+ example.features.feature['image/source_id'].bytes_list.value,
+ [b'image_id'])
+ self.assertTrue(
+ example.features.feature['image/encoded'].bytes_list.value)
+
+ def test_generate_detection_data_fn(self):
+ saved_model_path = self._export_saved_model()
+ confidence_threshold = 0.8
+ inference_fn = generate_detection_data.GenerateDetectionDataFn(
+ saved_model_path, confidence_threshold)
+ inference_fn.start_bundle()
+ generated_example = self._create_tf_example()
+ self.assertAllEqual(tf.train.Example.FromString(
+ generated_example).features.feature['image/object/class/label']
+ .int64_list.value, [5])
+ self.assertAllEqual(tf.train.Example.FromString(
+ generated_example).features.feature['image/object/class/text']
+ .bytes_list.value, [b'hyena'])
+ output = inference_fn.process(generated_example)
+ output_example = output[0]
+
+ self.assertAllEqual(
+ output_example.features.feature['image/object/class/label']
+ .int64_list.value, [5])
+ self.assertAllEqual(output_example.features.feature['image/width']
+ .int64_list.value, [6])
+
+ self.assert_expected_example(output_example)
+
+ def test_beam_pipeline(self):
+ with InMemoryTFRecord([self._create_tf_example()]) as input_tfrecord:
+ runner = runners.DirectRunner()
+ temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
+ output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
+ saved_model_path = self._export_saved_model()
+ confidence_threshold = 0.8
+ num_shards = 1
+ pipeline = generate_detection_data.construct_pipeline(
+ input_tfrecord, output_tfrecord, saved_model_path,
+ confidence_threshold, num_shards)
+ runner.run(pipeline)
+ filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
+ actual_output = []
+ record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
+ for record in record_iterator:
+ actual_output.append(record)
+ self.assertEqual(len(actual_output), 1)
+ self.assert_expected_example(tf.train.Example.FromString(
+ actual_output[0]))
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data.py b/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..a147c4e88339f44ff417dc38b60cff28ffe010ed
--- /dev/null
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data.py
@@ -0,0 +1,378 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""A Beam job to generate embedding data for camera trap images.
+
+This tool runs inference with an exported Object Detection model in
+`saved_model` format and produce raw embeddings for camera trap data. These
+embeddings contain an object-centric feature embedding from Faster R-CNN, the
+datetime that the image was taken (normalized in a specific way), and the
+position of the object of interest. By default, only the highest-scoring object
+embedding is included.
+
+Steps to generate a embedding dataset:
+1. Use object_detection/export_inference_graph.py to get a Faster R-CNN
+ `saved_model` for inference. The input node must accept a tf.Example proto.
+2. Run this tool with `saved_model` from step 1 and an TFRecord of tf.Example
+ protos containing images for inference.
+
+Example Usage:
+--------------
+python tensorflow_models/object_detection/export_inference_graph.py \
+ --alsologtostderr \
+ --input_type tf_example \
+ --pipeline_config_path path/to/faster_rcnn_model.config \
+ --trained_checkpoint_prefix path/to/model.ckpt \
+ --output_directory path/to/exported_model_directory
+
+python generate_embedding_data.py \
+ --alsologtostderr \
+ --embedding_input_tfrecord path/to/input_tfrecords* \
+ --embedding_output_tfrecord path/to/output_tfrecords \
+ --embedding_model_dir path/to/exported_model_directory/saved_model
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import datetime
+import os
+import threading
+from absl import app
+from absl import flags
+import apache_beam as beam
+import numpy as np
+import six
+import tensorflow.compat.v1 as tf
+from apache_beam import runners
+
+flags.DEFINE_string('embedding_input_tfrecord', None, 'TFRecord containing'
+ 'images in tf.Example format for object detection.')
+flags.DEFINE_string('embedding_output_tfrecord', None,
+ 'TFRecord containing embeddings in tf.Example format.')
+flags.DEFINE_string('embedding_model_dir', None, 'Path to directory containing'
+ 'an object detection SavedModel with'
+ 'detection_box_classifier_features in the output.')
+flags.DEFINE_integer('top_k_embedding_count', 1,
+ 'The number of top k embeddings to add to the memory bank.'
+ )
+flags.DEFINE_integer('bottom_k_embedding_count', 0,
+ 'The number of bottom k embeddings to add to the memory '
+ 'bank.')
+flags.DEFINE_integer('num_shards', 0, 'Number of output shards.')
+
+
+FLAGS = flags.FLAGS
+
+
+class GenerateEmbeddingDataFn(beam.DoFn):
+ """Generates embedding data for camera trap images.
+
+ This Beam DoFn performs inference with an object detection `saved_model` and
+ produces contextual embedding vectors.
+ """
+ session_lock = threading.Lock()
+
+ def __init__(self, model_dir, top_k_embedding_count,
+ bottom_k_embedding_count):
+ """Initialization function.
+
+ Args:
+ model_dir: A directory containing saved model.
+ top_k_embedding_count: the number of high-confidence embeddings to store
+ bottom_k_embedding_count: the number of low-confidence embeddings to store
+ """
+ self._model_dir = model_dir
+ self._session = None
+ self._num_examples_processed = beam.metrics.Metrics.counter(
+ 'embedding_data_generation', 'num_tf_examples_processed')
+ self._top_k_embedding_count = top_k_embedding_count
+ self._bottom_k_embedding_count = bottom_k_embedding_count
+
+ def start_bundle(self):
+ self._load_inference_model()
+
+ def _load_inference_model(self):
+ # Because initialization of the tf.Session is expensive we share
+ # one instance across all threads in the worker. This is possible since
+ # tf.Session.run() is thread safe.
+ with self.session_lock:
+ if self._session is None:
+ graph = tf.Graph()
+ self._session = tf.Session(graph=graph)
+ with graph.as_default():
+ meta_graph = tf.saved_model.loader.load(
+ self._session, [tf.saved_model.tag_constants.SERVING],
+ self._model_dir)
+ signature = meta_graph.signature_def['serving_default']
+ input_tensor_name = signature.inputs['inputs'].name
+ detection_features_name = signature.outputs['detection_features'].name
+ detection_boxes_name = signature.outputs['detection_boxes'].name
+ num_detections_name = signature.outputs['num_detections'].name
+ self._input = graph.get_tensor_by_name(input_tensor_name)
+ self._embedding_node = graph.get_tensor_by_name(detection_features_name)
+ self._box_node = graph.get_tensor_by_name(detection_boxes_name)
+ self._scores_node = graph.get_tensor_by_name(
+ signature.outputs['detection_scores'].name)
+ self._num_detections = graph.get_tensor_by_name(num_detections_name)
+ tf.logging.info(signature.outputs['detection_features'].name)
+ tf.logging.info(signature.outputs['detection_boxes'].name)
+ tf.logging.info(signature.outputs['num_detections'].name)
+
+ def process(self, tfrecord_entry):
+ return self._run_inference_and_generate_embedding(tfrecord_entry)
+
+ def _run_inference_and_generate_embedding(self, tfrecord_entry):
+ input_example = tf.train.Example.FromString(tfrecord_entry)
+ # Convert date_captured datetime string to unix time integer and store
+
+ def get_date_captured(example):
+ date_captured = datetime.datetime.strptime(
+ six.ensure_str(
+ example.features.feature[
+ 'image/date_captured'].bytes_list.value[0]),
+ '%Y-%m-%d %H:%M:%S')
+ return date_captured
+
+ try:
+ date_captured = get_date_captured(input_example)
+ except Exception: # pylint: disable=broad-except
+ # we require date_captured to be available for all images
+ return []
+
+ def embed_date_captured(date_captured):
+ """Encodes the datetime of the image."""
+ embedded_date_captured = []
+ month_max = 12.0
+ day_max = 31.0
+ hour_max = 24.0
+ minute_max = 60.0
+ min_year = 1990.0
+ max_year = 2030.0
+
+ year = (date_captured.year-min_year)/float(max_year-min_year)
+ embedded_date_captured.append(year)
+
+ month = (date_captured.month-1)/month_max
+ embedded_date_captured.append(month)
+
+ day = (date_captured.day-1)/day_max
+ embedded_date_captured.append(day)
+
+ hour = date_captured.hour/hour_max
+ embedded_date_captured.append(hour)
+
+ minute = date_captured.minute/minute_max
+ embedded_date_captured.append(minute)
+
+ return np.asarray(embedded_date_captured)
+
+ def embed_position_and_size(box):
+ """Encodes the bounding box of the object of interest."""
+ ymin = box[0]
+ xmin = box[1]
+ ymax = box[2]
+ xmax = box[3]
+ w = xmax - xmin
+ h = ymax - ymin
+ x = xmin + w / 2.0
+ y = ymin + h / 2.0
+ return np.asarray([x, y, w, h])
+
+ unix_time = (
+ (date_captured - datetime.datetime.fromtimestamp(0)).total_seconds())
+
+ example = tf.train.Example()
+ example.features.feature['image/unix_time'].float_list.value.extend(
+ [unix_time])
+
+ (detection_features, detection_boxes, num_detections,
+ detection_scores) = self._session.run(
+ [
+ self._embedding_node, self._box_node, self._num_detections[0],
+ self._scores_node
+ ],
+ feed_dict={self._input: [tfrecord_entry]})
+
+ num_detections = int(num_detections)
+ embed_all = []
+ score_all = []
+
+ detection_features = np.asarray(detection_features)
+
+ def get_bb_embedding(detection_features, detection_boxes, detection_scores,
+ index):
+ embedding = detection_features[0][index]
+ pooled_embedding = np.mean(np.mean(embedding, axis=1), axis=0)
+
+ box = detection_boxes[0][index]
+ position_embedding = embed_position_and_size(box)
+
+ score = detection_scores[0][index]
+ return np.concatenate((pooled_embedding, position_embedding)), score
+
+ temporal_embedding = embed_date_captured(date_captured)
+
+ embedding_count = 0
+ for index in range(min(num_detections, self._top_k_embedding_count)):
+ bb_embedding, score = get_bb_embedding(
+ detection_features, detection_boxes, detection_scores, index)
+ embed_all.extend(bb_embedding)
+ embed_all.extend(temporal_embedding)
+ score_all.append(score)
+ embedding_count += 1
+
+ for index in range(
+ max(0, num_detections - 1),
+ max(-1, num_detections - 1 - self._bottom_k_embedding_count), -1):
+ bb_embedding, score = get_bb_embedding(
+ detection_features, detection_boxes, detection_scores, index)
+ embed_all.extend(bb_embedding)
+ embed_all.extend(temporal_embedding)
+ score_all.append(score)
+ embedding_count += 1
+
+ if embedding_count == 0:
+ bb_embedding, score = get_bb_embedding(
+ detection_features, detection_boxes, detection_scores, 0)
+ embed_all.extend(bb_embedding)
+ embed_all.extend(temporal_embedding)
+ score_all.append(score)
+
+ # Takes max in case embedding_count is 0.
+ embedding_length = len(embed_all) // max(1, embedding_count)
+
+ embed_all = np.asarray(embed_all)
+
+ example.features.feature['image/embedding'].float_list.value.extend(
+ embed_all)
+ example.features.feature['image/embedding_score'].float_list.value.extend(
+ score_all)
+ example.features.feature['image/embedding_length'].int64_list.value.append(
+ embedding_length)
+ example.features.feature['image/embedding_count'].int64_list.value.append(
+ embedding_count)
+
+ # Add other essential example attributes
+ example.features.feature['image/encoded'].bytes_list.value.extend(
+ input_example.features.feature['image/encoded'].bytes_list.value)
+ example.features.feature['image/height'].int64_list.value.extend(
+ input_example.features.feature['image/height'].int64_list.value)
+ example.features.feature['image/width'].int64_list.value.extend(
+ input_example.features.feature['image/width'].int64_list.value)
+ example.features.feature['image/source_id'].bytes_list.value.extend(
+ input_example.features.feature['image/source_id'].bytes_list.value)
+ example.features.feature['image/location'].bytes_list.value.extend(
+ input_example.features.feature['image/location'].bytes_list.value)
+
+ example.features.feature['image/date_captured'].bytes_list.value.extend(
+ input_example.features.feature['image/date_captured'].bytes_list.value)
+
+ example.features.feature['image/class/text'].bytes_list.value.extend(
+ input_example.features.feature['image/class/text'].bytes_list.value)
+ example.features.feature['image/class/label'].int64_list.value.extend(
+ input_example.features.feature['image/class/label'].int64_list.value)
+
+ example.features.feature['image/seq_id'].bytes_list.value.extend(
+ input_example.features.feature['image/seq_id'].bytes_list.value)
+ example.features.feature['image/seq_num_frames'].int64_list.value.extend(
+ input_example.features.feature['image/seq_num_frames'].int64_list.value)
+ example.features.feature['image/seq_frame_num'].int64_list.value.extend(
+ input_example.features.feature['image/seq_frame_num'].int64_list.value)
+
+ example.features.feature['image/object/bbox/ymax'].float_list.value.extend(
+ input_example.features.feature[
+ 'image/object/bbox/ymax'].float_list.value)
+ example.features.feature['image/object/bbox/ymin'].float_list.value.extend(
+ input_example.features.feature[
+ 'image/object/bbox/ymin'].float_list.value)
+ example.features.feature['image/object/bbox/xmax'].float_list.value.extend(
+ input_example.features.feature[
+ 'image/object/bbox/xmax'].float_list.value)
+ example.features.feature['image/object/bbox/xmin'].float_list.value.extend(
+ input_example.features.feature[
+ 'image/object/bbox/xmin'].float_list.value)
+ example.features.feature[
+ 'image/object/class/score'].float_list.value.extend(
+ input_example.features.feature[
+ 'image/object/class/score'].float_list.value)
+ example.features.feature[
+ 'image/object/class/label'].int64_list.value.extend(
+ input_example.features.feature[
+ 'image/object/class/label'].int64_list.value)
+ example.features.feature[
+ 'image/object/class/text'].bytes_list.value.extend(
+ input_example.features.feature[
+ 'image/object/class/text'].bytes_list.value)
+
+ self._num_examples_processed.inc(1)
+ return [example]
+
+
+def construct_pipeline(input_tfrecord, output_tfrecord, model_dir,
+ top_k_embedding_count, bottom_k_embedding_count,
+ num_shards):
+ """Returns a beam pipeline to run object detection inference.
+
+ Args:
+ input_tfrecord: An TFRecord of tf.train.Example protos containing images.
+ output_tfrecord: An TFRecord of tf.train.Example protos that contain images
+ in the input TFRecord and the detections from the model.
+ model_dir: Path to `saved_model` to use for inference.
+ top_k_embedding_count: The number of high-confidence embeddings to store.
+ bottom_k_embedding_count: The number of low-confidence embeddings to store.
+ num_shards: The number of output shards.
+ """
+ def pipeline(root):
+ input_collection = (
+ root | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
+ input_tfrecord,
+ coder=beam.coders.BytesCoder()))
+ output_collection = input_collection | 'ExtractEmbedding' >> beam.ParDo(
+ GenerateEmbeddingDataFn(model_dir, top_k_embedding_count,
+ bottom_k_embedding_count))
+ output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle()
+ _ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
+ output_tfrecord,
+ num_shards=num_shards,
+ coder=beam.coders.ProtoCoder(tf.train.Example))
+ return pipeline
+
+
+def main(_):
+ """Runs the Beam pipeline that performs inference.
+
+ Args:
+ _: unused
+ """
+ # must create before flags are used
+ runner = runners.DirectRunner()
+
+ dirname = os.path.dirname(FLAGS.embedding_output_tfrecord)
+ tf.io.gfile.makedirs(dirname)
+ runner.run(
+ construct_pipeline(FLAGS.embedding_input_tfrecord,
+ FLAGS.embedding_output_tfrecord,
+ FLAGS.embedding_model_dir, FLAGS.top_k_embedding_count,
+ FLAGS.bottom_k_embedding_count, FLAGS.num_shards))
+
+
+if __name__ == '__main__':
+ flags.mark_flags_as_required([
+ 'embedding_input_tfrecord',
+ 'embedding_output_tfrecord',
+ 'embedding_model_dir'
+ ])
+ app.run(main)
diff --git a/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data_tf1_test.py b/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data_tf1_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..836bd59fb02e7b71e906cbdc0a56fd0e67fc02d4
--- /dev/null
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data_tf1_test.py
@@ -0,0 +1,340 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for generate_embedding_data."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import contextlib
+import os
+import tempfile
+import unittest
+import numpy as np
+import six
+import tensorflow.compat.v1 as tf
+from object_detection import exporter
+from object_detection.builders import model_builder
+from object_detection.core import model
+from object_detection.dataset_tools.context_rcnn import generate_embedding_data
+from object_detection.protos import pipeline_pb2
+from object_detection.utils import tf_version
+from apache_beam import runners
+
+if six.PY2:
+ import mock # pylint: disable=g-import-not-at-top
+else:
+ mock = unittest.mock
+
+
+class FakeModel(model.DetectionModel):
+ """A Fake Detection model with expected output nodes from post-processing."""
+
+ def preprocess(self, inputs):
+ true_image_shapes = [] # Doesn't matter for the fake model.
+ return tf.identity(inputs), true_image_shapes
+
+ def predict(self, preprocessed_inputs, true_image_shapes):
+ return {'image': tf.layers.conv2d(preprocessed_inputs, 3, 1)}
+
+ def postprocess(self, prediction_dict, true_image_shapes):
+ with tf.control_dependencies(prediction_dict.values()):
+ num_features = 100
+ feature_dims = 10
+ classifier_feature = np.ones(
+ (2, feature_dims, feature_dims, num_features),
+ dtype=np.float32).tolist()
+ postprocessed_tensors = {
+ 'detection_boxes': tf.constant([[[0.0, 0.1, 0.5, 0.6],
+ [0.5, 0.5, 0.8, 0.8]]], tf.float32),
+ 'detection_scores': tf.constant([[0.95, 0.6]], tf.float32),
+ 'detection_multiclass_scores': tf.constant([[[0.1, 0.7, 0.2],
+ [0.3, 0.1, 0.6]]],
+ tf.float32),
+ 'detection_classes': tf.constant([[0, 1]], tf.float32),
+ 'num_detections': tf.constant([2], tf.float32),
+ 'detection_features':
+ tf.constant([classifier_feature],
+ tf.float32)
+ }
+ return postprocessed_tensors
+
+ def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
+ pass
+
+ def restore_from_objects(self, fine_tune_checkpoint_type):
+ pass
+
+ def loss(self, prediction_dict, true_image_shapes):
+ pass
+
+ def regularization_losses(self):
+ pass
+
+ def updates(self):
+ pass
+
+
+@contextlib.contextmanager
+def InMemoryTFRecord(entries):
+ temp = tempfile.NamedTemporaryFile(delete=False)
+ filename = temp.name
+ try:
+ with tf.python_io.TFRecordWriter(filename) as writer:
+ for value in entries:
+ writer.write(value)
+ yield filename
+ finally:
+ os.unlink(temp.name)
+
+
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
+class GenerateEmbeddingData(tf.test.TestCase):
+
+ def _save_checkpoint_from_mock_model(self, checkpoint_path):
+ """A function to save checkpoint from a fake Detection Model.
+
+ Args:
+ checkpoint_path: Path to save checkpoint from Fake model.
+ """
+ g = tf.Graph()
+ with g.as_default():
+ mock_model = FakeModel(num_classes=5)
+ preprocessed_inputs, true_image_shapes = mock_model.preprocess(
+ tf.placeholder(tf.float32, shape=[None, None, None, 3]))
+ predictions = mock_model.predict(preprocessed_inputs, true_image_shapes)
+ mock_model.postprocess(predictions, true_image_shapes)
+ tf.train.get_or_create_global_step()
+ saver = tf.train.Saver()
+ init = tf.global_variables_initializer()
+ with self.test_session(graph=g) as sess:
+ sess.run(init)
+ saver.save(sess, checkpoint_path)
+
+ def _export_saved_model(self):
+ tmp_dir = self.get_temp_dir()
+ checkpoint_path = os.path.join(tmp_dir, 'model.ckpt')
+ self._save_checkpoint_from_mock_model(checkpoint_path)
+ output_directory = os.path.join(tmp_dir, 'output')
+ saved_model_path = os.path.join(output_directory, 'saved_model')
+ tf.io.gfile.makedirs(output_directory)
+ with mock.patch.object(
+ model_builder, 'build', autospec=True) as mock_builder:
+ mock_builder.return_value = FakeModel(num_classes=5)
+ pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
+ pipeline_config.eval_config.use_moving_averages = False
+ detection_model = model_builder.build(pipeline_config.model,
+ is_training=False)
+ outputs, placeholder_tensor = exporter.build_detection_graph(
+ input_type='tf_example',
+ detection_model=detection_model,
+ input_shape=None,
+ output_collection_name='inference_op',
+ graph_hook_fn=None)
+ output_node_names = ','.join(outputs.keys())
+ saver = tf.train.Saver()
+ input_saver_def = saver.as_saver_def()
+ frozen_graph_def = exporter.freeze_graph_with_def_protos(
+ input_graph_def=tf.get_default_graph().as_graph_def(),
+ input_saver_def=input_saver_def,
+ input_checkpoint=checkpoint_path,
+ output_node_names=output_node_names,
+ restore_op_name='save/restore_all',
+ filename_tensor_name='save/Const:0',
+ output_graph='',
+ clear_devices=True,
+ initializer_nodes='')
+ exporter.write_saved_model(
+ saved_model_path=saved_model_path,
+ frozen_graph_def=frozen_graph_def,
+ inputs=placeholder_tensor,
+ outputs=outputs)
+ return saved_model_path
+
+ def _create_tf_example(self):
+ with self.test_session():
+ encoded_image = tf.image.encode_jpeg(
+ tf.constant(np.ones((4, 4, 3)).astype(np.uint8))).eval()
+
+ def BytesFeature(value):
+ return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+
+ def Int64Feature(value):
+ return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
+
+ def FloatFeature(value):
+ return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
+
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded': BytesFeature(encoded_image),
+ 'image/source_id': BytesFeature(b'image_id'),
+ 'image/height': Int64Feature(400),
+ 'image/width': Int64Feature(600),
+ 'image/class/label': Int64Feature(5),
+ 'image/class/text': BytesFeature(b'hyena'),
+ 'image/object/bbox/xmin': FloatFeature(0.1),
+ 'image/object/bbox/xmax': FloatFeature(0.6),
+ 'image/object/bbox/ymin': FloatFeature(0.0),
+ 'image/object/bbox/ymax': FloatFeature(0.5),
+ 'image/object/class/score': FloatFeature(0.95),
+ 'image/object/class/label': Int64Feature(5),
+ 'image/object/class/text': BytesFeature(b'hyena'),
+ 'image/date_captured': BytesFeature(b'2019-10-20 12:12:12')
+ }))
+
+ return example.SerializeToString()
+
+ def assert_expected_example(self, example, topk=False, botk=False):
+ # Check embeddings
+ if topk or botk:
+ self.assertEqual(len(
+ example.features.feature['image/embedding'].float_list.value),
+ 218)
+ self.assertAllEqual(
+ example.features.feature['image/embedding_count'].int64_list.value,
+ [2])
+ else:
+ self.assertEqual(len(
+ example.features.feature['image/embedding'].float_list.value),
+ 109)
+ self.assertAllEqual(
+ example.features.feature['image/embedding_count'].int64_list.value,
+ [1])
+
+ self.assertAllEqual(
+ example.features.feature['image/embedding_length'].int64_list.value,
+ [109])
+
+ # Check annotations
+ self.assertAllClose(
+ example.features.feature['image/object/bbox/ymin'].float_list.value,
+ [0.0])
+ self.assertAllClose(
+ example.features.feature['image/object/bbox/xmin'].float_list.value,
+ [0.1])
+ self.assertAllClose(
+ example.features.feature['image/object/bbox/ymax'].float_list.value,
+ [0.5])
+ self.assertAllClose(
+ example.features.feature['image/object/bbox/xmax'].float_list.value,
+ [0.6])
+ self.assertAllClose(
+ example.features.feature['image/object/class/score']
+ .float_list.value, [0.95])
+ self.assertAllClose(
+ example.features.feature['image/object/class/label']
+ .int64_list.value, [5])
+ self.assertAllEqual(
+ example.features.feature['image/object/class/text']
+ .bytes_list.value, [b'hyena'])
+ self.assertAllClose(
+ example.features.feature['image/class/label']
+ .int64_list.value, [5])
+ self.assertAllEqual(
+ example.features.feature['image/class/text']
+ .bytes_list.value, [b'hyena'])
+
+ # Check other essential attributes.
+ self.assertAllEqual(
+ example.features.feature['image/height'].int64_list.value, [400])
+ self.assertAllEqual(
+ example.features.feature['image/width'].int64_list.value, [600])
+ self.assertAllEqual(
+ example.features.feature['image/source_id'].bytes_list.value,
+ [b'image_id'])
+ self.assertTrue(
+ example.features.feature['image/encoded'].bytes_list.value)
+
+ def test_generate_embedding_data_fn(self):
+ saved_model_path = self._export_saved_model()
+ top_k_embedding_count = 1
+ bottom_k_embedding_count = 0
+ inference_fn = generate_embedding_data.GenerateEmbeddingDataFn(
+ saved_model_path, top_k_embedding_count, bottom_k_embedding_count)
+ inference_fn.start_bundle()
+ generated_example = self._create_tf_example()
+ self.assertAllEqual(tf.train.Example.FromString(
+ generated_example).features.feature['image/object/class/label']
+ .int64_list.value, [5])
+ self.assertAllEqual(tf.train.Example.FromString(
+ generated_example).features.feature['image/object/class/text']
+ .bytes_list.value, [b'hyena'])
+ output = inference_fn.process(generated_example)
+ output_example = output[0]
+ self.assert_expected_example(output_example)
+
+ def test_generate_embedding_data_with_top_k_boxes(self):
+ saved_model_path = self._export_saved_model()
+ top_k_embedding_count = 2
+ bottom_k_embedding_count = 0
+ inference_fn = generate_embedding_data.GenerateEmbeddingDataFn(
+ saved_model_path, top_k_embedding_count, bottom_k_embedding_count)
+ inference_fn.start_bundle()
+ generated_example = self._create_tf_example()
+ self.assertAllEqual(
+ tf.train.Example.FromString(generated_example).features
+ .feature['image/object/class/label'].int64_list.value, [5])
+ self.assertAllEqual(
+ tf.train.Example.FromString(generated_example).features
+ .feature['image/object/class/text'].bytes_list.value, [b'hyena'])
+ output = inference_fn.process(generated_example)
+ output_example = output[0]
+ self.assert_expected_example(output_example, topk=True)
+
+ def test_generate_embedding_data_with_bottom_k_boxes(self):
+ saved_model_path = self._export_saved_model()
+ top_k_embedding_count = 0
+ bottom_k_embedding_count = 2
+ inference_fn = generate_embedding_data.GenerateEmbeddingDataFn(
+ saved_model_path, top_k_embedding_count, bottom_k_embedding_count)
+ inference_fn.start_bundle()
+ generated_example = self._create_tf_example()
+ self.assertAllEqual(
+ tf.train.Example.FromString(generated_example).features
+ .feature['image/object/class/label'].int64_list.value, [5])
+ self.assertAllEqual(
+ tf.train.Example.FromString(generated_example).features
+ .feature['image/object/class/text'].bytes_list.value, [b'hyena'])
+ output = inference_fn.process(generated_example)
+ output_example = output[0]
+ self.assert_expected_example(output_example, botk=True)
+
+ def test_beam_pipeline(self):
+ with InMemoryTFRecord([self._create_tf_example()]) as input_tfrecord:
+ runner = runners.DirectRunner()
+ temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
+ output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
+ saved_model_path = self._export_saved_model()
+ top_k_embedding_count = 1
+ bottom_k_embedding_count = 0
+ num_shards = 1
+ pipeline = generate_embedding_data.construct_pipeline(
+ input_tfrecord, output_tfrecord, saved_model_path,
+ top_k_embedding_count, bottom_k_embedding_count, num_shards)
+ runner.run(pipeline)
+ filenames = tf.io.gfile.glob(
+ output_tfrecord + '-?????-of-?????')
+ actual_output = []
+ record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
+ for record in record_iterator:
+ actual_output.append(record)
+ self.assertEqual(len(actual_output), 1)
+ self.assert_expected_example(tf.train.Example.FromString(
+ actual_output[0]))
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/research/object_detection/dataset_tools/create_coco_tf_record.py b/research/object_detection/dataset_tools/create_coco_tf_record.py
index 51ed389105f827335de68ec9c85e04c0083242a5..2703c427e9bae8ebca5233f1ddaf7c42e5f1b82e 100644
--- a/research/object_detection/dataset_tools/create_coco_tf_record.py
+++ b/research/object_detection/dataset_tools/create_coco_tf_record.py
@@ -14,6 +14,9 @@
# ==============================================================================
r"""Convert raw COCO dataset to TFRecord for object_detection.
+This tool supports data generation for object detection (boxes, masks),
+keypoint detection, and DensePose.
+
Please note that this tool creates sharded output files.
Example usage:
@@ -63,7 +66,18 @@ tf.flags.DEFINE_string('train_keypoint_annotations_file', '',
'Training annotations JSON file.')
tf.flags.DEFINE_string('val_keypoint_annotations_file', '',
'Validation annotations JSON file.')
+# DensePose is only available for coco 2014.
+tf.flags.DEFINE_string('train_densepose_annotations_file', '',
+ 'Training annotations JSON file for DensePose.')
+tf.flags.DEFINE_string('val_densepose_annotations_file', '',
+ 'Validation annotations JSON file for DensePose.')
tf.flags.DEFINE_string('output_dir', '/tmp/', 'Output data directory.')
+# Whether to only produce images/annotations on person class (for keypoint /
+# densepose task).
+tf.flags.DEFINE_boolean('remove_non_person_annotations', False, 'Whether to '
+ 'remove all annotations for non-person objects.')
+tf.flags.DEFINE_boolean('remove_non_person_images', False, 'Whether to '
+ 'remove all examples that do not contain a person.')
FLAGS = flags.FLAGS
@@ -77,13 +91,33 @@ _COCO_KEYPOINT_NAMES = [
b'left_knee', b'right_knee', b'left_ankle', b'right_ankle'
]
+_COCO_PART_NAMES = [
+ b'torso_back', b'torso_front', b'right_hand', b'left_hand', b'left_foot',
+ b'right_foot', b'right_upper_leg_back', b'left_upper_leg_back',
+ b'right_upper_leg_front', b'left_upper_leg_front', b'right_lower_leg_back',
+ b'left_lower_leg_back', b'right_lower_leg_front', b'left_lower_leg_front',
+ b'left_upper_arm_back', b'right_upper_arm_back', b'left_upper_arm_front',
+ b'right_upper_arm_front', b'left_lower_arm_back', b'right_lower_arm_back',
+ b'left_lower_arm_front', b'right_lower_arm_front', b'right_face',
+ b'left_face',
+]
+
+_DP_PART_ID_OFFSET = 1
+
+
+def clip_to_unit(x):
+ return min(max(x, 0.0), 1.0)
+
def create_tf_example(image,
annotations_list,
image_dir,
category_index,
include_masks=False,
- keypoint_annotations_dict=None):
+ keypoint_annotations_dict=None,
+ densepose_annotations_dict=None,
+ remove_non_person_annotations=False,
+ remove_non_person_images=False):
"""Converts image and annotations to a tf.Example proto.
Args:
@@ -108,10 +142,23 @@ def create_tf_example(image,
dictionary with keys: [u'keypoints', u'num_keypoints'] represeting the
keypoint information for this person object annotation. If None, then
no keypoint annotations will be populated.
+ densepose_annotations_dict: A dictionary that maps from annotation_id to a
+ dictionary with keys: [u'dp_I', u'dp_x', u'dp_y', 'dp_U', 'dp_V']
+ representing part surface coordinates. For more information see
+ http://densepose.org/.
+ remove_non_person_annotations: Whether to remove any annotations that are
+ not the "person" class.
+ remove_non_person_images: Whether to remove any images that do not contain
+ at least one "person" annotation.
Returns:
+ key: SHA256 hash of the image.
example: The converted tf.Example
num_annotations_skipped: Number of (invalid) annotations that were ignored.
+ num_keypoint_annotation_skipped: Number of keypoint annotations that were
+ skipped.
+ num_densepose_annotation_skipped: Number of DensePose annotations that were
+ skipped.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
@@ -146,6 +193,16 @@ def create_tf_example(image,
num_annotations_skipped = 0
num_keypoint_annotation_used = 0
num_keypoint_annotation_skipped = 0
+ dp_part_index = []
+ dp_x = []
+ dp_y = []
+ dp_u = []
+ dp_v = []
+ dp_num_points = []
+ densepose_keys = ['dp_I', 'dp_U', 'dp_V', 'dp_x', 'dp_y', 'bbox']
+ include_densepose = densepose_annotations_dict is not None
+ num_densepose_annotation_used = 0
+ num_densepose_annotation_skipped = 0
for object_annotations in annotations_list:
(x, y, width, height) = tuple(object_annotations['bbox'])
if width <= 0 or height <= 0:
@@ -154,14 +211,18 @@ def create_tf_example(image,
if x + width > image_width or y + height > image_height:
num_annotations_skipped += 1
continue
+ category_id = int(object_annotations['category_id'])
+ category_name = category_index[category_id]['name'].encode('utf8')
+ if remove_non_person_annotations and category_name != b'person':
+ num_annotations_skipped += 1
+ continue
xmin.append(float(x) / image_width)
xmax.append(float(x + width) / image_width)
ymin.append(float(y) / image_height)
ymax.append(float(y + height) / image_height)
is_crowd.append(object_annotations['iscrowd'])
- category_id = int(object_annotations['category_id'])
category_ids.append(category_id)
- category_names.append(category_index[category_id]['name'].encode('utf8'))
+ category_names.append(category_name)
area.append(object_annotations['area'])
if include_masks:
@@ -197,6 +258,40 @@ def create_tf_example(image,
keypoints_visibility.extend([0] * len(_COCO_KEYPOINT_NAMES))
keypoints_name.extend(_COCO_KEYPOINT_NAMES)
num_keypoints.append(0)
+
+ if include_densepose:
+ annotation_id = object_annotations['id']
+ if (annotation_id in densepose_annotations_dict and
+ all(key in densepose_annotations_dict[annotation_id]
+ for key in densepose_keys)):
+ dp_annotations = densepose_annotations_dict[annotation_id]
+ num_densepose_annotation_used += 1
+ dp_num_points.append(len(dp_annotations['dp_I']))
+ dp_part_index.extend([int(i - _DP_PART_ID_OFFSET)
+ for i in dp_annotations['dp_I']])
+ # DensePose surface coordinates are defined on a [256, 256] grid
+ # relative to each instance box (i.e. absolute coordinates in range
+ # [0., 256.]). The following converts the coordinates
+ # so that they are expressed in normalized image coordinates.
+ dp_x_box_rel = [
+ clip_to_unit(val / 256.) for val in dp_annotations['dp_x']]
+ dp_x_norm = [(float(x) + x_box_rel * width) / image_width
+ for x_box_rel in dp_x_box_rel]
+ dp_y_box_rel = [
+ clip_to_unit(val / 256.) for val in dp_annotations['dp_y']]
+ dp_y_norm = [(float(y) + y_box_rel * height) / image_height
+ for y_box_rel in dp_y_box_rel]
+ dp_x.extend(dp_x_norm)
+ dp_y.extend(dp_y_norm)
+ dp_u.extend(dp_annotations['dp_U'])
+ dp_v.extend(dp_annotations['dp_V'])
+ else:
+ dp_num_points.append(0)
+
+ if (remove_non_person_images and
+ not any(name == b'person' for name in category_names)):
+ return (key, None, num_annotations_skipped,
+ num_keypoint_annotation_skipped, num_densepose_annotation_skipped)
feature_dict = {
'image/height':
dataset_util.int64_feature(image_height),
@@ -243,15 +338,34 @@ def create_tf_example(image,
dataset_util.bytes_list_feature(keypoints_name))
num_keypoint_annotation_skipped = (
len(keypoint_annotations_dict) - num_keypoint_annotation_used)
+ if include_densepose:
+ feature_dict['image/object/densepose/num'] = (
+ dataset_util.int64_list_feature(dp_num_points))
+ feature_dict['image/object/densepose/part_index'] = (
+ dataset_util.int64_list_feature(dp_part_index))
+ feature_dict['image/object/densepose/x'] = (
+ dataset_util.float_list_feature(dp_x))
+ feature_dict['image/object/densepose/y'] = (
+ dataset_util.float_list_feature(dp_y))
+ feature_dict['image/object/densepose/u'] = (
+ dataset_util.float_list_feature(dp_u))
+ feature_dict['image/object/densepose/v'] = (
+ dataset_util.float_list_feature(dp_v))
+ num_densepose_annotation_skipped = (
+ len(densepose_annotations_dict) - num_densepose_annotation_used)
example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
- return key, example, num_annotations_skipped, num_keypoint_annotation_skipped
+ return (key, example, num_annotations_skipped,
+ num_keypoint_annotation_skipped, num_densepose_annotation_skipped)
def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
output_path, include_masks,
num_shards,
- keypoint_annotations_file=''):
+ keypoint_annotations_file='',
+ densepose_annotations_file='',
+ remove_non_person_annotations=False,
+ remove_non_person_images=False):
"""Loads COCO annotation json files and converts to tf.Record format.
Args:
@@ -264,6 +378,12 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
keypoint_annotations_file: JSON file containing the person keypoint
annotations. If empty, then no person keypoint annotations will be
generated.
+ densepose_annotations_file: JSON file containing the DensePose annotations.
+ If empty, then no DensePose annotations will be generated.
+ remove_non_person_annotations: Whether to remove any annotations that are
+ not the "person" class.
+ remove_non_person_images: Whether to remove any images that do not contain
+ at least one "person" annotation.
"""
with contextlib2.ExitStack() as tf_record_close_stack, \
tf.gfile.GFile(annotations_file, 'r') as fid:
@@ -288,7 +408,8 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
if image_id not in annotations_index:
missing_annotation_count += 1
annotations_index[image_id] = []
- logging.info('%d images are missing annotations.', missing_annotation_count)
+ logging.info('%d images are missing annotations.',
+ missing_annotation_count)
keypoint_annotations_index = {}
if keypoint_annotations_file:
@@ -301,8 +422,20 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
keypoint_annotations_index[image_id] = {}
keypoint_annotations_index[image_id][annotation['id']] = annotation
+ densepose_annotations_index = {}
+ if densepose_annotations_file:
+ with tf.gfile.GFile(densepose_annotations_file, 'r') as fid:
+ densepose_groundtruth_data = json.load(fid)
+ if 'annotations' in densepose_groundtruth_data:
+ for annotation in densepose_groundtruth_data['annotations']:
+ image_id = annotation['image_id']
+ if image_id not in densepose_annotations_index:
+ densepose_annotations_index[image_id] = {}
+ densepose_annotations_index[image_id][annotation['id']] = annotation
+
total_num_annotations_skipped = 0
total_num_keypoint_annotations_skipped = 0
+ total_num_densepose_annotations_skipped = 0
for idx, image in enumerate(images):
if idx % 100 == 0:
logging.info('On image %d of %d', idx, len(images))
@@ -312,19 +445,31 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
keypoint_annotations_dict = {}
if image['id'] in keypoint_annotations_index:
keypoint_annotations_dict = keypoint_annotations_index[image['id']]
- (_, tf_example, num_annotations_skipped,
- num_keypoint_annotations_skipped) = create_tf_example(
+ densepose_annotations_dict = None
+ if densepose_annotations_file:
+ densepose_annotations_dict = {}
+ if image['id'] in densepose_annotations_index:
+ densepose_annotations_dict = densepose_annotations_index[image['id']]
+ (_, tf_example, num_annotations_skipped, num_keypoint_annotations_skipped,
+ num_densepose_annotations_skipped) = create_tf_example(
image, annotations_list, image_dir, category_index, include_masks,
- keypoint_annotations_dict)
+ keypoint_annotations_dict, densepose_annotations_dict,
+ remove_non_person_annotations, remove_non_person_images)
total_num_annotations_skipped += num_annotations_skipped
total_num_keypoint_annotations_skipped += num_keypoint_annotations_skipped
+ total_num_densepose_annotations_skipped += (
+ num_densepose_annotations_skipped)
shard_idx = idx % num_shards
- output_tfrecords[shard_idx].write(tf_example.SerializeToString())
+ if tf_example:
+ output_tfrecords[shard_idx].write(tf_example.SerializeToString())
logging.info('Finished writing, skipped %d annotations.',
total_num_annotations_skipped)
if keypoint_annotations_file:
logging.info('Finished writing, skipped %d keypoint annotations.',
total_num_keypoint_annotations_skipped)
+ if densepose_annotations_file:
+ logging.info('Finished writing, skipped %d DensePose annotations.',
+ total_num_densepose_annotations_skipped)
def main(_):
@@ -347,20 +492,26 @@ def main(_):
train_output_path,
FLAGS.include_masks,
num_shards=100,
- keypoint_annotations_file=FLAGS.train_keypoint_annotations_file)
+ keypoint_annotations_file=FLAGS.train_keypoint_annotations_file,
+ densepose_annotations_file=FLAGS.train_densepose_annotations_file,
+ remove_non_person_annotations=FLAGS.remove_non_person_annotations,
+ remove_non_person_images=FLAGS.remove_non_person_images)
_create_tf_record_from_coco_annotations(
FLAGS.val_annotations_file,
FLAGS.val_image_dir,
val_output_path,
FLAGS.include_masks,
- num_shards=100,
- keypoint_annotations_file=FLAGS.val_keypoint_annotations_file)
+ num_shards=50,
+ keypoint_annotations_file=FLAGS.val_keypoint_annotations_file,
+ densepose_annotations_file=FLAGS.val_densepose_annotations_file,
+ remove_non_person_annotations=FLAGS.remove_non_person_annotations,
+ remove_non_person_images=FLAGS.remove_non_person_images)
_create_tf_record_from_coco_annotations(
FLAGS.testdev_annotations_file,
FLAGS.test_image_dir,
testdev_output_path,
FLAGS.include_masks,
- num_shards=100)
+ num_shards=50)
if __name__ == '__main__':
diff --git a/research/object_detection/dataset_tools/create_coco_tf_record_test.py b/research/object_detection/dataset_tools/create_coco_tf_record_test.py
index 0bcc8be9c7437734414e73e43cae8effb7c95681..659142b7b7022a4243025146162eaac4b8c9f165 100644
--- a/research/object_detection/dataset_tools/create_coco_tf_record_test.py
+++ b/research/object_detection/dataset_tools/create_coco_tf_record_test.py
@@ -89,7 +89,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
}
(_, example,
- num_annotations_skipped, _) = create_coco_tf_record.create_tf_example(
+ num_annotations_skipped, _, _) = create_coco_tf_record.create_tf_example(
image, annotations_list, image_dir, category_index)
self.assertEqual(num_annotations_skipped, 0)
@@ -156,7 +156,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
}
(_, example,
- num_annotations_skipped, _) = create_coco_tf_record.create_tf_example(
+ num_annotations_skipped, _, _) = create_coco_tf_record.create_tf_example(
image, annotations_list, image_dir, category_index, include_masks=True)
self.assertEqual(num_annotations_skipped, 0)
@@ -259,14 +259,14 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
}
}
- (_, example, _,
- num_keypoint_annotation_skipped) = create_coco_tf_record.create_tf_example(
- image,
- annotations_list,
- image_dir,
- category_index,
- include_masks=False,
- keypoint_annotations_dict=keypoint_annotations_dict)
+ _, example, _, num_keypoint_annotation_skipped, _ = (
+ create_coco_tf_record.create_tf_example(
+ image,
+ annotations_list,
+ image_dir,
+ category_index,
+ include_masks=False,
+ keypoint_annotations_dict=keypoint_annotations_dict))
self.assertEqual(num_keypoint_annotation_skipped, 0)
self._assertProtoEqual(
@@ -310,6 +310,132 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
example.features.feature[
'image/object/keypoint/visibility'].int64_list.value, vv)
+ def test_create_tf_example_with_dense_pose(self):
+ image_dir = self.get_temp_dir()
+ image_file_name = 'tmp_image.jpg'
+ image_data = np.random.randint(low=0, high=256, size=(256, 256, 3)).astype(
+ np.uint8)
+ save_path = os.path.join(image_dir, image_file_name)
+ image = PIL.Image.fromarray(image_data, 'RGB')
+ image.save(save_path)
+
+ image = {
+ 'file_name': image_file_name,
+ 'height': 256,
+ 'width': 256,
+ 'id': 11,
+ }
+
+ min_x, min_y = 64, 64
+ max_x, max_y = 128, 128
+ keypoints = []
+ num_visible_keypoints = 0
+ xv = []
+ yv = []
+ vv = []
+ for _ in range(17):
+ xc = min_x + int(np.random.rand()*(max_x - min_x))
+ yc = min_y + int(np.random.rand()*(max_y - min_y))
+ vis = np.random.randint(0, 3)
+ xv.append(xc)
+ yv.append(yc)
+ vv.append(vis)
+ keypoints.extend([xc, yc, vis])
+ num_visible_keypoints += (vis > 0)
+
+ annotations_list = [{
+ 'area': 0.5,
+ 'iscrowd': False,
+ 'image_id': 11,
+ 'bbox': [64, 64, 128, 128],
+ 'category_id': 1,
+ 'id': 1000
+ }]
+
+ num_points = 45
+ dp_i = np.random.randint(1, 25, (num_points,)).astype(np.float32)
+ dp_u = np.random.randn(num_points)
+ dp_v = np.random.randn(num_points)
+ dp_x = np.random.rand(num_points)*256.
+ dp_y = np.random.rand(num_points)*256.
+ densepose_annotations_dict = {
+ 1000: {
+ 'dp_I': dp_i,
+ 'dp_U': dp_u,
+ 'dp_V': dp_v,
+ 'dp_x': dp_x,
+ 'dp_y': dp_y,
+ 'bbox': [64, 64, 128, 128],
+ }
+ }
+
+ category_index = {
+ 1: {
+ 'name': 'person',
+ 'id': 1
+ }
+ }
+
+ _, example, _, _, num_densepose_annotation_skipped = (
+ create_coco_tf_record.create_tf_example(
+ image,
+ annotations_list,
+ image_dir,
+ category_index,
+ include_masks=False,
+ densepose_annotations_dict=densepose_annotations_dict))
+
+ self.assertEqual(num_densepose_annotation_skipped, 0)
+ self._assertProtoEqual(
+ example.features.feature['image/height'].int64_list.value, [256])
+ self._assertProtoEqual(
+ example.features.feature['image/width'].int64_list.value, [256])
+ self._assertProtoEqual(
+ example.features.feature['image/filename'].bytes_list.value,
+ [six.b(image_file_name)])
+ self._assertProtoEqual(
+ example.features.feature['image/source_id'].bytes_list.value,
+ [six.b(str(image['id']))])
+ self._assertProtoEqual(
+ example.features.feature['image/format'].bytes_list.value,
+ [six.b('jpeg')])
+ self._assertProtoEqual(
+ example.features.feature['image/object/bbox/xmin'].float_list.value,
+ [0.25])
+ self._assertProtoEqual(
+ example.features.feature['image/object/bbox/ymin'].float_list.value,
+ [0.25])
+ self._assertProtoEqual(
+ example.features.feature['image/object/bbox/xmax'].float_list.value,
+ [0.75])
+ self._assertProtoEqual(
+ example.features.feature['image/object/bbox/ymax'].float_list.value,
+ [0.75])
+ self._assertProtoEqual(
+ example.features.feature['image/object/class/text'].bytes_list.value,
+ [six.b('person')])
+ self._assertProtoEqual(
+ example.features.feature['image/object/densepose/num'].int64_list.value,
+ [num_points])
+ self.assertAllEqual(
+ example.features.feature[
+ 'image/object/densepose/part_index'].int64_list.value,
+ dp_i.astype(np.int64) - create_coco_tf_record._DP_PART_ID_OFFSET)
+ self.assertAllClose(
+ example.features.feature['image/object/densepose/u'].float_list.value,
+ dp_u)
+ self.assertAllClose(
+ example.features.feature['image/object/densepose/v'].float_list.value,
+ dp_v)
+ expected_dp_x = (64 + dp_x * 128. / 256.) / 256.
+ expected_dp_y = (64 + dp_y * 128. / 256.) / 256.
+ self.assertAllClose(
+ example.features.feature['image/object/densepose/x'].float_list.value,
+ expected_dp_x)
+ self.assertAllClose(
+ example.features.feature['image/object/densepose/y'].float_list.value,
+ expected_dp_y)
+
def test_create_sharded_tf_record(self):
tmp_dir = self.get_temp_dir()
image_paths = ['tmp1_image.jpg', 'tmp2_image.jpg']
diff --git a/research/object_detection/dataset_tools/seq_example_util_test.py b/research/object_detection/dataset_tools/seq_example_util_test.py
index 81fd4f54fc4ceec442b0962b0d11a0cfdcf5623d..fd721954be896b4044735dd67928044e413422e7 100644
--- a/research/object_detection/dataset_tools/seq_example_util_test.py
+++ b/research/object_detection/dataset_tools/seq_example_util_test.py
@@ -24,10 +24,18 @@ import six
import tensorflow.compat.v1 as tf
from object_detection.dataset_tools import seq_example_util
+from object_detection.utils import tf_version
class SeqExampleUtilTest(tf.test.TestCase):
+ def materialize_tensors(self, list_of_tensors):
+ if tf_version.is_tf2():
+ return [tensor.numpy() for tensor in list_of_tensors]
+ else:
+ with self.cached_session() as sess:
+ return sess.run(list_of_tensors)
+
def test_make_unlabeled_example(self):
num_frames = 5
image_height = 100
@@ -41,8 +49,7 @@ class SeqExampleUtilTest(tf.test.TestCase):
image_source_ids = [str(idx) for idx in range(num_frames)]
images_list = tf.unstack(images, axis=0)
encoded_images_list = [tf.io.encode_jpeg(image) for image in images_list]
- with tf.Session() as sess:
- encoded_images = sess.run(encoded_images_list)
+ encoded_images = self.materialize_tensors(encoded_images_list)
seq_example = seq_example_util.make_sequence_example(
dataset_name=dataset_name,
video_id=video_id,
@@ -109,8 +116,7 @@ class SeqExampleUtilTest(tf.test.TestCase):
dtype=tf.int32), dtype=tf.uint8)
images_list = tf.unstack(images, axis=0)
encoded_images_list = [tf.io.encode_jpeg(image) for image in images_list]
- with tf.Session() as sess:
- encoded_images = sess.run(encoded_images_list)
+ encoded_images = self.materialize_tensors(encoded_images_list)
timestamps = [100000, 110000]
is_annotated = [1, 0]
bboxes = [
@@ -208,8 +214,7 @@ class SeqExampleUtilTest(tf.test.TestCase):
dtype=tf.int32), dtype=tf.uint8)
images_list = tf.unstack(images, axis=0)
encoded_images_list = [tf.io.encode_jpeg(image) for image in images_list]
- with tf.Session() as sess:
- encoded_images = sess.run(encoded_images_list)
+ encoded_images = self.materialize_tensors(encoded_images_list)
bboxes = [
np.array([[0., 0., 0.75, 0.75],
[0., 0., 1., 1.]], dtype=np.float32),
@@ -283,7 +288,7 @@ class SeqExampleUtilTest(tf.test.TestCase):
[0.75, 1.],
seq_feature_dict['region/bbox/xmax'].feature[0].float_list.value[:])
self.assertAllEqual(
- ['cat', 'frog'],
+ [b'cat', b'frog'],
seq_feature_dict['region/label/string'].feature[0].bytes_list.value[:])
self.assertAllClose(
[0.],
@@ -327,7 +332,7 @@ class SeqExampleUtilTest(tf.test.TestCase):
[0.75],
seq_feature_dict['region/bbox/xmax'].feature[1].float_list.value[:])
self.assertAllEqual(
- ['cat'],
+ [b'cat'],
seq_feature_dict['region/label/string'].feature[1].bytes_list.value[:])
self.assertAllClose(
[],
diff --git a/research/object_detection/dataset_tools/tf_record_creation_util_test.py b/research/object_detection/dataset_tools/tf_record_creation_util_test.py
index 2873a6d146fbdb8ae62c558abe8f62e76943b515..5722c86472e617f5e2e2aba916ad9e90c418948b 100644
--- a/research/object_detection/dataset_tools/tf_record_creation_util_test.py
+++ b/research/object_detection/dataset_tools/tf_record_creation_util_test.py
@@ -42,7 +42,7 @@ class OpenOutputTfrecordsTests(tf.test.TestCase):
tf_record_path = '{}-{:05d}-of-00010'.format(
os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), idx)
records = list(tf.python_io.tf_record_iterator(tf_record_path))
- self.assertAllEqual(records, ['test_{}'.format(idx)])
+ self.assertAllEqual(records, ['test_{}'.format(idx).encode('utf-8')])
if __name__ == '__main__':
diff --git a/research/object_detection/dockerfiles/tf1/Dockerfile b/research/object_detection/dockerfiles/tf1/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..76c0c1f07866557776a329cf215cf046c043ebb3
--- /dev/null
+++ b/research/object_detection/dockerfiles/tf1/Dockerfile
@@ -0,0 +1,44 @@
+FROM tensorflow/tensorflow:1.15.2-gpu-py3
+
+ARG DEBIAN_FRONTEND=noninteractive
+
+# Install apt dependencies
+RUN apt-get update && apt-get install -y \
+ git \
+ gpg-agent \
+ python3-cairocffi \
+ protobuf-compiler \
+ python3-pil \
+ python3-lxml \
+ python3-tk \
+ wget
+
+# Install gcloud and gsutil commands
+# https://cloud.google.com/sdk/docs/quickstart-debian-ubuntu
+RUN export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" && \
+ echo "deb http://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
+ curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \
+ apt-get update -y && apt-get install google-cloud-sdk -y
+
+# Add new user to avoid running as root
+RUN useradd -ms /bin/bash tensorflow
+USER tensorflow
+WORKDIR /home/tensorflow
+
+# Install pip dependencies
+RUN pip3 install --user absl-py
+RUN pip3 install --user contextlib2
+RUN pip3 install --user Cython
+RUN pip3 install --user jupyter
+RUN pip3 install --user matplotlib
+RUN pip3 install --user pycocotools
+RUN pip3 install --user tf-slim
+
+# Copy this version of of the model garden into the image
+COPY --chown=tensorflow . /home/tensorflow/models
+
+# Compile protobuf configs
+RUN (cd /home/tensorflow/models/research/ && protoc object_detection/protos/*.proto --python_out=.)
+
+ENV PYTHONPATH $PYTHONPATH:/home/tensorflow/models/research/:/home/tensorflow/models/research/slim
+ENV TF_CPP_MIN_LOG_LEVEL 3
diff --git a/research/object_detection/dockerfiles/tf1/README.md b/research/object_detection/dockerfiles/tf1/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..5e3e6d052daff9bd2ca6a0e6de2e118ee5f29417
--- /dev/null
+++ b/research/object_detection/dockerfiles/tf1/README.md
@@ -0,0 +1,11 @@
+# Tensorflow Object Detection on Docker
+
+These instructions are experimental.
+
+## Building and running:
+
+```bash
+# From the root of the git repository
+docker build -f research/object_detection/dockerfiles/1.15/Dockerfile -t od .
+docker run -it od
+```
diff --git a/research/object_detection/dockerfiles/tf2/Dockerfile b/research/object_detection/dockerfiles/tf2/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..c3d5e2e8f7c08d94857f8e25f15ed43710cba76e
--- /dev/null
+++ b/research/object_detection/dockerfiles/tf2/Dockerfile
@@ -0,0 +1,44 @@
+FROM tensorflow/tensorflow:2.2.0-gpu
+
+ARG DEBIAN_FRONTEND=noninteractive
+
+# Install apt dependencies
+RUN apt-get update && apt-get install -y \
+ git \
+ gpg-agent \
+ python3-cairocffi \
+ protobuf-compiler \
+ python3-pil \
+ python3-lxml \
+ python3-tk \
+ wget
+
+# Install gcloud and gsutil commands
+# https://cloud.google.com/sdk/docs/quickstart-debian-ubuntu
+RUN export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" && \
+ echo "deb http://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
+ curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \
+ apt-get update -y && apt-get install google-cloud-sdk -y
+
+# Add new user to avoid running as root
+RUN useradd -ms /bin/bash tensorflow
+USER tensorflow
+WORKDIR /home/tensorflow
+
+# Install pip dependencies
+RUN pip3 install --user absl-py
+RUN pip3 install --user contextlib2
+RUN pip3 install --user Cython
+RUN pip3 install --user jupyter
+RUN pip3 install --user matplotlib
+RUN pip3 install --user pycocotools
+RUN pip3 install --user tf-slim
+
+# Copy this version of of the model garden into the image
+COPY --chown=tensorflow . /home/tensorflow/models
+
+# Compile protobuf configs
+RUN (cd /home/tensorflow/models/research/ && protoc object_detection/protos/*.proto --python_out=.)
+
+ENV PYTHONPATH $PYTHONPATH:/home/tensorflow/models/research/:/home/tensorflow/models/research/slim
+ENV TF_CPP_MIN_LOG_LEVEL 3
diff --git a/research/object_detection/dockerfiles/tf2/README.md b/research/object_detection/dockerfiles/tf2/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..58b58db85769f826842e378ca1054cb6f3e392e8
--- /dev/null
+++ b/research/object_detection/dockerfiles/tf2/README.md
@@ -0,0 +1,11 @@
+# Tensorflow Object Detection on Docker
+
+These instructions are experimental.
+
+## Building and running:
+
+```bash
+# From the root of the git repository
+docker build -f research/object_detection/dockerfiles/2.2/Dockerfile -t od .
+docker run -it od
+```
diff --git a/research/object_detection/eval_util.py b/research/object_detection/eval_util.py
index e2d1255b5ae99fc49eb66e620e09e6a61fbcda6e..3b365df19a8093ad2c2a2ad39b8dd46f6d1a82c7 100644
--- a/research/object_detection/eval_util.py
+++ b/research/object_detection/eval_util.py
@@ -52,6 +52,8 @@ EVAL_METRICS_CLASS_DICT = {
coco_evaluation.CocoKeypointEvaluator,
'coco_mask_metrics':
coco_evaluation.CocoMaskEvaluator,
+ 'coco_panoptic_metrics':
+ coco_evaluation.CocoPanopticSegmentationEvaluator,
'oid_challenge_detection_metrics':
object_detection_evaluation.OpenImagesDetectionChallengeEvaluator,
'oid_challenge_segmentation_metrics':
diff --git a/research/object_detection/eval_util_test.py b/research/object_detection/eval_util_test.py
index f2f66405f7236d0806ccdeb55e84553fb20a2ee4..d0623f1fcda50482ee98eccb2e2e62ef10b88be3 100644
--- a/research/object_detection/eval_util_test.py
+++ b/research/object_detection/eval_util_test.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+import unittest
from absl.testing import parameterized
import numpy as np
@@ -30,6 +31,7 @@ from object_detection.core import standard_fields as fields
from object_detection.metrics import coco_evaluation
from object_detection.protos import eval_pb2
from object_detection.utils import test_case
+from object_detection.utils import tf_version
class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
@@ -127,6 +129,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
{'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': False},
{'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': False}
)
+ @unittest.skipIf(tf_version.is_tf2(), 'Only compatible with TF1.X')
def test_get_eval_metric_ops_for_coco_detections(self, batch_size=1,
max_gt_boxes=None,
scale_to_absolute=False):
@@ -155,6 +158,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
{'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': False},
{'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': False}
)
+ @unittest.skipIf(tf_version.is_tf2(), 'Only compatible with TF1.X')
def test_get_eval_metric_ops_for_coco_detections_and_masks(
self, batch_size=1, max_gt_boxes=None, scale_to_absolute=False):
eval_config = eval_pb2.EvalConfig()
@@ -185,6 +189,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
{'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': False},
{'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': False}
)
+ @unittest.skipIf(tf_version.is_tf2(), 'Only compatible with TF1.X')
def test_get_eval_metric_ops_for_coco_detections_and_resized_masks(
self, batch_size=1, max_gt_boxes=None, scale_to_absolute=False):
eval_config = eval_pb2.EvalConfig()
@@ -210,6 +215,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
self.assertAlmostEqual(1.0, metrics['DetectionBoxes_Precision/mAP'])
self.assertAlmostEqual(1.0, metrics['DetectionMasks_Precision/mAP'])
+ @unittest.skipIf(tf_version.is_tf2(), 'Only compatible with TF1.X')
def test_get_eval_metric_ops_raises_error_with_unsupported_metric(self):
eval_config = eval_pb2.EvalConfig()
eval_config.metrics_set.extend(['unsupported_metric'])
@@ -334,63 +340,67 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
dtype=np.float32)
detection_keypoints = np.array([[0.0, 0.0], [0.5, 0.5], [1.0, 1.0]],
dtype=np.float32)
- detections = {
- detection_fields.detection_boxes:
- tf.constant(detection_boxes),
- detection_fields.detection_scores:
- tf.constant([[1.], [1.]]),
- detection_fields.detection_classes:
- tf.constant([[1], [2]]),
- detection_fields.num_detections:
- tf.constant([1, 1]),
- detection_fields.detection_keypoints:
- tf.tile(
- tf.reshape(
- tf.constant(detection_keypoints), shape=[1, 1, 3, 2]),
- multiples=[2, 1, 1, 1])
- }
-
- gt_boxes = detection_boxes
- groundtruth = {
- input_data_fields.groundtruth_boxes:
- tf.constant(gt_boxes),
- input_data_fields.groundtruth_classes:
- tf.constant([[1.], [1.]]),
- input_data_fields.groundtruth_keypoints:
- tf.tile(
- tf.reshape(
- tf.constant(detection_keypoints), shape=[1, 1, 3, 2]),
- multiples=[2, 1, 1, 1])
- }
-
- image = tf.zeros((2, 100, 100, 3), dtype=tf.float32)
-
- true_image_shapes = tf.constant([[100, 100, 3], [50, 100, 3]])
- original_image_spatial_shapes = tf.constant([[200, 200], [150, 300]])
-
- result = eval_util.result_dict_for_batched_example(
- image, key, detections, groundtruth,
- scale_to_absolute=True,
- true_image_shapes=true_image_shapes,
- original_image_spatial_shapes=original_image_spatial_shapes,
- max_gt_boxes=tf.constant(1))
-
- with self.test_session() as sess:
- result = sess.run(result)
- self.assertAllEqual(
- [[[0., 0., 200., 200.]], [[0.0, 0.0, 150., 150.]]],
- result[input_data_fields.groundtruth_boxes])
- self.assertAllClose([[[[0., 0.], [100., 100.], [200., 200.]]],
- [[[0., 0.], [150., 150.], [300., 300.]]]],
- result[input_data_fields.groundtruth_keypoints])
-
- # Predictions from the model are not scaled.
- self.assertAllEqual(
- [[[0., 0., 200., 200.]], [[0.0, 0.0, 75., 150.]]],
- result[detection_fields.detection_boxes])
- self.assertAllClose([[[[0., 0.], [100., 100.], [200., 200.]]],
- [[[0., 0.], [75., 150.], [150., 300.]]]],
- result[detection_fields.detection_keypoints])
+ def graph_fn():
+ detections = {
+ detection_fields.detection_boxes:
+ tf.constant(detection_boxes),
+ detection_fields.detection_scores:
+ tf.constant([[1.], [1.]]),
+ detection_fields.detection_classes:
+ tf.constant([[1], [2]]),
+ detection_fields.num_detections:
+ tf.constant([1, 1]),
+ detection_fields.detection_keypoints:
+ tf.tile(
+ tf.reshape(
+ tf.constant(detection_keypoints), shape=[1, 1, 3, 2]),
+ multiples=[2, 1, 1, 1])
+ }
+
+ gt_boxes = detection_boxes
+ groundtruth = {
+ input_data_fields.groundtruth_boxes:
+ tf.constant(gt_boxes),
+ input_data_fields.groundtruth_classes:
+ tf.constant([[1.], [1.]]),
+ input_data_fields.groundtruth_keypoints:
+ tf.tile(
+ tf.reshape(
+ tf.constant(detection_keypoints), shape=[1, 1, 3, 2]),
+ multiples=[2, 1, 1, 1])
+ }
+
+ image = tf.zeros((2, 100, 100, 3), dtype=tf.float32)
+
+ true_image_shapes = tf.constant([[100, 100, 3], [50, 100, 3]])
+ original_image_spatial_shapes = tf.constant([[200, 200], [150, 300]])
+
+ result = eval_util.result_dict_for_batched_example(
+ image, key, detections, groundtruth,
+ scale_to_absolute=True,
+ true_image_shapes=true_image_shapes,
+ original_image_spatial_shapes=original_image_spatial_shapes,
+ max_gt_boxes=tf.constant(1))
+ return (result[input_data_fields.groundtruth_boxes],
+ result[input_data_fields.groundtruth_keypoints],
+ result[detection_fields.detection_boxes],
+ result[detection_fields.detection_keypoints])
+ (gt_boxes, gt_keypoints, detection_boxes,
+ detection_keypoints) = self.execute_cpu(graph_fn, [])
+ self.assertAllEqual(
+ [[[0., 0., 200., 200.]], [[0.0, 0.0, 150., 150.]]],
+ gt_boxes)
+ self.assertAllClose([[[[0., 0.], [100., 100.], [200., 200.]]],
+ [[[0., 0.], [150., 150.], [300., 300.]]]],
+ gt_keypoints)
+
+ # Predictions from the model are not scaled.
+ self.assertAllEqual(
+ [[[0., 0., 200., 200.]], [[0.0, 0.0, 75., 150.]]],
+ detection_boxes)
+ self.assertAllClose([[[[0., 0.], [100., 100.], [200., 200.]]],
+ [[[0., 0.], [75., 150.], [150., 300.]]]],
+ detection_keypoints)
if __name__ == '__main__':
diff --git a/research/object_detection/export_inference_graph.py b/research/object_detection/export_inference_graph.py
index bcb5c40b3b8c2b3bbfc95c00baa348889954dbb8..5a0ee0dde056afacca9a876c7456cb82a82f3192 100644
--- a/research/object_detection/export_inference_graph.py
+++ b/research/object_detection/export_inference_graph.py
@@ -134,6 +134,30 @@ flags.DEFINE_string('config_override', '',
'text proto to override pipeline_config_path.')
flags.DEFINE_boolean('write_inference_graph', False,
'If true, writes inference graph to disk.')
+flags.DEFINE_string('additional_output_tensor_names', None,
+ 'Additional Tensors to output, to be specified as a comma '
+ 'separated list of tensor names.')
+flags.DEFINE_boolean('use_side_inputs', False,
+ 'If True, uses side inputs as well as image inputs.')
+flags.DEFINE_string('side_input_shapes', None,
+ 'If use_side_inputs is True, this explicitly sets '
+ 'the shape of the side input tensors to a fixed size. The '
+ 'dimensions are to be provided as a comma-separated list '
+ 'of integers. A value of -1 can be used for unknown '
+ 'dimensions. A `/` denotes a break, starting the shape of '
+ 'the next side input tensor. This flag is required if '
+ 'using side inputs.')
+flags.DEFINE_string('side_input_types', None,
+ 'If use_side_inputs is True, this explicitly sets '
+ 'the type of the side input tensors. The '
+ 'dimensions are to be provided as a comma-separated list '
+ 'of types, each of `string`, `integer`, or `float`. '
+ 'This flag is required if using side inputs.')
+flags.DEFINE_string('side_input_names', None,
+ 'If use_side_inputs is True, this explicitly sets '
+ 'the names of the side input tensors required by the model '
+ 'assuming the names will be a comma-separated list of '
+ 'strings. This flag is required if using side inputs.')
tf.app.flags.mark_flag_as_required('pipeline_config_path')
tf.app.flags.mark_flag_as_required('trained_checkpoint_prefix')
tf.app.flags.mark_flag_as_required('output_directory')
@@ -152,10 +176,30 @@ def main(_):
]
else:
input_shape = None
+ if FLAGS.use_side_inputs:
+ side_input_shapes, side_input_names, side_input_types = (
+ exporter.parse_side_inputs(
+ FLAGS.side_input_shapes,
+ FLAGS.side_input_names,
+ FLAGS.side_input_types))
+ else:
+ side_input_shapes = None
+ side_input_names = None
+ side_input_types = None
+ if FLAGS.additional_output_tensor_names:
+ additional_output_tensor_names = list(
+ FLAGS.additional_output_tensor_names.split(','))
+ else:
+ additional_output_tensor_names = None
exporter.export_inference_graph(
FLAGS.input_type, pipeline_config, FLAGS.trained_checkpoint_prefix,
FLAGS.output_directory, input_shape=input_shape,
- write_inference_graph=FLAGS.write_inference_graph)
+ write_inference_graph=FLAGS.write_inference_graph,
+ additional_output_tensor_names=additional_output_tensor_names,
+ use_side_inputs=FLAGS.use_side_inputs,
+ side_input_shapes=side_input_shapes,
+ side_input_names=side_input_names,
+ side_input_types=side_input_types)
if __name__ == '__main__':
diff --git a/research/object_detection/export_tflite_ssd_graph_lib.py b/research/object_detection/export_tflite_ssd_graph_lib.py
index 229daab00a2ea2288ce854c508eb4ea48f63bacc..f72e9525bfd75b58c874cba5b790cbac710cb9dd 100644
--- a/research/object_detection/export_tflite_ssd_graph_lib.py
+++ b/research/object_detection/export_tflite_ssd_graph_lib.py
@@ -24,16 +24,19 @@ import tensorflow.compat.v1 as tf
from tensorflow.core.framework import attr_value_pb2
from tensorflow.core.framework import types_pb2
from tensorflow.core.protobuf import saver_pb2
-from tensorflow.tools.graph_transforms import TransformGraph
from object_detection import exporter
from object_detection.builders import graph_rewriter_builder
from object_detection.builders import model_builder
from object_detection.builders import post_processing_builder
from object_detection.core import box_list
+from object_detection.utils import tf_version
_DEFAULT_NUM_CHANNELS = 3
_DEFAULT_NUM_COORD_BOX = 4
+if tf_version.is_tf1():
+ from tensorflow.tools.graph_transforms import TransformGraph # pylint: disable=g-import-not-at-top
+
def get_const_center_size_encoded_anchors(anchors):
"""Exports center-size encoded anchors as a constant tensor.
diff --git a/research/object_detection/export_tflite_ssd_graph_lib_test.py b/research/object_detection/export_tflite_ssd_graph_lib_tf1_test.py
similarity index 98%
rename from research/object_detection/export_tflite_ssd_graph_lib_test.py
rename to research/object_detection/export_tflite_ssd_graph_lib_tf1_test.py
index 5b6082d109c7824651c0cfdce95e41a5126fa653..3625b9f651c157f52f690b1c9adf1e7ce19f2b94 100644
--- a/research/object_detection/export_tflite_ssd_graph_lib_test.py
+++ b/research/object_detection/export_tflite_ssd_graph_lib_tf1_test.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
+import unittest
import numpy as np
import six
import tensorflow.compat.v1 as tf
@@ -32,6 +33,7 @@ from object_detection.core import model
from object_detection.protos import graph_rewriter_pb2
from object_detection.protos import pipeline_pb2
from object_detection.protos import post_processing_pb2
+from object_detection.utils import tf_version
# pylint: disable=g-import-not-at-top
@@ -72,6 +74,9 @@ class FakeModel(model.DetectionModel):
def restore_map(self, checkpoint_path, from_detection_checkpoint):
pass
+ def restore_from_objects(self, fine_tune_checkpoint_type):
+ pass
+
def loss(self, prediction_dict, true_image_shapes):
pass
@@ -82,6 +87,7 @@ class FakeModel(model.DetectionModel):
pass
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class ExportTfliteGraphTest(tf.test.TestCase):
def _save_checkpoint_from_mock_model(self,
@@ -413,7 +419,7 @@ class ExportTfliteGraphTest(tf.test.TestCase):
tflite_graph_file = self._export_graph_with_postprocessing_op(
pipeline_config)
self.assertTrue(os.path.exists(tflite_graph_file))
- mock_get.assert_called_once()
+ self.assertEqual(1, mock_get.call_count)
if __name__ == '__main__':
diff --git a/research/object_detection/exporter.py b/research/object_detection/exporter.py
index 676e34debea179e0c772bc392362f1a43b24bc75..61c5f7f22db46c88c8bc5c1803b281da4c020967 100644
--- a/research/object_detection/exporter.py
+++ b/research/object_detection/exporter.py
@@ -39,6 +39,54 @@ except ImportError:
freeze_graph_with_def_protos = freeze_graph.freeze_graph_with_def_protos
+def parse_side_inputs(side_input_shapes_string, side_input_names_string,
+ side_input_types_string):
+ """Parses side input flags.
+
+ Args:
+ side_input_shapes_string: The shape of the side input tensors, provided as a
+ comma-separated list of integers. A value of -1 is used for unknown
+ dimensions. A `/` denotes a break, starting the shape of the next side
+ input tensor.
+ side_input_names_string: The names of the side input tensors, provided as a
+ comma-separated list of strings.
+ side_input_types_string: The type of the side input tensors, provided as a
+ comma-separated list of types, each of `string`, `integer`, or `float`.
+
+ Returns:
+ side_input_shapes: A list of shapes.
+ side_input_names: A list of strings.
+ side_input_types: A list of tensorflow dtypes.
+
+ """
+ if side_input_shapes_string:
+ side_input_shapes = []
+ for side_input_shape_list in side_input_shapes_string.split('/'):
+ side_input_shape = [
+ int(dim) if dim != '-1' else None
+ for dim in side_input_shape_list.split(',')
+ ]
+ side_input_shapes.append(side_input_shape)
+ else:
+ raise ValueError('When using side_inputs, side_input_shapes must be '
+ 'specified in the input flags.')
+ if side_input_names_string:
+ side_input_names = list(side_input_names_string.split(','))
+ else:
+ raise ValueError('When using side_inputs, side_input_names must be '
+ 'specified in the input flags.')
+ if side_input_types_string:
+ typelookup = {'float': tf.float32, 'int': tf.int32, 'string': tf.string}
+ side_input_types = [
+ typelookup[side_input_type]
+ for side_input_type in side_input_types_string.split(',')
+ ]
+ else:
+ raise ValueError('When using side_inputs, side_input_types must be '
+ 'specified in the input flags.')
+ return side_input_shapes, side_input_names, side_input_types
+
+
def rewrite_nn_resize_op(is_quantized=False):
"""Replaces a custom nearest-neighbor resize op with the Tensorflow version.
@@ -140,6 +188,14 @@ def _image_tensor_input_placeholder(input_shape=None):
return input_tensor, input_tensor
+def _side_input_tensor_placeholder(side_input_shape, side_input_name,
+ side_input_type):
+ """Returns side input placeholder and side input tensor."""
+ side_input_tensor = tf.placeholder(
+ dtype=side_input_type, shape=side_input_shape, name=side_input_name)
+ return side_input_tensor, side_input_tensor
+
+
def _tf_example_input_placeholder(input_shape=None):
"""Returns input that accepts a batch of strings with tf examples.
@@ -200,7 +256,7 @@ input_placeholder_fn_map = {
'image_tensor': _image_tensor_input_placeholder,
'encoded_image_string_tensor':
_encoded_image_string_tensor_input_placeholder,
- 'tf_example': _tf_example_input_placeholder,
+ 'tf_example': _tf_example_input_placeholder
}
@@ -312,7 +368,7 @@ def write_saved_model(saved_model_path,
Args:
saved_model_path: Path to write SavedModel.
frozen_graph_def: tf.GraphDef holding frozen graph.
- inputs: The input placeholder tensor.
+ inputs: A tensor dictionary containing the inputs to a DetectionModel.
outputs: A tensor dictionary containing the outputs of a DetectionModel.
"""
with tf.Graph().as_default():
@@ -322,8 +378,13 @@ def write_saved_model(saved_model_path,
builder = tf.saved_model.builder.SavedModelBuilder(saved_model_path)
- tensor_info_inputs = {
- 'inputs': tf.saved_model.utils.build_tensor_info(inputs)}
+ tensor_info_inputs = {}
+ if isinstance(inputs, dict):
+ for k, v in inputs.items():
+ tensor_info_inputs[k] = tf.saved_model.utils.build_tensor_info(v)
+ else:
+ tensor_info_inputs['inputs'] = tf.saved_model.utils.build_tensor_info(
+ inputs)
tensor_info_outputs = {}
for k, v in outputs.items():
tensor_info_outputs[k] = tf.saved_model.utils.build_tensor_info(v)
@@ -364,11 +425,11 @@ def write_graph_and_checkpoint(inference_graph_def,
def _get_outputs_from_inputs(input_tensors, detection_model,
- output_collection_name):
+ output_collection_name, **side_inputs):
inputs = tf.cast(input_tensors, dtype=tf.float32)
preprocessed_inputs, true_image_shapes = detection_model.preprocess(inputs)
output_tensors = detection_model.predict(
- preprocessed_inputs, true_image_shapes)
+ preprocessed_inputs, true_image_shapes, **side_inputs)
postprocessed_tensors = detection_model.postprocess(
output_tensors, true_image_shapes)
return add_output_tensor_nodes(postprocessed_tensors,
@@ -376,32 +437,45 @@ def _get_outputs_from_inputs(input_tensors, detection_model,
def build_detection_graph(input_type, detection_model, input_shape,
- output_collection_name, graph_hook_fn):
+ output_collection_name, graph_hook_fn,
+ use_side_inputs=False, side_input_shapes=None,
+ side_input_names=None, side_input_types=None):
"""Build the detection graph."""
if input_type not in input_placeholder_fn_map:
raise ValueError('Unknown input type: {}'.format(input_type))
placeholder_args = {}
+ side_inputs = {}
if input_shape is not None:
if (input_type != 'image_tensor' and
input_type != 'encoded_image_string_tensor' and
- input_type != 'tf_example'):
+ input_type != 'tf_example' and
+ input_type != 'tf_sequence_example'):
raise ValueError('Can only specify input shape for `image_tensor`, '
- '`encoded_image_string_tensor`, or `tf_example` '
- 'inputs.')
+ '`encoded_image_string_tensor`, `tf_example`, '
+ ' or `tf_sequence_example` inputs.')
placeholder_args['input_shape'] = input_shape
placeholder_tensor, input_tensors = input_placeholder_fn_map[input_type](
**placeholder_args)
+ placeholder_tensors = {'inputs': placeholder_tensor}
+ if use_side_inputs:
+ for idx, side_input_name in enumerate(side_input_names):
+ side_input_placeholder, side_input = _side_input_tensor_placeholder(
+ side_input_shapes[idx], side_input_name, side_input_types[idx])
+ print(side_input)
+ side_inputs[side_input_name] = side_input
+ placeholder_tensors[side_input_name] = side_input_placeholder
outputs = _get_outputs_from_inputs(
input_tensors=input_tensors,
detection_model=detection_model,
- output_collection_name=output_collection_name)
+ output_collection_name=output_collection_name,
+ **side_inputs)
# Add global step to the graph.
slim.get_or_create_global_step()
if graph_hook_fn: graph_hook_fn()
- return outputs, placeholder_tensor
+ return outputs, placeholder_tensors
def _export_inference_graph(input_type,
@@ -414,7 +488,11 @@ def _export_inference_graph(input_type,
output_collection_name='inference_op',
graph_hook_fn=None,
write_inference_graph=False,
- temp_checkpoint_prefix=''):
+ temp_checkpoint_prefix='',
+ use_side_inputs=False,
+ side_input_shapes=None,
+ side_input_names=None,
+ side_input_types=None):
"""Export helper."""
tf.gfile.MakeDirs(output_directory)
frozen_graph_path = os.path.join(output_directory,
@@ -422,12 +500,16 @@ def _export_inference_graph(input_type,
saved_model_path = os.path.join(output_directory, 'saved_model')
model_path = os.path.join(output_directory, 'model.ckpt')
- outputs, placeholder_tensor = build_detection_graph(
+ outputs, placeholder_tensor_dict = build_detection_graph(
input_type=input_type,
detection_model=detection_model,
input_shape=input_shape,
output_collection_name=output_collection_name,
- graph_hook_fn=graph_hook_fn)
+ graph_hook_fn=graph_hook_fn,
+ use_side_inputs=use_side_inputs,
+ side_input_shapes=side_input_shapes,
+ side_input_names=side_input_names,
+ side_input_types=side_input_types)
profile_inference_graph(tf.get_default_graph())
saver_kwargs = {}
@@ -464,7 +546,8 @@ def _export_inference_graph(input_type,
f.write(str(inference_graph_def))
if additional_output_tensor_names is not None:
- output_node_names = ','.join(outputs.keys()+additional_output_tensor_names)
+ output_node_names = ','.join(list(outputs.keys())+(
+ additional_output_tensor_names))
else:
output_node_names = ','.join(outputs.keys())
@@ -480,7 +563,7 @@ def _export_inference_graph(input_type,
initializer_nodes='')
write_saved_model(saved_model_path, frozen_graph_def,
- placeholder_tensor, outputs)
+ placeholder_tensor_dict, outputs)
def export_inference_graph(input_type,
@@ -490,7 +573,11 @@ def export_inference_graph(input_type,
input_shape=None,
output_collection_name='inference_op',
additional_output_tensor_names=None,
- write_inference_graph=False):
+ write_inference_graph=False,
+ use_side_inputs=False,
+ side_input_shapes=None,
+ side_input_names=None,
+ side_input_types=None):
"""Exports inference graph for the model specified in the pipeline config.
Args:
@@ -506,6 +593,13 @@ def export_inference_graph(input_type,
additional_output_tensor_names: list of additional output
tensors to include in the frozen graph.
write_inference_graph: If true, writes inference graph to disk.
+ use_side_inputs: If True, the model requires side_inputs.
+ side_input_shapes: List of shapes of the side input tensors,
+ required if use_side_inputs is True.
+ side_input_names: List of names of the side input tensors,
+ required if use_side_inputs is True.
+ side_input_types: List of types of the side input tensors,
+ required if use_side_inputs is True.
"""
detection_model = model_builder.build(pipeline_config.model,
is_training=False)
@@ -524,7 +618,11 @@ def export_inference_graph(input_type,
input_shape,
output_collection_name,
graph_hook_fn=graph_rewriter_fn,
- write_inference_graph=write_inference_graph)
+ write_inference_graph=write_inference_graph,
+ use_side_inputs=use_side_inputs,
+ side_input_shapes=side_input_shapes,
+ side_input_names=side_input_names,
+ side_input_types=side_input_types)
pipeline_config.eval_config.use_moving_averages = False
config_util.save_pipeline_config(pipeline_config, output_directory)
diff --git a/research/object_detection/exporter_lib_tf2_test.py b/research/object_detection/exporter_lib_tf2_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..99cbf263bece871d1a7d3b5a9f92e22c3f356412
--- /dev/null
+++ b/research/object_detection/exporter_lib_tf2_test.py
@@ -0,0 +1,240 @@
+# Lint as: python2, python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Test for exporter_lib_v2.py."""
+
+from __future__ import division
+import io
+import os
+import unittest
+from absl.testing import parameterized
+import numpy as np
+from PIL import Image
+import six
+
+import tensorflow.compat.v2 as tf
+
+from object_detection import exporter_lib_v2
+from object_detection.builders import model_builder
+from object_detection.core import model
+from object_detection.core import standard_fields as fields
+from object_detection.protos import pipeline_pb2
+from object_detection.utils import dataset_util
+from object_detection.utils import tf_version
+
+if six.PY2:
+ import mock # pylint: disable=g-importing-member,g-import-not-at-top
+else:
+ from unittest import mock # pylint: disable=g-importing-member,g-import-not-at-top
+
+
+class FakeModel(model.DetectionModel):
+
+ def __init__(self, conv_weight_scalar=1.0):
+ super(FakeModel, self).__init__(num_classes=2)
+ self._conv = tf.keras.layers.Conv2D(
+ filters=1, kernel_size=1, strides=(1, 1), padding='valid',
+ kernel_initializer=tf.keras.initializers.Constant(
+ value=conv_weight_scalar))
+
+ def preprocess(self, inputs):
+ true_image_shapes = [] # Doesn't matter for the fake model.
+ return tf.identity(inputs), true_image_shapes
+
+ def predict(self, preprocessed_inputs, true_image_shapes):
+ return {'image': self._conv(preprocessed_inputs)}
+
+ def postprocess(self, prediction_dict, true_image_shapes):
+ predict_tensor_sum = tf.reduce_sum(prediction_dict['image'])
+ with tf.control_dependencies(list(prediction_dict.values())):
+ postprocessed_tensors = {
+ 'detection_boxes': tf.constant([[[0.0, 0.0, 0.5, 0.5],
+ [0.5, 0.5, 0.8, 0.8]],
+ [[0.5, 0.5, 1.0, 1.0],
+ [0.0, 0.0, 0.0, 0.0]]], tf.float32),
+ 'detection_scores': predict_tensor_sum + tf.constant(
+ [[0.7, 0.6], [0.9, 0.0]], tf.float32),
+ 'detection_classes': tf.constant([[0, 1],
+ [1, 0]], tf.float32),
+ 'num_detections': tf.constant([2, 1], tf.float32),
+ }
+ return postprocessed_tensors
+
+ def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
+ pass
+
+ def restore_from_objects(self, fine_tune_checkpoint_type):
+ pass
+
+ def loss(self, prediction_dict, true_image_shapes):
+ pass
+
+ def regularization_losses(self):
+ pass
+
+ def updates(self):
+ pass
+
+
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class ExportInferenceGraphTest(tf.test.TestCase, parameterized.TestCase):
+
+ def _save_checkpoint_from_mock_model(
+ self, checkpoint_dir, conv_weight_scalar=6.0):
+ mock_model = FakeModel(conv_weight_scalar)
+ fake_image = tf.zeros(shape=[1, 10, 10, 3], dtype=tf.float32)
+ preprocessed_inputs, true_image_shapes = mock_model.preprocess(fake_image)
+ predictions = mock_model.predict(preprocessed_inputs, true_image_shapes)
+ mock_model.postprocess(predictions, true_image_shapes)
+
+ ckpt = tf.train.Checkpoint(model=mock_model)
+ exported_checkpoint_manager = tf.train.CheckpointManager(
+ ckpt, checkpoint_dir, max_to_keep=1)
+ exported_checkpoint_manager.save(checkpoint_number=0)
+
+ @parameterized.parameters(
+ {'input_type': 'image_tensor'},
+ {'input_type': 'encoded_image_string_tensor'},
+ {'input_type': 'tf_example'},
+ )
+ def test_export_yields_correct_directory_structure(
+ self, input_type='image_tensor'):
+ tmp_dir = self.get_temp_dir()
+ self._save_checkpoint_from_mock_model(tmp_dir)
+ with mock.patch.object(
+ model_builder, 'build', autospec=True) as mock_builder:
+ mock_builder.return_value = FakeModel()
+ output_directory = os.path.join(tmp_dir, 'output')
+ pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
+ exporter_lib_v2.export_inference_graph(
+ input_type=input_type,
+ pipeline_config=pipeline_config,
+ trained_checkpoint_dir=tmp_dir,
+ output_directory=output_directory)
+ self.assertTrue(os.path.exists(os.path.join(
+ output_directory, 'saved_model', 'saved_model.pb')))
+ self.assertTrue(os.path.exists(os.path.join(
+ output_directory, 'saved_model', 'variables', 'variables.index')))
+ self.assertTrue(os.path.exists(os.path.join(
+ output_directory, 'saved_model', 'variables',
+ 'variables.data-00000-of-00001')))
+ self.assertTrue(os.path.exists(os.path.join(
+ output_directory, 'checkpoint', 'ckpt-0.index')))
+ self.assertTrue(os.path.exists(os.path.join(
+ output_directory, 'checkpoint', 'ckpt-0.data-00000-of-00001')))
+ self.assertTrue(os.path.exists(os.path.join(
+ output_directory, 'pipeline.config')))
+
+ def get_dummy_input(self, input_type):
+ """Get dummy input for the given input type."""
+
+ if input_type == 'image_tensor':
+ return np.zeros(shape=(1, 20, 20, 3), dtype=np.uint8)
+ if input_type == 'float_image_tensor':
+ return np.zeros(shape=(1, 20, 20, 3), dtype=np.float32)
+ elif input_type == 'encoded_image_string_tensor':
+ image = Image.new('RGB', (20, 20))
+ byte_io = io.BytesIO()
+ image.save(byte_io, 'PNG')
+ return [byte_io.getvalue()]
+ elif input_type == 'tf_example':
+ image_tensor = tf.zeros((20, 20, 3), dtype=tf.uint8)
+ encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).numpy()
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded':
+ dataset_util.bytes_feature(encoded_jpeg),
+ 'image/format':
+ dataset_util.bytes_feature(six.b('jpeg')),
+ 'image/source_id':
+ dataset_util.bytes_feature(six.b('image_id')),
+ })).SerializeToString()
+ return [example]
+
+ @parameterized.parameters(
+ {'input_type': 'image_tensor'},
+ {'input_type': 'encoded_image_string_tensor'},
+ {'input_type': 'tf_example'},
+ {'input_type': 'float_image_tensor'},
+ )
+ def test_export_saved_model_and_run_inference(
+ self, input_type='image_tensor'):
+ tmp_dir = self.get_temp_dir()
+ self._save_checkpoint_from_mock_model(tmp_dir)
+ with mock.patch.object(
+ model_builder, 'build', autospec=True) as mock_builder:
+ mock_builder.return_value = FakeModel()
+ output_directory = os.path.join(tmp_dir, 'output')
+ pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
+ exporter_lib_v2.export_inference_graph(
+ input_type=input_type,
+ pipeline_config=pipeline_config,
+ trained_checkpoint_dir=tmp_dir,
+ output_directory=output_directory)
+
+ saved_model_path = os.path.join(output_directory, 'saved_model')
+ detect_fn = tf.saved_model.load(saved_model_path)
+ image = self.get_dummy_input(input_type)
+ detections = detect_fn(image)
+
+ detection_fields = fields.DetectionResultFields
+ self.assertAllClose(detections[detection_fields.detection_boxes],
+ [[[0.0, 0.0, 0.5, 0.5],
+ [0.5, 0.5, 0.8, 0.8]],
+ [[0.5, 0.5, 1.0, 1.0],
+ [0.0, 0.0, 0.0, 0.0]]])
+ self.assertAllClose(detections[detection_fields.detection_scores],
+ [[0.7, 0.6], [0.9, 0.0]])
+ self.assertAllClose(detections[detection_fields.detection_classes],
+ [[1, 2], [2, 1]])
+ self.assertAllClose(detections[detection_fields.num_detections], [2, 1])
+
+ def test_export_checkpoint_and_run_inference_with_image(self):
+ tmp_dir = self.get_temp_dir()
+ self._save_checkpoint_from_mock_model(tmp_dir, conv_weight_scalar=2.0)
+ with mock.patch.object(
+ model_builder, 'build', autospec=True) as mock_builder:
+ mock_builder.return_value = FakeModel()
+ output_directory = os.path.join(tmp_dir, 'output')
+ pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
+ exporter_lib_v2.export_inference_graph(
+ input_type='image_tensor',
+ pipeline_config=pipeline_config,
+ trained_checkpoint_dir=tmp_dir,
+ output_directory=output_directory)
+
+ mock_model = FakeModel()
+ ckpt = tf.compat.v2.train.Checkpoint(
+ model=mock_model)
+ checkpoint_dir = os.path.join(tmp_dir, 'output', 'checkpoint')
+ manager = tf.compat.v2.train.CheckpointManager(
+ ckpt, checkpoint_dir, max_to_keep=7)
+ ckpt.restore(manager.latest_checkpoint).expect_partial()
+
+ fake_image = tf.ones(shape=[1, 5, 5, 3], dtype=tf.float32)
+ preprocessed_inputs, true_image_shapes = mock_model.preprocess(fake_image)
+ predictions = mock_model.predict(preprocessed_inputs, true_image_shapes)
+ detections = mock_model.postprocess(predictions, true_image_shapes)
+
+ # 150 = conv_weight_scalar * height * width * channels = 2 * 5 * 5 * 3.
+ self.assertAllClose(detections['detection_scores'],
+ [[150 + 0.7, 150 + 0.6], [150 + 0.9, 150 + 0.0]])
+
+
+if __name__ == '__main__':
+ tf.enable_v2_behavior()
+ tf.test.main()
diff --git a/research/object_detection/exporter_lib_v2.py b/research/object_detection/exporter_lib_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..a7ecb45adb14f1b20c2291a3cf67376ad07194eb
--- /dev/null
+++ b/research/object_detection/exporter_lib_v2.py
@@ -0,0 +1,182 @@
+# Lint as: python2, python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Functions to export object detection inference graph."""
+import os
+import tensorflow.compat.v2 as tf
+from object_detection.builders import model_builder
+from object_detection.core import standard_fields as fields
+from object_detection.data_decoders import tf_example_decoder
+from object_detection.utils import config_util
+
+
+def _decode_image(encoded_image_string_tensor):
+ image_tensor = tf.image.decode_image(encoded_image_string_tensor,
+ channels=3)
+ image_tensor.set_shape((None, None, 3))
+ return image_tensor
+
+
+def _decode_tf_example(tf_example_string_tensor):
+ tensor_dict = tf_example_decoder.TfExampleDecoder().decode(
+ tf_example_string_tensor)
+ image_tensor = tensor_dict[fields.InputDataFields.image]
+ return image_tensor
+
+
+class DetectionInferenceModule(tf.Module):
+ """Detection Inference Module."""
+
+ def __init__(self, detection_model):
+ """Initializes a module for detection.
+
+ Args:
+ detection_model: The detection model to use for inference.
+ """
+ self._model = detection_model
+
+ def _run_inference_on_images(self, image):
+ """Cast image to float and run inference.
+
+ Args:
+ image: uint8 Tensor of shape [1, None, None, 3]
+ Returns:
+ Tensor dictionary holding detections.
+ """
+ label_id_offset = 1
+
+ image = tf.cast(image, tf.float32)
+ image, shapes = self._model.preprocess(image)
+ prediction_dict = self._model.predict(image, shapes)
+ detections = self._model.postprocess(prediction_dict, shapes)
+ classes_field = fields.DetectionResultFields.detection_classes
+ detections[classes_field] = (
+ tf.cast(detections[classes_field], tf.float32) + label_id_offset)
+
+ for key, val in detections.items():
+ detections[key] = tf.cast(val, tf.float32)
+
+ return detections
+
+
+class DetectionFromImageModule(DetectionInferenceModule):
+ """Detection Inference Module for image inputs."""
+
+ @tf.function(
+ input_signature=[
+ tf.TensorSpec(shape=[1, None, None, 3], dtype=tf.uint8)])
+ def __call__(self, input_tensor):
+ return self._run_inference_on_images(input_tensor)
+
+
+class DetectionFromFloatImageModule(DetectionInferenceModule):
+ """Detection Inference Module for float image inputs."""
+
+ @tf.function(
+ input_signature=[
+ tf.TensorSpec(shape=[1, None, None, 3], dtype=tf.float32)])
+ def __call__(self, input_tensor):
+ return self._run_inference_on_images(input_tensor)
+
+
+class DetectionFromEncodedImageModule(DetectionInferenceModule):
+ """Detection Inference Module for encoded image string inputs."""
+
+ @tf.function(input_signature=[tf.TensorSpec(shape=[1], dtype=tf.string)])
+ def __call__(self, input_tensor):
+ with tf.device('cpu:0'):
+ image = tf.map_fn(
+ _decode_image,
+ elems=input_tensor,
+ dtype=tf.uint8,
+ parallel_iterations=32,
+ back_prop=False)
+ return self._run_inference_on_images(image)
+
+
+class DetectionFromTFExampleModule(DetectionInferenceModule):
+ """Detection Inference Module for TF.Example inputs."""
+
+ @tf.function(input_signature=[tf.TensorSpec(shape=[1], dtype=tf.string)])
+ def __call__(self, input_tensor):
+ with tf.device('cpu:0'):
+ image = tf.map_fn(
+ _decode_tf_example,
+ elems=input_tensor,
+ dtype=tf.uint8,
+ parallel_iterations=32,
+ back_prop=False)
+ return self._run_inference_on_images(image)
+
+DETECTION_MODULE_MAP = {
+ 'image_tensor': DetectionFromImageModule,
+ 'encoded_image_string_tensor':
+ DetectionFromEncodedImageModule,
+ 'tf_example': DetectionFromTFExampleModule,
+ 'float_image_tensor': DetectionFromFloatImageModule
+}
+
+
+def export_inference_graph(input_type,
+ pipeline_config,
+ trained_checkpoint_dir,
+ output_directory):
+ """Exports inference graph for the model specified in the pipeline config.
+
+ This function creates `output_directory` if it does not already exist,
+ which will hold a copy of the pipeline config with filename `pipeline.config`,
+ and two subdirectories named `checkpoint` and `saved_model`
+ (containing the exported checkpoint and SavedModel respectively).
+
+ Args:
+ input_type: Type of input for the graph. Can be one of ['image_tensor',
+ 'encoded_image_string_tensor', 'tf_example'].
+ pipeline_config: pipeline_pb2.TrainAndEvalPipelineConfig proto.
+ trained_checkpoint_dir: Path to the trained checkpoint file.
+ output_directory: Path to write outputs.
+ Raises:
+ ValueError: if input_type is invalid.
+ """
+ output_checkpoint_directory = os.path.join(output_directory, 'checkpoint')
+ output_saved_model_directory = os.path.join(output_directory, 'saved_model')
+
+ detection_model = model_builder.build(pipeline_config.model,
+ is_training=False)
+
+ ckpt = tf.train.Checkpoint(
+ model=detection_model)
+ manager = tf.train.CheckpointManager(
+ ckpt, trained_checkpoint_dir, max_to_keep=1)
+ status = ckpt.restore(manager.latest_checkpoint).expect_partial()
+
+ if input_type not in DETECTION_MODULE_MAP:
+ raise ValueError('Unrecognized `input_type`')
+ detection_module = DETECTION_MODULE_MAP[input_type](detection_model)
+ # Getting the concrete function traces the graph and forces variables to
+ # be constructed --- only after this can we save the checkpoint and
+ # saved model.
+ concrete_function = detection_module.__call__.get_concrete_function()
+ status.assert_existing_objects_matched()
+
+ exported_checkpoint_manager = tf.train.CheckpointManager(
+ ckpt, output_checkpoint_directory, max_to_keep=1)
+ exported_checkpoint_manager.save(checkpoint_number=0)
+
+ tf.saved_model.save(detection_module,
+ output_saved_model_directory,
+ signatures=concrete_function)
+
+ config_util.save_pipeline_config(pipeline_config, output_directory)
diff --git a/research/object_detection/exporter_main_v2.py b/research/object_detection/exporter_main_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2ba8456039d4584e5998d619f36747d58018418
--- /dev/null
+++ b/research/object_detection/exporter_main_v2.py
@@ -0,0 +1,126 @@
+# Lint as: python2, python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+r"""Tool to export an object detection model for inference.
+
+Prepares an object detection tensorflow graph for inference using model
+configuration and a trained checkpoint. Outputs associated checkpoint files,
+a SavedModel, and a copy of the model config.
+
+The inference graph contains one of three input nodes depending on the user
+specified option.
+ * `image_tensor`: Accepts a uint8 4-D tensor of shape [1, None, None, 3]
+ * `float_image_tensor`: Accepts a float32 4-D tensor of shape
+ [1, None, None, 3]
+ * `encoded_image_string_tensor`: Accepts a 1-D string tensor of shape [None]
+ containing encoded PNG or JPEG images. Image resolutions are expected to be
+ the same if more than 1 image is provided.
+ * `tf_example`: Accepts a 1-D string tensor of shape [None] containing
+ serialized TFExample protos. Image resolutions are expected to be the same
+ if more than 1 image is provided.
+
+and the following output nodes returned by the model.postprocess(..):
+ * `num_detections`: Outputs float32 tensors of the form [batch]
+ that specifies the number of valid boxes per image in the batch.
+ * `detection_boxes`: Outputs float32 tensors of the form
+ [batch, num_boxes, 4] containing detected boxes.
+ * `detection_scores`: Outputs float32 tensors of the form
+ [batch, num_boxes] containing class scores for the detections.
+ * `detection_classes`: Outputs float32 tensors of the form
+ [batch, num_boxes] containing classes for the detections.
+
+
+Example Usage:
+--------------
+python exporter_main_v2.py \
+ --input_type image_tensor \
+ --pipeline_config_path path/to/ssd_inception_v2.config \
+ --trained_checkpoint_dir path/to/checkpoint \
+ --output_directory path/to/exported_model_directory
+
+The expected output would be in the directory
+path/to/exported_model_directory (which is created if it does not exist)
+holding two subdirectories (corresponding to checkpoint and SavedModel,
+respectively) and a copy of the pipeline config.
+
+Config overrides (see the `config_override` flag) are text protobufs
+(also of type pipeline_pb2.TrainEvalPipelineConfig) which are used to override
+certain fields in the provided pipeline_config_path. These are useful for
+making small changes to the inference graph that differ from the training or
+eval config.
+
+Example Usage (in which we change the second stage post-processing score
+threshold to be 0.5):
+
+python exporter_main_v2.py \
+ --input_type image_tensor \
+ --pipeline_config_path path/to/ssd_inception_v2.config \
+ --trained_checkpoint_dir path/to/checkpoint \
+ --output_directory path/to/exported_model_directory \
+ --config_override " \
+ model{ \
+ faster_rcnn { \
+ second_stage_post_processing { \
+ batch_non_max_suppression { \
+ score_threshold: 0.5 \
+ } \
+ } \
+ } \
+ }"
+"""
+from absl import app
+from absl import flags
+
+import tensorflow.compat.v2 as tf
+from google.protobuf import text_format
+from object_detection import exporter_lib_v2
+from object_detection.protos import pipeline_pb2
+
+tf.enable_v2_behavior()
+
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string('input_type', 'image_tensor', 'Type of input node. Can be '
+ 'one of [`image_tensor`, `encoded_image_string_tensor`, '
+ '`tf_example`, `float_image_tensor`]')
+flags.DEFINE_string('pipeline_config_path', None,
+ 'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
+ 'file.')
+flags.DEFINE_string('trained_checkpoint_dir', None,
+ 'Path to trained checkpoint directory')
+flags.DEFINE_string('output_directory', None, 'Path to write outputs.')
+flags.DEFINE_string('config_override', '',
+ 'pipeline_pb2.TrainEvalPipelineConfig '
+ 'text proto to override pipeline_config_path.')
+
+flags.mark_flag_as_required('pipeline_config_path')
+flags.mark_flag_as_required('trained_checkpoint_dir')
+flags.mark_flag_as_required('output_directory')
+
+
+def main(_):
+ pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
+ with tf.io.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f:
+ text_format.Merge(f.read(), pipeline_config)
+ text_format.Merge(FLAGS.config_override, pipeline_config)
+ exporter_lib_v2.export_inference_graph(
+ FLAGS.input_type, pipeline_config, FLAGS.trained_checkpoint_dir,
+ FLAGS.output_directory)
+
+
+if __name__ == '__main__':
+ app.run(main)
diff --git a/research/object_detection/exporter_test.py b/research/object_detection/exporter_tf1_test.py
similarity index 99%
rename from research/object_detection/exporter_test.py
rename to research/object_detection/exporter_tf1_test.py
index babe41d1e2a76b513f1a00bfefd5c33a8a07f690..b33bafd8db4f77627d6a64a1035f8c08bf6c09ee 100644
--- a/research/object_detection/exporter_test.py
+++ b/research/object_detection/exporter_tf1_test.py
@@ -19,6 +19,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
+import unittest
import numpy as np
import six
import tensorflow.compat.v1 as tf
@@ -33,12 +34,13 @@ from object_detection.core import model
from object_detection.protos import graph_rewriter_pb2
from object_detection.protos import pipeline_pb2
from object_detection.utils import ops
+from object_detection.utils import tf_version
from object_detection.utils import variables_helper
if six.PY2:
import mock # pylint: disable=g-import-not-at-top
else:
- from unittest import mock # pylint: disable=g-import-not-at-top
+ mock = unittest.mock # pylint: disable=g-import-not-at-top, g-importing-member
# pylint: disable=g-import-not-at-top
try:
@@ -103,6 +105,9 @@ class FakeModel(model.DetectionModel):
def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
pass
+ def restore_from_objects(self, fine_tune_checkpoint_type):
+ pass
+
def loss(self, prediction_dict, true_image_shapes):
pass
@@ -113,6 +118,7 @@ class FakeModel(model.DetectionModel):
pass
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class ExportInferenceGraphTest(tf.test.TestCase):
def _save_checkpoint_from_mock_model(self,
diff --git a/research/object_detection/g3doc/context_rcnn.md b/research/object_detection/g3doc/context_rcnn.md
new file mode 100644
index 0000000000000000000000000000000000000000..8d132b15b28ecc2d33581c9087804bb7d87000cf
--- /dev/null
+++ b/research/object_detection/g3doc/context_rcnn.md
@@ -0,0 +1,199 @@
+# Context R-CNN
+
+Context R-CNN is an object detection model that uses contextual features to
+improve object detection. See https://arxiv.org/abs/1912.03538 for more details.
+
+## Table of Contents
+
+* [Preparing Context Data for Context R-CNN](#preparing-context-data-for-context-r-cnn)
+ + [Generating TfRecords from a set of images and a COCO-CameraTraps style
+ JSON](#generating-tfrecords-from-a-set-of-images-and-a-coco-cameratraps-style-json)
+ + [Generating weakly-supervised bounding box labels for image-labeled data](#generating-weakly-supervised-bounding-box-labels-for-image-labeled-data)
+ + [Generating and saving contextual features for each image](#generating-and-saving-contextual-features-for-each-image)
+ + [Building up contextual memory banks and storing them for each context
+ group](#building-up-contextual-memory-banks-and-storing-them-for-each-context-group)
+- [Training a Context R-CNN Model](#training-a-context-r-cnn-model)
+- [Exporting a Context R-CNN Model](#exporting-a-context-r-cnn-model)
+
+## Preparing Context Data for Context R-CNN
+
+In this section, we will walk through the process of generating TfRecords with
+contextual features. We focus on building context from object-centric features
+generated with a pre-trained Faster R-CNN model, but you can adapt the provided
+code to use alternative feature extractors.
+
+Each of these data processing scripts uses Apache Beam, which can be installed
+using
+
+```
+pip install apache-beam
+```
+
+and can be run locally, or on a cluster for efficient processing of large
+amounts of data. Note that generate_detection_data.py and
+generate_embedding_data.py both involve running inference, and may be very slow
+to run locally. See the
+[Apache Beam documentation](https://beam.apache.org/documentation/runners/dataflow/)
+for more information, and Google Cloud Documentation for a tutorial on
+[running Beam jobs on DataFlow](https://cloud.google.com/dataflow/docs/quickstarts/quickstart-python).
+
+### Generating TfRecords from a set of images and a COCO-CameraTraps style JSON
+
+If your data is already stored in TfRecords, you can skip this first step.
+
+We assume a COCO-CameraTraps json format, as described on
+[LILA.science](https://github.com/microsoft/CameraTraps/blob/master/data_management/README.md).
+
+COCO-CameraTraps is a format that adds static-camera-specific fields, such as a
+location ID and datetime, to the well-established COCO format. To generate
+appropriate context later on, be sure you have specified each contextual group
+with a different location ID, which in the static camera case would be the ID of
+the camera, as well as the datetime each photo was taken. We assume that empty
+images will be labeled 'empty' with class id 0.
+
+To generate TfRecords from your database and local image folder, run
+
+```
+python object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_main.py \
+ --alsologtostderr \
+ --output_tfrecord_prefix="/path/to/output/tfrecord/location/prefix" \
+ --image_directory="/path/to/image/folder/" \
+ --input_annotations_file="path/to/annotations.json"
+```
+
+### Generating weakly-supervised bounding box labels for image-labeled data
+
+If all your data already has bounding box labels you can skip this step.
+
+Many camera trap datasets do not have bounding box labels, or only have bounding
+box labels for some of the data. We have provided code to add bounding boxes
+from a pretrained model (such as the
+[Microsoft AI for Earth MegaDetector](https://github.com/microsoft/CameraTraps/blob/master/megadetector.md))
+and match the boxes to the image-level class label.
+
+To export your pretrained detection model, run
+
+```
+python object_detection/export_inference_graph.py \
+ --alsologtostderr \
+ --input_type tf_example \
+ --pipeline_config_path path/to/faster_rcnn_model.config \
+ --trained_checkpoint_prefix path/to/model.ckpt \
+ --output_directory path/to/exported_model_directory
+```
+
+To add bounding boxes to your dataset using the above model, run
+
+```
+python object_detection/dataset_tools/context_rcnn/generate_detection_data.py \
+ --alsologtostderr \
+ --input_tfrecord path/to/input_tfrecord@X \
+ --output_tfrecord path/to/output_tfrecord@X \
+ --model_dir path/to/exported_model_directory/saved_model
+```
+
+If an image already has bounding box labels, those labels are left unchanged. If
+an image is labeled 'empty' (class ID 0), we will not generate boxes for that
+image.
+
+### Generating and saving contextual features for each image
+
+We next extract and store features for each image from a pretrained model. This
+model can be the same model as above, or be a class-specific detection model
+trained on data from your classes of interest.
+
+To export your pretrained detection model, run
+
+```
+python object_detection/export_inference_graph.py \
+ --alsologtostderr \
+ --input_type tf_example \
+ --pipeline_config_path path/to/pipeline.config \
+ --trained_checkpoint_prefix path/to/model.ckpt \
+ --output_directory path/to/exported_model_directory \
+ --additional_output_tensor_names detection_features
+```
+
+Make sure that you have set `output_final_box_features: true` within
+your config file before exporting. This is needed to export the features as an
+output, but it does not need to be set during training.
+
+To generate and save contextual features for your data, run
+
+```
+python object_detection/dataset_tools/context_rcnn/generate_embedding_data.py \
+ --alsologtostderr \
+ --embedding_input_tfrecord path/to/input_tfrecords* \
+ --embedding_output_tfrecord path/to/output_tfrecords \
+ --embedding_model_dir path/to/exported_model_directory/saved_model
+```
+
+### Building up contextual memory banks and storing them for each context group
+
+To build the context features you just added for each image into memory banks,
+run
+
+```
+python object_detection/dataset_tools/context_rcnn/add_context_to_examples.py \
+ --input_tfrecord path/to/input_tfrecords* \
+ --output_tfrecord path/to/output_tfrecords \
+ --sequence_key image/location \
+ --time_horizon month
+```
+
+where the input_tfrecords for add_context_to_examples.py are the
+output_tfrecords from generate_embedding_data.py.
+
+For all options, see add_context_to_examples.py. By default, this code builds
+TfSequenceExamples, which are more data efficient (this allows you to store the
+context features once for each context group, as opposed to once per image). If
+you would like to export TfExamples instead, set flag `--output_type
+tf_example`.
+
+If you use TfSequenceExamples, you must be sure to set `input_type:
+TF_SEQUENCE_EXAMPLE` within your Context R-CNN configs for both
+train_input_reader and test_input_reader. See
+`object_detection/test_data/context_rcnn_camera_trap.config`
+for an example.
+
+## Training a Context R-CNN Model
+
+To train a Context R-CNN model, you must first set up your config file. See
+`test_data/context_rcnn_camera_trap.config` for an example. The important
+difference between this config and a Faster R-CNN config is the inclusion of a
+`context_config` within the model, which defines the necessary Context R-CNN
+parameters.
+
+```
+context_config {
+ max_num_context_features: 2000
+ context_feature_length: 2057
+ }
+```
+
+Once your config file has been updated with your local paths, you can follow
+along with documentation for running [locally](running_locally.md), or
+[on the cloud](running_on_cloud.md).
+
+## Exporting a Context R-CNN Model
+
+Since Context R-CNN takes context features as well as images as input, we have
+to explicitly define the other inputs ("side_inputs") to the model when
+exporting, as below. This example is shown with default context feature shapes.
+
+```
+python export_inference_graph.py \
+ --input_type image_tensor \
+ --input_shape 1,-1,-1,3 \
+ --pipeline_config_path /path/to/context_rcnn_model/pipeline.config \
+ --trained_checkpoint_prefix /path/to/context_rcnn_model/model.ckpt \
+ --output_directory /path/to/output_directory \
+ --use_side_inputs True \
+ --side_input_shapes 1,2000,2057/1 \
+ --side_input_names context_features,valid_context_size \
+ --side_input_types float,int
+
+```
+
+If you have questions about Context R-CNN, please contact
+[Sara Beery](https://beerys.github.io/).
diff --git a/research/object_detection/g3doc/detection_model_zoo.md b/research/object_detection/g3doc/detection_model_zoo.md
index b13fe6a3123ab1336afe0b7e8660753893040cb7..cb515b813ba8296005da503703bb659b9cb8b9dd 100644
--- a/research/object_detection/g3doc/detection_model_zoo.md
+++ b/research/object_detection/g3doc/detection_model_zoo.md
@@ -1,32 +1,34 @@
# Tensorflow detection model zoo
-We provide a collection of detection models pre-trained on the [COCO
-dataset](http://cocodataset.org), the [Kitti dataset](http://www.cvlibs.net/datasets/kitti/),
-the
+We provide a collection of detection models pre-trained on the
+[COCO dataset](http://cocodataset.org), the
+[Kitti dataset](http://www.cvlibs.net/datasets/kitti/), the
[Open Images dataset](https://storage.googleapis.com/openimages/web/index.html),
-the [AVA v2.1 dataset](https://research.google.com/ava/) and the
-[iNaturalist Species Detection Dataset](https://github.com/visipedia/inat_comp/blob/master/2017/README.md#bounding-boxes).
+the [AVA v2.1 dataset](https://research.google.com/ava/) the
+[iNaturalist Species Detection Dataset](https://github.com/visipedia/inat_comp/blob/master/2017/README.md#bounding-boxes)
+and the
+[Snapshot Serengeti Dataset](http://lila.science/datasets/snapshot-serengeti).
These models can be useful for out-of-the-box inference if you are interested in
categories already in those datasets. They are also useful for initializing your
models when training on novel datasets.
In the table below, we list each such pre-trained model including:
-* a model name that corresponds to a config file that was used to train this
- model in the `samples/configs` directory,
-* a download link to a tar.gz file containing the pre-trained model,
-* model speed --- we report running time in ms per 600x600 image (including all
- pre and post-processing), but please be
- aware that these timings depend highly on one's specific hardware
- configuration (these timings were performed using an Nvidia
- GeForce GTX TITAN X card) and should be treated more as relative timings in
- many cases. Also note that desktop GPU timing does not always reflect mobile
- run time. For example Mobilenet V2 is faster on mobile devices than Mobilenet
- V1, but is slightly slower on desktop GPU.
-* detector performance on subset of the COCO validation set or Open Images test split as measured by the dataset-specific mAP measure.
- Here, higher is better, and we only report bounding box mAP rounded to the
- nearest integer.
-* Output types (`Boxes`, and `Masks` if applicable )
+* a model name that corresponds to a config file that was used to train this
+ model in the `samples/configs` directory,
+* a download link to a tar.gz file containing the pre-trained model,
+* model speed --- we report running time in ms per 600x600 image (including
+ all pre and post-processing), but please be aware that these timings depend
+ highly on one's specific hardware configuration (these timings were
+ performed using an Nvidia GeForce GTX TITAN X card) and should be treated
+ more as relative timings in many cases. Also note that desktop GPU timing
+ does not always reflect mobile run time. For example Mobilenet V2 is faster
+ on mobile devices than Mobilenet V1, but is slightly slower on desktop GPU.
+* detector performance on subset of the COCO validation set, Open Images test
+ split, iNaturalist test split, or Snapshot Serengeti LILA.science test
+ split. as measured by the dataset-specific mAP measure. Here, higher is
+ better, and we only report bounding box mAP rounded to the nearest integer.
+* Output types (`Boxes`, and `Masks` if applicable )
You can un-tar each tar.gz file via, e.g.,:
@@ -53,118 +55,133 @@ Inside the un-tar'ed directory, you will find:
Some remarks on frozen inference graphs:
-* If you try to evaluate the frozen graph, you may find performance numbers for
- some of the models to be slightly lower than what we report in the below
- tables. This is because we discard detections with scores below a
- threshold (typically 0.3) when creating the frozen graph. This corresponds
- effectively to picking a point on the precision recall curve of
- a detector (and discarding the part past that point), which negatively impacts
- standard mAP metrics.
-* Our frozen inference graphs are generated using the
- [v1.12.0](https://github.com/tensorflow/tensorflow/tree/v1.12.0)
- release version of Tensorflow and we do not guarantee that these will work
- with other versions; this being said, each frozen inference graph can be
- regenerated using your current version of Tensorflow by re-running the
- [exporter](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/exporting_models.md),
- pointing it at the model directory as well as the corresponding config file in
- [samples/configs](https://github.com/tensorflow/models/tree/master/research/object_detection/samples/configs).
-
+* If you try to evaluate the frozen graph, you may find performance numbers
+ for some of the models to be slightly lower than what we report in the below
+ tables. This is because we discard detections with scores below a threshold
+ (typically 0.3) when creating the frozen graph. This corresponds effectively
+ to picking a point on the precision recall curve of a detector (and
+ discarding the part past that point), which negatively impacts standard mAP
+ metrics.
+* Our frozen inference graphs are generated using the
+ [v1.12.0](https://github.com/tensorflow/tensorflow/tree/v1.12.0) release
+ version of Tensorflow and we do not guarantee that these will work with
+ other versions; this being said, each frozen inference graph can be
+ regenerated using your current version of Tensorflow by re-running the
+ [exporter](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/exporting_models.md),
+ pointing it at the model directory as well as the corresponding config file
+ in
+ [samples/configs](https://github.com/tensorflow/models/tree/master/research/object_detection/samples/configs).
## COCO-trained models
-| Model name | Speed (ms) | COCO mAP[^1] | Outputs |
-| ------------ | :--------------: | :--------------: | :-------------: |
-| [ssd_mobilenet_v1_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2018_01_28.tar.gz) | 30 | 21 | Boxes |
-| [ssd_mobilenet_v1_0.75_depth_coco ☆](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_0.75_depth_300x300_coco14_sync_2018_07_03.tar.gz) | 26 | 18 | Boxes |
-| [ssd_mobilenet_v1_quantized_coco ☆](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_quantized_300x300_coco14_sync_2018_07_18.tar.gz) | 29 | 18 | Boxes |
-| [ssd_mobilenet_v1_0.75_depth_quantized_coco ☆](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_0.75_depth_quantized_300x300_coco14_sync_2018_07_18.tar.gz) | 29 | 16 | Boxes |
-| [ssd_mobilenet_v1_ppn_coco ☆](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_ppn_shared_box_predictor_300x300_coco14_sync_2018_07_03.tar.gz) | 26 | 20 | Boxes |
-| [ssd_mobilenet_v1_fpn_coco ☆](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03.tar.gz) | 56 | 32 | Boxes |
-| [ssd_resnet_50_fpn_coco ☆](http://download.tensorflow.org/models/object_detection/ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03.tar.gz) | 76 | 35 | Boxes |
-| [ssd_mobilenet_v2_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v2_coco_2018_03_29.tar.gz) | 31 | 22 | Boxes |
-| [ssd_mobilenet_v2_quantized_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v2_quantized_300x300_coco_2019_01_03.tar.gz) | 29 | 22 | Boxes |
-| [ssdlite_mobilenet_v2_coco](http://download.tensorflow.org/models/object_detection/ssdlite_mobilenet_v2_coco_2018_05_09.tar.gz) | 27 | 22 | Boxes |
-| [ssd_inception_v2_coco](http://download.tensorflow.org/models/object_detection/ssd_inception_v2_coco_2018_01_28.tar.gz) | 42 | 24 | Boxes |
-| [faster_rcnn_inception_v2_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2018_01_28.tar.gz) | 58 | 28 | Boxes |
-| [faster_rcnn_resnet50_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_coco_2018_01_28.tar.gz) | 89 | 30 | Boxes |
-| [faster_rcnn_resnet50_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_lowproposals_coco_2018_01_28.tar.gz) | 64 | | Boxes |
-| [rfcn_resnet101_coco](http://download.tensorflow.org/models/object_detection/rfcn_resnet101_coco_2018_01_28.tar.gz) | 92 | 30 | Boxes |
-| [faster_rcnn_resnet101_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_coco_2018_01_28.tar.gz) | 106 | 32 | Boxes |
-| [faster_rcnn_resnet101_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_lowproposals_coco_2018_01_28.tar.gz) | 82 | | Boxes |
-| [faster_rcnn_inception_resnet_v2_atrous_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_coco_2018_01_28.tar.gz) | 620 | 37 | Boxes |
-| [faster_rcnn_inception_resnet_v2_atrous_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_coco_2018_01_28.tar.gz) | 241 | | Boxes |
-| [faster_rcnn_nas](http://download.tensorflow.org/models/object_detection/faster_rcnn_nas_coco_2018_01_28.tar.gz) | 1833 | 43 | Boxes |
-| [faster_rcnn_nas_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_nas_lowproposals_coco_2018_01_28.tar.gz) | 540 | | Boxes |
-| [mask_rcnn_inception_resnet_v2_atrous_coco](http://download.tensorflow.org/models/object_detection/mask_rcnn_inception_resnet_v2_atrous_coco_2018_01_28.tar.gz) | 771 | 36 | Masks |
-| [mask_rcnn_inception_v2_coco](http://download.tensorflow.org/models/object_detection/mask_rcnn_inception_v2_coco_2018_01_28.tar.gz) | 79 | 25 | Masks |
-| [mask_rcnn_resnet101_atrous_coco](http://download.tensorflow.org/models/object_detection/mask_rcnn_resnet101_atrous_coco_2018_01_28.tar.gz) | 470 | 33 | Masks |
-| [mask_rcnn_resnet50_atrous_coco](http://download.tensorflow.org/models/object_detection/mask_rcnn_resnet50_atrous_coco_2018_01_28.tar.gz) | 343 | 29 | Masks |
-
-Note: The asterisk (☆) at the end of model name indicates that this model supports TPU training.
-
-Note: If you download the tar.gz file of quantized models and un-tar, you will get different set of files - a checkpoint, a config file and tflite frozen graphs (txt/binary).
-
+Model name | Speed (ms) | COCO mAP[^1] | Outputs
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :--------: | :----------: | :-----:
+[ssd_mobilenet_v1_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2018_01_28.tar.gz) | 30 | 21 | Boxes
+[ssd_mobilenet_v1_0.75_depth_coco ☆](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_0.75_depth_300x300_coco14_sync_2018_07_03.tar.gz) | 26 | 18 | Boxes
+[ssd_mobilenet_v1_quantized_coco ☆](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_quantized_300x300_coco14_sync_2018_07_18.tar.gz) | 29 | 18 | Boxes
+[ssd_mobilenet_v1_0.75_depth_quantized_coco ☆](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_0.75_depth_quantized_300x300_coco14_sync_2018_07_18.tar.gz) | 29 | 16 | Boxes
+[ssd_mobilenet_v1_ppn_coco ☆](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_ppn_shared_box_predictor_300x300_coco14_sync_2018_07_03.tar.gz) | 26 | 20 | Boxes
+[ssd_mobilenet_v1_fpn_coco ☆](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03.tar.gz) | 56 | 32 | Boxes
+[ssd_resnet_50_fpn_coco ☆](http://download.tensorflow.org/models/object_detection/ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03.tar.gz) | 76 | 35 | Boxes
+[ssd_mobilenet_v2_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v2_coco_2018_03_29.tar.gz) | 31 | 22 | Boxes
+[ssd_mobilenet_v2_quantized_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v2_quantized_300x300_coco_2019_01_03.tar.gz) | 29 | 22 | Boxes
+[ssdlite_mobilenet_v2_coco](http://download.tensorflow.org/models/object_detection/ssdlite_mobilenet_v2_coco_2018_05_09.tar.gz) | 27 | 22 | Boxes
+[ssd_inception_v2_coco](http://download.tensorflow.org/models/object_detection/ssd_inception_v2_coco_2018_01_28.tar.gz) | 42 | 24 | Boxes
+[faster_rcnn_inception_v2_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2018_01_28.tar.gz) | 58 | 28 | Boxes
+[faster_rcnn_resnet50_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_coco_2018_01_28.tar.gz) | 89 | 30 | Boxes
+[faster_rcnn_resnet50_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_lowproposals_coco_2018_01_28.tar.gz) | 64 | | Boxes
+[rfcn_resnet101_coco](http://download.tensorflow.org/models/object_detection/rfcn_resnet101_coco_2018_01_28.tar.gz) | 92 | 30 | Boxes
+[faster_rcnn_resnet101_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_coco_2018_01_28.tar.gz) | 106 | 32 | Boxes
+[faster_rcnn_resnet101_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_lowproposals_coco_2018_01_28.tar.gz) | 82 | | Boxes
+[faster_rcnn_inception_resnet_v2_atrous_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_coco_2018_01_28.tar.gz) | 620 | 37 | Boxes
+[faster_rcnn_inception_resnet_v2_atrous_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_coco_2018_01_28.tar.gz) | 241 | | Boxes
+[faster_rcnn_nas](http://download.tensorflow.org/models/object_detection/faster_rcnn_nas_coco_2018_01_28.tar.gz) | 1833 | 43 | Boxes
+[faster_rcnn_nas_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_nas_lowproposals_coco_2018_01_28.tar.gz) | 540 | | Boxes
+[mask_rcnn_inception_resnet_v2_atrous_coco](http://download.tensorflow.org/models/object_detection/mask_rcnn_inception_resnet_v2_atrous_coco_2018_01_28.tar.gz) | 771 | 36 | Masks
+[mask_rcnn_inception_v2_coco](http://download.tensorflow.org/models/object_detection/mask_rcnn_inception_v2_coco_2018_01_28.tar.gz) | 79 | 25 | Masks
+[mask_rcnn_resnet101_atrous_coco](http://download.tensorflow.org/models/object_detection/mask_rcnn_resnet101_atrous_coco_2018_01_28.tar.gz) | 470 | 33 | Masks
+[mask_rcnn_resnet50_atrous_coco](http://download.tensorflow.org/models/object_detection/mask_rcnn_resnet50_atrous_coco_2018_01_28.tar.gz) | 343 | 29 | Masks
+
+Note: The asterisk (☆) at the end of model name indicates that this model
+supports TPU training.
+
+Note: If you download the tar.gz file of quantized models and un-tar, you will
+get different set of files - a checkpoint, a config file and tflite frozen
+graphs (txt/binary).
### Mobile models
Model name | Pixel 1 Latency (ms) | COCO mAP | Outputs
------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------------: | :------: | :-----:
-[ssd_mobiledet_cpu_coco](http://download.tensorflow.org/models/object_detection/ssdlite_mobiledet_cpu_320x320_coco_2020_05_19.tar.gz) | 113 | 24.0 | Boxes
-[ssd_mobilenet_v2_mnasfpn_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v2_mnasfpn_shared_box_predictor_320x320_coco_sync_2020_05_18.tar.gz) | 183 | 26.6 | Boxes
-[ssd_mobilenet_v3_large_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v3_large_coco_2020_01_14.tar.gz) | 119 | 22.6 | Boxes
-[ssd_mobilenet_v3_small_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v3_small_coco_2020_01_14.tar.gz) | 43 | 15.4 | Boxes
+[ssd_mobiledet_cpu_coco](http://download.tensorflow.org/models/object_detection/ssdlite_mobiledet_cpu_320x320_coco_2020_05_19.tar.gz) | 113 | 24.0 | Boxes
+[ssd_mobilenet_v2_mnasfpn_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v2_mnasfpn_shared_box_predictor_320x320_coco_sync_2020_05_18.tar.gz) | 183 | 26.6 | Boxes
+[ssd_mobilenet_v3_large_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v3_large_coco_2020_01_14.tar.gz) | 119 | 22.6 | Boxes
+[ssd_mobilenet_v3_small_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v3_small_coco_2020_01_14.tar.gz) | 43 | 15.4 | Boxes
### Pixel4 Edge TPU models
-Model name | Pixel 4 Edge TPU Latency (ms) | COCO mAP (fp32/uint8) | Outputs
------------------------------------------------------------------------------------------------------------------------------------ | :------------------: | :------: | :-----:
-[ssd_mobiledet_edgetpu_coco](http://download.tensorflow.org/models/object_detection/ssdlite_mobiledet_edgetpu_320x320_coco_2020_05_19.tar.gz) | 6.9 | 25.9/25.6 | Boxes
-[ssd_mobilenet_edgetpu_coco](https://storage.cloud.google.com/mobilenet_edgetpu/checkpoints/ssdlite_mobilenet_edgetpu_coco_quant.tar.gz) | 6.6 | -/24.3 | Boxes
+
+Model name | Pixel 4 Edge TPU Latency (ms) | COCO mAP (fp32/uint8) | Outputs
+--------------------------------------------------------------------------------------------------------------------------------------------- | :---------------------------: | :-------------------: | :-----:
+[ssd_mobiledet_edgetpu_coco](http://download.tensorflow.org/models/object_detection/ssdlite_mobiledet_edgetpu_320x320_coco_2020_05_19.tar.gz) | 6.9 | 25.9/25.6 | Boxes
+[ssd_mobilenet_edgetpu_coco](https://storage.cloud.google.com/mobilenet_edgetpu/checkpoints/ssdlite_mobilenet_edgetpu_coco_quant.tar.gz) | 6.6 | -/24.3 | Boxes
### Pixel4 DSP models
-Model name | Pixel 4 DSP Latency (ms) | COCO mAP (fp32/uint8) | Outputs
------------------------------------------------------------------------------------------------------------------------------------ | :------------------: | :------: | :-----:
-[ssd_mobiledet_dsp_coco](http://download.tensorflow.org/models/object_detection/ssdlite_mobiledet_dsp_320x320_coco_2020_05_19.tar.gz) | 12.3 | 28.9/28.8 | Boxes
+
+Model name | Pixel 4 DSP Latency (ms) | COCO mAP (fp32/uint8) | Outputs
+------------------------------------------------------------------------------------------------------------------------------------- | :----------------------: | :-------------------: | :-----:
+[ssd_mobiledet_dsp_coco](http://download.tensorflow.org/models/object_detection/ssdlite_mobiledet_dsp_320x320_coco_2020_05_19.tar.gz) | 12.3 | 28.9/28.8 | Boxes
## Kitti-trained models
-Model name | Speed (ms) | Pascal mAP@0.5 | Outputs
------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :---: | :-------------: | :-----:
-[faster_rcnn_resnet101_kitti](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_kitti_2018_01_28.tar.gz) | 79 | 87 | Boxes
+Model name | Speed (ms) | Pascal mAP@0.5 | Outputs
+----------------------------------------------------------------------------------------------------------------------------------- | :--------: | :------------: | :-----:
+[faster_rcnn_resnet101_kitti](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_kitti_2018_01_28.tar.gz) | 79 | 87 | Boxes
## Open Images-trained models
Model name | Speed (ms) | Open Images mAP@0.5[^2] | Outputs
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :--------: | :---------------------: | :-----:
-[faster_rcnn_inception_resnet_v2_atrous_oidv2](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid_2018_01_28.tar.gz) | 727 | 37 | Boxes
+[faster_rcnn_inception_resnet_v2_atrous_oidv2](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid_2018_01_28.tar.gz) | 727 | 37 | Boxes
[faster_rcnn_inception_resnet_v2_atrous_lowproposals_oidv2](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid_2018_01_28.tar.gz) | 347 | | Boxes
[facessd_mobilenet_v2_quantized_open_image_v4](http://download.tensorflow.org/models/object_detection/facessd_mobilenet_v2_quantized_320x320_open_image_v4.tar.gz) [^3] | 20 | 73 (faces) | Boxes
-Model name | Speed (ms) | Open Images mAP@0.5[^4] | Outputs
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :--------: | :---------------------: | :-----:
-[faster_rcnn_inception_resnet_v2_atrous_oidv4](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid_v4_2018_12_12.tar.gz) | 425 | 54 | Boxes
-[ssd_mobilenetv2_oidv4](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v2_oid_v4_2018_12_12.tar.gz) | 89 | 36 | Boxes
-[ssd_resnet_101_fpn_oidv4](http://download.tensorflow.org/models/object_detection/ssd_resnet101_v1_fpn_shared_box_predictor_oid_512x512_sync_2019_01_20.tar.gz) | 237 | 38 | Boxes
-## iNaturalist Species-trained models
+Model name | Speed (ms) | Open Images mAP@0.5[^4] | Outputs
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :--------: | :---------------------: | :-----:
+[faster_rcnn_inception_resnet_v2_atrous_oidv4](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid_v4_2018_12_12.tar.gz) | 425 | 54 | Boxes
+[ssd_mobilenetv2_oidv4](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v2_oid_v4_2018_12_12.tar.gz) | 89 | 36 | Boxes
+[ssd_resnet_101_fpn_oidv4](http://download.tensorflow.org/models/object_detection/ssd_resnet101_v1_fpn_shared_box_predictor_oid_512x512_sync_2019_01_20.tar.gz) | 237 | 38 | Boxes
-Model name | Speed (ms) | Pascal mAP@0.5 | Outputs
------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :---: | :-------------: | :-----:
-[faster_rcnn_resnet101_fgvc](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_fgvc_2018_07_19.tar.gz) | 395 | 58 | Boxes
-[faster_rcnn_resnet50_fgvc](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_fgvc_2018_07_19.tar.gz) | 366 | 55 | Boxes
+## iNaturalist Species-trained models
+Model name | Speed (ms) | Pascal mAP@0.5 | Outputs
+--------------------------------------------------------------------------------------------------------------------------------- | :--------: | :------------: | :-----:
+[faster_rcnn_resnet101_fgvc](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_fgvc_2018_07_19.tar.gz) | 395 | 58 | Boxes
+[faster_rcnn_resnet50_fgvc](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_fgvc_2018_07_19.tar.gz) | 366 | 55 | Boxes
## AVA v2.1 trained models
-Model name | Speed (ms) | Pascal mAP@0.5 | Outputs
------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :---: | :-------------: | :-----:
-[faster_rcnn_resnet101_ava_v2.1](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_ava_v2.1_2018_04_30.tar.gz) | 93 | 11 | Boxes
-
-
-[^1]: See [MSCOCO evaluation protocol](http://cocodataset.org/#detections-eval). The COCO mAP numbers here are evaluated on COCO 14 minival set (note that our split is different from COCO 17 Val). A full list of image ids used in our split could be fould [here](https://github.com/tensorflow/models/blob/master/research/object_detection/data/mscoco_minival_ids.txt).
-
-
-[^2]: This is PASCAL mAP with a slightly different way of true positives computation: see [Open Images evaluation protocols](evaluation_protocols.md), oid_V2_detection_metrics.
-
-[^3]: Non-face boxes are dropped during training and non-face groundtruth boxes are ignored when evaluating.
-
-[^4]: This is Open Images Challenge metric: see [Open Images evaluation protocols](evaluation_protocols.md), oid_challenge_detection_metrics.
-
+Model name | Speed (ms) | Pascal mAP@0.5 | Outputs
+----------------------------------------------------------------------------------------------------------------------------------------- | :--------: | :------------: | :-----:
+[faster_rcnn_resnet101_ava_v2.1](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_ava_v2.1_2018_04_30.tar.gz) | 93 | 11 | Boxes
+
+## Snapshot Serengeti Camera Trap trained models
+
+Model name | COCO mAP@0.5 | Outputs
+--------------------------------------------------------------------------------------------------------------------------------------------------------------- | :----------: | :-----:
+[faster_rcnn_resnet101_snapshot_serengeti](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_snapshot_serengeti_2020_06_10.tar.gz) | 38 | Boxes
+[context_rcnn_resnet101_snapshot_serengeti](http://download.tensorflow.org/models/object_detection/context_rcnn_resnet101_snapshot_serengeti_2020_06_10.tar.gz) | 56 | Boxes
+
+[^1]: See [MSCOCO evaluation protocol](http://cocodataset.org/#detections-eval).
+ The COCO mAP numbers here are evaluated on COCO 14 minival set (note that
+ our split is different from COCO 17 Val). A full list of image ids used in
+ our split could be fould
+ [here](https://github.com/tensorflow/models/blob/master/research/object_detection/data/mscoco_minival_ids.txt).
+[^2]: This is PASCAL mAP with a slightly different way of true positives
+ computation: see
+ [Open Images evaluation protocols](evaluation_protocols.md),
+ oid_V2_detection_metrics.
+[^3]: Non-face boxes are dropped during training and non-face groundtruth boxes
+ are ignored when evaluating.
+[^4]: This is Open Images Challenge metric: see
+ [Open Images evaluation protocols](evaluation_protocols.md),
+ oid_challenge_detection_metrics.
diff --git a/research/object_detection/inference/detection_inference_test.py b/research/object_detection/inference/detection_inference_tf1_test.py
similarity index 98%
rename from research/object_detection/inference/detection_inference_test.py
rename to research/object_detection/inference/detection_inference_tf1_test.py
index 6d35f2b688d4ccfc885b854251e27d2a47c24d4b..899da1298765425c667fbcdfd341fad713724d9f 100644
--- a/research/object_detection/inference/detection_inference_test.py
+++ b/research/object_detection/inference/detection_inference_tf1_test.py
@@ -15,7 +15,7 @@
r"""Tests for detection_inference.py."""
import os
-
+import unittest
import numpy as np
from PIL import Image
import six
@@ -25,6 +25,7 @@ from google.protobuf import text_format
from object_detection.core import standard_fields
from object_detection.inference import detection_inference
from object_detection.utils import dataset_util
+from object_detection.utils import tf_version
def get_mock_tfrecord_path():
@@ -74,6 +75,7 @@ def create_mock_graph():
fl.write(graph_def.SerializeToString())
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class InferDetectionsTests(tf.test.TestCase):
def test_simple(self):
diff --git a/research/object_detection/inputs.py b/research/object_detection/inputs.py
index 7512a56b15f9785b3868053e2a970bbd15801cb6..a3eb2f0bd2514a723442a568e124a579eb801794 100644
--- a/research/object_detection/inputs.py
+++ b/research/object_detection/inputs.py
@@ -64,7 +64,6 @@ def _multiclass_scores_or_one_hot_labels(multiclass_scores,
[tf.shape(groundtruth_boxes)[0], num_classes])
def false_fn():
return tf.one_hot(groundtruth_classes, num_classes)
-
return tf.cond(tf.size(multiclass_scores) > 0, true_fn, false_fn)
@@ -1006,14 +1005,21 @@ def get_reduce_to_frame_fn(input_reader_config, is_training):
`reduce_to_frame_fn` for the dataset builder
"""
if input_reader_config.input_type != (
- input_reader_pb2.InputType.TF_SEQUENCE_EXAMPLE):
- return lambda d: d
+ input_reader_pb2.InputType.Value('TF_SEQUENCE_EXAMPLE')):
+ return lambda dataset, dataset_map_fn, batch_size, config: dataset
else:
- def reduce_to_frame(dataset):
+ def reduce_to_frame(dataset, dataset_map_fn, batch_size,
+ input_reader_config):
"""Returns a function reducing sequence tensors to single frame tensors.
Args:
dataset: A tf dataset containing sequence tensors.
+ dataset_map_fn: A function that handles whether to
+ map_with_legacy_function for this dataset
+ batch_size: used if map_with_legacy_function is true to determine
+ num_parallel_calls
+ input_reader_config: used if map_with_legacy_function is true to
+ determine num_parallel_calls
Returns:
A tf dataset containing single frame tensors.
@@ -1046,13 +1052,14 @@ def get_reduce_to_frame_fn(input_reader_config, is_training):
# Copy all context tensors.
out_tensor_dict[key] = tensor_dict[key]
return out_tensor_dict
- dataset = dataset.map(get_single_frame, tf.data.experimental.AUTOTUNE)
+ dataset = dataset_map_fn(dataset, get_single_frame, batch_size,
+ input_reader_config)
else:
- dataset = dataset.map(util_ops.tile_context_tensors,
- tf.data.experimental.AUTOTUNE)
+ dataset = dataset_map_fn(dataset, util_ops.tile_context_tensors,
+ batch_size, input_reader_config)
dataset = dataset.unbatch()
# Decode frame here as SequenceExample tensors contain encoded images.
- dataset = dataset.map(util_ops.decode_image,
- tf.data.experimental.AUTOTUNE)
+ dataset = dataset_map_fn(dataset, util_ops.decode_image, batch_size,
+ input_reader_config)
return dataset
return reduce_to_frame
diff --git a/research/object_detection/inputs_test.py b/research/object_detection/inputs_test.py
index 78e268b25d1c2fd6eab22e4384b0d2172a5ff8a7..1fca6538f071d11605ef1f83db24d184d3e6ab8d 100644
--- a/research/object_detection/inputs_test.py
+++ b/research/object_detection/inputs_test.py
@@ -20,10 +20,11 @@ from __future__ import print_function
import functools
import os
+import unittest
from absl import logging
from absl.testing import parameterized
-
import numpy as np
+import six
import tensorflow.compat.v1 as tf
from object_detection import inputs
@@ -31,6 +32,13 @@ from object_detection.core import preprocessor
from object_detection.core import standard_fields as fields
from object_detection.utils import config_util
from object_detection.utils import test_case
+from object_detection.utils import test_utils
+from object_detection.utils import tf_version
+
+if six.PY2:
+ import mock # pylint: disable=g-import-not-at-top
+else:
+ from unittest import mock # pylint: disable=g-import-not-at-top, g-importing-member
FLAGS = tf.flags.FLAGS
@@ -86,7 +94,8 @@ def _make_initializable_iterator(dataset):
return iterator
-class InputsTest(test_case.TestCase, parameterized.TestCase):
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only tests under TF2.X.')
+class InputFnTest(test_case.TestCase, parameterized.TestCase):
def test_faster_rcnn_resnet50_train_input(self):
"""Tests the training input function for FasterRcnnResnet50."""
@@ -402,7 +411,7 @@ class InputsTest(test_case.TestCase, parameterized.TestCase):
def test_ssd_inceptionV2_eval_input_with_additional_channels(
self, eval_batch_size=1):
- """Tests the eval input function for SSDInceptionV2 with additional channels.
+ """Tests the eval input function for SSDInceptionV2 with additional channel.
Args:
eval_batch_size: Batch size for eval set.
@@ -638,24 +647,20 @@ class DataAugmentationFnTest(test_case.TestCase):
data_augmentation_fn = functools.partial(
inputs.augment_input_data,
data_augmentation_options=data_augmentation_options)
- tensor_dict = {
- fields.InputDataFields.image:
- tf.constant(np.random.rand(10, 10, 3).astype(np.float32)),
- fields.InputDataFields.groundtruth_boxes:
- tf.constant(np.array([[.5, .5, 1., 1.]], np.float32))
- }
- augmented_tensor_dict = data_augmentation_fn(tensor_dict=tensor_dict)
- with self.test_session() as sess:
- augmented_tensor_dict_out = sess.run(augmented_tensor_dict)
-
- self.assertAllEqual(
- augmented_tensor_dict_out[fields.InputDataFields.image].shape,
- [20, 20, 3]
- )
- self.assertAllClose(
- augmented_tensor_dict_out[fields.InputDataFields.groundtruth_boxes],
- [[10, 10, 20, 20]]
- )
+ def graph_fn():
+ tensor_dict = {
+ fields.InputDataFields.image:
+ tf.constant(np.random.rand(10, 10, 3).astype(np.float32)),
+ fields.InputDataFields.groundtruth_boxes:
+ tf.constant(np.array([[.5, .5, 1., 1.]], np.float32))
+ }
+ augmented_tensor_dict = data_augmentation_fn(tensor_dict=tensor_dict)
+ return (augmented_tensor_dict[fields.InputDataFields.image],
+ augmented_tensor_dict[fields.InputDataFields.
+ groundtruth_boxes])
+ image, groundtruth_boxes = self.execute_cpu(graph_fn, [])
+ self.assertAllEqual(image.shape, [20, 20, 3])
+ self.assertAllClose(groundtruth_boxes, [[10, 10, 20, 20]])
def test_apply_image_and_box_augmentation_with_scores(self):
data_augmentation_options = [
@@ -669,37 +674,28 @@ class DataAugmentationFnTest(test_case.TestCase):
data_augmentation_fn = functools.partial(
inputs.augment_input_data,
data_augmentation_options=data_augmentation_options)
- tensor_dict = {
- fields.InputDataFields.image:
- tf.constant(np.random.rand(10, 10, 3).astype(np.float32)),
- fields.InputDataFields.groundtruth_boxes:
- tf.constant(np.array([[.5, .5, 1., 1.]], np.float32)),
- fields.InputDataFields.groundtruth_classes:
- tf.constant(np.array([1.0], np.float32)),
- fields.InputDataFields.groundtruth_weights:
- tf.constant(np.array([0.8], np.float32)),
- }
- augmented_tensor_dict = data_augmentation_fn(tensor_dict=tensor_dict)
- with self.test_session() as sess:
- augmented_tensor_dict_out = sess.run(augmented_tensor_dict)
-
- self.assertAllEqual(
- augmented_tensor_dict_out[fields.InputDataFields.image].shape,
- [20, 20, 3]
- )
- self.assertAllClose(
- augmented_tensor_dict_out[fields.InputDataFields.groundtruth_boxes],
- [[10, 10, 20, 20]]
- )
- self.assertAllClose(
- augmented_tensor_dict_out[fields.InputDataFields.groundtruth_classes],
- [1.0]
- )
- self.assertAllClose(
- augmented_tensor_dict_out[
- fields.InputDataFields.groundtruth_weights],
- [0.8]
- )
+ def graph_fn():
+ tensor_dict = {
+ fields.InputDataFields.image:
+ tf.constant(np.random.rand(10, 10, 3).astype(np.float32)),
+ fields.InputDataFields.groundtruth_boxes:
+ tf.constant(np.array([[.5, .5, 1., 1.]], np.float32)),
+ fields.InputDataFields.groundtruth_classes:
+ tf.constant(np.array([1.0], np.float32)),
+ fields.InputDataFields.groundtruth_weights:
+ tf.constant(np.array([0.8], np.float32)),
+ }
+ augmented_tensor_dict = data_augmentation_fn(tensor_dict=tensor_dict)
+ return (augmented_tensor_dict[fields.InputDataFields.image],
+ augmented_tensor_dict[fields.InputDataFields.groundtruth_boxes],
+ augmented_tensor_dict[fields.InputDataFields.groundtruth_classes],
+ augmented_tensor_dict[fields.InputDataFields.groundtruth_weights])
+ (image, groundtruth_boxes,
+ groundtruth_classes, groundtruth_weights) = self.execute_cpu(graph_fn, [])
+ self.assertAllEqual(image.shape, [20, 20, 3])
+ self.assertAllClose(groundtruth_boxes, [[10, 10, 20, 20]])
+ self.assertAllClose(groundtruth_classes.shape, [1.0])
+ self.assertAllClose(groundtruth_weights, [0.8])
def test_include_masks_in_data_augmentation(self):
data_augmentation_options = [
@@ -712,21 +708,20 @@ class DataAugmentationFnTest(test_case.TestCase):
data_augmentation_fn = functools.partial(
inputs.augment_input_data,
data_augmentation_options=data_augmentation_options)
- tensor_dict = {
- fields.InputDataFields.image:
- tf.constant(np.random.rand(10, 10, 3).astype(np.float32)),
- fields.InputDataFields.groundtruth_instance_masks:
- tf.constant(np.zeros([2, 10, 10], np.uint8))
- }
- augmented_tensor_dict = data_augmentation_fn(tensor_dict=tensor_dict)
- with self.test_session() as sess:
- augmented_tensor_dict_out = sess.run(augmented_tensor_dict)
-
- self.assertAllEqual(
- augmented_tensor_dict_out[fields.InputDataFields.image].shape,
- [20, 20, 3])
- self.assertAllEqual(augmented_tensor_dict_out[
- fields.InputDataFields.groundtruth_instance_masks].shape, [2, 20, 20])
+ def graph_fn():
+ tensor_dict = {
+ fields.InputDataFields.image:
+ tf.constant(np.random.rand(10, 10, 3).astype(np.float32)),
+ fields.InputDataFields.groundtruth_instance_masks:
+ tf.constant(np.zeros([2, 10, 10], np.uint8))
+ }
+ augmented_tensor_dict = data_augmentation_fn(tensor_dict=tensor_dict)
+ return (augmented_tensor_dict[fields.InputDataFields.image],
+ augmented_tensor_dict[fields.InputDataFields.
+ groundtruth_instance_masks])
+ image, masks = self.execute_cpu(graph_fn, [])
+ self.assertAllEqual(image.shape, [20, 20, 3])
+ self.assertAllEqual(masks.shape, [2, 20, 20])
def test_include_keypoints_in_data_augmentation(self):
data_augmentation_options = [
@@ -740,30 +735,24 @@ class DataAugmentationFnTest(test_case.TestCase):
data_augmentation_fn = functools.partial(
inputs.augment_input_data,
data_augmentation_options=data_augmentation_options)
- tensor_dict = {
- fields.InputDataFields.image:
- tf.constant(np.random.rand(10, 10, 3).astype(np.float32)),
- fields.InputDataFields.groundtruth_boxes:
- tf.constant(np.array([[.5, .5, 1., 1.]], np.float32)),
- fields.InputDataFields.groundtruth_keypoints:
- tf.constant(np.array([[[0.5, 1.0], [0.5, 0.5]]], np.float32))
- }
- augmented_tensor_dict = data_augmentation_fn(tensor_dict=tensor_dict)
- with self.test_session() as sess:
- augmented_tensor_dict_out = sess.run(augmented_tensor_dict)
-
- self.assertAllEqual(
- augmented_tensor_dict_out[fields.InputDataFields.image].shape,
- [20, 20, 3]
- )
- self.assertAllClose(
- augmented_tensor_dict_out[fields.InputDataFields.groundtruth_boxes],
- [[10, 10, 20, 20]]
- )
- self.assertAllClose(
- augmented_tensor_dict_out[fields.InputDataFields.groundtruth_keypoints],
- [[[10, 20], [10, 10]]]
- )
+ def graph_fn():
+ tensor_dict = {
+ fields.InputDataFields.image:
+ tf.constant(np.random.rand(10, 10, 3).astype(np.float32)),
+ fields.InputDataFields.groundtruth_boxes:
+ tf.constant(np.array([[.5, .5, 1., 1.]], np.float32)),
+ fields.InputDataFields.groundtruth_keypoints:
+ tf.constant(np.array([[[0.5, 1.0], [0.5, 0.5]]], np.float32))
+ }
+ augmented_tensor_dict = data_augmentation_fn(tensor_dict=tensor_dict)
+ return (augmented_tensor_dict[fields.InputDataFields.image],
+ augmented_tensor_dict[fields.InputDataFields.groundtruth_boxes],
+ augmented_tensor_dict[fields.InputDataFields.
+ groundtruth_keypoints])
+ image, boxes, keypoints = self.execute_cpu(graph_fn, [])
+ self.assertAllEqual(image.shape, [20, 20, 3])
+ self.assertAllClose(boxes, [[10, 10, 20, 20]])
+ self.assertAllClose(keypoints, [[[10, 20], [10, 10]]])
def _fake_model_preprocessor_fn(image):
@@ -787,85 +776,82 @@ class DataTransformationFnTest(test_case.TestCase, parameterized.TestCase):
def test_combine_additional_channels_if_present(self):
image = np.random.rand(4, 4, 3).astype(np.float32)
additional_channels = np.random.rand(4, 4, 2).astype(np.float32)
- tensor_dict = {
- fields.InputDataFields.image:
- tf.constant(image),
- fields.InputDataFields.image_additional_channels:
- tf.constant(additional_channels),
- fields.InputDataFields.groundtruth_classes:
- tf.constant(np.array([1, 1], np.int32))
- }
+ def graph_fn(image, additional_channels):
+ tensor_dict = {
+ fields.InputDataFields.image: image,
+ fields.InputDataFields.image_additional_channels: additional_channels,
+ fields.InputDataFields.groundtruth_classes:
+ tf.constant([1, 1], tf.int32)
+ }
- input_transformation_fn = functools.partial(
- inputs.transform_input_data,
- model_preprocess_fn=_fake_model_preprocessor_fn,
- image_resizer_fn=_fake_image_resizer_fn,
- num_classes=1)
- with self.test_session() as sess:
- transformed_inputs = sess.run(
- input_transformation_fn(tensor_dict=tensor_dict))
- self.assertAllEqual(transformed_inputs[fields.InputDataFields.image].dtype,
- tf.float32)
- self.assertAllEqual(transformed_inputs[fields.InputDataFields.image].shape,
- [4, 4, 5])
- self.assertAllClose(transformed_inputs[fields.InputDataFields.image],
- np.concatenate((image, additional_channels), axis=2))
+ input_transformation_fn = functools.partial(
+ inputs.transform_input_data,
+ model_preprocess_fn=_fake_model_preprocessor_fn,
+ image_resizer_fn=_fake_image_resizer_fn,
+ num_classes=1)
+ out_tensors = input_transformation_fn(tensor_dict=tensor_dict)
+ return out_tensors[fields.InputDataFields.image]
+ out_image = self.execute_cpu(graph_fn, [image, additional_channels])
+ self.assertAllEqual(out_image.dtype, tf.float32)
+ self.assertAllEqual(out_image.shape, [4, 4, 5])
+ self.assertAllClose(out_image, np.concatenate((image, additional_channels),
+ axis=2))
def test_use_multiclass_scores_when_present(self):
- image = np.random.rand(4, 4, 3).astype(np.float32)
- tensor_dict = {
- fields.InputDataFields.image:
- tf.constant(image),
- fields.InputDataFields.groundtruth_boxes:
- tf.constant(np.array([[.5, .5, 1, 1], [.5, .5, 1, 1]], np.float32)),
- fields.InputDataFields.multiclass_scores:
- tf.constant(np.array([0.2, 0.3, 0.5, 0.1, 0.6, 0.3], np.float32)),
- fields.InputDataFields.groundtruth_classes:
- tf.constant(np.array([1, 2], np.int32))
- }
-
- input_transformation_fn = functools.partial(
- inputs.transform_input_data,
- model_preprocess_fn=_fake_model_preprocessor_fn,
- image_resizer_fn=_fake_image_resizer_fn,
- num_classes=3, use_multiclass_scores=True)
- with self.test_session() as sess:
- transformed_inputs = sess.run(
- input_transformation_fn(tensor_dict=tensor_dict))
+ def graph_fn():
+ tensor_dict = {
+ fields.InputDataFields.image: tf.constant(np.random.rand(4, 4, 3).
+ astype(np.float32)),
+ fields.InputDataFields.groundtruth_boxes:
+ tf.constant(np.array([[.5, .5, 1, 1], [.5, .5, 1, 1]],
+ np.float32)),
+ fields.InputDataFields.multiclass_scores:
+ tf.constant(np.array([0.2, 0.3, 0.5, 0.1, 0.6, 0.3], np.float32)),
+ fields.InputDataFields.groundtruth_classes:
+ tf.constant(np.array([1, 2], np.int32))
+ }
+ input_transformation_fn = functools.partial(
+ inputs.transform_input_data,
+ model_preprocess_fn=_fake_model_preprocessor_fn,
+ image_resizer_fn=_fake_image_resizer_fn,
+ num_classes=3, use_multiclass_scores=True)
+ transformed_inputs = input_transformation_fn(tensor_dict=tensor_dict)
+ return transformed_inputs[fields.InputDataFields.groundtruth_classes]
+ groundtruth_classes = self.execute_cpu(graph_fn, [])
self.assertAllClose(
np.array([[0.2, 0.3, 0.5], [0.1, 0.6, 0.3]], np.float32),
- transformed_inputs[fields.InputDataFields.groundtruth_classes])
+ groundtruth_classes)
+ @unittest.skipIf(tf_version.is_tf2(), ('Skipping due to different behaviour '
+ 'in TF 2.X'))
def test_use_multiclass_scores_when_not_present(self):
- image = np.random.rand(4, 4, 3).astype(np.float32)
- tensor_dict = {
- fields.InputDataFields.image:
- tf.constant(image),
- fields.InputDataFields.groundtruth_boxes:
- tf.constant(np.array([[.5, .5, 1, 1], [.5, .5, 1, 1]], np.float32)),
- fields.InputDataFields.multiclass_scores:
- tf.placeholder(tf.float32),
- fields.InputDataFields.groundtruth_classes:
- tf.constant(np.array([1, 2], np.int32))
- }
+ def graph_fn():
+ zero_num_elements = tf.random.uniform([], minval=0, maxval=1,
+ dtype=tf.int32)
+ tensor_dict = {
+ fields.InputDataFields.image:
+ tf.constant(np.random.rand(4, 4, 3).astype(np.float32)),
+ fields.InputDataFields.groundtruth_boxes:
+ tf.constant(np.array([[.5, .5, 1, 1], [.5, .5, 1, 1]],
+ np.float32)),
+ fields.InputDataFields.multiclass_scores: tf.zeros(zero_num_elements),
+ fields.InputDataFields.groundtruth_classes:
+ tf.constant(np.array([1, 2], np.int32))
+ }
- input_transformation_fn = functools.partial(
- inputs.transform_input_data,
- model_preprocess_fn=_fake_model_preprocessor_fn,
- image_resizer_fn=_fake_image_resizer_fn,
- num_classes=3, use_multiclass_scores=True)
- with self.test_session() as sess:
- transformed_inputs = sess.run(
- input_transformation_fn(tensor_dict=tensor_dict),
- feed_dict={
- tensor_dict[fields.InputDataFields.multiclass_scores]:
- np.array([], dtype=np.float32)
- })
+ input_transformation_fn = functools.partial(
+ inputs.transform_input_data,
+ model_preprocess_fn=_fake_model_preprocessor_fn,
+ image_resizer_fn=_fake_image_resizer_fn,
+ num_classes=3, use_multiclass_scores=True)
+ transformed_inputs = input_transformation_fn(tensor_dict=tensor_dict)
+ return transformed_inputs[fields.InputDataFields.groundtruth_classes]
+ groundtruth_classes = self.execute_cpu(graph_fn, [])
self.assertAllClose(
np.array([[0, 1, 0], [0, 0, 1]], np.float32),
- transformed_inputs[fields.InputDataFields.groundtruth_classes])
+ groundtruth_classes)
@parameterized.parameters(
{'labeled_classes': [1, 2]},
@@ -916,385 +902,395 @@ class DataTransformationFnTest(test_case.TestCase, parameterized.TestCase):
transformed_inputs[fields.InputDataFields.groundtruth_labeled_classes])
def test_returns_correct_class_label_encodings(self):
- tensor_dict = {
- fields.InputDataFields.image:
- tf.constant(np.random.rand(4, 4, 3).astype(np.float32)),
- fields.InputDataFields.groundtruth_boxes:
- tf.constant(np.array([[0, 0, 1, 1], [.5, .5, 1, 1]], np.float32)),
- fields.InputDataFields.groundtruth_classes:
- tf.constant(np.array([3, 1], np.int32))
- }
- num_classes = 3
- input_transformation_fn = functools.partial(
- inputs.transform_input_data,
- model_preprocess_fn=_fake_model_preprocessor_fn,
- image_resizer_fn=_fake_image_resizer_fn,
- num_classes=num_classes)
- with self.test_session() as sess:
- transformed_inputs = sess.run(
- input_transformation_fn(tensor_dict=tensor_dict))
-
- self.assertAllClose(
- transformed_inputs[fields.InputDataFields.groundtruth_classes],
- [[0, 0, 1], [1, 0, 0]])
- self.assertAllClose(
- transformed_inputs[fields.InputDataFields.groundtruth_confidences],
- [[0, 0, 1], [1, 0, 0]])
+ def graph_fn():
+ tensor_dict = {
+ fields.InputDataFields.image:
+ tf.constant(np.random.rand(4, 4, 3).astype(np.float32)),
+ fields.InputDataFields.groundtruth_boxes:
+ tf.constant(np.array([[0, 0, 1, 1], [.5, .5, 1, 1]], np.float32)),
+ fields.InputDataFields.groundtruth_classes:
+ tf.constant(np.array([3, 1], np.int32))
+ }
+ num_classes = 3
+ input_transformation_fn = functools.partial(
+ inputs.transform_input_data,
+ model_preprocess_fn=_fake_model_preprocessor_fn,
+ image_resizer_fn=_fake_image_resizer_fn,
+ num_classes=num_classes)
+ transformed_inputs = input_transformation_fn(tensor_dict=tensor_dict)
+ return (transformed_inputs[fields.InputDataFields.groundtruth_classes],
+ transformed_inputs[fields.InputDataFields.
+ groundtruth_confidences])
+ (groundtruth_classes, groundtruth_confidences) = self.execute_cpu(graph_fn,
+ [])
+ self.assertAllClose(groundtruth_classes, [[0, 0, 1], [1, 0, 0]])
+ self.assertAllClose(groundtruth_confidences, [[0, 0, 1], [1, 0, 0]])
def test_returns_correct_labels_with_unrecognized_class(self):
- tensor_dict = {
- fields.InputDataFields.image:
- tf.constant(np.random.rand(4, 4, 3).astype(np.float32)),
- fields.InputDataFields.groundtruth_boxes:
- tf.constant(
- np.array([[0, 0, 1, 1], [.2, .2, 4, 4], [.5, .5, 1, 1]],
- np.float32)),
- fields.InputDataFields.groundtruth_area:
- tf.constant(np.array([.5, .4, .3])),
- fields.InputDataFields.groundtruth_classes:
- tf.constant(np.array([3, -1, 1], np.int32)),
- fields.InputDataFields.groundtruth_keypoints:
- tf.constant(
- np.array([[[.1, .1]], [[.2, .2]], [[.5, .5]]],
- np.float32)),
- fields.InputDataFields.groundtruth_keypoint_visibilities:
- tf.constant([[True, True], [False, False], [True, True]]),
- fields.InputDataFields.groundtruth_instance_masks:
- tf.constant(np.random.rand(3, 4, 4).astype(np.float32)),
- fields.InputDataFields.groundtruth_is_crowd:
- tf.constant([False, True, False]),
- fields.InputDataFields.groundtruth_difficult:
- tf.constant(np.array([0, 0, 1], np.int32))
- }
-
- num_classes = 3
- input_transformation_fn = functools.partial(
- inputs.transform_input_data,
- model_preprocess_fn=_fake_model_preprocessor_fn,
- image_resizer_fn=_fake_image_resizer_fn,
- num_classes=num_classes)
- with self.test_session() as sess:
- transformed_inputs = sess.run(
- input_transformation_fn(tensor_dict=tensor_dict))
+ def graph_fn():
+ tensor_dict = {
+ fields.InputDataFields.image:
+ tf.constant(np.random.rand(4, 4, 3).astype(np.float32)),
+ fields.InputDataFields.groundtruth_boxes:
+ tf.constant(
+ np.array([[0, 0, 1, 1], [.2, .2, 4, 4], [.5, .5, 1, 1]],
+ np.float32)),
+ fields.InputDataFields.groundtruth_area:
+ tf.constant(np.array([.5, .4, .3])),
+ fields.InputDataFields.groundtruth_classes:
+ tf.constant(np.array([3, -1, 1], np.int32)),
+ fields.InputDataFields.groundtruth_keypoints:
+ tf.constant(
+ np.array([[[.1, .1]], [[.2, .2]], [[.5, .5]]],
+ np.float32)),
+ fields.InputDataFields.groundtruth_keypoint_visibilities:
+ tf.constant([[True, True], [False, False], [True, True]]),
+ fields.InputDataFields.groundtruth_instance_masks:
+ tf.constant(np.random.rand(3, 4, 4).astype(np.float32)),
+ fields.InputDataFields.groundtruth_is_crowd:
+ tf.constant([False, True, False]),
+ fields.InputDataFields.groundtruth_difficult:
+ tf.constant(np.array([0, 0, 1], np.int32))
+ }
- self.assertAllClose(
- transformed_inputs[fields.InputDataFields.groundtruth_classes],
- [[0, 0, 1], [1, 0, 0]])
- self.assertAllEqual(
- transformed_inputs[fields.InputDataFields.num_groundtruth_boxes], 2)
- self.assertAllClose(
- transformed_inputs[fields.InputDataFields.groundtruth_area], [.5, .3])
- self.assertAllEqual(
- transformed_inputs[fields.InputDataFields.groundtruth_confidences],
- [[0, 0, 1], [1, 0, 0]])
- self.assertAllClose(
- transformed_inputs[fields.InputDataFields.groundtruth_boxes],
- [[0, 0, 1, 1], [.5, .5, 1, 1]])
- self.assertAllClose(
- transformed_inputs[fields.InputDataFields.groundtruth_keypoints],
- [[[.1, .1]], [[.5, .5]]])
- self.assertAllEqual(
- transformed_inputs[
- fields.InputDataFields.groundtruth_keypoint_visibilities],
- [[True, True], [True, True]])
- self.assertAllEqual(
- transformed_inputs[
- fields.InputDataFields.groundtruth_instance_masks].shape, [2, 4, 4])
- self.assertAllEqual(
- transformed_inputs[fields.InputDataFields.groundtruth_is_crowd],
- [False, False])
- self.assertAllEqual(
- transformed_inputs[fields.InputDataFields.groundtruth_difficult],
- [0, 1])
+ num_classes = 3
+ input_transformation_fn = functools.partial(
+ inputs.transform_input_data,
+ model_preprocess_fn=_fake_model_preprocessor_fn,
+ image_resizer_fn=_fake_image_resizer_fn,
+ num_classes=num_classes)
+ transformed_inputs = input_transformation_fn(tensor_dict)
+ return (transformed_inputs[fields.InputDataFields.groundtruth_classes],
+ transformed_inputs[fields.InputDataFields.num_groundtruth_boxes],
+ transformed_inputs[fields.InputDataFields.groundtruth_area],
+ transformed_inputs[fields.InputDataFields.
+ groundtruth_confidences],
+ transformed_inputs[fields.InputDataFields.groundtruth_boxes],
+ transformed_inputs[fields.InputDataFields.groundtruth_keypoints],
+ transformed_inputs[fields.InputDataFields.
+ groundtruth_keypoint_visibilities],
+ transformed_inputs[fields.InputDataFields.
+ groundtruth_instance_masks],
+ transformed_inputs[fields.InputDataFields.groundtruth_is_crowd],
+ transformed_inputs[fields.InputDataFields.groundtruth_difficult])
+ (groundtruth_classes, num_groundtruth_boxes, groundtruth_area,
+ groundtruth_confidences, groundtruth_boxes, groundtruth_keypoints,
+ groundtruth_keypoint_visibilities, groundtruth_instance_masks,
+ groundtruth_is_crowd, groundtruth_difficult) = self.execute_cpu(graph_fn,
+ [])
+
+ self.assertAllClose(groundtruth_classes, [[0, 0, 1], [1, 0, 0]])
+ self.assertAllEqual(num_groundtruth_boxes, 2)
+ self.assertAllClose(groundtruth_area, [.5, .3])
+ self.assertAllEqual(groundtruth_confidences, [[0, 0, 1], [1, 0, 0]])
+ self.assertAllClose(groundtruth_boxes, [[0, 0, 1, 1], [.5, .5, 1, 1]])
+ self.assertAllClose(groundtruth_keypoints, [[[.1, .1]], [[.5, .5]]])
+ self.assertAllEqual(groundtruth_keypoint_visibilities,
+ [[True, True], [True, True]])
+ self.assertAllEqual(groundtruth_instance_masks.shape, [2, 4, 4])
+ self.assertAllEqual(groundtruth_is_crowd, [False, False])
+ self.assertAllEqual(groundtruth_difficult, [0, 1])
def test_returns_correct_merged_boxes(self):
- tensor_dict = {
- fields.InputDataFields.image:
- tf.constant(np.random.rand(4, 4, 3).astype(np.float32)),
- fields.InputDataFields.groundtruth_boxes:
- tf.constant(np.array([[.5, .5, 1, 1], [.5, .5, 1, 1]], np.float32)),
- fields.InputDataFields.groundtruth_classes:
- tf.constant(np.array([3, 1], np.int32))
- }
-
- num_classes = 3
- input_transformation_fn = functools.partial(
- inputs.transform_input_data,
- model_preprocess_fn=_fake_model_preprocessor_fn,
- image_resizer_fn=_fake_image_resizer_fn,
- num_classes=num_classes,
- merge_multiple_boxes=True)
+ def graph_fn():
+ tensor_dict = {
+ fields.InputDataFields.image:
+ tf.constant(np.random.rand(4, 4, 3).astype(np.float32)),
+ fields.InputDataFields.groundtruth_boxes:
+ tf.constant(np.array([[.5, .5, 1, 1], [.5, .5, 1, 1]],
+ np.float32)),
+ fields.InputDataFields.groundtruth_classes:
+ tf.constant(np.array([3, 1], np.int32))
+ }
- with self.test_session() as sess:
- transformed_inputs = sess.run(
- input_transformation_fn(tensor_dict=tensor_dict))
+ num_classes = 3
+ input_transformation_fn = functools.partial(
+ inputs.transform_input_data,
+ model_preprocess_fn=_fake_model_preprocessor_fn,
+ image_resizer_fn=_fake_image_resizer_fn,
+ num_classes=num_classes,
+ merge_multiple_boxes=True)
+ transformed_inputs = input_transformation_fn(tensor_dict)
+ return (transformed_inputs[fields.InputDataFields.groundtruth_boxes],
+ transformed_inputs[fields.InputDataFields.groundtruth_classes],
+ transformed_inputs[fields.InputDataFields.
+ groundtruth_confidences],
+ transformed_inputs[fields.InputDataFields.num_groundtruth_boxes])
+ (groundtruth_boxes, groundtruth_classes, groundtruth_confidences,
+ num_groundtruth_boxes) = self.execute_cpu(graph_fn, [])
self.assertAllClose(
- transformed_inputs[fields.InputDataFields.groundtruth_boxes],
+ groundtruth_boxes,
[[.5, .5, 1., 1.]])
self.assertAllClose(
- transformed_inputs[fields.InputDataFields.groundtruth_classes],
+ groundtruth_classes,
[[1, 0, 1]])
self.assertAllClose(
- transformed_inputs[fields.InputDataFields.groundtruth_confidences],
+ groundtruth_confidences,
[[1, 0, 1]])
self.assertAllClose(
- transformed_inputs[fields.InputDataFields.num_groundtruth_boxes],
+ num_groundtruth_boxes,
1)
def test_returns_correct_groundtruth_confidences_when_input_present(self):
- tensor_dict = {
- fields.InputDataFields.image:
- tf.constant(np.random.rand(4, 4, 3).astype(np.float32)),
- fields.InputDataFields.groundtruth_boxes:
- tf.constant(np.array([[0, 0, 1, 1], [.5, .5, 1, 1]], np.float32)),
- fields.InputDataFields.groundtruth_classes:
- tf.constant(np.array([3, 1], np.int32)),
- fields.InputDataFields.groundtruth_confidences:
- tf.constant(np.array([1.0, -1.0], np.float32))
- }
- num_classes = 3
- input_transformation_fn = functools.partial(
- inputs.transform_input_data,
- model_preprocess_fn=_fake_model_preprocessor_fn,
- image_resizer_fn=_fake_image_resizer_fn,
- num_classes=num_classes)
- with self.test_session() as sess:
- transformed_inputs = sess.run(
- input_transformation_fn(tensor_dict=tensor_dict))
-
+ def graph_fn():
+ tensor_dict = {
+ fields.InputDataFields.image:
+ tf.constant(np.random.rand(4, 4, 3).astype(np.float32)),
+ fields.InputDataFields.groundtruth_boxes:
+ tf.constant(np.array([[0, 0, 1, 1], [.5, .5, 1, 1]], np.float32)),
+ fields.InputDataFields.groundtruth_classes:
+ tf.constant(np.array([3, 1], np.int32)),
+ fields.InputDataFields.groundtruth_confidences:
+ tf.constant(np.array([1.0, -1.0], np.float32))
+ }
+ num_classes = 3
+ input_transformation_fn = functools.partial(
+ inputs.transform_input_data,
+ model_preprocess_fn=_fake_model_preprocessor_fn,
+ image_resizer_fn=_fake_image_resizer_fn,
+ num_classes=num_classes)
+ transformed_inputs = input_transformation_fn(tensor_dict)
+ return (transformed_inputs[fields.InputDataFields.groundtruth_classes],
+ transformed_inputs[fields.InputDataFields.
+ groundtruth_confidences])
+ groundtruth_classes, groundtruth_confidences = self.execute_cpu(graph_fn,
+ [])
self.assertAllClose(
- transformed_inputs[fields.InputDataFields.groundtruth_classes],
+ groundtruth_classes,
[[0, 0, 1], [1, 0, 0]])
self.assertAllClose(
- transformed_inputs[fields.InputDataFields.groundtruth_confidences],
+ groundtruth_confidences,
[[0, 0, 1], [-1, 0, 0]])
def test_returns_resized_masks(self):
- tensor_dict = {
- fields.InputDataFields.image:
- tf.constant(np.random.rand(4, 4, 3).astype(np.float32)),
- fields.InputDataFields.groundtruth_instance_masks:
- tf.constant(np.random.rand(2, 4, 4).astype(np.float32)),
- fields.InputDataFields.groundtruth_classes:
- tf.constant(np.array([3, 1], np.int32)),
- fields.InputDataFields.original_image_spatial_shape:
- tf.constant(np.array([4, 4], np.int32))
- }
+ def graph_fn():
+ tensor_dict = {
+ fields.InputDataFields.image:
+ tf.constant(np.random.rand(4, 4, 3).astype(np.float32)),
+ fields.InputDataFields.groundtruth_instance_masks:
+ tf.constant(np.random.rand(2, 4, 4).astype(np.float32)),
+ fields.InputDataFields.groundtruth_classes:
+ tf.constant(np.array([3, 1], np.int32)),
+ fields.InputDataFields.original_image_spatial_shape:
+ tf.constant(np.array([4, 4], np.int32))
+ }
- def fake_image_resizer_fn(image, masks=None):
- resized_image = tf.image.resize_images(image, [8, 8])
- results = [resized_image]
- if masks is not None:
- resized_masks = tf.transpose(
- tf.image.resize_images(tf.transpose(masks, [1, 2, 0]), [8, 8]),
- [2, 0, 1])
- results.append(resized_masks)
- results.append(tf.shape(resized_image))
- return results
-
- num_classes = 3
- input_transformation_fn = functools.partial(
- inputs.transform_input_data,
- model_preprocess_fn=_fake_model_preprocessor_fn,
- image_resizer_fn=fake_image_resizer_fn,
- num_classes=num_classes,
- retain_original_image=True)
- with self.test_session() as sess:
- transformed_inputs = sess.run(
- input_transformation_fn(tensor_dict=tensor_dict))
- self.assertAllEqual(transformed_inputs[
- fields.InputDataFields.original_image].dtype, tf.uint8)
- self.assertAllEqual(transformed_inputs[
- fields.InputDataFields.original_image_spatial_shape], [4, 4])
- self.assertAllEqual(transformed_inputs[
- fields.InputDataFields.original_image].shape, [8, 8, 3])
- self.assertAllEqual(transformed_inputs[
- fields.InputDataFields.groundtruth_instance_masks].shape, [2, 8, 8])
+ def fake_image_resizer_fn(image, masks=None):
+ resized_image = tf.image.resize_images(image, [8, 8])
+ results = [resized_image]
+ if masks is not None:
+ resized_masks = tf.transpose(
+ tf.image.resize_images(tf.transpose(masks, [1, 2, 0]), [8, 8]),
+ [2, 0, 1])
+ results.append(resized_masks)
+ results.append(tf.shape(resized_image))
+ return results
+
+ num_classes = 3
+ input_transformation_fn = functools.partial(
+ inputs.transform_input_data,
+ model_preprocess_fn=_fake_model_preprocessor_fn,
+ image_resizer_fn=fake_image_resizer_fn,
+ num_classes=num_classes,
+ retain_original_image=True)
+ transformed_inputs = input_transformation_fn(tensor_dict)
+ return (transformed_inputs[fields.InputDataFields.original_image],
+ transformed_inputs[fields.InputDataFields.
+ original_image_spatial_shape],
+ transformed_inputs[fields.InputDataFields.
+ groundtruth_instance_masks])
+ (original_image, original_image_shape,
+ groundtruth_instance_masks) = self.execute_cpu(graph_fn, [])
+ self.assertEqual(original_image.dtype, np.uint8)
+ self.assertAllEqual(original_image_shape, [4, 4])
+ self.assertAllEqual(original_image.shape, [8, 8, 3])
+ self.assertAllEqual(groundtruth_instance_masks.shape, [2, 8, 8])
def test_applies_model_preprocess_fn_to_image_tensor(self):
np_image = np.random.randint(256, size=(4, 4, 3))
- tensor_dict = {
- fields.InputDataFields.image:
- tf.constant(np_image),
- fields.InputDataFields.groundtruth_classes:
- tf.constant(np.array([3, 1], np.int32))
- }
-
- def fake_model_preprocessor_fn(image):
- return (image / 255., tf.expand_dims(tf.shape(image)[1:], axis=0))
+ def graph_fn(image):
+ tensor_dict = {
+ fields.InputDataFields.image: image,
+ fields.InputDataFields.groundtruth_classes:
+ tf.constant(np.array([3, 1], np.int32))
+ }
- num_classes = 3
- input_transformation_fn = functools.partial(
- inputs.transform_input_data,
- model_preprocess_fn=fake_model_preprocessor_fn,
- image_resizer_fn=_fake_image_resizer_fn,
- num_classes=num_classes)
+ def fake_model_preprocessor_fn(image):
+ return (image / 255., tf.expand_dims(tf.shape(image)[1:], axis=0))
- with self.test_session() as sess:
- transformed_inputs = sess.run(
- input_transformation_fn(tensor_dict=tensor_dict))
- self.assertAllClose(transformed_inputs[fields.InputDataFields.image],
- np_image / 255.)
- self.assertAllClose(transformed_inputs[fields.InputDataFields.
- true_image_shape],
- [4, 4, 3])
+ num_classes = 3
+ input_transformation_fn = functools.partial(
+ inputs.transform_input_data,
+ model_preprocess_fn=fake_model_preprocessor_fn,
+ image_resizer_fn=_fake_image_resizer_fn,
+ num_classes=num_classes)
+ transformed_inputs = input_transformation_fn(tensor_dict)
+ return (transformed_inputs[fields.InputDataFields.image],
+ transformed_inputs[fields.InputDataFields.true_image_shape])
+ image, true_image_shape = self.execute_cpu(graph_fn, [np_image])
+ self.assertAllClose(image, np_image / 255.)
+ self.assertAllClose(true_image_shape, [4, 4, 3])
def test_applies_data_augmentation_fn_to_tensor_dict(self):
np_image = np.random.randint(256, size=(4, 4, 3))
- tensor_dict = {
- fields.InputDataFields.image:
- tf.constant(np_image),
- fields.InputDataFields.groundtruth_classes:
- tf.constant(np.array([3, 1], np.int32))
- }
-
- def add_one_data_augmentation_fn(tensor_dict):
- return {key: value + 1 for key, value in tensor_dict.items()}
+ def graph_fn(image):
+ tensor_dict = {
+ fields.InputDataFields.image: image,
+ fields.InputDataFields.groundtruth_classes:
+ tf.constant(np.array([3, 1], np.int32))
+ }
- num_classes = 4
- input_transformation_fn = functools.partial(
- inputs.transform_input_data,
- model_preprocess_fn=_fake_model_preprocessor_fn,
- image_resizer_fn=_fake_image_resizer_fn,
- num_classes=num_classes,
- data_augmentation_fn=add_one_data_augmentation_fn)
- with self.test_session() as sess:
- augmented_tensor_dict = sess.run(
- input_transformation_fn(tensor_dict=tensor_dict))
+ def add_one_data_augmentation_fn(tensor_dict):
+ return {key: value + 1 for key, value in tensor_dict.items()}
- self.assertAllEqual(augmented_tensor_dict[fields.InputDataFields.image],
- np_image + 1)
- self.assertAllEqual(
- augmented_tensor_dict[fields.InputDataFields.groundtruth_classes],
+ num_classes = 4
+ input_transformation_fn = functools.partial(
+ inputs.transform_input_data,
+ model_preprocess_fn=_fake_model_preprocessor_fn,
+ image_resizer_fn=_fake_image_resizer_fn,
+ num_classes=num_classes,
+ data_augmentation_fn=add_one_data_augmentation_fn)
+ transformed_inputs = input_transformation_fn(tensor_dict)
+ return (transformed_inputs[fields.InputDataFields.image],
+ transformed_inputs[fields.InputDataFields.groundtruth_classes])
+ image, groundtruth_classes = self.execute_cpu(graph_fn, [np_image])
+ self.assertAllEqual(image, np_image + 1)
+ self.assertAllEqual(
+ groundtruth_classes,
[[0, 0, 0, 1], [0, 1, 0, 0]])
def test_applies_data_augmentation_fn_before_model_preprocess_fn(self):
np_image = np.random.randint(256, size=(4, 4, 3))
- tensor_dict = {
- fields.InputDataFields.image:
- tf.constant(np_image),
- fields.InputDataFields.groundtruth_classes:
- tf.constant(np.array([3, 1], np.int32))
- }
-
- def mul_two_model_preprocessor_fn(image):
- return (image * 2, tf.expand_dims(tf.shape(image)[1:], axis=0))
+ def graph_fn(image):
+ tensor_dict = {
+ fields.InputDataFields.image: image,
+ fields.InputDataFields.groundtruth_classes:
+ tf.constant(np.array([3, 1], np.int32))
+ }
- def add_five_to_image_data_augmentation_fn(tensor_dict):
- tensor_dict[fields.InputDataFields.image] += 5
- return tensor_dict
+ def mul_two_model_preprocessor_fn(image):
+ return (image * 2, tf.expand_dims(tf.shape(image)[1:], axis=0))
- num_classes = 4
- input_transformation_fn = functools.partial(
- inputs.transform_input_data,
- model_preprocess_fn=mul_two_model_preprocessor_fn,
- image_resizer_fn=_fake_image_resizer_fn,
- num_classes=num_classes,
- data_augmentation_fn=add_five_to_image_data_augmentation_fn)
- with self.test_session() as sess:
- augmented_tensor_dict = sess.run(
- input_transformation_fn(tensor_dict=tensor_dict))
+ def add_five_to_image_data_augmentation_fn(tensor_dict):
+ tensor_dict[fields.InputDataFields.image] += 5
+ return tensor_dict
- self.assertAllEqual(augmented_tensor_dict[fields.InputDataFields.image],
- (np_image + 5) * 2)
+ num_classes = 4
+ input_transformation_fn = functools.partial(
+ inputs.transform_input_data,
+ model_preprocess_fn=mul_two_model_preprocessor_fn,
+ image_resizer_fn=_fake_image_resizer_fn,
+ num_classes=num_classes,
+ data_augmentation_fn=add_five_to_image_data_augmentation_fn)
+ transformed_inputs = input_transformation_fn(tensor_dict)
+ return transformed_inputs[fields.InputDataFields.image]
+ image = self.execute_cpu(graph_fn, [np_image])
+ self.assertAllEqual(image, (np_image + 5) * 2)
def test_resize_with_padding(self):
+ def graph_fn():
+ tensor_dict = {
+ fields.InputDataFields.image:
+ tf.constant(np.random.rand(100, 50, 3).astype(np.float32)),
+ fields.InputDataFields.groundtruth_boxes:
+ tf.constant(np.array([[.5, .5, 1, 1], [.0, .0, .5, .5]],
+ np.float32)),
+ fields.InputDataFields.groundtruth_classes:
+ tf.constant(np.array([1, 2], np.int32)),
+ fields.InputDataFields.groundtruth_keypoints:
+ tf.constant([[[0.1, 0.2]], [[0.3, 0.4]]]),
+ }
- tensor_dict = {
- fields.InputDataFields.image:
- tf.constant(np.random.rand(100, 50, 3).astype(np.float32)),
- fields.InputDataFields.groundtruth_boxes:
- tf.constant(np.array([[.5, .5, 1, 1], [.0, .0, .5, .5]],
- np.float32)),
- fields.InputDataFields.groundtruth_classes:
- tf.constant(np.array([1, 2], np.int32)),
- fields.InputDataFields.groundtruth_keypoints:
- tf.constant([[[0.1, 0.2]], [[0.3, 0.4]]]),
- }
-
- num_classes = 3
- input_transformation_fn = functools.partial(
- inputs.transform_input_data,
- model_preprocess_fn=_fake_resize50_preprocess_fn,
- image_resizer_fn=_fake_image_resizer_fn,
- num_classes=num_classes,)
-
- with self.test_session() as sess:
- transformed_inputs = sess.run(
- input_transformation_fn(tensor_dict=tensor_dict))
+ num_classes = 3
+ input_transformation_fn = functools.partial(
+ inputs.transform_input_data,
+ model_preprocess_fn=_fake_resize50_preprocess_fn,
+ image_resizer_fn=_fake_image_resizer_fn,
+ num_classes=num_classes,)
+ transformed_inputs = input_transformation_fn(tensor_dict)
+ return (transformed_inputs[fields.InputDataFields.groundtruth_boxes],
+ transformed_inputs[fields.InputDataFields.groundtruth_keypoints])
+ groundtruth_boxes, groundtruth_keypoints = self.execute_cpu(graph_fn, [])
self.assertAllClose(
- transformed_inputs[fields.InputDataFields.groundtruth_boxes],
+ groundtruth_boxes,
[[.5, .25, 1., .5], [.0, .0, .5, .25]])
self.assertAllClose(
- transformed_inputs[fields.InputDataFields.groundtruth_keypoints],
+ groundtruth_keypoints,
[[[.1, .1]], [[.3, .2]]])
def test_groundtruth_keypoint_weights(self):
- tensor_dict = {
- fields.InputDataFields.image:
- tf.constant(np.random.rand(100, 50, 3).astype(np.float32)),
- fields.InputDataFields.groundtruth_boxes:
- tf.constant(np.array([[.5, .5, 1, 1], [.0, .0, .5, .5]],
- np.float32)),
- fields.InputDataFields.groundtruth_classes:
- tf.constant(np.array([1, 2], np.int32)),
- fields.InputDataFields.groundtruth_keypoints:
- tf.constant([[[0.1, 0.2], [0.3, 0.4]],
- [[0.5, 0.6], [0.7, 0.8]]]),
- fields.InputDataFields.groundtruth_keypoint_visibilities:
- tf.constant([[True, False], [True, True]]),
- }
-
- num_classes = 3
- keypoint_type_weight = [1.0, 2.0]
- input_transformation_fn = functools.partial(
- inputs.transform_input_data,
- model_preprocess_fn=_fake_resize50_preprocess_fn,
- image_resizer_fn=_fake_image_resizer_fn,
- num_classes=num_classes,
- keypoint_type_weight=keypoint_type_weight)
+ def graph_fn():
+ tensor_dict = {
+ fields.InputDataFields.image:
+ tf.constant(np.random.rand(100, 50, 3).astype(np.float32)),
+ fields.InputDataFields.groundtruth_boxes:
+ tf.constant(np.array([[.5, .5, 1, 1], [.0, .0, .5, .5]],
+ np.float32)),
+ fields.InputDataFields.groundtruth_classes:
+ tf.constant(np.array([1, 2], np.int32)),
+ fields.InputDataFields.groundtruth_keypoints:
+ tf.constant([[[0.1, 0.2], [0.3, 0.4]],
+ [[0.5, 0.6], [0.7, 0.8]]]),
+ fields.InputDataFields.groundtruth_keypoint_visibilities:
+ tf.constant([[True, False], [True, True]]),
+ }
- with self.test_session() as sess:
- transformed_inputs = sess.run(
- input_transformation_fn(tensor_dict=tensor_dict))
+ num_classes = 3
+ keypoint_type_weight = [1.0, 2.0]
+ input_transformation_fn = functools.partial(
+ inputs.transform_input_data,
+ model_preprocess_fn=_fake_resize50_preprocess_fn,
+ image_resizer_fn=_fake_image_resizer_fn,
+ num_classes=num_classes,
+ keypoint_type_weight=keypoint_type_weight)
+ transformed_inputs = input_transformation_fn(tensor_dict=tensor_dict)
+ return (transformed_inputs[fields.InputDataFields.groundtruth_keypoints],
+ transformed_inputs[fields.InputDataFields.
+ groundtruth_keypoint_weights])
+
+ groundtruth_keypoints, groundtruth_keypoint_weights = self.execute_cpu(
+ graph_fn, [])
self.assertAllClose(
- transformed_inputs[fields.InputDataFields.groundtruth_keypoints],
+ groundtruth_keypoints,
[[[0.1, 0.1], [0.3, 0.2]],
[[0.5, 0.3], [0.7, 0.4]]])
self.assertAllClose(
- transformed_inputs[fields.InputDataFields.groundtruth_keypoint_weights],
+ groundtruth_keypoint_weights,
[[1.0, 0.0], [1.0, 2.0]])
def test_groundtruth_keypoint_weights_default(self):
- tensor_dict = {
- fields.InputDataFields.image:
- tf.constant(np.random.rand(100, 50, 3).astype(np.float32)),
- fields.InputDataFields.groundtruth_boxes:
- tf.constant(np.array([[.5, .5, 1, 1], [.0, .0, .5, .5]],
- np.float32)),
- fields.InputDataFields.groundtruth_classes:
- tf.constant(np.array([1, 2], np.int32)),
- fields.InputDataFields.groundtruth_keypoints:
- tf.constant([[[0.1, 0.2], [0.3, 0.4]],
- [[0.5, 0.6], [0.7, 0.8]]]),
- }
-
- num_classes = 3
- input_transformation_fn = functools.partial(
- inputs.transform_input_data,
- model_preprocess_fn=_fake_resize50_preprocess_fn,
- image_resizer_fn=_fake_image_resizer_fn,
- num_classes=num_classes)
+ def graph_fn():
+ tensor_dict = {
+ fields.InputDataFields.image:
+ tf.constant(np.random.rand(100, 50, 3).astype(np.float32)),
+ fields.InputDataFields.groundtruth_boxes:
+ tf.constant(np.array([[.5, .5, 1, 1], [.0, .0, .5, .5]],
+ np.float32)),
+ fields.InputDataFields.groundtruth_classes:
+ tf.constant(np.array([1, 2], np.int32)),
+ fields.InputDataFields.groundtruth_keypoints:
+ tf.constant([[[0.1, 0.2], [0.3, 0.4]],
+ [[0.5, 0.6], [0.7, 0.8]]]),
+ }
- with self.test_session() as sess:
- transformed_inputs = sess.run(
- input_transformation_fn(tensor_dict=tensor_dict))
+ num_classes = 3
+ input_transformation_fn = functools.partial(
+ inputs.transform_input_data,
+ model_preprocess_fn=_fake_resize50_preprocess_fn,
+ image_resizer_fn=_fake_image_resizer_fn,
+ num_classes=num_classes)
+ transformed_inputs = input_transformation_fn(tensor_dict=tensor_dict)
+ return (transformed_inputs[fields.InputDataFields.groundtruth_keypoints],
+ transformed_inputs[fields.InputDataFields.
+ groundtruth_keypoint_weights])
+ groundtruth_keypoints, groundtruth_keypoint_weights = self.execute_cpu(
+ graph_fn, [])
self.assertAllClose(
- transformed_inputs[fields.InputDataFields.groundtruth_keypoints],
+ groundtruth_keypoints,
[[[0.1, 0.1], [0.3, 0.2]],
[[0.5, 0.3], [0.7, 0.4]]])
self.assertAllClose(
- transformed_inputs[fields.InputDataFields.groundtruth_keypoint_weights],
+ groundtruth_keypoint_weights,
[[1.0, 1.0], [1.0, 1.0]])
@@ -1303,15 +1299,15 @@ class PadInputDataToStaticShapesFnTest(test_case.TestCase):
def test_pad_images_boxes_and_classes(self):
input_tensor_dict = {
fields.InputDataFields.image:
- tf.placeholder(tf.float32, [None, None, 3]),
+ tf.random.uniform([3, 3, 3]),
fields.InputDataFields.groundtruth_boxes:
- tf.placeholder(tf.float32, [None, 4]),
+ tf.random.uniform([2, 4]),
fields.InputDataFields.groundtruth_classes:
- tf.placeholder(tf.int32, [None, 3]),
+ tf.random.uniform([2, 3], minval=0, maxval=2, dtype=tf.int32),
fields.InputDataFields.true_image_shape:
- tf.placeholder(tf.int32, [3]),
+ tf.constant([3, 3, 3]),
fields.InputDataFields.original_image_spatial_shape:
- tf.placeholder(tf.int32, [2])
+ tf.constant([3, 3])
}
padded_tensor_dict = inputs.pad_input_data_to_static_shapes(
tensor_dict=input_tensor_dict,
@@ -1336,69 +1332,35 @@ class PadInputDataToStaticShapesFnTest(test_case.TestCase):
.shape.as_list(), [3, 3])
def test_clip_boxes_and_classes(self):
- input_tensor_dict = {
- fields.InputDataFields.groundtruth_boxes:
- tf.placeholder(tf.float32, [None, 4]),
- fields.InputDataFields.groundtruth_classes:
- tf.placeholder(tf.int32, [None, 3]),
- fields.InputDataFields.num_groundtruth_boxes:
- tf.placeholder(tf.int32, [])
- }
- padded_tensor_dict = inputs.pad_input_data_to_static_shapes(
- tensor_dict=input_tensor_dict,
- max_num_boxes=3,
- num_classes=3,
- spatial_image_shape=[5, 6])
-
- self.assertAllEqual(
- padded_tensor_dict[fields.InputDataFields.groundtruth_boxes]
- .shape.as_list(), [3, 4])
- self.assertAllEqual(
- padded_tensor_dict[fields.InputDataFields.groundtruth_classes]
- .shape.as_list(), [3, 3])
-
- with self.test_session() as sess:
- out_tensor_dict = sess.run(
- padded_tensor_dict,
- feed_dict={
- input_tensor_dict[fields.InputDataFields.groundtruth_boxes]:
- np.random.rand(5, 4),
- input_tensor_dict[fields.InputDataFields.groundtruth_classes]:
- np.random.rand(2, 3),
- input_tensor_dict[fields.InputDataFields.num_groundtruth_boxes]:
- 5,
- })
-
- self.assertAllEqual(
- out_tensor_dict[fields.InputDataFields.groundtruth_boxes].shape, [3, 4])
- self.assertAllEqual(
- out_tensor_dict[fields.InputDataFields.groundtruth_classes].shape,
- [3, 3])
- self.assertEqual(
- out_tensor_dict[fields.InputDataFields.num_groundtruth_boxes],
- 3)
-
- def test_do_not_pad_dynamic_images(self):
- input_tensor_dict = {
- fields.InputDataFields.image:
- tf.placeholder(tf.float32, [None, None, 3]),
- }
- padded_tensor_dict = inputs.pad_input_data_to_static_shapes(
- tensor_dict=input_tensor_dict,
- max_num_boxes=3,
- num_classes=3,
- spatial_image_shape=[None, None])
-
- self.assertAllEqual(
- padded_tensor_dict[fields.InputDataFields.image].shape.as_list(),
- [None, None, 3])
+ def graph_fn():
+ input_tensor_dict = {
+ fields.InputDataFields.groundtruth_boxes:
+ tf.random.uniform([5, 4]),
+ fields.InputDataFields.groundtruth_classes:
+ tf.random.uniform([2, 3], maxval=10, dtype=tf.int32),
+ fields.InputDataFields.num_groundtruth_boxes:
+ tf.constant(5)
+ }
+ padded_tensor_dict = inputs.pad_input_data_to_static_shapes(
+ tensor_dict=input_tensor_dict,
+ max_num_boxes=3,
+ num_classes=3,
+ spatial_image_shape=[5, 6])
+ return (padded_tensor_dict[fields.InputDataFields.groundtruth_boxes],
+ padded_tensor_dict[fields.InputDataFields.groundtruth_classes],
+ padded_tensor_dict[fields.InputDataFields.num_groundtruth_boxes])
+ (groundtruth_boxes, groundtruth_classes,
+ num_groundtruth_boxes) = self.execute_cpu(graph_fn, [])
+ self.assertAllEqual(groundtruth_boxes.shape, [3, 4])
+ self.assertAllEqual(groundtruth_classes.shape, [3, 3])
+ self.assertEqual(num_groundtruth_boxes, 3)
def test_images_and_additional_channels(self):
input_tensor_dict = {
fields.InputDataFields.image:
- tf.placeholder(tf.float32, [None, None, 5]),
+ test_utils.image_with_dynamic_shape(4, 3, 5),
fields.InputDataFields.image_additional_channels:
- tf.placeholder(tf.float32, [None, None, 2]),
+ test_utils.image_with_dynamic_shape(4, 3, 2),
}
padded_tensor_dict = inputs.pad_input_data_to_static_shapes(
tensor_dict=input_tensor_dict,
@@ -1418,11 +1380,11 @@ class PadInputDataToStaticShapesFnTest(test_case.TestCase):
def test_images_and_additional_channels_errors(self):
input_tensor_dict = {
fields.InputDataFields.image:
- tf.placeholder(tf.float32, [None, None, 3]),
+ test_utils.image_with_dynamic_shape(10, 10, 3),
fields.InputDataFields.image_additional_channels:
- tf.placeholder(tf.float32, [None, None, 2]),
+ test_utils.image_with_dynamic_shape(10, 10, 2),
fields.InputDataFields.original_image:
- tf.placeholder(tf.float32, [None, None, 3]),
+ test_utils.image_with_dynamic_shape(10, 10, 3),
}
with self.assertRaises(ValueError):
_ = inputs.pad_input_data_to_static_shapes(
@@ -1434,7 +1396,7 @@ class PadInputDataToStaticShapesFnTest(test_case.TestCase):
def test_gray_images(self):
input_tensor_dict = {
fields.InputDataFields.image:
- tf.placeholder(tf.float32, [None, None, 1]),
+ test_utils.image_with_dynamic_shape(4, 4, 1),
}
padded_tensor_dict = inputs.pad_input_data_to_static_shapes(
tensor_dict=input_tensor_dict,
@@ -1449,9 +1411,9 @@ class PadInputDataToStaticShapesFnTest(test_case.TestCase):
def test_gray_images_and_additional_channels(self):
input_tensor_dict = {
fields.InputDataFields.image:
- tf.placeholder(tf.float32, [None, None, 3]),
+ test_utils.image_with_dynamic_shape(4, 4, 3),
fields.InputDataFields.image_additional_channels:
- tf.placeholder(tf.float32, [None, None, 2]),
+ test_utils.image_with_dynamic_shape(4, 4, 2),
}
# pad_input_data_to_static_shape assumes that image is already concatenated
# with additional channels.
@@ -1469,11 +1431,14 @@ class PadInputDataToStaticShapesFnTest(test_case.TestCase):
.shape.as_list(), [5, 6, 2])
def test_keypoints(self):
+ keypoints = test_utils.keypoints_with_dynamic_shape(10, 16, 4)
+ visibilities = tf.cast(tf.random.uniform(tf.shape(keypoints)[:-1], minval=0,
+ maxval=2, dtype=tf.int32), tf.bool)
input_tensor_dict = {
fields.InputDataFields.groundtruth_keypoints:
- tf.placeholder(tf.float32, [None, 16, 4]),
+ test_utils.keypoints_with_dynamic_shape(10, 16, 4),
fields.InputDataFields.groundtruth_keypoint_visibilities:
- tf.placeholder(tf.bool, [None, 16]),
+ visibilities
}
padded_tensor_dict = inputs.pad_input_data_to_static_shapes(
tensor_dict=input_tensor_dict,
@@ -1493,39 +1458,76 @@ class PadInputDataToStaticShapesFnTest(test_case.TestCase):
context_memory_size = 8
context_feature_length = 10
max_num_context_features = 20
- input_tensor_dict = {
- fields.InputDataFields.context_features:
- tf.placeholder(tf.float32,
- [context_memory_size, context_feature_length]),
- fields.InputDataFields.context_feature_length:
- tf.placeholder(tf.float32, [])
- }
- padded_tensor_dict = inputs.pad_input_data_to_static_shapes(
- tensor_dict=input_tensor_dict,
- max_num_boxes=3,
- num_classes=3,
- spatial_image_shape=[5, 6],
- max_num_context_features=max_num_context_features,
- context_feature_length=context_feature_length)
+ def graph_fn():
+ input_tensor_dict = {
+ fields.InputDataFields.context_features:
+ tf.ones([context_memory_size, context_feature_length]),
+ fields.InputDataFields.context_feature_length:
+ tf.constant(context_feature_length)
+ }
+ padded_tensor_dict = inputs.pad_input_data_to_static_shapes(
+ tensor_dict=input_tensor_dict,
+ max_num_boxes=3,
+ num_classes=3,
+ spatial_image_shape=[5, 6],
+ max_num_context_features=max_num_context_features,
+ context_feature_length=context_feature_length)
- self.assertAllEqual(
- padded_tensor_dict[
- fields.InputDataFields.context_features].shape.as_list(),
- [max_num_context_features, context_feature_length])
+ self.assertAllEqual(
+ padded_tensor_dict[
+ fields.InputDataFields.context_features].shape.as_list(),
+ [max_num_context_features, context_feature_length])
+ return padded_tensor_dict[fields.InputDataFields.valid_context_size]
- with self.test_session() as sess:
- feed_dict = {
- input_tensor_dict[fields.InputDataFields.context_features]:
- np.ones([context_memory_size, context_feature_length],
- dtype=np.float32),
- input_tensor_dict[fields.InputDataFields.context_feature_length]:
- context_feature_length
+ valid_context_size = self.execute_cpu(graph_fn, [])
+ self.assertEqual(valid_context_size, context_memory_size)
+
+
+class NegativeSizeTest(test_case.TestCase):
+ """Test for inputs and related funcitons."""
+
+ def test_negative_size_error(self):
+ """Test that error is raised for negative size boxes."""
+
+ def graph_fn():
+ tensors = {
+ fields.InputDataFields.image: tf.zeros((128, 128, 3)),
+ fields.InputDataFields.groundtruth_classes:
+ tf.constant([1, 1], tf.int32),
+ fields.InputDataFields.groundtruth_boxes:
+ tf.constant([[0.5, 0.5, 0.4, 0.5]], tf.float32)
}
- padded_tensor_dict_out = sess.run(padded_tensor_dict, feed_dict=feed_dict)
+ tensors = inputs.transform_input_data(
+ tensors, _fake_model_preprocessor_fn, _fake_image_resizer_fn,
+ num_classes=10)
+ return tensors[fields.InputDataFields.groundtruth_boxes]
+ with self.assertRaises(tf.errors.InvalidArgumentError):
+ self.execute_cpu(graph_fn, [])
+
+ def test_negative_size_no_assert(self):
+ """Test that negative size boxes are filtered out without assert.
+
+ This test simulates the behaviour when we run on TPU and Assert ops are
+ not supported.
+ """
- self.assertEqual(
- padded_tensor_dict_out[fields.InputDataFields.valid_context_size],
- context_memory_size)
+ tensors = {
+ fields.InputDataFields.image: tf.zeros((128, 128, 3)),
+ fields.InputDataFields.groundtruth_classes:
+ tf.constant([1, 1], tf.int32),
+ fields.InputDataFields.groundtruth_boxes:
+ tf.constant([[0.5, 0.5, 0.4, 0.5], [0.5, 0.5, 0.6, 0.6]],
+ tf.float32)
+ }
+
+ with mock.patch.object(tf, 'Assert') as tf_assert:
+ tf_assert.return_value = tf.no_op()
+ tensors = inputs.transform_input_data(
+ tensors, _fake_model_preprocessor_fn, _fake_image_resizer_fn,
+ num_classes=10)
+
+ self.assertAllClose(tensors[fields.InputDataFields.groundtruth_boxes],
+ [[0.5, 0.5, 0.6, 0.6]])
if __name__ == '__main__':
diff --git a/research/object_detection/legacy/trainer_test.py b/research/object_detection/legacy/trainer_tf1_test.py
similarity index 97%
rename from research/object_detection/legacy/trainer_test.py
rename to research/object_detection/legacy/trainer_tf1_test.py
index 3a5d073048933e98278f423f348c77b3cc2860ae..0cde654e6a8bba2cfedea939e67d44698f882e04 100644
--- a/research/object_detection/legacy/trainer_test.py
+++ b/research/object_detection/legacy/trainer_tf1_test.py
@@ -14,7 +14,7 @@
# ==============================================================================
"""Tests for object_detection.trainer."""
-
+import unittest
import tensorflow.compat.v1 as tf
import tf_slim as slim
from google.protobuf import text_format
@@ -24,6 +24,7 @@ from object_detection.core import model
from object_detection.core import standard_fields as fields
from object_detection.legacy import trainer
from object_detection.protos import train_pb2
+from object_detection.utils import tf_version
NUMBER_OF_CLASSES = 2
@@ -184,6 +185,9 @@ class FakeDetectionModel(model.DetectionModel):
"""
return {var.op.name: var for var in tf.global_variables()}
+ def restore_from_objects(self, fine_tune_checkpoint_type):
+ pass
+
def updates(self):
"""Returns a list of update operators for this model.
@@ -197,6 +201,7 @@ class FakeDetectionModel(model.DetectionModel):
pass
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class TrainerTest(tf.test.TestCase):
def test_configure_trainer_and_train_two_steps(self):
diff --git a/research/object_detection/matchers/bipartite_matcher_test.py b/research/object_detection/matchers/bipartite_matcher_tf1_test.py
similarity index 94%
rename from research/object_detection/matchers/bipartite_matcher_test.py
rename to research/object_detection/matchers/bipartite_matcher_tf1_test.py
index 1617cbbc3876f5aa1da90918557d68ecaa25360a..314546ad4ee507d3024746044d4d4a30bc92e85d 100644
--- a/research/object_detection/matchers/bipartite_matcher_test.py
+++ b/research/object_detection/matchers/bipartite_matcher_tf1_test.py
@@ -14,14 +14,18 @@
# ==============================================================================
"""Tests for object_detection.core.bipartite_matcher."""
-
+import unittest
import numpy as np
import tensorflow.compat.v1 as tf
-from object_detection.matchers import bipartite_matcher
from object_detection.utils import test_case
+from object_detection.utils import tf_version
+
+if tf_version.is_tf1():
+ from object_detection.matchers import bipartite_matcher # pylint: disable=g-import-not-at-top
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class GreedyBipartiteMatcherTest(test_case.TestCase):
def test_get_expected_matches_when_all_rows_are_valid(self):
diff --git a/research/object_detection/meta_architectures/center_net_meta_arch.py b/research/object_detection/meta_architectures/center_net_meta_arch.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d4f9a2ba9737c5dd86fe22226954ddfbb16c959
--- /dev/null
+++ b/research/object_detection/meta_architectures/center_net_meta_arch.py
@@ -0,0 +1,2379 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""The CenterNet meta architecture as described in the "Objects as Points" paper [1].
+
+[1]: https://arxiv.org/abs/1904.07850
+
+"""
+
+import abc
+import collections
+import functools
+import numpy as np
+import tensorflow.compat.v1 as tf
+import tensorflow.compat.v2 as tf2
+
+from object_detection.core import box_list
+from object_detection.core import box_list_ops
+from object_detection.core import keypoint_ops
+from object_detection.core import model
+from object_detection.core import standard_fields as fields
+from object_detection.core import target_assigner as cn_assigner
+from object_detection.utils import shape_utils
+
+# Number of channels needed to predict size and offsets.
+NUM_OFFSET_CHANNELS = 2
+NUM_SIZE_CHANNELS = 2
+
+# Error range for detecting peaks.
+PEAK_EPSILON = 1e-6
+
+# Constants shared between all keypoint tasks.
+UNMATCHED_KEYPOINT_SCORE = 0.1
+KEYPOINT_CANDIDATE_SEARCH_SCALE = 0.3
+
+
+class CenterNetFeatureExtractor(tf.keras.Model):
+ """Base class for feature extractors for the CenterNet meta architecture.
+
+ Child classes are expected to override the _output_model property which will
+ return 1 or more tensors predicted by the feature extractor.
+
+ """
+ __metaclass__ = abc.ABCMeta
+
+ def __init__(self, name=None, channel_means=(0., 0., 0.),
+ channel_stds=(1., 1., 1.), bgr_ordering=False):
+ """Initializes a CenterNet feature extractor.
+
+ Args:
+ name: str, the name used for the underlying keras model.
+ channel_means: A tuple of floats, denoting the mean of each channel
+ which will be subtracted from it. If None or empty, we use 0s.
+ channel_stds: A tuple of floats, denoting the standard deviation of each
+ channel. Each channel will be divided by its standard deviation value.
+ If None or empty, we use 1s.
+ bgr_ordering: bool, if set will change the channel ordering to be in the
+ [blue, red, green] order.
+ """
+ super(CenterNetFeatureExtractor, self).__init__(name=name)
+
+ if channel_means is None or len(channel_means) == 0: # pylint:disable=g-explicit-length-test
+ channel_means = [0., 0., 0.]
+
+ if channel_stds is None or len(channel_stds) == 0: # pylint:disable=g-explicit-length-test
+ channel_stds = [1., 1., 1.]
+
+ self._channel_means = channel_means
+ self._channel_stds = channel_stds
+ self._bgr_ordering = bgr_ordering
+
+ def preprocess(self, inputs):
+ """Converts a batch of unscaled images to a scale suitable for the model.
+
+ This method normalizes the image using the given `channel_means` and
+ `channels_stds` values at initialization time while optionally flipping
+ the channel order if `bgr_ordering` is set.
+
+ Args:
+ inputs: a [batch, height, width, channels] float32 tensor
+
+ Returns:
+ outputs: a [batch, height, width, channels] float32 tensor
+
+ """
+
+ if self._bgr_ordering:
+ red, green, blue = tf.unstack(inputs, axis=3)
+ inputs = tf.stack([blue, green, red], axis=3)
+
+ channel_means = tf.reshape(tf.constant(self._channel_means),
+ [1, 1, 1, -1])
+ channel_stds = tf.reshape(tf.constant(self._channel_stds),
+ [1, 1, 1, -1])
+
+ return (inputs - channel_means)/channel_stds
+
+ @property
+ @abc.abstractmethod
+ def out_stride(self):
+ """The stride in the output image of the network."""
+ pass
+
+ @property
+ @abc.abstractmethod
+ def num_feature_outputs(self):
+ """Ther number of feature outputs returned by the feature extractor."""
+ pass
+
+
+def make_prediction_net(num_out_channels, kernel_size=3, num_filters=256,
+ bias_fill=None):
+ """Creates a network to predict the given number of output channels.
+
+ This function is intended to make the prediction heads for the CenterNet
+ meta architecture.
+
+ Args:
+ num_out_channels: Number of output channels.
+ kernel_size: The size of the conv kernel in the intermediate layer
+ num_filters: The number of filters in the intermediate conv layer.
+ bias_fill: If not None, is used to initialize the bias in the final conv
+ layer.
+
+ Returns:
+ net: A keras module which when called on an input tensor of size
+ [batch_size, height, width, num_in_channels] returns an output
+ of size [batch_size, height, width, num_out_channels]
+ """
+
+ out_conv = tf.keras.layers.Conv2D(num_out_channels, kernel_size=1)
+
+ if bias_fill is not None:
+ out_conv.bias_initializer = tf.keras.initializers.constant(bias_fill)
+
+ net = tf.keras.Sequential(
+ [tf.keras.layers.Conv2D(num_filters, kernel_size=kernel_size,
+ padding='same'),
+ tf.keras.layers.ReLU(),
+ out_conv]
+ )
+
+ return net
+
+
+def _to_float32(x):
+ return tf.cast(x, tf.float32)
+
+
+def _get_shape(tensor, num_dims):
+ tf.Assert(tensor.get_shape().ndims == num_dims, [tensor])
+ return shape_utils.combined_static_and_dynamic_shape(tensor)
+
+
+def _flatten_spatial_dimensions(batch_images):
+ batch_size, height, width, channels = _get_shape(batch_images, 4)
+ return tf.reshape(batch_images, [batch_size, height * width,
+ channels])
+
+
+def top_k_feature_map_locations(feature_map, max_pool_kernel_size=3, k=100,
+ per_channel=False):
+ """Returns the top k scores and their locations in a feature map.
+
+ Given a feature map, the top k values (based on activation) are returned. If
+ `per_channel` is True, the top k values **per channel** are returned.
+
+ The `max_pool_kernel_size` argument allows for selecting local peaks in a
+ region. This filtering is done per channel, so nothing prevents two values at
+ the same location to be returned.
+
+ Args:
+ feature_map: [batch, height, width, channels] float32 feature map.
+ max_pool_kernel_size: integer, the max pool kernel size to use to pull off
+ peak score locations in a neighborhood (independently for each channel).
+ For example, to make sure no two neighboring values (in the same channel)
+ are returned, set max_pool_kernel_size=3. If None or 1, will not apply max
+ pooling.
+ k: The number of highest scoring locations to return.
+ per_channel: If True, will return the top k scores and locations per
+ feature map channel. If False, the top k across the entire feature map
+ (height x width x channels) are returned.
+
+ Returns:
+ Tuple of
+ scores: A [batch, N] float32 tensor with scores from the feature map in
+ descending order. If per_channel is False, N = k. Otherwise,
+ N = k * channels, and the first k elements correspond to channel 0, the
+ second k correspond to channel 1, etc.
+ y_indices: A [batch, N] int tensor with y indices of the top k feature map
+ locations. If per_channel is False, N = k. Otherwise,
+ N = k * channels.
+ x_indices: A [batch, N] int tensor with x indices of the top k feature map
+ locations. If per_channel is False, N = k. Otherwise,
+ N = k * channels.
+ channel_indices: A [batch, N] int tensor with channel indices of the top k
+ feature map locations. If per_channel is False, N = k. Otherwise,
+ N = k * channels.
+ """
+ if not max_pool_kernel_size or max_pool_kernel_size == 1:
+ feature_map_peaks = feature_map
+ else:
+ feature_map_max_pool = tf.nn.max_pool(
+ feature_map, ksize=max_pool_kernel_size, strides=1, padding='SAME')
+
+ feature_map_peak_mask = tf.math.abs(
+ feature_map - feature_map_max_pool) < PEAK_EPSILON
+
+ # Zero out everything that is not a peak.
+ feature_map_peaks = (
+ feature_map * _to_float32(feature_map_peak_mask))
+
+ batch_size, _, width, num_channels = _get_shape(feature_map, 4)
+
+ if per_channel:
+ # Perform top k over batch and channels.
+ feature_map_peaks_transposed = tf.transpose(feature_map_peaks,
+ perm=[0, 3, 1, 2])
+ feature_map_peaks_transposed = tf.reshape(
+ feature_map_peaks_transposed, [batch_size, num_channels, -1])
+ scores, peak_flat_indices = tf.math.top_k(feature_map_peaks_transposed, k=k)
+ # Convert the indices such that they represent the location in the full
+ # (flattened) feature map of size [batch, height * width * channels].
+ channel_idx = tf.range(num_channels)[tf.newaxis, :, tf.newaxis]
+ peak_flat_indices = num_channels * peak_flat_indices + channel_idx
+ scores = tf.reshape(scores, [batch_size, -1])
+ peak_flat_indices = tf.reshape(peak_flat_indices, [batch_size, -1])
+ else:
+ feature_map_peaks_flat = tf.reshape(feature_map_peaks, [batch_size, -1])
+ scores, peak_flat_indices = tf.math.top_k(feature_map_peaks_flat, k=k)
+
+ # Get x, y and channel indices corresponding to the top indices in the flat
+ # array.
+ y_indices, x_indices, channel_indices = (
+ row_col_channel_indices_from_flattened_indices(
+ peak_flat_indices, width, num_channels))
+ return scores, y_indices, x_indices, channel_indices
+
+
+def prediction_tensors_to_boxes(detection_scores, y_indices, x_indices,
+ channel_indices, height_width_predictions,
+ offset_predictions):
+ """Converts CenterNet class-center, offset and size predictions to boxes.
+
+ Args:
+ detection_scores: A [batch, num_boxes] float32 tensor with detection
+ scores in range [0, 1].
+ y_indices: A [batch, num_boxes] int32 tensor with y indices corresponding to
+ object center locations (expressed in output coordinate frame).
+ x_indices: A [batch, num_boxes] int32 tensor with x indices corresponding to
+ object center locations (expressed in output coordinate frame).
+ channel_indices: A [batch, num_boxes] int32 tensor with channel indices
+ corresponding to object classes.
+ height_width_predictions: A float tensor of shape [batch_size, height,
+ width, 2] representing the height and width of a box centered at each
+ pixel.
+ offset_predictions: A float tensor of shape [batch_size, height, width, 2]
+ representing the y and x offsets of a box centered at each pixel. This
+ helps reduce the error from downsampling.
+
+ Returns:
+ detection_boxes: A tensor of shape [batch_size, num_boxes, 4] holding the
+ the raw bounding box coordinates of boxes.
+ detection_classes: An integer tensor of shape [batch_size, num_boxes]
+ indicating the predicted class for each box.
+ detection_scores: A float tensor of shape [batch_size, num_boxes] indicating
+ the score for each box.
+ num_detections: An integer tensor of shape [batch_size,] indicating the
+ number of boxes detected for each sample in the batch.
+
+ """
+ _, _, width, _ = _get_shape(height_width_predictions, 4)
+
+ peak_spatial_indices = flattened_indices_from_row_col_indices(
+ y_indices, x_indices, width)
+ y_indices = _to_float32(y_indices)
+ x_indices = _to_float32(x_indices)
+
+ height_width_flat = _flatten_spatial_dimensions(height_width_predictions)
+ offsets_flat = _flatten_spatial_dimensions(offset_predictions)
+
+ height_width = tf.gather(height_width_flat, peak_spatial_indices,
+ batch_dims=1)
+ offsets = tf.gather(offsets_flat, peak_spatial_indices, batch_dims=1)
+
+ heights, widths = tf.unstack(height_width, axis=2)
+ y_offsets, x_offsets = tf.unstack(offsets, axis=2)
+
+ detection_classes = channel_indices
+
+ num_detections = tf.reduce_sum(tf.to_int32(detection_scores > 0), axis=1)
+
+ boxes = tf.stack([y_indices + y_offsets - heights / 2.0,
+ x_indices + x_offsets - widths / 2.0,
+ y_indices + y_offsets + heights / 2.0,
+ x_indices + x_offsets + widths / 2.0], axis=2)
+
+ return boxes, detection_classes, detection_scores, num_detections
+
+
+def prediction_tensors_to_keypoint_candidates(
+ keypoint_heatmap_predictions,
+ keypoint_heatmap_offsets,
+ keypoint_score_threshold=0.1,
+ max_pool_kernel_size=1,
+ max_candidates=20):
+ """Convert keypoint heatmap predictions and offsets to keypoint candidates.
+
+ Args:
+ keypoint_heatmap_predictions: A float tensor of shape [batch_size, height,
+ width, num_keypoints] representing the per-keypoint heatmaps.
+ keypoint_heatmap_offsets: A float tensor of shape [batch_size, height,
+ width, 2] (or [batch_size, height, width, 2 * num_keypoints] if
+ 'per_keypoint_offset' is set True) representing the per-keypoint offsets.
+ keypoint_score_threshold: float, the threshold for considering a keypoint
+ a candidate.
+ max_pool_kernel_size: integer, the max pool kernel size to use to pull off
+ peak score locations in a neighborhood. For example, to make sure no two
+ neighboring values for the same keypoint are returned, set
+ max_pool_kernel_size=3. If None or 1, will not apply any local filtering.
+ max_candidates: integer, maximum number of keypoint candidates per
+ keypoint type.
+
+ Returns:
+ keypoint_candidates: A tensor of shape
+ [batch_size, max_candidates, num_keypoints, 2] holding the
+ location of keypoint candidates in [y, x] format (expressed in absolute
+ coordinates in the output coordinate frame).
+ keypoint_scores: A float tensor of shape
+ [batch_size, max_candidates, num_keypoints] with the scores for each
+ keypoint candidate. The scores come directly from the heatmap predictions.
+ num_keypoint_candidates: An integer tensor of shape
+ [batch_size, num_keypoints] with the number of candidates for each
+ keypoint type, as it's possible to filter some candidates due to the score
+ threshold.
+ """
+ batch_size, _, width, num_keypoints = _get_shape(
+ keypoint_heatmap_predictions, 4)
+ # Get x, y and channel indices corresponding to the top indices in the
+ # keypoint heatmap predictions.
+ # Note that the top k candidates are produced for **each keypoint type**.
+ # Might be worth eventually trying top k in the feature map, independent of
+ # the keypoint type.
+ keypoint_scores, y_indices, x_indices, channel_indices = (
+ top_k_feature_map_locations(keypoint_heatmap_predictions,
+ max_pool_kernel_size=max_pool_kernel_size,
+ k=max_candidates,
+ per_channel=True))
+
+ peak_spatial_indices = flattened_indices_from_row_col_indices(
+ y_indices, x_indices, width)
+ y_indices = _to_float32(y_indices)
+ x_indices = _to_float32(x_indices)
+
+ offsets_flat = _flatten_spatial_dimensions(keypoint_heatmap_offsets)
+
+ selected_offsets = tf.gather(offsets_flat, peak_spatial_indices, batch_dims=1)
+ _, num_indices, num_channels = _get_shape(selected_offsets, 3)
+ if num_channels > 2:
+ reshaped_offsets = tf.reshape(selected_offsets,
+ [batch_size, num_indices, -1, 2])
+ offsets = tf.gather(reshaped_offsets, channel_indices, batch_dims=2)
+ else:
+ offsets = selected_offsets
+ y_offsets, x_offsets = tf.unstack(offsets, axis=2)
+
+ keypoint_candidates = tf.stack([y_indices + y_offsets,
+ x_indices + x_offsets], axis=2)
+ keypoint_candidates = tf.reshape(
+ keypoint_candidates,
+ [batch_size, num_keypoints, max_candidates, 2])
+ keypoint_candidates = tf.transpose(keypoint_candidates, [0, 2, 1, 3])
+ keypoint_scores = tf.reshape(
+ keypoint_scores,
+ [batch_size, num_keypoints, max_candidates])
+ keypoint_scores = tf.transpose(keypoint_scores, [0, 2, 1])
+ num_candidates = tf.reduce_sum(
+ tf.to_int32(keypoint_scores >= keypoint_score_threshold), axis=1)
+
+ return keypoint_candidates, keypoint_scores, num_candidates
+
+
+def regressed_keypoints_at_object_centers(regressed_keypoint_predictions,
+ y_indices, x_indices):
+ """Returns the regressed keypoints at specified object centers.
+
+ The original keypoint predictions are regressed relative to each feature map
+ location. The returned keypoints are expressed in absolute coordinates in the
+ output frame (i.e. the center offsets are added to each individual regressed
+ set of keypoints).
+
+ Args:
+ regressed_keypoint_predictions: A float tensor of shape
+ [batch_size, height, width, 2 * num_keypoints] holding regressed
+ keypoints. The last dimension has keypoint coordinates ordered as follows:
+ [y0, x0, y1, x1, ..., y{J-1}, x{J-1}] where J is the number of keypoints.
+ y_indices: A [batch, num_instances] int tensor holding y indices for object
+ centers. These indices correspond to locations in the output feature map.
+ x_indices: A [batch, num_instances] int tensor holding x indices for object
+ centers. These indices correspond to locations in the output feature map.
+
+ Returns:
+ A float tensor of shape [batch_size, num_objects, 2 * num_keypoints] where
+ regressed keypoints are gathered at the provided locations, and converted
+ to absolute coordinates in the output coordinate frame.
+ """
+ batch_size, _, width, _ = _get_shape(regressed_keypoint_predictions, 4)
+ flattened_indices = flattened_indices_from_row_col_indices(
+ y_indices, x_indices, width)
+ _, num_instances = _get_shape(flattened_indices, 2)
+
+ regressed_keypoints_flat = _flatten_spatial_dimensions(
+ regressed_keypoint_predictions)
+
+ relative_regressed_keypoints = tf.gather(
+ regressed_keypoints_flat, flattened_indices, batch_dims=1)
+ relative_regressed_keypoints = tf.reshape(
+ relative_regressed_keypoints,
+ [batch_size, num_instances, -1, 2])
+ relative_regressed_keypoints_y, relative_regressed_keypoints_x = tf.unstack(
+ relative_regressed_keypoints, axis=3)
+ y_indices = _to_float32(tf.expand_dims(y_indices, axis=-1))
+ x_indices = _to_float32(tf.expand_dims(x_indices, axis=-1))
+ absolute_regressed_keypoints = tf.stack(
+ [y_indices + relative_regressed_keypoints_y,
+ x_indices + relative_regressed_keypoints_x],
+ axis=3)
+ return tf.reshape(absolute_regressed_keypoints,
+ [batch_size, num_instances, -1])
+
+
+def refine_keypoints(regressed_keypoints, keypoint_candidates, keypoint_scores,
+ num_keypoint_candidates, bboxes=None,
+ unmatched_keypoint_score=0.1, box_scale=1.2,
+ candidate_search_scale=0.3,
+ candidate_ranking_mode='min_distance'):
+ """Refines regressed keypoints by snapping to the nearest candidate keypoints.
+
+ The initial regressed keypoints represent a full set of keypoints regressed
+ from the centers of the objects. The keypoint candidates are estimated
+ independently from heatmaps, and are not associated with any object instances.
+ This function refines the regressed keypoints by "snapping" to the
+ nearest/highest score/highest score-distance ratio (depending on the
+ candidate_ranking_mode) candidate of the same keypoint type (e.g. "nose").
+ If no candidates are nearby, the regressed keypoint remains unchanged.
+
+ In order to snap a regressed keypoint to a candidate keypoint, the following
+ must be satisfied:
+ - the candidate keypoint must be of the same type as the regressed keypoint
+ - the candidate keypoint must not lie outside the predicted boxes (or the
+ boxes which encloses the regressed keypoints for the instance if `bboxes` is
+ not provided). Note that the box is scaled by
+ `regressed_box_scale` in height and width, to provide some margin around the
+ keypoints
+ - the distance to the closest candidate keypoint cannot exceed
+ candidate_search_scale * max(height, width), where height and width refer to
+ the bounding box for the instance.
+
+ Note that the same candidate keypoint is allowed to snap to regressed
+ keypoints in difference instances.
+
+ Args:
+ regressed_keypoints: A float tensor of shape
+ [batch_size, num_instances, num_keypoints, 2] with the initial regressed
+ keypoints.
+ keypoint_candidates: A tensor of shape
+ [batch_size, max_candidates, num_keypoints, 2] holding the location of
+ keypoint candidates in [y, x] format (expressed in absolute coordinates in
+ the output coordinate frame).
+ keypoint_scores: A float tensor of shape
+ [batch_size, max_candidates, num_keypoints] indicating the scores for
+ keypoint candidates.
+ num_keypoint_candidates: An integer tensor of shape
+ [batch_size, num_keypoints] indicating the number of valid candidates for
+ each keypoint type, as there may be padding (dim 1) of
+ `keypoint_candidates` and `keypoint_scores`.
+ bboxes: A tensor of shape [batch_size, num_instances, 4] with predicted
+ bounding boxes for each instance, expressed in the output coordinate
+ frame. If not provided, boxes will be computed from regressed keypoints.
+ unmatched_keypoint_score: float, the default score to use for regressed
+ keypoints that are not successfully snapped to a nearby candidate.
+ box_scale: float, the multiplier to expand the bounding boxes (either the
+ provided boxes or those which tightly cover the regressed keypoints) for
+ an instance. This scale is typically larger than 1.0 when not providing
+ `bboxes`.
+ candidate_search_scale: float, the scale parameter that multiplies the
+ largest dimension of a bounding box. The resulting distance becomes a
+ search radius for candidates in the vicinity of each regressed keypoint.
+ candidate_ranking_mode: A string as one of ['min_distance',
+ 'score_distance_ratio'] indicating how to select the candidate. If invalid
+ value is provided, an ValueError will be raised.
+
+ Returns:
+ A tuple with:
+ refined_keypoints: A float tensor of shape
+ [batch_size, num_instances, num_keypoints, 2] with the final, refined
+ keypoints.
+ refined_scores: A float tensor of shape
+ [batch_size, num_instances, num_keypoints] with scores associated with all
+ instances and keypoints in `refined_keypoints`.
+
+ Raises:
+ ValueError: if provided candidate_ranking_mode is not one of
+ ['min_distance', 'score_distance_ratio']
+ """
+ batch_size, num_instances, num_keypoints, _ = (
+ shape_utils.combined_static_and_dynamic_shape(regressed_keypoints))
+ max_candidates = keypoint_candidates.shape[1]
+
+ # Replace all invalid (i.e. padded) keypoint candidates with NaN.
+ # This will prevent them from being considered.
+ range_tiled = tf.tile(
+ tf.reshape(tf.range(max_candidates), [1, max_candidates, 1]),
+ [batch_size, 1, num_keypoints])
+ num_candidates_tiled = tf.tile(tf.expand_dims(num_keypoint_candidates, 1),
+ [1, max_candidates, 1])
+ invalid_candidates = range_tiled >= num_candidates_tiled
+ nan_mask = tf.where(
+ invalid_candidates,
+ np.nan * tf.ones_like(invalid_candidates, dtype=tf.float32),
+ tf.ones_like(invalid_candidates, dtype=tf.float32))
+ keypoint_candidates_with_nans = tf.math.multiply(
+ keypoint_candidates, tf.expand_dims(nan_mask, -1))
+
+ # Pairwise squared distances between regressed keypoints and candidate
+ # keypoints (for a single keypoint type).
+ # Shape [batch_size, num_instances, max_candidates, num_keypoints].
+ regressed_keypoint_expanded = tf.expand_dims(regressed_keypoints,
+ axis=2)
+ keypoint_candidates_expanded = tf.expand_dims(
+ keypoint_candidates_with_nans, axis=1)
+ sqrd_distances = tf.math.reduce_sum(
+ tf.math.squared_difference(regressed_keypoint_expanded,
+ keypoint_candidates_expanded),
+ axis=-1)
+ distances = tf.math.sqrt(sqrd_distances)
+
+ # Determine the candidates that have the minimum distance to the regressed
+ # keypoints. Shape [batch_size, num_instances, num_keypoints].
+ min_distances = tf.math.reduce_min(distances, axis=2)
+ if candidate_ranking_mode == 'min_distance':
+ nearby_candidate_inds = tf.math.argmin(distances, axis=2)
+ elif candidate_ranking_mode == 'score_distance_ratio':
+ # tiled_keypoint_scores:
+ # Shape [batch_size, num_instances, max_candidates, num_keypoints].
+ tiled_keypoint_scores = tf.tile(
+ tf.expand_dims(keypoint_scores, axis=1),
+ multiples=[1, num_instances, 1, 1])
+ ranking_scores = tiled_keypoint_scores / (distances + 1e-6)
+ nearby_candidate_inds = tf.math.argmax(ranking_scores, axis=2)
+ else:
+ raise ValueError('Not recognized candidate_ranking_mode: %s' %
+ candidate_ranking_mode)
+
+ # Gather the coordinates and scores corresponding to the closest candidates.
+ # Shape of tensors are [batch_size, num_instances, num_keypoints, 2] and
+ # [batch_size, num_instances, num_keypoints], respectively.
+ nearby_candidate_coords, nearby_candidate_scores = (
+ _gather_candidates_at_indices(keypoint_candidates, keypoint_scores,
+ nearby_candidate_inds))
+
+ if bboxes is None:
+ # Create bboxes from regressed keypoints.
+ # Shape [batch_size * num_instances, 4].
+ regressed_keypoints_flattened = tf.reshape(
+ regressed_keypoints, [-1, num_keypoints, 2])
+ bboxes_flattened = keypoint_ops.keypoints_to_enclosing_bounding_boxes(
+ regressed_keypoints_flattened)
+ else:
+ bboxes_flattened = tf.reshape(bboxes, [-1, 4])
+
+ # Scale the bounding boxes.
+ # Shape [batch_size, num_instances, 4].
+ boxlist = box_list.BoxList(bboxes_flattened)
+ boxlist_scaled = box_list_ops.scale_height_width(
+ boxlist, box_scale, box_scale)
+ bboxes_scaled = boxlist_scaled.get()
+ bboxes = tf.reshape(bboxes_scaled, [batch_size, num_instances, 4])
+
+ # Get ymin, xmin, ymax, xmax bounding box coordinates, tiled per keypoint.
+ # Shape [batch_size, num_instances, num_keypoints].
+ bboxes_tiled = tf.tile(tf.expand_dims(bboxes, 2), [1, 1, num_keypoints, 1])
+ ymin, xmin, ymax, xmax = tf.unstack(bboxes_tiled, axis=3)
+
+ # Produce a mask that indicates whether the original regressed keypoint
+ # should be used instead of a candidate keypoint.
+ # Shape [batch_size, num_instances, num_keypoints].
+ search_radius = (
+ tf.math.maximum(ymax - ymin, xmax - xmin) * candidate_search_scale)
+ mask = (tf.cast(nearby_candidate_coords[:, :, :, 0] < ymin, tf.int32) +
+ tf.cast(nearby_candidate_coords[:, :, :, 0] > ymax, tf.int32) +
+ tf.cast(nearby_candidate_coords[:, :, :, 1] < xmin, tf.int32) +
+ tf.cast(nearby_candidate_coords[:, :, :, 1] > xmax, tf.int32) +
+ # Filter out the chosen candidate with score lower than unmatched
+ # keypoint score.
+ tf.cast(nearby_candidate_scores <
+ unmatched_keypoint_score, tf.int32) +
+ tf.cast(min_distances > search_radius, tf.int32))
+ mask = mask > 0
+
+ # Create refined keypoints where candidate keypoints replace original
+ # regressed keypoints if they are in the vicinity of the regressed keypoints.
+ # Shape [batch_size, num_instances, num_keypoints, 2].
+ refined_keypoints = tf.where(
+ tf.tile(tf.expand_dims(mask, -1), [1, 1, 1, 2]),
+ regressed_keypoints,
+ nearby_candidate_coords)
+
+ # Update keypoints scores. In the case where we use the original regressed
+ # keypoints, we use a default score of `unmatched_keypoint_score`.
+ # Shape [batch_size, num_instances, num_keypoints].
+ refined_scores = tf.where(
+ mask,
+ unmatched_keypoint_score * tf.ones_like(nearby_candidate_scores),
+ nearby_candidate_scores)
+
+ return refined_keypoints, refined_scores
+
+
+def _pad_to_full_keypoint_dim(keypoint_coords, keypoint_scores, keypoint_inds,
+ num_total_keypoints):
+ """Scatter keypoint elements into tensors with full keypoints dimension.
+
+ Args:
+ keypoint_coords: a [batch_size, num_instances, num_keypoints, 2] float32
+ tensor.
+ keypoint_scores: a [batch_size, num_instances, num_keypoints] float32
+ tensor.
+ keypoint_inds: a list of integers that indicate the keypoint indices for
+ this specific keypoint class. These indices are used to scatter into
+ tensors that have a `num_total_keypoints` dimension.
+ num_total_keypoints: The total number of keypoints that this model predicts.
+
+ Returns:
+ A tuple with
+ keypoint_coords_padded: a
+ [batch_size, num_instances, num_total_keypoints,2] float32 tensor.
+ keypoint_scores_padded: a [batch_size, num_instances, num_total_keypoints]
+ float32 tensor.
+ """
+ batch_size, num_instances, _, _ = (
+ shape_utils.combined_static_and_dynamic_shape(keypoint_coords))
+ kpt_coords_transposed = tf.transpose(keypoint_coords, [2, 0, 1, 3])
+ kpt_scores_transposed = tf.transpose(keypoint_scores, [2, 0, 1])
+ kpt_inds_tensor = tf.expand_dims(keypoint_inds, axis=-1)
+ kpt_coords_scattered = tf.scatter_nd(
+ indices=kpt_inds_tensor,
+ updates=kpt_coords_transposed,
+ shape=[num_total_keypoints, batch_size, num_instances, 2])
+ kpt_scores_scattered = tf.scatter_nd(
+ indices=kpt_inds_tensor,
+ updates=kpt_scores_transposed,
+ shape=[num_total_keypoints, batch_size, num_instances])
+ keypoint_coords_padded = tf.transpose(kpt_coords_scattered, [1, 2, 0, 3])
+ keypoint_scores_padded = tf.transpose(kpt_scores_scattered, [1, 2, 0])
+ return keypoint_coords_padded, keypoint_scores_padded
+
+
+def _pad_to_full_instance_dim(keypoint_coords, keypoint_scores, instance_inds,
+ max_instances):
+ """Scatter keypoint elements into tensors with full instance dimension.
+
+ Args:
+ keypoint_coords: a [batch_size, num_instances, num_keypoints, 2] float32
+ tensor.
+ keypoint_scores: a [batch_size, num_instances, num_keypoints] float32
+ tensor.
+ instance_inds: a list of integers that indicate the instance indices for
+ these keypoints. These indices are used to scatter into tensors
+ that have a `max_instances` dimension.
+ max_instances: The maximum number of instances detected by the model.
+
+ Returns:
+ A tuple with
+ keypoint_coords_padded: a [batch_size, max_instances, num_keypoints, 2]
+ float32 tensor.
+ keypoint_scores_padded: a [batch_size, max_instances, num_keypoints]
+ float32 tensor.
+ """
+ batch_size, _, num_keypoints, _ = (
+ shape_utils.combined_static_and_dynamic_shape(keypoint_coords))
+ kpt_coords_transposed = tf.transpose(keypoint_coords, [1, 0, 2, 3])
+ kpt_scores_transposed = tf.transpose(keypoint_scores, [1, 0, 2])
+ instance_inds = tf.expand_dims(instance_inds, axis=-1)
+ kpt_coords_scattered = tf.scatter_nd(
+ indices=instance_inds,
+ updates=kpt_coords_transposed,
+ shape=[max_instances, batch_size, num_keypoints, 2])
+ kpt_scores_scattered = tf.scatter_nd(
+ indices=instance_inds,
+ updates=kpt_scores_transposed,
+ shape=[max_instances, batch_size, num_keypoints])
+ keypoint_coords_padded = tf.transpose(kpt_coords_scattered, [1, 0, 2, 3])
+ keypoint_scores_padded = tf.transpose(kpt_scores_scattered, [1, 0, 2])
+ return keypoint_coords_padded, keypoint_scores_padded
+
+
+def _gather_candidates_at_indices(keypoint_candidates, keypoint_scores,
+ indices):
+ """Gathers keypoint candidate coordinates and scores at indices.
+
+ Args:
+ keypoint_candidates: a float tensor of shape [batch_size, max_candidates,
+ num_keypoints, 2] with candidate coordinates.
+ keypoint_scores: a float tensor of shape [batch_size, max_candidates,
+ num_keypoints] with keypoint scores.
+ indices: an integer tensor of shape [batch_size, num_indices, num_keypoints]
+ with indices.
+
+ Returns:
+ A tuple with
+ gathered_keypoint_candidates: a float tensor of shape [batch_size,
+ num_indices, num_keypoints, 2] with gathered coordinates.
+ gathered_keypoint_scores: a float tensor of shape [batch_size,
+ num_indices, num_keypoints, 2].
+ """
+ # Transpose tensors so that all batch dimensions are up front.
+ keypoint_candidates_transposed = tf.transpose(keypoint_candidates,
+ [0, 2, 1, 3])
+ keypoint_scores_transposed = tf.transpose(keypoint_scores, [0, 2, 1])
+ nearby_candidate_inds_transposed = tf.transpose(indices,
+ [0, 2, 1])
+ nearby_candidate_coords_tranposed = tf.gather(
+ keypoint_candidates_transposed, nearby_candidate_inds_transposed,
+ batch_dims=2)
+ nearby_candidate_scores_transposed = tf.gather(
+ keypoint_scores_transposed, nearby_candidate_inds_transposed,
+ batch_dims=2)
+ gathered_keypoint_candidates = tf.transpose(nearby_candidate_coords_tranposed,
+ [0, 2, 1, 3])
+ gathered_keypoint_scores = tf.transpose(nearby_candidate_scores_transposed,
+ [0, 2, 1])
+ return gathered_keypoint_candidates, gathered_keypoint_scores
+
+
+def flattened_indices_from_row_col_indices(row_indices, col_indices, num_cols):
+ """Get the index in a flattened array given row and column indices."""
+ return (row_indices * num_cols) + col_indices
+
+
+def row_col_channel_indices_from_flattened_indices(indices, num_cols,
+ num_channels):
+ """Computes row, column and channel indices from flattened indices.
+
+ Args:
+ indices: An integer tensor of any shape holding the indices in the flattened
+ space.
+ num_cols: Number of columns in the image (width).
+ num_channels: Number of channels in the image.
+
+ Returns:
+ row_indices: The row indices corresponding to each of the input indices.
+ Same shape as indices.
+ col_indices: The column indices corresponding to each of the input indices.
+ Same shape as indices.
+ channel_indices. The channel indices corresponding to each of the input
+ indices.
+
+ """
+ row_indices = (indices // num_channels) // num_cols
+ col_indices = (indices // num_channels) % num_cols
+ channel_indices = indices % num_channels
+
+ return row_indices, col_indices, channel_indices
+
+
+def get_valid_anchor_weights_in_flattened_image(true_image_shapes, height,
+ width):
+ """Computes valid anchor weights for an image assuming pixels will be flattened.
+
+ This function is useful when we only want to penalize valid areas in the
+ image in the case when padding is used. The function assumes that the loss
+ function will be applied after flattening the spatial dimensions and returns
+ anchor weights accordingly.
+
+ Args:
+ true_image_shapes: An integer tensor of shape [batch_size, 3] representing
+ the true image shape (without padding) for each sample in the batch.
+ height: height of the prediction from the network.
+ width: width of the prediction from the network.
+
+ Returns:
+ valid_anchor_weights: a float tensor of shape [batch_size, height * width]
+ with 1s in locations where the spatial coordinates fall within the height
+ and width in true_image_shapes.
+ """
+
+ indices = tf.reshape(tf.range(height * width), [1, -1])
+ batch_size = tf.shape(true_image_shapes)[0]
+ batch_indices = tf.ones((batch_size, 1), dtype=tf.int32) * indices
+
+ y_coords, x_coords, _ = row_col_channel_indices_from_flattened_indices(
+ batch_indices, width, 1)
+
+ max_y, max_x = true_image_shapes[:, 0], true_image_shapes[:, 1]
+ max_x = _to_float32(tf.expand_dims(max_x, 1))
+ max_y = _to_float32(tf.expand_dims(max_y, 1))
+
+ x_coords = _to_float32(x_coords)
+ y_coords = _to_float32(y_coords)
+
+ valid_mask = tf.math.logical_and(x_coords < max_x, y_coords < max_y)
+
+ return _to_float32(valid_mask)
+
+
+def convert_strided_predictions_to_normalized_boxes(boxes, stride,
+ true_image_shapes):
+ """Converts predictions in the output space to normalized boxes.
+
+ Boxes falling outside the valid image boundary are clipped to be on the
+ boundary.
+
+ Args:
+ boxes: A tensor of shape [batch_size, num_boxes, 4] holding the raw
+ coordinates of boxes in the model's output space.
+ stride: The stride in the output space.
+ true_image_shapes: A tensor of shape [batch_size, 3] representing the true
+ shape of the input not considering padding.
+
+ Returns:
+ boxes: A tensor of shape [batch_size, num_boxes, 4] representing the
+ coordinates of the normalized boxes.
+ """
+
+ def _normalize_boxlist(args):
+
+ boxes, height, width = args
+ boxes = box_list_ops.scale(boxes, stride, stride)
+ boxes = box_list_ops.to_normalized_coordinates(boxes, height, width)
+ boxes = box_list_ops.clip_to_window(boxes, [0., 0., 1., 1.],
+ filter_nonoverlapping=False)
+ return boxes
+
+ box_lists = [box_list.BoxList(boxes) for boxes in tf.unstack(boxes, axis=0)]
+ true_heights, true_widths, _ = tf.unstack(true_image_shapes, axis=1)
+
+ true_heights_list = tf.unstack(true_heights, axis=0)
+ true_widths_list = tf.unstack(true_widths, axis=0)
+
+ box_lists = list(map(_normalize_boxlist,
+ zip(box_lists, true_heights_list, true_widths_list)))
+ boxes = tf.stack([box_list_instance.get() for
+ box_list_instance in box_lists], axis=0)
+
+ return boxes
+
+
+def convert_strided_predictions_to_normalized_keypoints(
+ keypoint_coords, keypoint_scores, stride, true_image_shapes,
+ clip_out_of_frame_keypoints=False):
+ """Converts predictions in the output space to normalized keypoints.
+
+ If clip_out_of_frame_keypoints=False, keypoint coordinates falling outside
+ the valid image boundary are normalized but not clipped; If
+ clip_out_of_frame_keypoints=True, keypoint coordinates falling outside the
+ valid image boundary are clipped to the closest image boundary and the scores
+ will be set to 0.0.
+
+ Args:
+ keypoint_coords: A tensor of shape
+ [batch_size, num_instances, num_keypoints, 2] holding the raw coordinates
+ of keypoints in the model's output space.
+ keypoint_scores: A tensor of shape
+ [batch_size, num_instances, num_keypoints] holding the keypoint scores.
+ stride: The stride in the output space.
+ true_image_shapes: A tensor of shape [batch_size, 3] representing the true
+ shape of the input not considering padding.
+ clip_out_of_frame_keypoints: A boolean indicating whether keypoints outside
+ the image boundary should be clipped. If True, keypoint coords will be
+ clipped to image boundary. If False, keypoints are normalized but not
+ filtered based on their location.
+
+ Returns:
+ keypoint_coords_normalized: A tensor of shape
+ [batch_size, num_instances, num_keypoints, 2] representing the coordinates
+ of the normalized keypoints.
+ keypoint_scores: A tensor of shape
+ [batch_size, num_instances, num_keypoints] representing the updated
+ keypoint scores.
+ """
+ # Flatten keypoints and scores.
+ batch_size, _, _, _ = (
+ shape_utils.combined_static_and_dynamic_shape(keypoint_coords))
+
+ # Scale and normalize keypoints.
+ true_heights, true_widths, _ = tf.unstack(true_image_shapes, axis=1)
+ yscale = float(stride) / tf.cast(true_heights, tf.float32)
+ xscale = float(stride) / tf.cast(true_widths, tf.float32)
+ yx_scale = tf.stack([yscale, xscale], axis=1)
+ keypoint_coords_normalized = keypoint_coords * tf.reshape(
+ yx_scale, [batch_size, 1, 1, 2])
+
+ if clip_out_of_frame_keypoints:
+ # Determine the keypoints that are in the true image regions.
+ valid_indices = tf.logical_and(
+ tf.logical_and(keypoint_coords_normalized[:, :, :, 0] >= 0.0,
+ keypoint_coords_normalized[:, :, :, 0] <= 1.0),
+ tf.logical_and(keypoint_coords_normalized[:, :, :, 1] >= 0.0,
+ keypoint_coords_normalized[:, :, :, 1] <= 1.0))
+ batch_window = tf.tile(
+ tf.constant([[0.0, 0.0, 1.0, 1.0]], dtype=tf.float32),
+ multiples=[batch_size, 1])
+ def clip_to_window(inputs):
+ keypoints, window = inputs
+ return keypoint_ops.clip_to_window(keypoints, window)
+ keypoint_coords_normalized = tf.map_fn(
+ clip_to_window, (keypoint_coords_normalized, batch_window),
+ dtype=tf.float32, back_prop=False)
+ keypoint_scores = tf.where(valid_indices, keypoint_scores,
+ tf.zeros_like(keypoint_scores))
+ return keypoint_coords_normalized, keypoint_scores
+
+
+def convert_strided_predictions_to_instance_masks(
+ boxes, classes, masks, stride, mask_height, mask_width,
+ true_image_shapes, score_threshold=0.5):
+ """Converts predicted full-image masks into instance masks.
+
+ For each predicted detection box:
+ * Crop and resize the predicted mask based on the detected bounding box
+ coordinates and class prediction. Uses bilinear resampling.
+ * Binarize the mask using the provided score threshold.
+
+ Args:
+ boxes: A tensor of shape [batch, max_detections, 4] holding the predicted
+ boxes, in normalized coordinates (relative to the true image dimensions).
+ classes: An integer tensor of shape [batch, max_detections] containing the
+ detected class for each box (0-indexed).
+ masks: A [batch, output_height, output_width, num_classes] float32
+ tensor with class probabilities.
+ stride: The stride in the output space.
+ mask_height: The desired resized height for instance masks.
+ mask_width: The desired resized width for instance masks.
+ true_image_shapes: A tensor of shape [batch, 3] representing the true
+ shape of the inputs not considering padding.
+ score_threshold: The threshold at which to convert predicted mask
+ into foreground pixels.
+
+ Returns:
+ A [batch_size, max_detections, mask_height, mask_width] uint8 tensor with
+ predicted foreground mask for each instance. The masks take values in
+ {0, 1}.
+ """
+ _, output_height, output_width, _ = (
+ shape_utils.combined_static_and_dynamic_shape(masks))
+ input_height = stride * output_height
+ input_width = stride * output_width
+
+ # Boxes are in normalized coordinates relative to true image shapes. Convert
+ # coordinates to be normalized relative to input image shapes (since masks
+ # may still have padding).
+ # Then crop and resize each mask.
+ def crop_and_threshold_masks(args):
+ """Crops masks based on detection boxes."""
+ boxes, classes, masks, true_height, true_width = args
+ boxlist = box_list.BoxList(boxes)
+ y_scale = true_height / input_height
+ x_scale = true_width / input_width
+ boxlist = box_list_ops.scale(boxlist, y_scale, x_scale)
+ boxes = boxlist.get()
+ # Convert masks from [input_height, input_width, num_classes] to
+ # [num_classes, input_height, input_width, 1].
+ masks_4d = tf.transpose(masks, perm=[2, 0, 1])[:, :, :, tf.newaxis]
+ cropped_masks = tf2.image.crop_and_resize(
+ masks_4d,
+ boxes=boxes,
+ box_indices=classes,
+ crop_size=[mask_height, mask_width],
+ method='bilinear')
+ masks_3d = tf.squeeze(cropped_masks, axis=3)
+ masks_binarized = tf.math.greater_equal(masks_3d, score_threshold)
+ return tf.cast(masks_binarized, tf.uint8)
+
+ true_heights, true_widths, _ = tf.unstack(true_image_shapes, axis=1)
+ masks_for_image = shape_utils.static_or_dynamic_map_fn(
+ crop_and_threshold_masks,
+ elems=[boxes, classes, masks, true_heights, true_widths],
+ dtype=tf.uint8,
+ back_prop=False)
+ masks = tf.stack(masks_for_image, axis=0)
+ return masks
+
+
+class ObjectDetectionParams(
+ collections.namedtuple('ObjectDetectionParams', [
+ 'localization_loss', 'scale_loss_weight', 'offset_loss_weight',
+ 'task_loss_weight'
+ ])):
+ """Namedtuple to host object detection related parameters.
+
+ This is a wrapper class over the fields that are either the hyper-parameters
+ or the loss functions needed for the object detection task. The class is
+ immutable after constructed. Please see the __new__ function for detailed
+ information for each fields.
+ """
+
+ __slots__ = ()
+
+ def __new__(cls,
+ localization_loss,
+ scale_loss_weight,
+ offset_loss_weight,
+ task_loss_weight=1.0):
+ """Constructor with default values for ObjectDetectionParams.
+
+ Args:
+ localization_loss: a object_detection.core.losses.Loss object to compute
+ the loss for the center offset and height/width predictions in
+ CenterNet.
+ scale_loss_weight: float, The weight for localizing box size. Note that
+ the scale loss is dependent on the input image size, since we penalize
+ the raw height and width. This constant may need to be adjusted
+ depending on the input size.
+ offset_loss_weight: float, The weight for localizing center offsets.
+ task_loss_weight: float, the weight of the object detection loss.
+
+ Returns:
+ An initialized ObjectDetectionParams namedtuple.
+ """
+ return super(ObjectDetectionParams,
+ cls).__new__(cls, localization_loss, scale_loss_weight,
+ offset_loss_weight, task_loss_weight)
+
+
+class KeypointEstimationParams(
+ collections.namedtuple('KeypointEstimationParams', [
+ 'task_name', 'class_id', 'keypoint_indices', 'classification_loss',
+ 'localization_loss', 'keypoint_labels', 'keypoint_std_dev',
+ 'keypoint_heatmap_loss_weight', 'keypoint_offset_loss_weight',
+ 'keypoint_regression_loss_weight', 'keypoint_candidate_score_threshold',
+ 'heatmap_bias_init', 'num_candidates_per_keypoint', 'task_loss_weight',
+ 'peak_max_pool_kernel_size', 'unmatched_keypoint_score', 'box_scale',
+ 'candidate_search_scale', 'candidate_ranking_mode',
+ 'offset_peak_radius', 'per_keypoint_offset'
+ ])):
+ """Namedtuple to host object detection related parameters.
+
+ This is a wrapper class over the fields that are either the hyper-parameters
+ or the loss functions needed for the keypoint estimation task. The class is
+ immutable after constructed. Please see the __new__ function for detailed
+ information for each fields.
+ """
+
+ __slots__ = ()
+
+ def __new__(cls,
+ task_name,
+ class_id,
+ keypoint_indices,
+ classification_loss,
+ localization_loss,
+ keypoint_labels=None,
+ keypoint_std_dev=None,
+ keypoint_heatmap_loss_weight=1.0,
+ keypoint_offset_loss_weight=1.0,
+ keypoint_regression_loss_weight=1.0,
+ keypoint_candidate_score_threshold=0.1,
+ heatmap_bias_init=-2.19,
+ num_candidates_per_keypoint=100,
+ task_loss_weight=1.0,
+ peak_max_pool_kernel_size=3,
+ unmatched_keypoint_score=0.1,
+ box_scale=1.2,
+ candidate_search_scale=0.3,
+ candidate_ranking_mode='min_distance',
+ offset_peak_radius=0,
+ per_keypoint_offset=False):
+ """Constructor with default values for KeypointEstimationParams.
+
+ Args:
+ task_name: string, the name of the task this namedtuple corresponds to.
+ Note that it should be an unique identifier of the task.
+ class_id: int, the ID of the class that contains the target keypoints to
+ considered in this task. For example, if the task is human pose
+ estimation, the class id should correspond to the "human" class. Note
+ that the ID is 0-based, meaning that class 0 corresponds to the first
+ non-background object class.
+ keypoint_indices: A list of integers representing the indicies of the
+ keypoints to be considered in this task. This is used to retrieve the
+ subset of the keypoints from gt_keypoints that should be considered in
+ this task.
+ classification_loss: an object_detection.core.losses.Loss object to
+ compute the loss for the class predictions in CenterNet.
+ localization_loss: an object_detection.core.losses.Loss object to compute
+ the loss for the center offset and height/width predictions in
+ CenterNet.
+ keypoint_labels: A list of strings representing the label text of each
+ keypoint, e.g. "nose", 'left_shoulder". Note that the length of this
+ list should be equal to keypoint_indices.
+ keypoint_std_dev: A list of float represent the standard deviation of the
+ Gaussian kernel used to generate the keypoint heatmap. It is to provide
+ the flexibility of using different sizes of Gaussian kernel for each
+ keypoint class.
+ keypoint_heatmap_loss_weight: float, The weight for the keypoint heatmap.
+ keypoint_offset_loss_weight: float, The weight for the keypoint offsets
+ loss.
+ keypoint_regression_loss_weight: float, The weight for keypoint regression
+ loss. Note that the loss is dependent on the input image size, since we
+ penalize the raw height and width. This constant may need to be adjusted
+ depending on the input size.
+ keypoint_candidate_score_threshold: float, The heatmap score threshold for
+ a keypoint to become a valid candidate.
+ heatmap_bias_init: float, the initial value of bias in the convolutional
+ kernel of the class prediction head. If set to None, the bias is
+ initialized with zeros.
+ num_candidates_per_keypoint: The maximum number of candidates to retrieve
+ for each keypoint.
+ task_loss_weight: float, the weight of the keypoint estimation loss.
+ peak_max_pool_kernel_size: Max pool kernel size to use to pull off peak
+ score locations in a neighborhood (independently for each keypoint
+ types).
+ unmatched_keypoint_score: The default score to use for regressed keypoints
+ that are not successfully snapped to a nearby candidate.
+ box_scale: The multiplier to expand the bounding boxes (either the
+ provided boxes or those which tightly cover the regressed keypoints).
+ candidate_search_scale: The scale parameter that multiplies the largest
+ dimension of a bounding box. The resulting distance becomes a search
+ radius for candidates in the vicinity of each regressed keypoint.
+ candidate_ranking_mode: One of ['min_distance', 'score_distance_ratio']
+ indicating how to select the keypoint candidate.
+ offset_peak_radius: The radius (in the unit of output pixel) around
+ groundtruth heatmap peak to assign the offset targets. If set 0, then
+ the offset target will only be assigned to the heatmap peak (same
+ behavior as the original paper).
+ per_keypoint_offset: A bool indicates whether to assign offsets for each
+ keypoint channel separately. If set False, the output offset target has
+ the shape [batch_size, out_height, out_width, 2] (same behavior as the
+ original paper). If set True, the output offset target has the shape
+ [batch_size, out_height, out_width, 2 * num_keypoints] (recommended when
+ the offset_peak_radius is not zero).
+
+ Returns:
+ An initialized KeypointEstimationParams namedtuple.
+ """
+ return super(KeypointEstimationParams, cls).__new__(
+ cls, task_name, class_id, keypoint_indices, classification_loss,
+ localization_loss, keypoint_labels, keypoint_std_dev,
+ keypoint_heatmap_loss_weight, keypoint_offset_loss_weight,
+ keypoint_regression_loss_weight, keypoint_candidate_score_threshold,
+ heatmap_bias_init, num_candidates_per_keypoint, task_loss_weight,
+ peak_max_pool_kernel_size, unmatched_keypoint_score, box_scale,
+ candidate_search_scale, candidate_ranking_mode, offset_peak_radius,
+ per_keypoint_offset)
+
+
+class ObjectCenterParams(
+ collections.namedtuple('ObjectCenterParams', [
+ 'classification_loss', 'object_center_loss_weight', 'heatmap_bias_init',
+ 'min_box_overlap_iou', 'max_box_predictions', 'use_only_known_classes'
+ ])):
+ """Namedtuple to store object center prediction related parameters."""
+
+ __slots__ = ()
+
+ def __new__(cls,
+ classification_loss,
+ object_center_loss_weight,
+ heatmap_bias_init=-2.19,
+ min_box_overlap_iou=0.7,
+ max_box_predictions=100,
+ use_labeled_classes=False):
+ """Constructor with default values for ObjectCenterParams.
+
+ Args:
+ classification_loss: an object_detection.core.losses.Loss object to
+ compute the loss for the class predictions in CenterNet.
+ object_center_loss_weight: float, The weight for the object center loss.
+ heatmap_bias_init: float, the initial value of bias in the convolutional
+ kernel of the object center prediction head. If set to None, the bias is
+ initialized with zeros.
+ min_box_overlap_iou: float, the minimum IOU overlap that predicted boxes
+ need have with groundtruth boxes to not be penalized. This is used for
+ computing the class specific center heatmaps.
+ max_box_predictions: int, the maximum number of boxes to predict.
+ use_labeled_classes: boolean, compute the loss only labeled classes.
+
+ Returns:
+ An initialized ObjectCenterParams namedtuple.
+ """
+ return super(ObjectCenterParams,
+ cls).__new__(cls, classification_loss,
+ object_center_loss_weight, heatmap_bias_init,
+ min_box_overlap_iou, max_box_predictions,
+ use_labeled_classes)
+
+
+class MaskParams(
+ collections.namedtuple('MaskParams', [
+ 'classification_loss', 'task_loss_weight', 'mask_height', 'mask_width',
+ 'score_threshold', 'heatmap_bias_init'
+ ])):
+ """Namedtuple to store mask prediction related parameters."""
+
+ __slots__ = ()
+
+ def __new__(cls,
+ classification_loss,
+ task_loss_weight=1.0,
+ mask_height=256,
+ mask_width=256,
+ score_threshold=0.5,
+ heatmap_bias_init=-2.19):
+ """Constructor with default values for MaskParams.
+
+ Args:
+ classification_loss: an object_detection.core.losses.Loss object to
+ compute the loss for the semantic segmentation predictions in CenterNet.
+ task_loss_weight: float, The loss weight for the segmentation task.
+ mask_height: The height of the resized instance segmentation mask.
+ mask_width: The width of the resized instance segmentation mask.
+ score_threshold: The threshold at which to convert predicted mask
+ probabilities (after passing through sigmoid) into foreground pixels.
+ heatmap_bias_init: float, the initial value of bias in the convolutional
+ kernel of the semantic segmentation prediction head. If set to None, the
+ bias is initialized with zeros.
+
+ Returns:
+ An initialized MaskParams namedtuple.
+ """
+ return super(MaskParams,
+ cls).__new__(cls, classification_loss,
+ task_loss_weight, mask_height, mask_width,
+ score_threshold, heatmap_bias_init)
+
+
+# The following constants are used to generate the keys of the
+# (prediction, loss, target assigner,...) dictionaries used in CenterNetMetaArch
+# class.
+DETECTION_TASK = 'detection_task'
+OBJECT_CENTER = 'object_center'
+BOX_SCALE = 'box/scale'
+BOX_OFFSET = 'box/offset'
+KEYPOINT_REGRESSION = 'keypoint/regression'
+KEYPOINT_HEATMAP = 'keypoint/heatmap'
+KEYPOINT_OFFSET = 'keypoint/offset'
+SEGMENTATION_TASK = 'segmentation_task'
+SEGMENTATION_HEATMAP = 'segmentation/heatmap'
+LOSS_KEY_PREFIX = 'Loss'
+
+
+def get_keypoint_name(task_name, head_name):
+ return '%s/%s' % (task_name, head_name)
+
+
+def get_num_instances_from_weights(groundtruth_weights_list):
+ """Computes the number of instances/boxes from the weights in a batch.
+
+ Args:
+ groundtruth_weights_list: A list of float tensors with shape
+ [max_num_instances] representing whether there is an actual instance in
+ the image (with non-zero value) or is padded to match the
+ max_num_instances (with value 0.0). The list represents the batch
+ dimension.
+
+ Returns:
+ A scalar integer tensor incidating how many instances/boxes are in the
+ images in the batch. Note that this function is usually used to normalize
+ the loss so the minimum return value is 1 to avoid weird behavior.
+ """
+ num_instances = tf.reduce_sum(
+ [tf.math.count_nonzero(w) for w in groundtruth_weights_list])
+ num_instances = tf.maximum(num_instances, 1)
+ return num_instances
+
+
+class CenterNetMetaArch(model.DetectionModel):
+ """The CenterNet meta architecture [1].
+
+ [1]: https://arxiv.org/abs/1904.07850
+ """
+
+ def __init__(self,
+ is_training,
+ add_summaries,
+ num_classes,
+ feature_extractor,
+ image_resizer_fn,
+ object_center_params,
+ object_detection_params=None,
+ keypoint_params_dict=None,
+ mask_params=None):
+ """Initializes a CenterNet model.
+
+ Args:
+ is_training: Set to True if this model is being built for training.
+ add_summaries: Whether to add tf summaries in the model.
+ num_classes: int, The number of classes that the model should predict.
+ feature_extractor: A CenterNetFeatureExtractor to use to extract features
+ from an image.
+ image_resizer_fn: a callable for image resizing. This callable always
+ takes a rank-3 image tensor (corresponding to a single image) and
+ returns a rank-3 image tensor, possibly with new spatial dimensions and
+ a 1-D tensor of shape [3] indicating shape of true image within the
+ resized image tensor as the resized image tensor could be padded. See
+ builders/image_resizer_builder.py.
+ object_center_params: An ObjectCenterParams namedtuple. This object holds
+ the hyper-parameters for object center prediction. This is required by
+ either object detection or keypoint estimation tasks.
+ object_detection_params: An ObjectDetectionParams namedtuple. This object
+ holds the hyper-parameters necessary for object detection. Please see
+ the class definition for more details.
+ keypoint_params_dict: A dictionary that maps from task name to the
+ corresponding KeypointEstimationParams namedtuple. This object holds the
+ hyper-parameters necessary for multiple keypoint estimations. Please
+ see the class definition for more details.
+ mask_params: A MaskParams namedtuple. This object
+ holds the hyper-parameters for segmentation. Please see the class
+ definition for more details.
+ """
+ assert object_detection_params or keypoint_params_dict
+ # Shorten the name for convenience and better formatting.
+ self._is_training = is_training
+ # The Objects as Points paper attaches loss functions to multiple
+ # (`num_feature_outputs`) feature maps in the the backbone. E.g.
+ # for the hourglass backbone, `num_feature_outputs` is 2.
+ self._feature_extractor = feature_extractor
+ self._num_feature_outputs = feature_extractor.num_feature_outputs
+ self._stride = self._feature_extractor.out_stride
+ self._image_resizer_fn = image_resizer_fn
+ self._center_params = object_center_params
+ self._od_params = object_detection_params
+ self._kp_params_dict = keypoint_params_dict
+ self._mask_params = mask_params
+
+ # Construct the prediction head nets.
+ self._prediction_head_dict = self._construct_prediction_heads(
+ num_classes,
+ self._num_feature_outputs,
+ class_prediction_bias_init=self._center_params.heatmap_bias_init)
+ # Initialize the target assigners.
+ self._target_assigner_dict = self._initialize_target_assigners(
+ stride=self._stride,
+ min_box_overlap_iou=self._center_params.min_box_overlap_iou)
+
+ # Will be used in VOD single_frame_meta_arch for tensor reshape.
+ self._batched_prediction_tensor_names = []
+
+ super(CenterNetMetaArch, self).__init__(num_classes)
+
+ @property
+ def batched_prediction_tensor_names(self):
+ if not self._batched_prediction_tensor_names:
+ raise RuntimeError('Must call predict() method to get batched prediction '
+ 'tensor names.')
+ return self._batched_prediction_tensor_names
+
+ def _construct_prediction_heads(self, num_classes, num_feature_outputs,
+ class_prediction_bias_init):
+ """Constructs the prediction heads based on the specific parameters.
+
+ Args:
+ num_classes: An integer indicating how many classes in total to predict.
+ num_feature_outputs: An integer indicating how many feature outputs to use
+ for calculating the loss. The Objects as Points paper attaches loss
+ functions to multiple (`num_feature_outputs`) feature maps in the the
+ backbone. E.g. for the hourglass backbone, `num_feature_outputs` is 2.
+ class_prediction_bias_init: float, the initial value of bias in the
+ convolutional kernel of the class prediction head. If set to None, the
+ bias is initialized with zeros.
+
+ Returns:
+ A dictionary of keras modules generated by calling make_prediction_net
+ function.
+ """
+ prediction_heads = {}
+ prediction_heads[OBJECT_CENTER] = [
+ make_prediction_net(num_classes, bias_fill=class_prediction_bias_init)
+ for _ in range(num_feature_outputs)
+ ]
+ if self._od_params is not None:
+ prediction_heads[BOX_SCALE] = [
+ make_prediction_net(NUM_SIZE_CHANNELS)
+ for _ in range(num_feature_outputs)
+ ]
+ prediction_heads[BOX_OFFSET] = [
+ make_prediction_net(NUM_OFFSET_CHANNELS)
+ for _ in range(num_feature_outputs)
+ ]
+ if self._kp_params_dict is not None:
+ for task_name, kp_params in self._kp_params_dict.items():
+ num_keypoints = len(kp_params.keypoint_indices)
+ prediction_heads[get_keypoint_name(task_name, KEYPOINT_HEATMAP)] = [
+ make_prediction_net(
+ num_keypoints, bias_fill=kp_params.heatmap_bias_init)
+ for _ in range(num_feature_outputs)
+ ]
+ prediction_heads[get_keypoint_name(task_name, KEYPOINT_REGRESSION)] = [
+ make_prediction_net(NUM_OFFSET_CHANNELS * num_keypoints)
+ for _ in range(num_feature_outputs)
+ ]
+ if kp_params.per_keypoint_offset:
+ prediction_heads[get_keypoint_name(task_name, KEYPOINT_OFFSET)] = [
+ make_prediction_net(NUM_OFFSET_CHANNELS * num_keypoints)
+ for _ in range(num_feature_outputs)
+ ]
+ else:
+ prediction_heads[get_keypoint_name(task_name, KEYPOINT_OFFSET)] = [
+ make_prediction_net(NUM_OFFSET_CHANNELS)
+ for _ in range(num_feature_outputs)
+ ]
+ if self._mask_params is not None:
+ prediction_heads[SEGMENTATION_HEATMAP] = [
+ make_prediction_net(num_classes,
+ bias_fill=class_prediction_bias_init)
+ for _ in range(num_feature_outputs)]
+ return prediction_heads
+
+ def _initialize_target_assigners(self, stride, min_box_overlap_iou):
+ """Initializes the target assigners and puts them in a dictionary.
+
+ Args:
+ stride: An integer indicating the stride of the image.
+ min_box_overlap_iou: float, the minimum IOU overlap that predicted boxes
+ need have with groundtruth boxes to not be penalized. This is used for
+ computing the class specific center heatmaps.
+
+ Returns:
+ A dictionary of initialized target assigners for each task.
+ """
+ target_assigners = {}
+ target_assigners[OBJECT_CENTER] = (
+ cn_assigner.CenterNetCenterHeatmapTargetAssigner(
+ stride, min_box_overlap_iou))
+ if self._od_params is not None:
+ target_assigners[DETECTION_TASK] = (
+ cn_assigner.CenterNetBoxTargetAssigner(stride))
+ if self._kp_params_dict is not None:
+ for task_name, kp_params in self._kp_params_dict.items():
+ target_assigners[task_name] = (
+ cn_assigner.CenterNetKeypointTargetAssigner(
+ stride=stride,
+ class_id=kp_params.class_id,
+ keypoint_indices=kp_params.keypoint_indices,
+ keypoint_std_dev=kp_params.keypoint_std_dev,
+ peak_radius=kp_params.offset_peak_radius,
+ per_keypoint_offset=kp_params.per_keypoint_offset))
+ if self._mask_params is not None:
+ target_assigners[SEGMENTATION_TASK] = (
+ cn_assigner.CenterNetMaskTargetAssigner(stride))
+
+ return target_assigners
+
+ def _compute_object_center_loss(self, input_height, input_width,
+ object_center_predictions, per_pixel_weights):
+ """Computes the object center loss.
+
+ Args:
+ input_height: An integer scalar tensor representing input image height.
+ input_width: An integer scalar tensor representing input image width.
+ object_center_predictions: A list of float tensors of shape [batch_size,
+ out_height, out_width, num_classes] representing the object center
+ feature maps.
+ per_pixel_weights: A float tensor of shape [batch_size,
+ out_height * out_width, 1] with 1s in locations where the spatial
+ coordinates fall within the height and width in true_image_shapes.
+
+ Returns:
+ A float scalar tensor representing the object center loss per instance.
+ """
+ gt_boxes_list = self.groundtruth_lists(fields.BoxListFields.boxes)
+ gt_classes_list = self.groundtruth_lists(fields.BoxListFields.classes)
+ gt_weights_list = self.groundtruth_lists(fields.BoxListFields.weights)
+
+ if self._center_params.use_only_known_classes:
+ gt_labeled_classes_list = self.groundtruth_lists(
+ fields.InputDataFields.groundtruth_labeled_classes)
+ batch_labeled_classes = tf.stack(gt_labeled_classes_list, axis=0)
+ batch_labeled_classes_shape = tf.shape(batch_labeled_classes)
+ batch_labeled_classes = tf.reshape(
+ batch_labeled_classes,
+ [batch_labeled_classes_shape[0], 1, batch_labeled_classes_shape[-1]])
+ per_pixel_weights = per_pixel_weights * batch_labeled_classes
+
+ # Convert the groundtruth to targets.
+ assigner = self._target_assigner_dict[OBJECT_CENTER]
+ heatmap_targets = assigner.assign_center_targets_from_boxes(
+ height=input_height,
+ width=input_width,
+ gt_boxes_list=gt_boxes_list,
+ gt_classes_list=gt_classes_list,
+ gt_weights_list=gt_weights_list)
+
+ flattened_heatmap_targets = _flatten_spatial_dimensions(heatmap_targets)
+ num_boxes = _to_float32(get_num_instances_from_weights(gt_weights_list))
+
+ loss = 0.0
+ object_center_loss = self._center_params.classification_loss
+ # Loop through each feature output head.
+ for pred in object_center_predictions:
+ pred = _flatten_spatial_dimensions(pred)
+ loss += object_center_loss(
+ pred, flattened_heatmap_targets, weights=per_pixel_weights)
+ loss_per_instance = tf.reduce_sum(loss) / (
+ float(len(object_center_predictions)) * num_boxes)
+ return loss_per_instance
+
+ def _compute_object_detection_losses(self, input_height, input_width,
+ prediction_dict, per_pixel_weights):
+ """Computes the weighted object detection losses.
+
+ This wrapper function calls the function which computes the losses for
+ object detection task and applies corresponding weights to the losses.
+
+ Args:
+ input_height: An integer scalar tensor representing input image height.
+ input_width: An integer scalar tensor representing input image width.
+ prediction_dict: A dictionary holding predicted tensors output by
+ "predict" function. See "predict" function for more detailed
+ description.
+ per_pixel_weights: A float tensor of shape [batch_size,
+ out_height * out_width, 1] with 1s in locations where the spatial
+ coordinates fall within the height and width in true_image_shapes.
+
+ Returns:
+ A dictionary of scalar float tensors representing the weighted losses for
+ object detection task:
+ BOX_SCALE: the weighted scale (height/width) loss.
+ BOX_OFFSET: the weighted object offset loss.
+ """
+ od_scale_loss, od_offset_loss = self._compute_box_scale_and_offset_loss(
+ scale_predictions=prediction_dict[BOX_SCALE],
+ offset_predictions=prediction_dict[BOX_OFFSET],
+ input_height=input_height,
+ input_width=input_width)
+ loss_dict = {}
+ loss_dict[BOX_SCALE] = (
+ self._od_params.scale_loss_weight * od_scale_loss)
+ loss_dict[BOX_OFFSET] = (
+ self._od_params.offset_loss_weight * od_offset_loss)
+ return loss_dict
+
+ def _compute_box_scale_and_offset_loss(self, input_height, input_width,
+ scale_predictions, offset_predictions):
+ """Computes the scale loss of the object detection task.
+
+ Args:
+ input_height: An integer scalar tensor representing input image height.
+ input_width: An integer scalar tensor representing input image width.
+ scale_predictions: A list of float tensors of shape [batch_size,
+ out_height, out_width, 2] representing the prediction heads of the model
+ for object scale (i.e height and width).
+ offset_predictions: A list of float tensors of shape [batch_size,
+ out_height, out_width, 2] representing the prediction heads of the model
+ for object offset.
+
+ Returns:
+ A tuple of two losses:
+ scale_loss: A float scalar tensor representing the object height/width
+ loss normalized by total number of boxes.
+ offset_loss: A float scalar tensor representing the object offset loss
+ normalized by total number of boxes
+ """
+ # TODO(vighneshb) Explore a size invariant version of scale loss.
+ gt_boxes_list = self.groundtruth_lists(fields.BoxListFields.boxes)
+ gt_weights_list = self.groundtruth_lists(fields.BoxListFields.weights)
+ num_boxes = _to_float32(get_num_instances_from_weights(gt_weights_list))
+ num_predictions = float(len(scale_predictions))
+
+ assigner = self._target_assigner_dict[DETECTION_TASK]
+ (batch_indices, batch_height_width_targets, batch_offset_targets,
+ batch_weights) = assigner.assign_size_and_offset_targets(
+ height=input_height,
+ width=input_width,
+ gt_boxes_list=gt_boxes_list,
+ gt_weights_list=gt_weights_list)
+ batch_weights = tf.expand_dims(batch_weights, -1)
+
+ scale_loss = 0
+ offset_loss = 0
+ localization_loss_fn = self._od_params.localization_loss
+ for scale_pred, offset_pred in zip(scale_predictions, offset_predictions):
+ # Compute the scale loss.
+ scale_pred = cn_assigner.get_batch_predictions_from_indices(
+ scale_pred, batch_indices)
+ scale_loss += localization_loss_fn(
+ scale_pred, batch_height_width_targets, weights=batch_weights)
+ # Compute the offset loss.
+ offset_pred = cn_assigner.get_batch_predictions_from_indices(
+ offset_pred, batch_indices)
+ offset_loss += localization_loss_fn(
+ offset_pred, batch_offset_targets, weights=batch_weights)
+ scale_loss = tf.reduce_sum(scale_loss) / (
+ num_predictions * num_boxes)
+ offset_loss = tf.reduce_sum(offset_loss) / (
+ num_predictions * num_boxes)
+ return scale_loss, offset_loss
+
+ def _compute_keypoint_estimation_losses(self, task_name, input_height,
+ input_width, prediction_dict,
+ per_pixel_weights):
+ """Computes the weighted keypoint losses."""
+ kp_params = self._kp_params_dict[task_name]
+ heatmap_key = get_keypoint_name(task_name, KEYPOINT_HEATMAP)
+ offset_key = get_keypoint_name(task_name, KEYPOINT_OFFSET)
+ regression_key = get_keypoint_name(task_name, KEYPOINT_REGRESSION)
+ heatmap_loss = self._compute_kp_heatmap_loss(
+ input_height=input_height,
+ input_width=input_width,
+ task_name=task_name,
+ heatmap_predictions=prediction_dict[heatmap_key],
+ classification_loss_fn=kp_params.classification_loss,
+ per_pixel_weights=per_pixel_weights)
+ offset_loss = self._compute_kp_offset_loss(
+ input_height=input_height,
+ input_width=input_width,
+ task_name=task_name,
+ offset_predictions=prediction_dict[offset_key],
+ localization_loss_fn=kp_params.localization_loss)
+ reg_loss = self._compute_kp_regression_loss(
+ input_height=input_height,
+ input_width=input_width,
+ task_name=task_name,
+ regression_predictions=prediction_dict[regression_key],
+ localization_loss_fn=kp_params.localization_loss)
+
+ loss_dict = {}
+ loss_dict[heatmap_key] = (
+ kp_params.keypoint_heatmap_loss_weight * heatmap_loss)
+ loss_dict[offset_key] = (
+ kp_params.keypoint_offset_loss_weight * offset_loss)
+ loss_dict[regression_key] = (
+ kp_params.keypoint_regression_loss_weight * reg_loss)
+ return loss_dict
+
+ def _compute_kp_heatmap_loss(self, input_height, input_width, task_name,
+ heatmap_predictions, classification_loss_fn,
+ per_pixel_weights):
+ """Computes the heatmap loss of the keypoint estimation task.
+
+ Args:
+ input_height: An integer scalar tensor representing input image height.
+ input_width: An integer scalar tensor representing input image width.
+ task_name: A string representing the name of the keypoint task.
+ heatmap_predictions: A list of float tensors of shape [batch_size,
+ out_height, out_width, num_keypoints] representing the prediction heads
+ of the model for keypoint heatmap.
+ classification_loss_fn: An object_detection.core.losses.Loss object to
+ compute the loss for the class predictions in CenterNet.
+ per_pixel_weights: A float tensor of shape [batch_size,
+ out_height * out_width, 1] with 1s in locations where the spatial
+ coordinates fall within the height and width in true_image_shapes.
+
+ Returns:
+ loss: A float scalar tensor representing the object keypoint heatmap loss
+ normalized by number of instances.
+ """
+ gt_keypoints_list = self.groundtruth_lists(fields.BoxListFields.keypoints)
+ gt_classes_list = self.groundtruth_lists(fields.BoxListFields.classes)
+ gt_weights_list = self.groundtruth_lists(fields.BoxListFields.weights)
+ gt_boxes_list = self.groundtruth_lists(fields.BoxListFields.boxes)
+
+ assigner = self._target_assigner_dict[task_name]
+ (keypoint_heatmap, num_instances_per_kp_type,
+ valid_mask_batch) = assigner.assign_keypoint_heatmap_targets(
+ height=input_height,
+ width=input_width,
+ gt_keypoints_list=gt_keypoints_list,
+ gt_weights_list=gt_weights_list,
+ gt_classes_list=gt_classes_list,
+ gt_boxes_list=gt_boxes_list)
+ flattened_valid_mask = _flatten_spatial_dimensions(
+ tf.expand_dims(valid_mask_batch, axis=-1))
+ flattened_heapmap_targets = _flatten_spatial_dimensions(keypoint_heatmap)
+ # Sum over the number of instances per keypoint types to get the total
+ # number of keypoints. Note that this is used to normalized the loss and we
+ # keep the minimum value to be 1 to avoid generating weird loss value when
+ # no keypoint is in the image batch.
+ num_instances = tf.maximum(
+ tf.cast(tf.reduce_sum(num_instances_per_kp_type), dtype=tf.float32),
+ 1.0)
+ loss = 0.0
+ # Loop through each feature output head.
+ for pred in heatmap_predictions:
+ pred = _flatten_spatial_dimensions(pred)
+ unweighted_loss = classification_loss_fn(
+ pred,
+ flattened_heapmap_targets,
+ weights=tf.ones_like(per_pixel_weights))
+ # Apply the weights after the loss function to have full control over it.
+ loss += unweighted_loss * per_pixel_weights * flattened_valid_mask
+ loss = tf.reduce_sum(loss) / (
+ float(len(heatmap_predictions)) * num_instances)
+ return loss
+
+ def _compute_kp_offset_loss(self, input_height, input_width, task_name,
+ offset_predictions, localization_loss_fn):
+ """Computes the offset loss of the keypoint estimation task.
+
+ Args:
+ input_height: An integer scalar tensor representing input image height.
+ input_width: An integer scalar tensor representing input image width.
+ task_name: A string representing the name of the keypoint task.
+ offset_predictions: A list of float tensors of shape [batch_size,
+ out_height, out_width, 2] representing the prediction heads of the model
+ for keypoint offset.
+ localization_loss_fn: An object_detection.core.losses.Loss object to
+ compute the loss for the keypoint offset predictions in CenterNet.
+
+ Returns:
+ loss: A float scalar tensor representing the keypoint offset loss
+ normalized by number of total keypoints.
+ """
+ gt_keypoints_list = self.groundtruth_lists(fields.BoxListFields.keypoints)
+ gt_classes_list = self.groundtruth_lists(fields.BoxListFields.classes)
+ gt_weights_list = self.groundtruth_lists(fields.BoxListFields.weights)
+
+ assigner = self._target_assigner_dict[task_name]
+ (batch_indices, batch_offsets,
+ batch_weights) = assigner.assign_keypoints_offset_targets(
+ height=input_height,
+ width=input_width,
+ gt_keypoints_list=gt_keypoints_list,
+ gt_weights_list=gt_weights_list,
+ gt_classes_list=gt_classes_list)
+
+ # Keypoint offset loss.
+ loss = 0.0
+ for prediction in offset_predictions:
+ batch_size, out_height, out_width, channels = _get_shape(prediction, 4)
+ if channels > 2:
+ prediction = tf.reshape(
+ prediction, shape=[batch_size, out_height, out_width, -1, 2])
+ prediction = cn_assigner.get_batch_predictions_from_indices(
+ prediction, batch_indices)
+ # The dimensions passed are not as per the doc string but the loss
+ # still computes the correct value.
+ unweighted_loss = localization_loss_fn(
+ prediction,
+ batch_offsets,
+ weights=tf.expand_dims(tf.ones_like(batch_weights), -1))
+ # Apply the weights after the loss function to have full control over it.
+ loss += batch_weights * tf.reduce_sum(unweighted_loss, axis=1)
+
+ loss = tf.reduce_sum(loss) / (
+ float(len(offset_predictions)) *
+ tf.maximum(tf.reduce_sum(batch_weights), 1.0))
+ return loss
+
+ def _compute_kp_regression_loss(self, input_height, input_width, task_name,
+ regression_predictions, localization_loss_fn):
+ """Computes the keypoint regression loss of the keypoint estimation task.
+
+ Args:
+ input_height: An integer scalar tensor representing input image height.
+ input_width: An integer scalar tensor representing input image width.
+ task_name: A string representing the name of the keypoint task.
+ regression_predictions: A list of float tensors of shape [batch_size,
+ out_height, out_width, 2 * num_keypoints] representing the prediction
+ heads of the model for keypoint regression offset.
+ localization_loss_fn: An object_detection.core.losses.Loss object to
+ compute the loss for the keypoint regression offset predictions in
+ CenterNet.
+
+ Returns:
+ loss: A float scalar tensor representing the keypoint regression offset
+ loss normalized by number of total keypoints.
+ """
+ gt_boxes_list = self.groundtruth_lists(fields.BoxListFields.boxes)
+ gt_keypoints_list = self.groundtruth_lists(fields.BoxListFields.keypoints)
+ gt_classes_list = self.groundtruth_lists(fields.BoxListFields.classes)
+ gt_weights_list = self.groundtruth_lists(fields.BoxListFields.weights)
+ # keypoint regression offset loss.
+ assigner = self._target_assigner_dict[task_name]
+ (batch_indices, batch_regression_offsets,
+ batch_weights) = assigner.assign_joint_regression_targets(
+ height=input_height,
+ width=input_width,
+ gt_keypoints_list=gt_keypoints_list,
+ gt_classes_list=gt_classes_list,
+ gt_weights_list=gt_weights_list,
+ gt_boxes_list=gt_boxes_list)
+
+ loss = 0.0
+ for prediction in regression_predictions:
+ batch_size, out_height, out_width, _ = _get_shape(prediction, 4)
+ reshaped_prediction = tf.reshape(
+ prediction, shape=[batch_size, out_height, out_width, -1, 2])
+ reg_prediction = cn_assigner.get_batch_predictions_from_indices(
+ reshaped_prediction, batch_indices)
+ unweighted_loss = localization_loss_fn(
+ reg_prediction,
+ batch_regression_offsets,
+ weights=tf.expand_dims(tf.ones_like(batch_weights), -1))
+ # Apply the weights after the loss function to have full control over it.
+ loss += batch_weights * tf.reduce_sum(unweighted_loss, axis=1)
+
+ loss = tf.reduce_sum(loss) / (
+ float(len(regression_predictions)) *
+ tf.maximum(tf.reduce_sum(batch_weights), 1.0))
+ return loss
+
+ def _compute_segmentation_losses(self, prediction_dict, per_pixel_weights):
+ """Computes all the losses associated with segmentation.
+
+ Args:
+ prediction_dict: The dictionary returned from the predict() method.
+ per_pixel_weights: A float tensor of shape [batch_size,
+ out_height * out_width, 1] with 1s in locations where the spatial
+ coordinates fall within the height and width in true_image_shapes.
+
+ Returns:
+ A dictionary with segmentation losses.
+ """
+ segmentation_heatmap = prediction_dict[SEGMENTATION_HEATMAP]
+ mask_loss = self._compute_mask_loss(
+ segmentation_heatmap, per_pixel_weights)
+ losses = {
+ SEGMENTATION_HEATMAP: mask_loss
+ }
+ return losses
+
+ def _compute_mask_loss(self, segmentation_predictions,
+ per_pixel_weights):
+ """Computes the mask loss.
+
+ Args:
+ segmentation_predictions: A list of float32 tensors of shape [batch_size,
+ out_height, out_width, num_classes].
+ per_pixel_weights: A float tensor of shape [batch_size,
+ out_height * out_width, 1] with 1s in locations where the spatial
+ coordinates fall within the height and width in true_image_shapes.
+
+ Returns:
+ A float scalar tensor representing the mask loss.
+ """
+ gt_masks_list = self.groundtruth_lists(fields.BoxListFields.masks)
+ gt_classes_list = self.groundtruth_lists(fields.BoxListFields.classes)
+
+ # Convert the groundtruth to targets.
+ assigner = self._target_assigner_dict[SEGMENTATION_TASK]
+ heatmap_targets = assigner.assign_segmentation_targets(
+ gt_masks_list=gt_masks_list,
+ gt_classes_list=gt_classes_list)
+
+ flattened_heatmap_targets = _flatten_spatial_dimensions(heatmap_targets)
+
+ loss = 0.0
+ mask_loss_fn = self._mask_params.classification_loss
+ total_pixels_in_loss = tf.reduce_sum(per_pixel_weights)
+
+ # Loop through each feature output head.
+ for pred in segmentation_predictions:
+ pred = _flatten_spatial_dimensions(pred)
+ loss += mask_loss_fn(
+ pred, flattened_heatmap_targets, weights=per_pixel_weights)
+ # TODO(ronnyvotel): Consider other ways to normalize loss.
+ total_loss = tf.reduce_sum(loss) / (
+ float(len(segmentation_predictions)) * total_pixels_in_loss)
+ return total_loss
+
+ def preprocess(self, inputs):
+ outputs = shape_utils.resize_images_and_return_shapes(
+ inputs, self._image_resizer_fn)
+ resized_inputs, true_image_shapes = outputs
+
+ return (self._feature_extractor.preprocess(resized_inputs),
+ true_image_shapes)
+
+ def predict(self, preprocessed_inputs, _):
+ """Predicts CenterNet prediction tensors given an input batch.
+
+ Feature extractors are free to produce predictions from multiple feature
+ maps and therefore we return a dictionary mapping strings to lists.
+ E.g. the hourglass backbone produces two feature maps.
+
+ Args:
+ preprocessed_inputs: a [batch, height, width, channels] float32 tensor
+ representing a batch of images.
+
+ Returns:
+ prediction_dict: a dictionary holding predicted tensors with
+ 'preprocessed_inputs' - The input image after being resized and
+ preprocessed by the feature extractor.
+ 'object_center' - A list of size num_feature_outputs containing
+ float tensors of size [batch_size, output_height, output_width,
+ num_classes] representing the predicted object center heatmap logits.
+ 'box/scale' - [optional] A list of size num_feature_outputs holding
+ float tensors of size [batch_size, output_height, output_width, 2]
+ representing the predicted box height and width at each output
+ location. This field exists only when object detection task is
+ specified.
+ 'box/offset' - [optional] A list of size num_feature_outputs holding
+ float tensors of size [batch_size, output_height, output_width, 2]
+ representing the predicted y and x offsets at each output location.
+ '$TASK_NAME/keypoint_heatmap' - [optional] A list of size
+ num_feature_outputs holding float tensors of size [batch_size,
+ output_height, output_width, num_keypoints] representing the predicted
+ keypoint heatmap logits.
+ '$TASK_NAME/keypoint_offset' - [optional] A list of size
+ num_feature_outputs holding float tensors of size [batch_size,
+ output_height, output_width, 2] representing the predicted keypoint
+ offsets at each output location.
+ '$TASK_NAME/keypoint_regression' - [optional] A list of size
+ num_feature_outputs holding float tensors of size [batch_size,
+ output_height, output_width, 2 * num_keypoints] representing the
+ predicted keypoint regression at each output location.
+ 'segmentation/heatmap' - [optional] A list of size num_feature_outputs
+ holding float tensors of size [batch_size, output_height,
+ output_width, num_classes] representing the mask logits.
+ Note the $TASK_NAME is provided by the KeypointEstimation namedtuple
+ used to differentiate between different keypoint tasks.
+ """
+ features_list = self._feature_extractor(preprocessed_inputs)
+
+ predictions = {}
+ for head_name, heads in self._prediction_head_dict.items():
+ predictions[head_name] = [
+ head(feature) for (feature, head) in zip(features_list, heads)
+ ]
+ predictions['preprocessed_inputs'] = preprocessed_inputs
+
+ self._batched_prediction_tensor_names = predictions.keys()
+ return predictions
+
+ def loss(self, prediction_dict, true_image_shapes, scope=None):
+ """Computes scalar loss tensors with respect to provided groundtruth.
+
+ This function implements the various CenterNet losses.
+
+ Args:
+ prediction_dict: a dictionary holding predicted tensors returned by
+ "predict" function.
+ true_image_shapes: int32 tensor of shape [batch, 3] where each row is of
+ the form [height, width, channels] indicating the shapes of true images
+ in the resized images, as resized images can be padded with zeros.
+ scope: Optional scope name.
+
+ Returns:
+ A dictionary mapping the keys ['Loss/object_center', 'Loss/box/scale',
+ 'Loss/box/offset', 'Loss/$TASK_NAME/keypoint/heatmap',
+ 'Loss/$TASK_NAME/keypoint/offset',
+ 'Loss/$TASK_NAME/keypoint/regression', 'Loss/segmentation/heatmap'] to
+ scalar tensors corresponding to the losses for different tasks. Note the
+ $TASK_NAME is provided by the KeypointEstimation namedtuple used to
+ differentiate between different keypoint tasks.
+ """
+
+ _, input_height, input_width, _ = _get_shape(
+ prediction_dict['preprocessed_inputs'], 4)
+
+ output_height, output_width = (input_height // self._stride,
+ input_width // self._stride)
+
+ # TODO(vighneshb) Explore whether using floor here is safe.
+ output_true_image_shapes = tf.ceil(
+ tf.to_float(true_image_shapes) / self._stride)
+ valid_anchor_weights = get_valid_anchor_weights_in_flattened_image(
+ output_true_image_shapes, output_height, output_width)
+ valid_anchor_weights = tf.expand_dims(valid_anchor_weights, 2)
+
+ object_center_loss = self._compute_object_center_loss(
+ object_center_predictions=prediction_dict[OBJECT_CENTER],
+ input_height=input_height,
+ input_width=input_width,
+ per_pixel_weights=valid_anchor_weights)
+ losses = {
+ OBJECT_CENTER:
+ self._center_params.object_center_loss_weight * object_center_loss
+ }
+ if self._od_params is not None:
+ od_losses = self._compute_object_detection_losses(
+ input_height=input_height,
+ input_width=input_width,
+ prediction_dict=prediction_dict,
+ per_pixel_weights=valid_anchor_weights)
+ for key in od_losses:
+ od_losses[key] = od_losses[key] * self._od_params.task_loss_weight
+ losses.update(od_losses)
+
+ if self._kp_params_dict is not None:
+ for task_name, params in self._kp_params_dict.items():
+ kp_losses = self._compute_keypoint_estimation_losses(
+ task_name=task_name,
+ input_height=input_height,
+ input_width=input_width,
+ prediction_dict=prediction_dict,
+ per_pixel_weights=valid_anchor_weights)
+ for key in kp_losses:
+ kp_losses[key] = kp_losses[key] * params.task_loss_weight
+ losses.update(kp_losses)
+
+ if self._mask_params is not None:
+ seg_losses = self._compute_segmentation_losses(
+ prediction_dict=prediction_dict,
+ per_pixel_weights=valid_anchor_weights)
+ for key in seg_losses:
+ seg_losses[key] = seg_losses[key] * self._mask_params.task_loss_weight
+ losses.update(seg_losses)
+
+ # Prepend the LOSS_KEY_PREFIX to the keys in the dictionary such that the
+ # losses will be grouped together in Tensorboard.
+ return dict([('%s/%s' % (LOSS_KEY_PREFIX, key), val)
+ for key, val in losses.items()])
+
+ def postprocess(self, prediction_dict, true_image_shapes, **params):
+ """Produces boxes given a prediction dict returned by predict().
+
+ Although predict returns a list of tensors, only the last tensor in
+ each list is used for making box predictions.
+
+ Args:
+ prediction_dict: a dictionary holding predicted tensors from "predict"
+ function.
+ true_image_shapes: int32 tensor of shape [batch, 3] where each row is of
+ the form [height, width, channels] indicating the shapes of true images
+ in the resized images, as resized images can be padded with zeros.
+ **params: Currently ignored.
+
+ Returns:
+ detections: a dictionary containing the following fields
+ detection_boxes - A tensor of shape [batch, max_detections, 4]
+ holding the predicted boxes.
+ detection_scores: A tensor of shape [batch, max_detections] holding
+ the predicted score for each box.
+ detection_classes: An integer tensor of shape [batch, max_detections]
+ containing the detected class for each box.
+ num_detections: An integer tensor of shape [batch] containing the
+ number of detected boxes for each sample in the batch.
+ detection_keypoints: (Optional) A float tensor of shape [batch,
+ max_detections, num_keypoints, 2] with normalized keypoints. Any
+ invalid keypoints have their coordinates and scores set to 0.0.
+ detection_keypoint_scores: (Optional) A float tensor of shape [batch,
+ max_detection, num_keypoints] with scores for each keypoint.
+ detection_masks: (Optional) An int tensor of shape [batch,
+ max_detections, mask_height, mask_width] with binarized masks for each
+ detection.
+ """
+ object_center_prob = tf.nn.sigmoid(prediction_dict[OBJECT_CENTER][-1])
+ # Get x, y and channel indices corresponding to the top indices in the class
+ # center predictions.
+ detection_scores, y_indices, x_indices, channel_indices = (
+ top_k_feature_map_locations(
+ object_center_prob, max_pool_kernel_size=3,
+ k=self._center_params.max_box_predictions))
+
+ boxes_strided, classes, scores, num_detections = (
+ prediction_tensors_to_boxes(
+ detection_scores, y_indices, x_indices, channel_indices,
+ prediction_dict[BOX_SCALE][-1], prediction_dict[BOX_OFFSET][-1]))
+
+ boxes = convert_strided_predictions_to_normalized_boxes(
+ boxes_strided, self._stride, true_image_shapes)
+
+ postprocess_dict = {
+ fields.DetectionResultFields.detection_boxes: boxes,
+ fields.DetectionResultFields.detection_scores: scores,
+ fields.DetectionResultFields.detection_classes: classes,
+ fields.DetectionResultFields.num_detections: num_detections,
+ }
+
+ if self._kp_params_dict:
+ keypoints, keypoint_scores = self._postprocess_keypoints(
+ prediction_dict, classes, y_indices, x_indices,
+ boxes_strided, num_detections)
+ keypoints, keypoint_scores = (
+ convert_strided_predictions_to_normalized_keypoints(
+ keypoints, keypoint_scores, self._stride, true_image_shapes,
+ clip_out_of_frame_keypoints=True))
+ postprocess_dict.update({
+ fields.DetectionResultFields.detection_keypoints: keypoints,
+ fields.DetectionResultFields.detection_keypoint_scores:
+ keypoint_scores
+ })
+
+ if self._mask_params:
+ masks = tf.nn.sigmoid(prediction_dict[SEGMENTATION_HEATMAP][-1])
+ instance_masks = convert_strided_predictions_to_instance_masks(
+ boxes, classes, masks, self._stride, self._mask_params.mask_height,
+ self._mask_params.mask_width, true_image_shapes,
+ self._mask_params.score_threshold)
+ postprocess_dict.update({
+ fields.DetectionResultFields.detection_masks:
+ instance_masks
+ })
+ return postprocess_dict
+
+ def _postprocess_keypoints(self, prediction_dict, classes, y_indices,
+ x_indices, boxes, num_detections):
+ """Performs postprocessing on keypoint predictions.
+
+ Args:
+ prediction_dict: a dictionary holding predicted tensors, returned from the
+ predict() method. This dictionary should contain keypoint prediction
+ feature maps for each keypoint task.
+ classes: A [batch_size, max_detections] int tensor with class indices for
+ all detected objects.
+ y_indices: A [batch_size, max_detections] int tensor with y indices for
+ all object centers.
+ x_indices: A [batch_size, max_detections] int tensor with x indices for
+ all object centers.
+ boxes: A [batch_size, max_detections, 4] float32 tensor with bounding
+ boxes in (un-normalized) output space.
+ num_detections: A [batch_size] int tensor with the number of valid
+ detections for each image.
+
+ Returns:
+ A tuple of
+ keypoints: a [batch_size, max_detection, num_total_keypoints, 2] float32
+ tensor with keypoints in the output (strided) coordinate frame.
+ keypoint_scores: a [batch_size, max_detections, num_total_keypoints]
+ float32 tensor with keypoint scores.
+ """
+ total_num_keypoints = sum(len(kp_dict.keypoint_indices) for kp_dict
+ in self._kp_params_dict.values())
+ batch_size, max_detections, _ = _get_shape(boxes, 3)
+ kpt_coords_for_example_list = []
+ kpt_scores_for_example_list = []
+ for ex_ind in range(batch_size):
+ kpt_coords_for_class_list = []
+ kpt_scores_for_class_list = []
+ instance_inds_for_class_list = []
+ for task_name, kp_params in self._kp_params_dict.items():
+ keypoint_heatmap = prediction_dict[
+ get_keypoint_name(task_name, KEYPOINT_HEATMAP)][-1]
+ keypoint_offsets = prediction_dict[
+ get_keypoint_name(task_name, KEYPOINT_OFFSET)][-1]
+ keypoint_regression = prediction_dict[
+ get_keypoint_name(task_name, KEYPOINT_REGRESSION)][-1]
+ instance_inds = self._get_instance_indices(
+ classes, num_detections, ex_ind, kp_params.class_id)
+
+ def true_fn(
+ keypoint_heatmap, keypoint_offsets, keypoint_regression,
+ classes, y_indices, x_indices, boxes, instance_inds,
+ ex_ind, kp_params):
+ """Logics to execute when instance_inds is not an empty set."""
+ # Postprocess keypoints and scores for class and single image. Shapes
+ # are [1, num_instances_i, num_keypoints_i, 2] and
+ # [1, num_instances_i, num_keypoints_i], respectively. Note that
+ # num_instances_i and num_keypoints_i refers to the number of
+ # instances and keypoints for class i, respectively.
+ kpt_coords_for_class, kpt_scores_for_class = (
+ self._postprocess_keypoints_for_class_and_image(
+ keypoint_heatmap, keypoint_offsets, keypoint_regression,
+ classes, y_indices, x_indices, boxes, instance_inds,
+ ex_ind, kp_params))
+ # Expand keypoint dimension (with padding) so that coordinates and
+ # scores have shape [1, num_instances_i, num_total_keypoints, 2] and
+ # [1, num_instances_i, num_total_keypoints], respectively.
+ kpts_coords_for_class_padded, kpt_scores_for_class_padded = (
+ _pad_to_full_keypoint_dim(
+ kpt_coords_for_class, kpt_scores_for_class,
+ kp_params.keypoint_indices, total_num_keypoints))
+ return kpts_coords_for_class_padded, kpt_scores_for_class_padded
+
+ def false_fn():
+ """Logics to execute when the instance_inds is an empty set."""
+ return (tf.zeros([1, 0, total_num_keypoints, 2], dtype=tf.float32),
+ tf.zeros([1, 0, total_num_keypoints], dtype=tf.float32))
+
+ true_fn = functools.partial(
+ true_fn, keypoint_heatmap, keypoint_offsets, keypoint_regression,
+ classes, y_indices, x_indices, boxes, instance_inds, ex_ind,
+ kp_params)
+ results = tf.cond(tf.size(instance_inds) > 0, true_fn, false_fn)
+
+ kpt_coords_for_class_list.append(results[0])
+ kpt_scores_for_class_list.append(results[1])
+ instance_inds_for_class_list.append(instance_inds)
+
+ # Concatenate all keypoints across all classes (single example).
+ kpt_coords_for_example = tf.concat(kpt_coords_for_class_list, axis=1)
+ kpt_scores_for_example = tf.concat(kpt_scores_for_class_list, axis=1)
+ instance_inds_for_example = tf.concat(instance_inds_for_class_list,
+ axis=0)
+
+ if tf.size(instance_inds_for_example) > 0:
+ # Scatter into tensor where instances align with original detection
+ # instances. New shape of keypoint coordinates and scores are
+ # [1, max_detections, num_total_keypoints, 2] and
+ # [1, max_detections, num_total_keypoints], respectively.
+ kpt_coords_for_example_all_det, kpt_scores_for_example_all_det = (
+ _pad_to_full_instance_dim(
+ kpt_coords_for_example, kpt_scores_for_example,
+ instance_inds_for_example,
+ self._center_params.max_box_predictions))
+ else:
+ kpt_coords_for_example_all_det = tf.zeros(
+ [1, max_detections, total_num_keypoints, 2], dtype=tf.float32)
+ kpt_scores_for_example_all_det = tf.zeros(
+ [1, max_detections, total_num_keypoints], dtype=tf.float32)
+
+ kpt_coords_for_example_list.append(kpt_coords_for_example_all_det)
+ kpt_scores_for_example_list.append(kpt_scores_for_example_all_det)
+
+ # Concatenate all keypoints and scores from all examples in the batch.
+ # Shapes are [batch_size, max_detections, num_total_keypoints, 2] and
+ # [batch_size, max_detections, num_total_keypoints], respectively.
+ keypoints = tf.concat(kpt_coords_for_example_list, axis=0)
+ keypoint_scores = tf.concat(kpt_scores_for_example_list, axis=0)
+
+ return keypoints, keypoint_scores
+
+ def _get_instance_indices(self, classes, num_detections, batch_index,
+ class_id):
+ """Gets the instance indices that match the target class ID.
+
+ Args:
+ classes: A [batch_size, max_detections] int tensor with class indices for
+ all detected objects.
+ num_detections: A [batch_size] int tensor with the number of valid
+ detections for each image.
+ batch_index: An integer specifying the index for an example in the batch.
+ class_id: Class id
+
+ Returns:
+ instance_inds: A [num_instances] int tensor where each element indicates
+ the instance location within the `classes` tensor. This is useful to
+ associate the refined keypoints with the original detections (i.e.
+ boxes)
+ """
+ classes = classes[batch_index:batch_index+1, ...]
+ _, max_detections = shape_utils.combined_static_and_dynamic_shape(
+ classes)
+ # Get the detection indices corresponding to the target class.
+ valid_detections_with_kpt_class = tf.math.logical_and(
+ tf.range(max_detections) < num_detections[batch_index],
+ classes[0] == class_id)
+ instance_inds = tf.where(valid_detections_with_kpt_class)[:, 0]
+ return instance_inds
+
+ def _postprocess_keypoints_for_class_and_image(
+ self, keypoint_heatmap, keypoint_offsets, keypoint_regression, classes,
+ y_indices, x_indices, boxes, indices_with_kpt_class, batch_index,
+ kp_params):
+ """Postprocess keypoints for a single image and class.
+
+ This function performs the following postprocessing operations on a single
+ image and single keypoint class:
+ - Converts keypoints scores to range [0, 1] with sigmoid.
+ - Determines the detections that correspond to the specified keypoint class.
+ - Gathers the regressed keypoints at the detection (i.e. box) centers.
+ - Gathers keypoint candidates from the keypoint heatmaps.
+ - Snaps regressed keypoints to nearby keypoint candidates.
+
+ Args:
+ keypoint_heatmap: A [batch_size, height, width, num_keypoints] float32
+ tensor with keypoint heatmaps.
+ keypoint_offsets: A [batch_size, height, width, 2] float32 tensor with
+ local offsets to keypoint centers.
+ keypoint_regression: A [batch_size, height, width, 2 * num_keypoints]
+ float32 tensor with regressed offsets to all keypoints.
+ classes: A [batch_size, max_detections] int tensor with class indices for
+ all detected objects.
+ y_indices: A [batch_size, max_detections] int tensor with y indices for
+ all object centers.
+ x_indices: A [batch_size, max_detections] int tensor with x indices for
+ all object centers.
+ boxes: A [batch_size, max_detections, 4] float32 tensor with detected
+ boxes in the output (strided) frame.
+ indices_with_kpt_class: A [num_instances] int tensor where each element
+ indicates the instance location within the `classes` tensor. This is
+ useful to associate the refined keypoints with the original detections
+ (i.e. boxes)
+ batch_index: An integer specifying the index for an example in the batch.
+ kp_params: A `KeypointEstimationParams` object with parameters for a
+ single keypoint class.
+
+ Returns:
+ A tuple of
+ refined_keypoints: A [1, num_instances, num_keypoints, 2] float32 tensor
+ with refined keypoints for a single class in a single image, expressed
+ in the output (strided) coordinate frame. Note that `num_instances` is a
+ dynamic dimension, and corresponds to the number of valid detections
+ for the specific class.
+ refined_scores: A [1, num_instances, num_keypoints] float32 tensor with
+ keypoint scores.
+ """
+ keypoint_indices = kp_params.keypoint_indices
+ num_keypoints = len(keypoint_indices)
+
+ keypoint_heatmap = tf.nn.sigmoid(
+ keypoint_heatmap[batch_index:batch_index+1, ...])
+ keypoint_offsets = keypoint_offsets[batch_index:batch_index+1, ...]
+ keypoint_regression = keypoint_regression[batch_index:batch_index+1, ...]
+ y_indices = y_indices[batch_index:batch_index+1, ...]
+ x_indices = x_indices[batch_index:batch_index+1, ...]
+
+ # Gather the feature map locations corresponding to the object class.
+ y_indices_for_kpt_class = tf.gather(y_indices, indices_with_kpt_class,
+ axis=1)
+ x_indices_for_kpt_class = tf.gather(x_indices, indices_with_kpt_class,
+ axis=1)
+ boxes_for_kpt_class = tf.gather(boxes, indices_with_kpt_class, axis=1)
+
+ # Gather the regressed keypoints. Final tensor has shape
+ # [1, num_instances, num_keypoints, 2].
+ regressed_keypoints_for_objects = regressed_keypoints_at_object_centers(
+ keypoint_regression, y_indices_for_kpt_class, x_indices_for_kpt_class)
+ regressed_keypoints_for_objects = tf.reshape(
+ regressed_keypoints_for_objects, [1, -1, num_keypoints, 2])
+
+ # Get the candidate keypoints and scores.
+ # The shape of keypoint_candidates and keypoint_scores is:
+ # [1, num_candidates_per_keypoint, num_keypoints, 2] and
+ # [1, num_candidates_per_keypoint, num_keypoints], respectively.
+ keypoint_candidates, keypoint_scores, num_keypoint_candidates = (
+ prediction_tensors_to_keypoint_candidates(
+ keypoint_heatmap, keypoint_offsets,
+ keypoint_score_threshold=(
+ kp_params.keypoint_candidate_score_threshold),
+ max_pool_kernel_size=kp_params.peak_max_pool_kernel_size,
+ max_candidates=kp_params.num_candidates_per_keypoint))
+
+ # Get the refined keypoints and scores, of shape
+ # [1, num_instances, num_keypoints, 2] and
+ # [1, num_instances, num_keypoints], respectively.
+ refined_keypoints, refined_scores = refine_keypoints(
+ regressed_keypoints_for_objects, keypoint_candidates, keypoint_scores,
+ num_keypoint_candidates, bboxes=boxes_for_kpt_class,
+ unmatched_keypoint_score=kp_params.unmatched_keypoint_score,
+ box_scale=kp_params.box_scale,
+ candidate_search_scale=kp_params.candidate_search_scale,
+ candidate_ranking_mode=kp_params.candidate_ranking_mode)
+
+ return refined_keypoints, refined_scores
+
+ def regularization_losses(self):
+ return []
+
+ def restore_map(self,
+ fine_tune_checkpoint_type='detection',
+ load_all_detection_checkpoint_vars=False):
+ raise RuntimeError('CenterNetMetaArch not supported under TF1.x.')
+
+ def restore_from_objects(self, fine_tune_checkpoint_type='detection'):
+ """Returns a map of Trackable objects to load from a foreign checkpoint.
+
+ Returns a dictionary of Tensorflow 2 Trackable objects (e.g. tf.Module
+ or Checkpoint). This enables the model to initialize based on weights from
+ another task. For example, the feature extractor variables from a
+ classification model can be used to bootstrap training of an object
+ detector. When loading from an object detection model, the checkpoint model
+ should have the same parameters as this detection model with exception of
+ the num_classes parameter.
+
+ Note that this function is intended to be used to restore Keras-based
+ models when running Tensorflow 2, whereas restore_map (not implemented
+ in CenterNet) is intended to be used to restore Slim-based models when
+ running Tensorflow 1.x.
+
+ TODO(jonathanhuang): Make this function consistent with other
+ meta-architectures.
+
+ Args:
+ fine_tune_checkpoint_type: whether to restore from a full detection
+ checkpoint (with compatible variable names) or to restore from a
+ classification checkpoint for initialization prior to training.
+ Valid values: `detection`, `classification`. Default 'detection'.
+
+ Returns:
+ A dict mapping keys to Trackable objects (tf.Module or Checkpoint).
+ """
+
+ if fine_tune_checkpoint_type == 'classification':
+ return {'feature_extractor': self._feature_extractor.get_base_model()}
+
+ if fine_tune_checkpoint_type == 'detection':
+ return {'feature_extractor': self._feature_extractor.get_model()}
+
+ else:
+ raise ValueError('Not supported fine tune checkpoint type - {}'.format(
+ fine_tune_checkpoint_type))
+
+ def updates(self):
+ raise RuntimeError('This model is intended to be used with model_lib_v2 '
+ 'which does not support updates()')
diff --git a/research/object_detection/meta_architectures/center_net_meta_arch_tf2_test.py b/research/object_detection/meta_architectures/center_net_meta_arch_tf2_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..298081b7bdf27ab4023b085adb10ad4cacd96921
--- /dev/null
+++ b/research/object_detection/meta_architectures/center_net_meta_arch_tf2_test.py
@@ -0,0 +1,1681 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the CenterNet Meta architecture code."""
+
+from __future__ import division
+
+import functools
+import unittest
+from absl.testing import parameterized
+import numpy as np
+import tensorflow.compat.v1 as tf
+
+from object_detection.core import losses
+from object_detection.core import preprocessor
+from object_detection.core import standard_fields as fields
+from object_detection.core import target_assigner as cn_assigner
+from object_detection.meta_architectures import center_net_meta_arch as cnma
+from object_detection.models import center_net_resnet_feature_extractor
+from object_detection.utils import test_case
+from object_detection.utils import tf_version
+
+
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class CenterNetMetaArchPredictionHeadTest(test_case.TestCase):
+ """Test CenterNet meta architecture prediction head."""
+
+ def test_prediction_head(self):
+ head = cnma.make_prediction_net(num_out_channels=7)
+ output = head(np.zeros((4, 128, 128, 8)))
+
+ self.assertEqual((4, 128, 128, 7), output.shape)
+
+
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
+ """Test for CenterNet meta architecture related functions."""
+
+ def test_row_col_indices_from_flattened_indices(self):
+ """Tests that the computation of row, col, channel indices is correct."""
+
+ r_grid, c_grid, ch_grid = (np.zeros((5, 4, 3), dtype=np.int),
+ np.zeros((5, 4, 3), dtype=np.int),
+ np.zeros((5, 4, 3), dtype=np.int))
+
+ r_grid[..., 0] = r_grid[..., 1] = r_grid[..., 2] = np.array(
+ [[0, 0, 0, 0],
+ [1, 1, 1, 1],
+ [2, 2, 2, 2],
+ [3, 3, 3, 3],
+ [4, 4, 4, 4]]
+ )
+
+ c_grid[..., 0] = c_grid[..., 1] = c_grid[..., 2] = np.array(
+ [[0, 1, 2, 3],
+ [0, 1, 2, 3],
+ [0, 1, 2, 3],
+ [0, 1, 2, 3],
+ [0, 1, 2, 3]]
+ )
+
+ for i in range(3):
+ ch_grid[..., i] = i
+
+ indices = np.arange(60)
+ ri, ci, chi = cnma.row_col_channel_indices_from_flattened_indices(
+ indices, 4, 3)
+
+ np.testing.assert_array_equal(ri, r_grid.flatten())
+ np.testing.assert_array_equal(ci, c_grid.flatten())
+ np.testing.assert_array_equal(chi, ch_grid.flatten())
+
+ def test_flattened_indices_from_row_col_indices(self):
+
+ r = np.array(
+ [[0, 0, 0, 0],
+ [1, 1, 1, 1],
+ [2, 2, 2, 2]]
+ )
+
+ c = np.array(
+ [[0, 1, 2, 3],
+ [0, 1, 2, 3],
+ [0, 1, 2, 3]]
+ )
+
+ idx = cnma.flattened_indices_from_row_col_indices(r, c, 4)
+ np.testing.assert_array_equal(np.arange(12), idx.flatten())
+
+ def test_get_valid_anchor_weights_in_flattened_image(self):
+ """Tests that the anchor weights are valid upon flattening out."""
+
+ valid_weights = np.zeros((2, 5, 5), dtype=np.float)
+
+ valid_weights[0, :3, :4] = 1.0
+ valid_weights[1, :2, :2] = 1.0
+
+ def graph_fn():
+ true_image_shapes = tf.constant([[3, 4], [2, 2]])
+ w = cnma.get_valid_anchor_weights_in_flattened_image(
+ true_image_shapes, 5, 5)
+ return w
+
+ w = self.execute(graph_fn, [])
+ np.testing.assert_allclose(w, valid_weights.reshape(2, -1))
+ self.assertEqual((2, 25), w.shape)
+
+ def test_convert_strided_predictions_to_normalized_boxes(self):
+ """Tests that boxes have correct coordinates in normalized input space."""
+
+ def graph_fn():
+ boxes = np.zeros((2, 3, 4), dtype=np.float32)
+
+ boxes[0] = [[10, 20, 30, 40], [20, 30, 50, 100], [50, 60, 100, 180]]
+ boxes[1] = [[-5, -5, 5, 5], [45, 60, 110, 120], [150, 150, 200, 250]]
+
+ true_image_shapes = tf.constant([[100, 90, 3], [150, 150, 3]])
+
+ clipped_boxes = (
+ cnma.convert_strided_predictions_to_normalized_boxes(
+ boxes, 2, true_image_shapes))
+ return clipped_boxes
+
+ clipped_boxes = self.execute(graph_fn, [])
+
+ expected_boxes = np.zeros((2, 3, 4), dtype=np.float32)
+ expected_boxes[0] = [[0.2, 4./9, 0.6, 8./9], [0.4, 2./3, 1, 1],
+ [1, 1, 1, 1]]
+ expected_boxes[1] = [[0., 0, 1./15, 1./15], [3./5, 4./5, 1, 1],
+ [1, 1, 1, 1]]
+
+ np.testing.assert_allclose(expected_boxes, clipped_boxes)
+
+ @parameterized.parameters(
+ {'clip_to_window': True},
+ {'clip_to_window': False}
+ )
+ def test_convert_strided_predictions_to_normalized_keypoints(
+ self, clip_to_window):
+ """Tests that keypoints have correct coordinates in normalized coords."""
+
+ keypoint_coords_np = np.array(
+ [
+ # Example 0.
+ [
+ [[-10., 8.], [60., 22.], [60., 120.]],
+ [[20., 20.], [0., 0.], [0., 0.]],
+ ],
+ # Example 1.
+ [
+ [[40., 50.], [20., 160.], [200., 150.]],
+ [[10., 0.], [40., 10.], [0., 0.]],
+ ],
+ ], dtype=np.float32)
+ keypoint_scores_np = np.array(
+ [
+ # Example 0.
+ [
+ [1.0, 0.9, 0.2],
+ [0.7, 0.0, 0.0],
+ ],
+ # Example 1.
+ [
+ [1.0, 1.0, 0.2],
+ [0.7, 0.6, 0.0],
+ ],
+ ], dtype=np.float32)
+
+ def graph_fn():
+ keypoint_coords = tf.constant(keypoint_coords_np, dtype=tf.float32)
+ keypoint_scores = tf.constant(keypoint_scores_np, dtype=tf.float32)
+ true_image_shapes = tf.constant([[320, 400, 3], [640, 640, 3]])
+ stride = 4
+
+ keypoint_coords_out, keypoint_scores_out = (
+ cnma.convert_strided_predictions_to_normalized_keypoints(
+ keypoint_coords, keypoint_scores, stride, true_image_shapes,
+ clip_to_window))
+ return keypoint_coords_out, keypoint_scores_out
+
+ keypoint_coords_out, keypoint_scores_out = self.execute(graph_fn, [])
+
+ if clip_to_window:
+ expected_keypoint_coords_np = np.array(
+ [
+ # Example 0.
+ [
+ [[0.0, 0.08], [0.75, 0.22], [0.75, 1.0]],
+ [[0.25, 0.2], [0., 0.], [0.0, 0.0]],
+ ],
+ # Example 1.
+ [
+ [[0.25, 0.3125], [0.125, 1.0], [1.0, 0.9375]],
+ [[0.0625, 0.], [0.25, 0.0625], [0., 0.]],
+ ],
+ ], dtype=np.float32)
+ expected_keypoint_scores_np = np.array(
+ [
+ # Example 0.
+ [
+ [0.0, 0.9, 0.0],
+ [0.7, 0.0, 0.0],
+ ],
+ # Example 1.
+ [
+ [1.0, 1.0, 0.0],
+ [0.7, 0.6, 0.0],
+ ],
+ ], dtype=np.float32)
+ else:
+ expected_keypoint_coords_np = np.array(
+ [
+ # Example 0.
+ [
+ [[-0.125, 0.08], [0.75, 0.22], [0.75, 1.2]],
+ [[0.25, 0.2], [0., 0.], [0., 0.]],
+ ],
+ # Example 1.
+ [
+ [[0.25, 0.3125], [0.125, 1.0], [1.25, 0.9375]],
+ [[0.0625, 0.], [0.25, 0.0625], [0., 0.]],
+ ],
+ ], dtype=np.float32)
+ expected_keypoint_scores_np = np.array(
+ [
+ # Example 0.
+ [
+ [1.0, 0.9, 0.2],
+ [0.7, 0.0, 0.0],
+ ],
+ # Example 1.
+ [
+ [1.0, 1.0, 0.2],
+ [0.7, 0.6, 0.0],
+ ],
+ ], dtype=np.float32)
+ np.testing.assert_allclose(expected_keypoint_coords_np, keypoint_coords_out)
+ np.testing.assert_allclose(expected_keypoint_scores_np, keypoint_scores_out)
+
+ def test_convert_strided_predictions_to_instance_masks(self):
+
+ def graph_fn():
+ boxes = tf.constant(
+ [
+ [[0.5, 0.5, 1.0, 1.0],
+ [0.0, 0.5, 0.5, 1.0],
+ [0.0, 0.0, 0.0, 0.0]],
+ ], tf.float32)
+ classes = tf.constant(
+ [
+ [0, 1, 0],
+ ], tf.int32)
+ masks_np = np.zeros((1, 4, 4, 2), dtype=np.float32)
+ masks_np[0, :, 2:, 0] = 1 # Class 0.
+ masks_np[0, :, :3, 1] = 1 # Class 1.
+ masks = tf.constant(masks_np)
+ true_image_shapes = tf.constant([[6, 8, 3]])
+ instance_masks = cnma.convert_strided_predictions_to_instance_masks(
+ boxes, classes, masks, stride=2, mask_height=2, mask_width=2,
+ true_image_shapes=true_image_shapes)
+ return instance_masks
+
+ instance_masks = self.execute_cpu(graph_fn, [])
+
+ expected_instance_masks = np.array(
+ [
+ [
+ # Mask 0 (class 0).
+ [[1, 1],
+ [1, 1]],
+ # Mask 1 (class 1).
+ [[1, 0],
+ [1, 0]],
+ # Mask 2 (class 0).
+ [[0, 0],
+ [0, 0]],
+ ]
+ ])
+ np.testing.assert_array_equal(expected_instance_masks, instance_masks)
+
+ def test_top_k_feature_map_locations(self):
+ feature_map_np = np.zeros((2, 3, 3, 2), dtype=np.float32)
+ feature_map_np[0, 2, 0, 1] = 1.0
+ feature_map_np[0, 2, 1, 1] = 0.9 # Get's filtered due to max pool.
+ feature_map_np[0, 0, 1, 0] = 0.7
+ feature_map_np[0, 2, 2, 0] = 0.5
+ feature_map_np[0, 2, 2, 1] = -0.3
+ feature_map_np[1, 2, 1, 1] = 0.7
+ feature_map_np[1, 1, 0, 0] = 0.4
+ feature_map_np[1, 1, 2, 0] = 0.1
+
+ def graph_fn():
+ feature_map = tf.constant(feature_map_np)
+ scores, y_inds, x_inds, channel_inds = (
+ cnma.top_k_feature_map_locations(
+ feature_map, max_pool_kernel_size=3, k=3))
+ return scores, y_inds, x_inds, channel_inds
+
+ scores, y_inds, x_inds, channel_inds = self.execute(graph_fn, [])
+
+ np.testing.assert_allclose([1.0, 0.7, 0.5], scores[0])
+ np.testing.assert_array_equal([2, 0, 2], y_inds[0])
+ np.testing.assert_array_equal([0, 1, 2], x_inds[0])
+ np.testing.assert_array_equal([1, 0, 0], channel_inds[0])
+
+ np.testing.assert_allclose([0.7, 0.4, 0.1], scores[1])
+ np.testing.assert_array_equal([2, 1, 1], y_inds[1])
+ np.testing.assert_array_equal([1, 0, 2], x_inds[1])
+ np.testing.assert_array_equal([1, 0, 0], channel_inds[1])
+
+ def test_top_k_feature_map_locations_no_pooling(self):
+ feature_map_np = np.zeros((2, 3, 3, 2), dtype=np.float32)
+ feature_map_np[0, 2, 0, 1] = 1.0
+ feature_map_np[0, 2, 1, 1] = 0.9
+ feature_map_np[0, 0, 1, 0] = 0.7
+ feature_map_np[0, 2, 2, 0] = 0.5
+ feature_map_np[0, 2, 2, 1] = -0.3
+ feature_map_np[1, 2, 1, 1] = 0.7
+ feature_map_np[1, 1, 0, 0] = 0.4
+ feature_map_np[1, 1, 2, 0] = 0.1
+
+ def graph_fn():
+ feature_map = tf.constant(feature_map_np)
+ scores, y_inds, x_inds, channel_inds = (
+ cnma.top_k_feature_map_locations(
+ feature_map, max_pool_kernel_size=1, k=3))
+ return scores, y_inds, x_inds, channel_inds
+
+ scores, y_inds, x_inds, channel_inds = self.execute(graph_fn, [])
+
+ np.testing.assert_allclose([1.0, 0.9, 0.7], scores[0])
+ np.testing.assert_array_equal([2, 2, 0], y_inds[0])
+ np.testing.assert_array_equal([0, 1, 1], x_inds[0])
+ np.testing.assert_array_equal([1, 1, 0], channel_inds[0])
+
+ np.testing.assert_allclose([0.7, 0.4, 0.1], scores[1])
+ np.testing.assert_array_equal([2, 1, 1], y_inds[1])
+ np.testing.assert_array_equal([1, 0, 2], x_inds[1])
+ np.testing.assert_array_equal([1, 0, 0], channel_inds[1])
+
+ def test_top_k_feature_map_locations_per_channel(self):
+ feature_map_np = np.zeros((2, 3, 3, 2), dtype=np.float32)
+ feature_map_np[0, 2, 0, 0] = 1.0 # Selected.
+ feature_map_np[0, 2, 1, 0] = 0.9 # Get's filtered due to max pool.
+ feature_map_np[0, 0, 1, 0] = 0.7 # Selected.
+ feature_map_np[0, 2, 2, 1] = 0.5 # Selected.
+ feature_map_np[0, 0, 0, 1] = 0.3 # Selected.
+ feature_map_np[1, 2, 1, 0] = 0.7 # Selected.
+ feature_map_np[1, 1, 0, 0] = 0.4 # Get's filtered due to max pool.
+ feature_map_np[1, 1, 2, 0] = 0.3 # Get's filtered due to max pool.
+ feature_map_np[1, 1, 0, 1] = 0.8 # Selected.
+ feature_map_np[1, 1, 2, 1] = 0.3 # Selected.
+
+ def graph_fn():
+ feature_map = tf.constant(feature_map_np)
+ scores, y_inds, x_inds, channel_inds = (
+ cnma.top_k_feature_map_locations(
+ feature_map, max_pool_kernel_size=3, k=2, per_channel=True))
+ return scores, y_inds, x_inds, channel_inds
+
+ scores, y_inds, x_inds, channel_inds = self.execute(graph_fn, [])
+
+ np.testing.assert_allclose([1.0, 0.7, 0.5, 0.3], scores[0])
+ np.testing.assert_array_equal([2, 0, 2, 0], y_inds[0])
+ np.testing.assert_array_equal([0, 1, 2, 0], x_inds[0])
+ np.testing.assert_array_equal([0, 0, 1, 1], channel_inds[0])
+
+ np.testing.assert_allclose([0.7, 0.0, 0.8, 0.3], scores[1])
+ np.testing.assert_array_equal([2, 0, 1, 1], y_inds[1])
+ np.testing.assert_array_equal([1, 0, 0, 2], x_inds[1])
+ np.testing.assert_array_equal([0, 0, 1, 1], channel_inds[1])
+
+ def test_box_prediction(self):
+
+ class_pred = np.zeros((3, 128, 128, 5), dtype=np.float32)
+ hw_pred = np.zeros((3, 128, 128, 2), dtype=np.float32)
+ offset_pred = np.zeros((3, 128, 128, 2), dtype=np.float32)
+
+ # Sample 1, 2 boxes
+ class_pred[0, 10, 20] = [0.3, .7, 0.0, 0.0, 0.0]
+ hw_pred[0, 10, 20] = [40, 60]
+ offset_pred[0, 10, 20] = [1, 2]
+
+ class_pred[0, 50, 60] = [0.55, 0.0, 0.0, 0.0, 0.45]
+ hw_pred[0, 50, 60] = [50, 50]
+ offset_pred[0, 50, 60] = [0, 0]
+
+ # Sample 2, 2 boxes (at same location)
+ class_pred[1, 100, 100] = [0.0, 0.1, 0.9, 0.0, 0.0]
+ hw_pred[1, 100, 100] = [10, 10]
+ offset_pred[1, 100, 100] = [1, 3]
+
+ # Sample 3, 3 boxes
+ class_pred[2, 60, 90] = [0.0, 0.0, 0.0, 0.2, 0.8]
+ hw_pred[2, 60, 90] = [40, 30]
+ offset_pred[2, 60, 90] = [0, 0]
+
+ class_pred[2, 65, 95] = [0.0, 0.7, 0.3, 0.0, 0.0]
+ hw_pred[2, 65, 95] = [20, 20]
+ offset_pred[2, 65, 95] = [1, 2]
+
+ class_pred[2, 75, 85] = [1.0, 0.0, 0.0, 0.0, 0.0]
+ hw_pred[2, 75, 85] = [21, 25]
+ offset_pred[2, 75, 85] = [5, 2]
+
+ def graph_fn():
+ class_pred_tensor = tf.constant(class_pred)
+ hw_pred_tensor = tf.constant(hw_pred)
+ offset_pred_tensor = tf.constant(offset_pred)
+
+ detection_scores, y_indices, x_indices, channel_indices = (
+ cnma.top_k_feature_map_locations(
+ class_pred_tensor, max_pool_kernel_size=3, k=2))
+
+ boxes, classes, scores, num_dets = cnma.prediction_tensors_to_boxes(
+ detection_scores, y_indices, x_indices, channel_indices,
+ hw_pred_tensor, offset_pred_tensor)
+ return boxes, classes, scores, num_dets
+
+ boxes, classes, scores, num_dets = self.execute(graph_fn, [])
+
+ np.testing.assert_array_equal(num_dets, [2, 2, 2])
+
+ np.testing.assert_allclose(
+ [[-9, -8, 31, 52], [25, 35, 75, 85]], boxes[0])
+ np.testing.assert_allclose(
+ [[96, 98, 106, 108], [96, 98, 106, 108]], boxes[1])
+ np.testing.assert_allclose(
+ [[69.5, 74.5, 90.5, 99.5], [40, 75, 80, 105]], boxes[2])
+
+ np.testing.assert_array_equal(classes[0], [1, 0])
+ np.testing.assert_array_equal(classes[1], [2, 1])
+ np.testing.assert_array_equal(classes[2], [0, 4])
+
+ np.testing.assert_allclose(scores[0], [.7, .55])
+ np.testing.assert_allclose(scores[1][:1], [.9])
+ np.testing.assert_allclose(scores[2], [1., .8])
+
+ def test_keypoint_candidate_prediction(self):
+ keypoint_heatmap_np = np.zeros((2, 3, 3, 2), dtype=np.float32)
+ keypoint_heatmap_np[0, 0, 0, 0] = 1.0
+ keypoint_heatmap_np[0, 2, 1, 0] = 0.7
+ keypoint_heatmap_np[0, 1, 1, 0] = 0.6
+ keypoint_heatmap_np[0, 0, 2, 1] = 0.7
+ keypoint_heatmap_np[0, 1, 1, 1] = 0.3 # Filtered by low score.
+ keypoint_heatmap_np[0, 2, 2, 1] = 0.2
+ keypoint_heatmap_np[1, 1, 0, 0] = 0.6
+ keypoint_heatmap_np[1, 2, 1, 0] = 0.5
+ keypoint_heatmap_np[1, 0, 0, 0] = 0.4
+ keypoint_heatmap_np[1, 0, 0, 1] = 1.0
+ keypoint_heatmap_np[1, 0, 1, 1] = 0.9
+ keypoint_heatmap_np[1, 2, 0, 1] = 0.8
+
+ keypoint_heatmap_offsets_np = np.zeros((2, 3, 3, 2), dtype=np.float32)
+ keypoint_heatmap_offsets_np[0, 0, 0] = [0.5, 0.25]
+ keypoint_heatmap_offsets_np[0, 2, 1] = [-0.25, 0.5]
+ keypoint_heatmap_offsets_np[0, 1, 1] = [0.0, 0.0]
+ keypoint_heatmap_offsets_np[0, 0, 2] = [1.0, 0.0]
+ keypoint_heatmap_offsets_np[0, 2, 2] = [1.0, 1.0]
+ keypoint_heatmap_offsets_np[1, 1, 0] = [0.25, 0.5]
+ keypoint_heatmap_offsets_np[1, 2, 1] = [0.5, 0.0]
+ keypoint_heatmap_offsets_np[1, 0, 0] = [0.0, -0.5]
+ keypoint_heatmap_offsets_np[1, 0, 1] = [0.5, -0.5]
+ keypoint_heatmap_offsets_np[1, 2, 0] = [-1.0, -0.5]
+
+ def graph_fn():
+ keypoint_heatmap = tf.constant(keypoint_heatmap_np, dtype=tf.float32)
+ keypoint_heatmap_offsets = tf.constant(
+ keypoint_heatmap_offsets_np, dtype=tf.float32)
+
+ keypoint_cands, keypoint_scores, num_keypoint_candidates = (
+ cnma.prediction_tensors_to_keypoint_candidates(
+ keypoint_heatmap,
+ keypoint_heatmap_offsets,
+ keypoint_score_threshold=0.5,
+ max_pool_kernel_size=1,
+ max_candidates=2))
+ return keypoint_cands, keypoint_scores, num_keypoint_candidates
+
+ (keypoint_cands, keypoint_scores,
+ num_keypoint_candidates) = self.execute(graph_fn, [])
+
+ expected_keypoint_candidates = [
+ [ # Example 0.
+ [[0.5, 0.25], [1.0, 2.0]], # Keypoint 1.
+ [[1.75, 1.5], [1.0, 1.0]], # Keypoint 2.
+ ],
+ [ # Example 1.
+ [[1.25, 0.5], [0.0, -0.5]], # Keypoint 1.
+ [[2.5, 1.0], [0.5, 0.5]], # Keypoint 2.
+ ],
+ ]
+ expected_keypoint_scores = [
+ [ # Example 0.
+ [1.0, 0.7], # Keypoint 1.
+ [0.7, 0.3], # Keypoint 2.
+ ],
+ [ # Example 1.
+ [0.6, 1.0], # Keypoint 1.
+ [0.5, 0.9], # Keypoint 2.
+ ],
+ ]
+ expected_num_keypoint_candidates = [
+ [2, 1],
+ [2, 2]
+ ]
+ np.testing.assert_allclose(expected_keypoint_candidates, keypoint_cands)
+ np.testing.assert_allclose(expected_keypoint_scores, keypoint_scores)
+ np.testing.assert_array_equal(expected_num_keypoint_candidates,
+ num_keypoint_candidates)
+
+ def test_keypoint_candidate_prediction_per_keypoints(self):
+ keypoint_heatmap_np = np.zeros((2, 3, 3, 2), dtype=np.float32)
+ keypoint_heatmap_np[0, 0, 0, 0] = 1.0
+ keypoint_heatmap_np[0, 2, 1, 0] = 0.7
+ keypoint_heatmap_np[0, 1, 1, 0] = 0.6
+ keypoint_heatmap_np[0, 0, 2, 1] = 0.7
+ keypoint_heatmap_np[0, 1, 1, 1] = 0.3 # Filtered by low score.
+ keypoint_heatmap_np[0, 2, 2, 1] = 0.2
+ keypoint_heatmap_np[1, 1, 0, 0] = 0.6
+ keypoint_heatmap_np[1, 2, 1, 0] = 0.5
+ keypoint_heatmap_np[1, 0, 0, 0] = 0.4
+ keypoint_heatmap_np[1, 0, 0, 1] = 1.0
+ keypoint_heatmap_np[1, 0, 1, 1] = 0.9
+ keypoint_heatmap_np[1, 2, 0, 1] = 0.8
+
+ keypoint_heatmap_offsets_np = np.zeros((2, 3, 3, 4), dtype=np.float32)
+ keypoint_heatmap_offsets_np[0, 0, 0] = [0.5, 0.25, 0.0, 0.0]
+ keypoint_heatmap_offsets_np[0, 2, 1] = [-0.25, 0.5, 0.0, 0.0]
+ keypoint_heatmap_offsets_np[0, 1, 1] = [0.0, 0.0, 0.0, 0.0]
+ keypoint_heatmap_offsets_np[0, 0, 2] = [0.0, 0.0, 1.0, 0.0]
+ keypoint_heatmap_offsets_np[0, 2, 2] = [0.0, 0.0, 1.0, 1.0]
+ keypoint_heatmap_offsets_np[1, 1, 0] = [0.25, 0.5, 0.0, 0.0]
+ keypoint_heatmap_offsets_np[1, 2, 1] = [0.5, 0.0, 0.0, 0.0]
+ keypoint_heatmap_offsets_np[1, 0, 0] = [0.0, 0.0, 0.0, -0.5]
+ keypoint_heatmap_offsets_np[1, 0, 1] = [0.0, 0.0, 0.5, -0.5]
+ keypoint_heatmap_offsets_np[1, 2, 0] = [0.0, 0.0, -1.0, -0.5]
+
+ def graph_fn():
+ keypoint_heatmap = tf.constant(keypoint_heatmap_np, dtype=tf.float32)
+ keypoint_heatmap_offsets = tf.constant(
+ keypoint_heatmap_offsets_np, dtype=tf.float32)
+
+ keypoint_cands, keypoint_scores, num_keypoint_candidates = (
+ cnma.prediction_tensors_to_keypoint_candidates(
+ keypoint_heatmap,
+ keypoint_heatmap_offsets,
+ keypoint_score_threshold=0.5,
+ max_pool_kernel_size=1,
+ max_candidates=2))
+ return keypoint_cands, keypoint_scores, num_keypoint_candidates
+
+ (keypoint_cands, keypoint_scores,
+ num_keypoint_candidates) = self.execute(graph_fn, [])
+
+ expected_keypoint_candidates = [
+ [ # Example 0.
+ [[0.5, 0.25], [1.0, 2.0]], # Candidate 1 of keypoint 1, 2.
+ [[1.75, 1.5], [1.0, 1.0]], # Candidate 2 of keypoint 1, 2.
+ ],
+ [ # Example 1.
+ [[1.25, 0.5], [0.0, -0.5]], # Candidate 1 of keypoint 1, 2.
+ [[2.5, 1.0], [0.5, 0.5]], # Candidate 2 of keypoint 1, 2.
+ ],
+ ]
+ expected_keypoint_scores = [
+ [ # Example 0.
+ [1.0, 0.7], # Candidate 1 scores of keypoint 1, 2.
+ [0.7, 0.3], # Candidate 2 scores of keypoint 1, 2.
+ ],
+ [ # Example 1.
+ [0.6, 1.0], # Candidate 1 scores of keypoint 1, 2.
+ [0.5, 0.9], # Candidate 2 scores of keypoint 1, 2.
+ ],
+ ]
+ expected_num_keypoint_candidates = [
+ [2, 1],
+ [2, 2]
+ ]
+ np.testing.assert_allclose(expected_keypoint_candidates, keypoint_cands)
+ np.testing.assert_allclose(expected_keypoint_scores, keypoint_scores)
+ np.testing.assert_array_equal(expected_num_keypoint_candidates,
+ num_keypoint_candidates)
+
+ def test_regressed_keypoints_at_object_centers(self):
+ batch_size = 2
+ num_keypoints = 5
+ num_instances = 6
+ regressed_keypoint_feature_map_np = np.random.randn(
+ batch_size, 10, 10, 2 * num_keypoints).astype(np.float32)
+ y_indices = np.random.choice(10, (batch_size, num_instances))
+ x_indices = np.random.choice(10, (batch_size, num_instances))
+ offsets = np.stack([y_indices, x_indices], axis=2).astype(np.float32)
+
+ def graph_fn():
+ regressed_keypoint_feature_map = tf.constant(
+ regressed_keypoint_feature_map_np, dtype=tf.float32)
+
+ gathered_regressed_keypoints = (
+ cnma.regressed_keypoints_at_object_centers(
+ regressed_keypoint_feature_map,
+ tf.constant(y_indices, dtype=tf.int32),
+ tf.constant(x_indices, dtype=tf.int32)))
+ return gathered_regressed_keypoints
+
+ gathered_regressed_keypoints = self.execute(graph_fn, [])
+
+ expected_gathered_keypoints_0 = regressed_keypoint_feature_map_np[
+ 0, y_indices[0], x_indices[0], :]
+ expected_gathered_keypoints_1 = regressed_keypoint_feature_map_np[
+ 1, y_indices[1], x_indices[1], :]
+ expected_gathered_keypoints = np.stack([
+ expected_gathered_keypoints_0,
+ expected_gathered_keypoints_1], axis=0)
+ expected_gathered_keypoints = np.reshape(
+ expected_gathered_keypoints,
+ [batch_size, num_instances, num_keypoints, 2])
+ expected_gathered_keypoints += np.expand_dims(offsets, axis=2)
+ expected_gathered_keypoints = np.reshape(
+ expected_gathered_keypoints,
+ [batch_size, num_instances, -1])
+ np.testing.assert_allclose(expected_gathered_keypoints,
+ gathered_regressed_keypoints)
+
+ @parameterized.parameters(
+ {'candidate_ranking_mode': 'min_distance'},
+ {'candidate_ranking_mode': 'score_distance_ratio'},
+ )
+ def test_refine_keypoints(self, candidate_ranking_mode):
+ regressed_keypoints_np = np.array(
+ [
+ # Example 0.
+ [
+ [[2.0, 2.0], [6.0, 10.0], [14.0, 7.0]], # Instance 0.
+ [[0.0, 6.0], [3.0, 3.0], [5.0, 7.0]], # Instance 1.
+ ],
+ # Example 1.
+ [
+ [[6.0, 2.0], [0.0, 0.0], [0.1, 0.1]], # Instance 0.
+ [[6.0, 2.5], [5.0, 5.0], [9.0, 3.0]], # Instance 1.
+ ],
+ ], dtype=np.float32)
+ keypoint_candidates_np = np.array(
+ [
+ # Example 0.
+ [
+ [[2.0, 2.5], [6.0, 10.5], [4.0, 7.0]], # Candidate 0.
+ [[1.0, 8.0], [0.0, 0.0], [2.0, 2.0]], # Candidate 1.
+ [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0]], # Candidate 2.
+ ],
+ # Example 1.
+ [
+ [[6.0, 1.5], [0.1, 0.4], [0.0, 0.0]], # Candidate 0.
+ [[1.0, 4.0], [0.0, 0.3], [0.0, 0.0]], # Candidate 1.
+ [[0.0, 0.0], [0.1, 0.3], [0.0, 0.0]], # Candidate 2.
+ ]
+ ], dtype=np.float32)
+ keypoint_scores_np = np.array(
+ [
+ # Example 0.
+ [
+ [0.8, 0.9, 1.0], # Candidate 0.
+ [0.6, 0.1, 0.9], # Candidate 1.
+ [0.0, 0.0, 0.0], # Candidate 1.
+ ],
+ # Example 1.
+ [
+ [0.7, 0.3, 0.0], # Candidate 0.
+ [0.6, 0.1, 0.0], # Candidate 1.
+ [0.0, 0.28, 0.0], # Candidate 1.
+ ]
+ ], dtype=np.float32)
+ num_keypoints_candidates_np = np.array(
+ [
+ # Example 0.
+ [2, 2, 2],
+ # Example 1.
+ [2, 3, 0],
+ ], dtype=np.int32)
+ unmatched_keypoint_score = 0.1
+
+ def graph_fn():
+ regressed_keypoints = tf.constant(
+ regressed_keypoints_np, dtype=tf.float32)
+ keypoint_candidates = tf.constant(
+ keypoint_candidates_np, dtype=tf.float32)
+ keypoint_scores = tf.constant(keypoint_scores_np, dtype=tf.float32)
+ num_keypoint_candidates = tf.constant(num_keypoints_candidates_np,
+ dtype=tf.int32)
+ refined_keypoints, refined_scores = cnma.refine_keypoints(
+ regressed_keypoints, keypoint_candidates, keypoint_scores,
+ num_keypoint_candidates, bboxes=None,
+ unmatched_keypoint_score=unmatched_keypoint_score,
+ box_scale=1.2, candidate_search_scale=0.3,
+ candidate_ranking_mode=candidate_ranking_mode)
+ return refined_keypoints, refined_scores
+
+ refined_keypoints, refined_scores = self.execute(graph_fn, [])
+
+ if candidate_ranking_mode == 'min_distance':
+ expected_refined_keypoints = np.array(
+ [
+ # Example 0.
+ [
+ [[2.0, 2.5], [6.0, 10.5], [14.0, 7.0]], # Instance 0.
+ [[0.0, 6.0], [3.0, 3.0], [4.0, 7.0]], # Instance 1.
+ ],
+ # Example 1.
+ [
+ [[6.0, 1.5], [0.0, 0.3], [0.1, 0.1]], # Instance 0.
+ [[6.0, 2.5], [5.0, 5.0], [9.0, 3.0]], # Instance 1.
+ ],
+ ], dtype=np.float32)
+ expected_refined_scores = np.array(
+ [
+ # Example 0.
+ [
+ [0.8, 0.9, unmatched_keypoint_score], # Instance 0.
+ [unmatched_keypoint_score, # Instance 1.
+ unmatched_keypoint_score, 1.0],
+ ],
+ # Example 1.
+ [
+ [0.7, 0.1, unmatched_keypoint_score], # Instance 0.
+ [unmatched_keypoint_score, # Instance 1.
+ 0.1, unmatched_keypoint_score],
+ ],
+ ], dtype=np.float32)
+ else:
+ expected_refined_keypoints = np.array(
+ [
+ # Example 0.
+ [
+ [[2.0, 2.5], [6.0, 10.5], [14.0, 7.0]], # Instance 0.
+ [[0.0, 6.0], [3.0, 3.0], [4.0, 7.0]], # Instance 1.
+ ],
+ # Example 1.
+ [
+ [[6.0, 1.5], [0.1, 0.3], [0.1, 0.1]], # Instance 0.
+ [[6.0, 2.5], [5.0, 5.0], [9.0, 3.0]], # Instance 1.
+ ],
+ ], dtype=np.float32)
+ expected_refined_scores = np.array(
+ [
+ # Example 0.
+ [
+ [0.8, 0.9, unmatched_keypoint_score], # Instance 0.
+ [unmatched_keypoint_score, # Instance 1.
+ unmatched_keypoint_score, 1.0],
+ ],
+ # Example 1.
+ [
+ [0.7, 0.28, unmatched_keypoint_score], # Instance 0.
+ [unmatched_keypoint_score, # Instance 1.
+ 0.1, unmatched_keypoint_score],
+ ],
+ ], dtype=np.float32)
+
+ np.testing.assert_allclose(expected_refined_keypoints, refined_keypoints)
+ np.testing.assert_allclose(expected_refined_scores, refined_scores)
+
+ def test_refine_keypoints_with_bboxes(self):
+ regressed_keypoints_np = np.array(
+ [
+ # Example 0.
+ [
+ [[2.0, 2.0], [6.0, 10.0], [14.0, 7.0]], # Instance 0.
+ [[0.0, 6.0], [3.0, 3.0], [5.0, 7.0]], # Instance 1.
+ ],
+ # Example 1.
+ [
+ [[6.0, 2.0], [0.0, 0.0], [0.1, 0.1]], # Instance 0.
+ [[6.0, 2.5], [5.0, 5.0], [9.0, 3.0]], # Instance 1.
+ ],
+ ], dtype=np.float32)
+ keypoint_candidates_np = np.array(
+ [
+ # Example 0.
+ [
+ [[2.0, 2.5], [6.0, 10.5], [4.0, 7.0]], # Candidate 0.
+ [[1.0, 8.0], [0.0, 0.0], [2.0, 2.0]], # Candidate 1.
+ ],
+ # Example 1.
+ [
+ [[6.0, 1.5], [5.0, 5.0], [0.0, 0.0]], # Candidate 0.
+ [[1.0, 4.0], [0.0, 0.3], [0.0, 0.0]], # Candidate 1.
+ ]
+ ], dtype=np.float32)
+ keypoint_scores_np = np.array(
+ [
+ # Example 0.
+ [
+ [0.8, 0.9, 1.0], # Candidate 0.
+ [0.6, 0.1, 0.9], # Candidate 1.
+ ],
+ # Example 1.
+ [
+ [0.7, 0.4, 0.0], # Candidate 0.
+ [0.6, 0.1, 0.0], # Candidate 1.
+ ]
+ ], dtype=np.float32)
+ num_keypoints_candidates_np = np.array(
+ [
+ # Example 0.
+ [2, 2, 2],
+ # Example 1.
+ [2, 2, 0],
+ ], dtype=np.int32)
+ bboxes_np = np.array(
+ [
+ # Example 0.
+ [
+ [2.0, 2.0, 14.0, 10.0], # Instance 0.
+ [0.0, 3.0, 5.0, 7.0], # Instance 1.
+ ],
+ # Example 1.
+ [
+ [0.0, 0.0, 6.0, 2.0], # Instance 0.
+ [5.0, 1.4, 9.0, 5.0], # Instance 1.
+ ],
+ ], dtype=np.float32)
+ unmatched_keypoint_score = 0.1
+
+ def graph_fn():
+ regressed_keypoints = tf.constant(
+ regressed_keypoints_np, dtype=tf.float32)
+ keypoint_candidates = tf.constant(
+ keypoint_candidates_np, dtype=tf.float32)
+ keypoint_scores = tf.constant(keypoint_scores_np, dtype=tf.float32)
+ num_keypoint_candidates = tf.constant(num_keypoints_candidates_np,
+ dtype=tf.int32)
+ bboxes = tf.constant(bboxes_np, dtype=tf.float32)
+ refined_keypoints, refined_scores = cnma.refine_keypoints(
+ regressed_keypoints, keypoint_candidates, keypoint_scores,
+ num_keypoint_candidates, bboxes=bboxes,
+ unmatched_keypoint_score=unmatched_keypoint_score,
+ box_scale=1.0, candidate_search_scale=0.3)
+ return refined_keypoints, refined_scores
+
+ refined_keypoints, refined_scores = self.execute(graph_fn, [])
+
+ expected_refined_keypoints = np.array(
+ [
+ # Example 0.
+ [
+ [[2.0, 2.5], [6.0, 10.0], [14.0, 7.0]], # Instance 0.
+ [[0.0, 6.0], [3.0, 3.0], [4.0, 7.0]], # Instance 1.
+ ],
+ # Example 1.
+ [
+ [[6.0, 1.5], [0.0, 0.3], [0.1, 0.1]], # Instance 0.
+ [[6.0, 1.5], [5.0, 5.0], [9.0, 3.0]], # Instance 1.
+ ],
+ ], dtype=np.float32)
+ expected_refined_scores = np.array(
+ [
+ # Example 0.
+ [
+ [0.8, unmatched_keypoint_score, # Instance 0.
+ unmatched_keypoint_score],
+ [unmatched_keypoint_score, # Instance 1.
+ unmatched_keypoint_score, 1.0],
+ ],
+ # Example 1.
+ [
+ [0.7, 0.1, unmatched_keypoint_score], # Instance 0.
+ [0.7, 0.4, unmatched_keypoint_score], # Instance 1.
+ ],
+ ], dtype=np.float32)
+
+ np.testing.assert_allclose(expected_refined_keypoints, refined_keypoints)
+ np.testing.assert_allclose(expected_refined_scores, refined_scores)
+
+ def test_pad_to_full_keypoint_dim(self):
+ batch_size = 4
+ num_instances = 8
+ num_keypoints = 2
+ keypoint_inds = [1, 3]
+ num_total_keypoints = 5
+
+ kpt_coords_np = np.random.randn(batch_size, num_instances, num_keypoints, 2)
+ kpt_scores_np = np.random.randn(batch_size, num_instances, num_keypoints)
+
+ def graph_fn():
+ kpt_coords = tf.constant(kpt_coords_np)
+ kpt_scores = tf.constant(kpt_scores_np)
+ kpt_coords_padded, kpt_scores_padded = (
+ cnma._pad_to_full_keypoint_dim(
+ kpt_coords, kpt_scores, keypoint_inds, num_total_keypoints))
+ return kpt_coords_padded, kpt_scores_padded
+
+ kpt_coords_padded, kpt_scores_padded = self.execute(graph_fn, [])
+
+ self.assertAllEqual([batch_size, num_instances, num_total_keypoints, 2],
+ kpt_coords_padded.shape)
+ self.assertAllEqual([batch_size, num_instances, num_total_keypoints],
+ kpt_scores_padded.shape)
+
+ for i, kpt_ind in enumerate(keypoint_inds):
+ np.testing.assert_allclose(kpt_coords_np[:, :, i, :],
+ kpt_coords_padded[:, :, kpt_ind, :])
+ np.testing.assert_allclose(kpt_scores_np[:, :, i],
+ kpt_scores_padded[:, :, kpt_ind])
+
+ def test_pad_to_full_instance_dim(self):
+ batch_size = 4
+ max_instances = 8
+ num_keypoints = 6
+ num_instances = 2
+ instance_inds = [1, 3]
+
+ kpt_coords_np = np.random.randn(batch_size, num_instances, num_keypoints, 2)
+ kpt_scores_np = np.random.randn(batch_size, num_instances, num_keypoints)
+
+ def graph_fn():
+ kpt_coords = tf.constant(kpt_coords_np)
+ kpt_scores = tf.constant(kpt_scores_np)
+ kpt_coords_padded, kpt_scores_padded = (
+ cnma._pad_to_full_instance_dim(
+ kpt_coords, kpt_scores, instance_inds, max_instances))
+ return kpt_coords_padded, kpt_scores_padded
+
+ kpt_coords_padded, kpt_scores_padded = self.execute(graph_fn, [])
+
+ self.assertAllEqual([batch_size, max_instances, num_keypoints, 2],
+ kpt_coords_padded.shape)
+ self.assertAllEqual([batch_size, max_instances, num_keypoints],
+ kpt_scores_padded.shape)
+
+ for i, inst_ind in enumerate(instance_inds):
+ np.testing.assert_allclose(kpt_coords_np[:, i, :, :],
+ kpt_coords_padded[:, inst_ind, :, :])
+ np.testing.assert_allclose(kpt_scores_np[:, i, :],
+ kpt_scores_padded[:, inst_ind, :])
+
+
+# Common parameters for setting up testing examples across tests.
+_NUM_CLASSES = 10
+_KEYPOINT_INDICES = [0, 1, 2, 3]
+_NUM_KEYPOINTS = len(_KEYPOINT_INDICES)
+_TASK_NAME = 'human_pose'
+
+
+def get_fake_center_params():
+ """Returns the fake object center parameter namedtuple."""
+ return cnma.ObjectCenterParams(
+ classification_loss=losses.WeightedSigmoidClassificationLoss(),
+ object_center_loss_weight=1.0,
+ min_box_overlap_iou=1.0,
+ max_box_predictions=5,
+ use_labeled_classes=False)
+
+
+def get_fake_od_params():
+ """Returns the fake object detection parameter namedtuple."""
+ return cnma.ObjectDetectionParams(
+ localization_loss=losses.L1LocalizationLoss(),
+ offset_loss_weight=1.0,
+ scale_loss_weight=0.1)
+
+
+def get_fake_kp_params():
+ """Returns the fake keypoint estimation parameter namedtuple."""
+ return cnma.KeypointEstimationParams(
+ task_name=_TASK_NAME,
+ class_id=1,
+ keypoint_indices=_KEYPOINT_INDICES,
+ keypoint_std_dev=[0.00001] * len(_KEYPOINT_INDICES),
+ classification_loss=losses.WeightedSigmoidClassificationLoss(),
+ localization_loss=losses.L1LocalizationLoss(),
+ keypoint_candidate_score_threshold=0.1)
+
+
+def get_fake_mask_params():
+ """Returns the fake mask estimation parameter namedtuple."""
+ return cnma.MaskParams(
+ classification_loss=losses.WeightedSoftmaxClassificationLoss(),
+ task_loss_weight=1.0,
+ mask_height=4,
+ mask_width=4)
+
+
+def build_center_net_meta_arch(build_resnet=False):
+ """Builds the CenterNet meta architecture."""
+ if build_resnet:
+ feature_extractor = (
+ center_net_resnet_feature_extractor.CenterNetResnetFeatureExtractor(
+ 'resnet_v2_101'))
+ else:
+ feature_extractor = DummyFeatureExtractor(
+ channel_means=(1.0, 2.0, 3.0),
+ channel_stds=(10., 20., 30.),
+ bgr_ordering=False,
+ num_feature_outputs=2,
+ stride=4)
+ image_resizer_fn = functools.partial(
+ preprocessor.resize_to_range,
+ min_dimension=128,
+ max_dimension=128,
+ pad_to_max_dimesnion=True)
+ return cnma.CenterNetMetaArch(
+ is_training=True,
+ add_summaries=False,
+ num_classes=_NUM_CLASSES,
+ feature_extractor=feature_extractor,
+ image_resizer_fn=image_resizer_fn,
+ object_center_params=get_fake_center_params(),
+ object_detection_params=get_fake_od_params(),
+ keypoint_params_dict={_TASK_NAME: get_fake_kp_params()},
+ mask_params=get_fake_mask_params())
+
+
+def _logit(p):
+ return np.log(
+ (p + np.finfo(np.float32).eps) / (1 - p + np.finfo(np.float32).eps))
+
+
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class CenterNetMetaArchLibTest(test_case.TestCase):
+ """Test for CenterNet meta architecture related functions."""
+
+ def test_get_keypoint_name(self):
+ self.assertEqual('human_pose/keypoint_offset',
+ cnma.get_keypoint_name('human_pose', 'keypoint_offset'))
+
+ def test_get_num_instances_from_weights(self):
+ weight1 = tf.constant([0.0, 0.0, 0.0], dtype=tf.float32)
+ weight2 = tf.constant([0.5, 0.9, 0.0], dtype=tf.float32)
+ weight3 = tf.constant([0.0, 0.0, 1.0], dtype=tf.float32)
+
+ def graph_fn_1():
+ # Total of three elements with non-zero values.
+ num_instances = cnma.get_num_instances_from_weights(
+ [weight1, weight2, weight3])
+ return num_instances
+ num_instances = self.execute(graph_fn_1, [])
+ self.assertAlmostEqual(3, num_instances)
+
+ # No non-zero value in the weights. Return minimum value: 1.
+ def graph_fn_2():
+ # Total of three elements with non-zero values.
+ num_instances = cnma.get_num_instances_from_weights([weight1, weight1])
+ return num_instances
+ num_instances = self.execute(graph_fn_2, [])
+ self.assertAlmostEqual(1, num_instances)
+
+
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
+ """Tests for the CenterNet meta architecture."""
+
+ def test_construct_prediction_heads(self):
+ model = build_center_net_meta_arch()
+ fake_feature_map = np.zeros((4, 128, 128, 8))
+
+ # Check the dictionary contains expected keys and corresponding heads with
+ # correct dimensions.
+ # "object center" head:
+ output = model._prediction_head_dict[cnma.OBJECT_CENTER][-1](
+ fake_feature_map)
+ self.assertEqual((4, 128, 128, _NUM_CLASSES), output.shape)
+
+ # "object scale" (height/width) head:
+ output = model._prediction_head_dict[cnma.BOX_SCALE][-1](fake_feature_map)
+ self.assertEqual((4, 128, 128, 2), output.shape)
+
+ # "object offset" head:
+ output = model._prediction_head_dict[cnma.BOX_OFFSET][-1](fake_feature_map)
+ self.assertEqual((4, 128, 128, 2), output.shape)
+
+ # "keypoint offset" head:
+ output = model._prediction_head_dict[
+ cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_OFFSET)][-1](
+ fake_feature_map)
+ self.assertEqual((4, 128, 128, 2), output.shape)
+
+ # "keypoint heatmap" head:
+ output = model._prediction_head_dict[cnma.get_keypoint_name(
+ _TASK_NAME, cnma.KEYPOINT_HEATMAP)][-1](
+ fake_feature_map)
+ self.assertEqual((4, 128, 128, _NUM_KEYPOINTS), output.shape)
+
+ # "keypoint regression" head:
+ output = model._prediction_head_dict[cnma.get_keypoint_name(
+ _TASK_NAME, cnma.KEYPOINT_REGRESSION)][-1](
+ fake_feature_map)
+ self.assertEqual((4, 128, 128, 2 * _NUM_KEYPOINTS), output.shape)
+
+ # "mask" head:
+ output = model._prediction_head_dict[cnma.SEGMENTATION_HEATMAP][-1](
+ fake_feature_map)
+ self.assertEqual((4, 128, 128, _NUM_CLASSES), output.shape)
+
+ def test_initialize_target_assigners(self):
+ model = build_center_net_meta_arch()
+ assigner_dict = model._initialize_target_assigners(
+ stride=2,
+ min_box_overlap_iou=0.7)
+
+ # Check whether the correponding target assigner class is initialized.
+ # object center target assigner:
+ self.assertIsInstance(assigner_dict[cnma.OBJECT_CENTER],
+ cn_assigner.CenterNetCenterHeatmapTargetAssigner)
+
+ # object detection target assigner:
+ self.assertIsInstance(assigner_dict[cnma.DETECTION_TASK],
+ cn_assigner.CenterNetBoxTargetAssigner)
+
+ # keypoint estimation target assigner:
+ self.assertIsInstance(assigner_dict[_TASK_NAME],
+ cn_assigner.CenterNetKeypointTargetAssigner)
+
+ # mask estimation target assigner:
+ self.assertIsInstance(assigner_dict[cnma.SEGMENTATION_TASK],
+ cn_assigner.CenterNetMaskTargetAssigner)
+
+ def test_predict(self):
+ """Test the predict function."""
+
+ model = build_center_net_meta_arch()
+ def graph_fn():
+ prediction_dict = model.predict(tf.zeros([2, 128, 128, 3]), None)
+ return prediction_dict
+
+ prediction_dict = self.execute(graph_fn, [])
+
+ self.assertEqual(prediction_dict['preprocessed_inputs'].shape,
+ (2, 128, 128, 3))
+ self.assertEqual(prediction_dict[cnma.OBJECT_CENTER][0].shape,
+ (2, 32, 32, _NUM_CLASSES))
+ self.assertEqual(prediction_dict[cnma.BOX_SCALE][0].shape,
+ (2, 32, 32, 2))
+ self.assertEqual(prediction_dict[cnma.BOX_OFFSET][0].shape,
+ (2, 32, 32, 2))
+ self.assertEqual(prediction_dict[cnma.SEGMENTATION_HEATMAP][0].shape,
+ (2, 32, 32, _NUM_CLASSES))
+
+ def test_loss(self):
+ """Test the loss function."""
+ groundtruth_dict = get_fake_groundtruth_dict(16, 32, 4)
+ model = build_center_net_meta_arch()
+ model.provide_groundtruth(
+ groundtruth_boxes_list=groundtruth_dict[fields.BoxListFields.boxes],
+ groundtruth_weights_list=groundtruth_dict[fields.BoxListFields.weights],
+ groundtruth_classes_list=groundtruth_dict[fields.BoxListFields.classes],
+ groundtruth_keypoints_list=groundtruth_dict[
+ fields.BoxListFields.keypoints],
+ groundtruth_masks_list=groundtruth_dict[
+ fields.BoxListFields.masks])
+
+ prediction_dict = get_fake_prediction_dict(
+ input_height=16, input_width=32, stride=4)
+
+ def graph_fn():
+ loss_dict = model.loss(prediction_dict,
+ tf.constant([[16, 24, 3], [16, 24, 3]]))
+ return loss_dict
+
+ loss_dict = self.execute(graph_fn, [])
+
+ # The prediction and groundtruth are curated to produce very low loss.
+ self.assertGreater(
+ 0.01, loss_dict['%s/%s' % (cnma.LOSS_KEY_PREFIX, cnma.OBJECT_CENTER)])
+ self.assertGreater(
+ 0.01, loss_dict['%s/%s' % (cnma.LOSS_KEY_PREFIX, cnma.BOX_SCALE)])
+ self.assertGreater(
+ 0.01, loss_dict['%s/%s' % (cnma.LOSS_KEY_PREFIX, cnma.BOX_OFFSET)])
+ self.assertGreater(
+ 0.01,
+ loss_dict['%s/%s' %
+ (cnma.LOSS_KEY_PREFIX,
+ cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_HEATMAP))])
+ self.assertGreater(
+ 0.01,
+ loss_dict['%s/%s' %
+ (cnma.LOSS_KEY_PREFIX,
+ cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_OFFSET))])
+ self.assertGreater(
+ 0.01, loss_dict['%s/%s' % (cnma.LOSS_KEY_PREFIX,
+ cnma.get_keypoint_name(
+ _TASK_NAME, cnma.KEYPOINT_REGRESSION))])
+ self.assertGreater(
+ 0.01, loss_dict['%s/%s' % (cnma.LOSS_KEY_PREFIX,
+ cnma.SEGMENTATION_HEATMAP)])
+
+ @parameterized.parameters(
+ {'target_class_id': 1},
+ {'target_class_id': 2},
+ )
+ def test_postprocess(self, target_class_id):
+ """Test the postprocess function."""
+ model = build_center_net_meta_arch()
+ max_detection = model._center_params.max_box_predictions
+ num_keypoints = len(model._kp_params_dict[_TASK_NAME].keypoint_indices)
+
+ class_center = np.zeros((1, 32, 32, 10), dtype=np.float32)
+ height_width = np.zeros((1, 32, 32, 2), dtype=np.float32)
+ offset = np.zeros((1, 32, 32, 2), dtype=np.float32)
+ keypoint_heatmaps = np.zeros((1, 32, 32, num_keypoints), dtype=np.float32)
+ keypoint_offsets = np.zeros((1, 32, 32, 2), dtype=np.float32)
+ keypoint_regression = np.random.randn(1, 32, 32, num_keypoints * 2)
+
+ class_probs = np.zeros(10)
+ class_probs[target_class_id] = _logit(0.75)
+ class_center[0, 16, 16] = class_probs
+ height_width[0, 16, 16] = [5, 10]
+ offset[0, 16, 16] = [.25, .5]
+ keypoint_regression[0, 16, 16] = [
+ -1., -1.,
+ -1., 1.,
+ 1., -1.,
+ 1., 1.]
+ keypoint_heatmaps[0, 14, 14, 0] = _logit(0.9)
+ keypoint_heatmaps[0, 14, 18, 1] = _logit(0.9)
+ keypoint_heatmaps[0, 18, 14, 2] = _logit(0.9)
+ keypoint_heatmaps[0, 18, 18, 3] = _logit(0.05) # Note the low score.
+
+ segmentation_heatmap = np.zeros((1, 32, 32, 10), dtype=np.float32)
+ segmentation_heatmap[:, 14:18, 14:18, target_class_id] = 1.0
+ segmentation_heatmap = _logit(segmentation_heatmap)
+
+ class_center = tf.constant(class_center)
+ height_width = tf.constant(height_width)
+ offset = tf.constant(offset)
+ keypoint_heatmaps = tf.constant(keypoint_heatmaps, dtype=tf.float32)
+ keypoint_offsets = tf.constant(keypoint_offsets, dtype=tf.float32)
+ keypoint_regression = tf.constant(keypoint_regression, dtype=tf.float32)
+ segmentation_heatmap = tf.constant(segmentation_heatmap, dtype=tf.float32)
+
+ prediction_dict = {
+ cnma.OBJECT_CENTER: [class_center],
+ cnma.BOX_SCALE: [height_width],
+ cnma.BOX_OFFSET: [offset],
+ cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_HEATMAP):
+ [keypoint_heatmaps],
+ cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_OFFSET):
+ [keypoint_offsets],
+ cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_REGRESSION):
+ [keypoint_regression],
+ cnma.SEGMENTATION_HEATMAP: [segmentation_heatmap],
+ }
+
+ def graph_fn():
+ detections = model.postprocess(prediction_dict,
+ tf.constant([[128, 128, 3]]))
+ return detections
+
+ detections = self.execute_cpu(graph_fn, [])
+
+ self.assertAllClose(detections['detection_boxes'][0, 0],
+ np.array([55, 46, 75, 86]) / 128.0)
+ self.assertAllClose(detections['detection_scores'][0],
+ [.75, .5, .5, .5, .5])
+ self.assertEqual(detections['detection_classes'][0, 0], target_class_id)
+ self.assertEqual(detections['num_detections'], [5])
+ self.assertAllEqual([1, max_detection, num_keypoints, 2],
+ detections['detection_keypoints'].shape)
+ self.assertAllEqual([1, max_detection, num_keypoints],
+ detections['detection_keypoint_scores'].shape)
+ self.assertAllEqual([1, max_detection, 4, 4],
+ detections['detection_masks'].shape)
+
+ # There should be some section of the first mask (correspond to the only
+ # detection) with non-zero mask values.
+ self.assertGreater(np.sum(detections['detection_masks'][0, 0, :, :] > 0), 0)
+ self.assertAllEqual(
+ detections['detection_masks'][0, 1:, :, :],
+ np.zeros_like(detections['detection_masks'][0, 1:, :, :]))
+
+ if target_class_id == 1:
+ expected_kpts_for_obj_0 = np.array(
+ [[14., 14.], [14., 18.], [18., 14.], [17., 17.]]) / 32.
+ expected_kpt_scores_for_obj_0 = np.array(
+ [0.9, 0.9, 0.9, cnma.UNMATCHED_KEYPOINT_SCORE])
+ np.testing.assert_allclose(detections['detection_keypoints'][0][0],
+ expected_kpts_for_obj_0, rtol=1e-6)
+ np.testing.assert_allclose(detections['detection_keypoint_scores'][0][0],
+ expected_kpt_scores_for_obj_0, rtol=1e-6)
+ else:
+ # All keypoint outputs should be zeros.
+ np.testing.assert_allclose(
+ detections['detection_keypoints'][0][0],
+ np.zeros([num_keypoints, 2], np.float),
+ rtol=1e-6)
+ np.testing.assert_allclose(
+ detections['detection_keypoint_scores'][0][0],
+ np.zeros([num_keypoints], np.float),
+ rtol=1e-6)
+
+ def test_get_instance_indices(self):
+ classes = tf.constant([[0, 1, 2, 0], [2, 1, 2, 2]], dtype=tf.int32)
+ num_detections = tf.constant([1, 3], dtype=tf.int32)
+ batch_index = 1
+ class_id = 2
+ model = build_center_net_meta_arch()
+ valid_indices = model._get_instance_indices(
+ classes, num_detections, batch_index, class_id)
+ self.assertAllEqual(valid_indices.numpy(), [0, 2])
+
+
+def get_fake_prediction_dict(input_height, input_width, stride):
+ """Prepares the fake prediction dictionary."""
+ output_height = input_height // stride
+ output_width = input_width // stride
+ object_center = np.zeros((2, output_height, output_width, _NUM_CLASSES),
+ dtype=np.float32)
+ # Box center:
+ # y: floor((0.54 + 0.56) / 2 * 4) = 2,
+ # x: floor((0.54 + 0.56) / 2 * 8) = 4
+ object_center[0, 2, 4, 1] = 1.0
+ object_center = _logit(object_center)
+
+ # Box size:
+ # height: (0.56 - 0.54) * 4 = 0.08
+ # width: (0.56 - 0.54) * 8 = 0.16
+ object_scale = np.zeros((2, output_height, output_width, 2), dtype=np.float32)
+ object_scale[0, 2, 4] = 0.08, 0.16
+
+ # Box center offset coordinate (0.55, 0.55):
+ # y-offset: 0.55 * 4 - 2 = 0.2
+ # x-offset: 0.55 * 8 - 4 = 0.4
+ object_offset = np.zeros((2, output_height, output_width, 2),
+ dtype=np.float32)
+ object_offset[0, 2, 4] = 0.2, 0.4
+
+ keypoint_heatmap = np.zeros((2, output_height, output_width, _NUM_KEYPOINTS),
+ dtype=np.float32)
+ keypoint_heatmap[0, 2, 4, 1] = 1.0
+ keypoint_heatmap[0, 2, 4, 3] = 1.0
+ keypoint_heatmap = _logit(keypoint_heatmap)
+
+ keypoint_offset = np.zeros((2, output_height, output_width, 2),
+ dtype=np.float32)
+ keypoint_offset[0, 2, 4] = 0.2, 0.4
+
+ keypoint_regression = np.zeros(
+ (2, output_height, output_width, 2 * _NUM_KEYPOINTS), dtype=np.float32)
+ keypoint_regression[0, 2, 4] = 0.0, 0.0, 0.2, 0.4, 0.0, 0.0, 0.2, 0.4
+
+ mask_heatmap = np.zeros((2, output_height, output_width, _NUM_CLASSES),
+ dtype=np.float32)
+ mask_heatmap[0, 2, 4, 1] = 1.0
+ mask_heatmap = _logit(mask_heatmap)
+
+ prediction_dict = {
+ 'preprocessed_inputs':
+ tf.zeros((2, input_height, input_width, 3)),
+ cnma.OBJECT_CENTER: [
+ tf.constant(object_center),
+ tf.constant(object_center)
+ ],
+ cnma.BOX_SCALE: [
+ tf.constant(object_scale),
+ tf.constant(object_scale)
+ ],
+ cnma.BOX_OFFSET: [
+ tf.constant(object_offset),
+ tf.constant(object_offset)
+ ],
+ cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_HEATMAP): [
+ tf.constant(keypoint_heatmap),
+ tf.constant(keypoint_heatmap)
+ ],
+ cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_OFFSET): [
+ tf.constant(keypoint_offset),
+ tf.constant(keypoint_offset)
+ ],
+ cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_REGRESSION): [
+ tf.constant(keypoint_regression),
+ tf.constant(keypoint_regression)
+ ],
+ cnma.SEGMENTATION_HEATMAP: [
+ tf.constant(mask_heatmap),
+ tf.constant(mask_heatmap)
+ ]
+ }
+ return prediction_dict
+
+
+def get_fake_groundtruth_dict(input_height, input_width, stride):
+ """Prepares the fake groundtruth dictionary."""
+ # A small box with center at (0.55, 0.55).
+ boxes = [
+ tf.constant([[0.54, 0.54, 0.56, 0.56]]),
+ tf.constant([[0.0, 0.0, 0.5, 0.5]]),
+ ]
+ classes = [
+ tf.one_hot([1], depth=_NUM_CLASSES),
+ tf.one_hot([0], depth=_NUM_CLASSES),
+ ]
+ weights = [
+ tf.constant([1.]),
+ tf.constant([0.]),
+ ]
+ keypoints = [
+ tf.tile(
+ tf.expand_dims(
+ tf.constant([[float('nan'), 0.55,
+ float('nan'), 0.55, 0.55, 0.0]]),
+ axis=2),
+ multiples=[1, 1, 2]),
+ tf.tile(
+ tf.expand_dims(
+ tf.constant([[float('nan'), 0.55,
+ float('nan'), 0.55, 0.55, 0.0]]),
+ axis=2),
+ multiples=[1, 1, 2]),
+ ]
+ labeled_classes = [
+ tf.one_hot([1], depth=_NUM_CLASSES) + tf.one_hot([2], depth=_NUM_CLASSES),
+ tf.one_hot([0], depth=_NUM_CLASSES) + tf.one_hot([1], depth=_NUM_CLASSES),
+ ]
+ mask = np.zeros((1, input_height, input_width), dtype=np.float32)
+ mask[0, 8:8+stride, 16:16+stride] = 1
+ masks = [
+ tf.constant(mask),
+ tf.zeros_like(mask),
+ ]
+ groundtruth_dict = {
+ fields.BoxListFields.boxes: boxes,
+ fields.BoxListFields.weights: weights,
+ fields.BoxListFields.classes: classes,
+ fields.BoxListFields.keypoints: keypoints,
+ fields.BoxListFields.masks: masks,
+ fields.InputDataFields.groundtruth_labeled_classes: labeled_classes,
+ }
+ return groundtruth_dict
+
+
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class CenterNetMetaComputeLossTest(test_case.TestCase):
+ """Test for CenterNet loss compuation related functions."""
+
+ def setUp(self):
+ self.model = build_center_net_meta_arch()
+ self.classification_loss_fn = self.model._center_params.classification_loss
+ self.localization_loss_fn = self.model._od_params.localization_loss
+ self.true_image_shapes = tf.constant([[16, 24, 3], [16, 24, 3]])
+ self.input_height = 16
+ self.input_width = 32
+ self.stride = 4
+ self.per_pixel_weights = self.get_per_pixel_weights(self.true_image_shapes,
+ self.input_height,
+ self.input_width,
+ self.stride)
+ self.prediction_dict = get_fake_prediction_dict(self.input_height,
+ self.input_width,
+ self.stride)
+ self.model._groundtruth_lists = get_fake_groundtruth_dict(
+ self.input_height, self.input_width, self.stride)
+ super(CenterNetMetaComputeLossTest, self).setUp()
+
+ def get_per_pixel_weights(self, true_image_shapes, input_height, input_width,
+ stride):
+ output_height, output_width = (input_height // stride,
+ input_width // stride)
+
+ # TODO(vighneshb) Explore whether using floor here is safe.
+ output_true_image_shapes = tf.ceil(tf.to_float(true_image_shapes) / stride)
+ per_pixel_weights = cnma.get_valid_anchor_weights_in_flattened_image(
+ output_true_image_shapes, output_height, output_width)
+ per_pixel_weights = tf.expand_dims(per_pixel_weights, 2)
+ return per_pixel_weights
+
+ def test_compute_object_center_loss(self):
+ def graph_fn():
+ loss = self.model._compute_object_center_loss(
+ object_center_predictions=self.prediction_dict[cnma.OBJECT_CENTER],
+ input_height=self.input_height,
+ input_width=self.input_width,
+ per_pixel_weights=self.per_pixel_weights)
+ return loss
+
+ loss = self.execute(graph_fn, [])
+
+ # The prediction and groundtruth are curated to produce very low loss.
+ self.assertGreater(0.01, loss)
+
+ default_value = self.model._center_params.use_only_known_classes
+ self.model._center_params = (
+ self.model._center_params._replace(use_only_known_classes=True))
+ loss = self.model._compute_object_center_loss(
+ object_center_predictions=self.prediction_dict[cnma.OBJECT_CENTER],
+ input_height=self.input_height,
+ input_width=self.input_width,
+ per_pixel_weights=self.per_pixel_weights)
+ self.model._center_params = (
+ self.model._center_params._replace(
+ use_only_known_classes=default_value))
+
+ # The prediction and groundtruth are curated to produce very low loss.
+ self.assertGreater(0.01, loss)
+
+ def test_compute_box_scale_and_offset_loss(self):
+ def graph_fn():
+ scale_loss, offset_loss = self.model._compute_box_scale_and_offset_loss(
+ scale_predictions=self.prediction_dict[cnma.BOX_SCALE],
+ offset_predictions=self.prediction_dict[cnma.BOX_OFFSET],
+ input_height=self.input_height,
+ input_width=self.input_width)
+ return scale_loss, offset_loss
+
+ scale_loss, offset_loss = self.execute(graph_fn, [])
+
+ # The prediction and groundtruth are curated to produce very low loss.
+ self.assertGreater(0.01, scale_loss)
+ self.assertGreater(0.01, offset_loss)
+
+ def test_compute_kp_heatmap_loss(self):
+ def graph_fn():
+ loss = self.model._compute_kp_heatmap_loss(
+ input_height=self.input_height,
+ input_width=self.input_width,
+ task_name=_TASK_NAME,
+ heatmap_predictions=self.prediction_dict[cnma.get_keypoint_name(
+ _TASK_NAME, cnma.KEYPOINT_HEATMAP)],
+ classification_loss_fn=self.classification_loss_fn,
+ per_pixel_weights=self.per_pixel_weights)
+ return loss
+
+ loss = self.execute(graph_fn, [])
+
+ # The prediction and groundtruth are curated to produce very low loss.
+ self.assertGreater(0.01, loss)
+
+ def test_compute_kp_offset_loss(self):
+ def graph_fn():
+ loss = self.model._compute_kp_offset_loss(
+ input_height=self.input_height,
+ input_width=self.input_width,
+ task_name=_TASK_NAME,
+ offset_predictions=self.prediction_dict[cnma.get_keypoint_name(
+ _TASK_NAME, cnma.KEYPOINT_OFFSET)],
+ localization_loss_fn=self.localization_loss_fn)
+ return loss
+
+ loss = self.execute(graph_fn, [])
+
+ # The prediction and groundtruth are curated to produce very low loss.
+ self.assertGreater(0.01, loss)
+
+ def test_compute_kp_regression_loss(self):
+ def graph_fn():
+ loss = self.model._compute_kp_regression_loss(
+ input_height=self.input_height,
+ input_width=self.input_width,
+ task_name=_TASK_NAME,
+ regression_predictions=self.prediction_dict[cnma.get_keypoint_name(
+ _TASK_NAME, cnma.KEYPOINT_REGRESSION,)],
+ localization_loss_fn=self.localization_loss_fn)
+ return loss
+
+ loss = self.execute(graph_fn, [])
+
+ # The prediction and groundtruth are curated to produce very low loss.
+ self.assertGreater(0.01, loss)
+
+
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class CenterNetMetaArchRestoreTest(test_case.TestCase):
+
+ def test_restore_map_resnet(self):
+ """Test restore map for a resnet backbone."""
+
+ model = build_center_net_meta_arch(build_resnet=True)
+ restore_from_objects_map = model.restore_from_objects('classification')
+ self.assertIsInstance(restore_from_objects_map['feature_extractor'],
+ tf.keras.Model)
+
+
+class DummyFeatureExtractor(cnma.CenterNetFeatureExtractor):
+
+ def __init__(self,
+ channel_means,
+ channel_stds,
+ bgr_ordering,
+ num_feature_outputs,
+ stride):
+ self._num_feature_outputs = num_feature_outputs
+ self._stride = stride
+ super(DummyFeatureExtractor, self).__init__(
+ channel_means=channel_means, channel_stds=channel_stds,
+ bgr_ordering=bgr_ordering)
+
+ def predict(self):
+ pass
+
+ def loss(self):
+ pass
+
+ def postprocess(self):
+ pass
+
+ def call(self, inputs):
+ batch_size, input_height, input_width, _ = inputs.shape
+ fake_output = tf.ones([
+ batch_size, input_height // self._stride, input_width // self._stride,
+ 64
+ ], dtype=tf.float32)
+ return [fake_output] * self._num_feature_outputs
+
+ @property
+ def out_stride(self):
+ return self._stride
+
+ @property
+ def num_feature_outputs(self):
+ return self._num_feature_outputs
+
+
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class CenterNetFeatureExtractorTest(test_case.TestCase):
+ """Test the base feature extractor class."""
+
+ def test_preprocess(self):
+ feature_extractor = DummyFeatureExtractor(
+ channel_means=(1.0, 2.0, 3.0),
+ channel_stds=(10., 20., 30.), bgr_ordering=False,
+ num_feature_outputs=2, stride=4)
+
+ img = np.zeros((2, 32, 32, 3))
+ img[:, :, :] = 11, 22, 33
+
+ def graph_fn():
+ output = feature_extractor.preprocess(img)
+ return output
+
+ output = self.execute(graph_fn, [])
+ self.assertAlmostEqual(output.sum(), 2 * 32 * 32 * 3)
+
+ def test_bgr_ordering(self):
+ feature_extractor = DummyFeatureExtractor(
+ channel_means=(0.0, 0.0, 0.0),
+ channel_stds=(1., 1., 1.), bgr_ordering=True,
+ num_feature_outputs=2, stride=4)
+
+ img = np.zeros((2, 32, 32, 3), dtype=np.float32)
+ img[:, :, :] = 1, 2, 3
+
+ def graph_fn():
+ output = feature_extractor.preprocess(img)
+ return output
+
+ output = self.execute(graph_fn, [])
+ self.assertAllClose(output[..., 2], 1 * np.ones((2, 32, 32)))
+ self.assertAllClose(output[..., 1], 2 * np.ones((2, 32, 32)))
+ self.assertAllClose(output[..., 0], 3 * np.ones((2, 32, 32)))
+
+ def test_default_ordering(self):
+ feature_extractor = DummyFeatureExtractor(
+ channel_means=(0.0, 0.0, 0.0),
+ channel_stds=(1., 1., 1.), bgr_ordering=False,
+ num_feature_outputs=2, stride=4)
+
+ img = np.zeros((2, 32, 32, 3), dtype=np.float32)
+ img[:, :, :] = 1, 2, 3
+
+ def graph_fn():
+ output = feature_extractor.preprocess(img)
+ return output
+
+ output = self.execute(graph_fn, [])
+ self.assertAllClose(output[..., 0], 1 * np.ones((2, 32, 32)))
+ self.assertAllClose(output[..., 1], 2 * np.ones((2, 32, 32)))
+ self.assertAllClose(output[..., 2], 3 * np.ones((2, 32, 32)))
+
+
+if __name__ == '__main__':
+ tf.enable_v2_behavior()
+ tf.test.main()
diff --git a/research/object_detection/meta_architectures/context_rcnn_lib.py b/research/object_detection/meta_architectures/context_rcnn_lib.py
new file mode 100644
index 0000000000000000000000000000000000000000..902a88c77669cd27eb36490d645740041600fcac
--- /dev/null
+++ b/research/object_detection/meta_architectures/context_rcnn_lib.py
@@ -0,0 +1,224 @@
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Library functions for ContextRCNN."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow.compat.v1 as tf
+import tf_slim as slim
+
+
+# The negative value used in padding the invalid weights.
+_NEGATIVE_PADDING_VALUE = -100000
+
+
+def filter_weight_value(weights, values, valid_mask):
+ """Filters weights and values based on valid_mask.
+
+ _NEGATIVE_PADDING_VALUE will be added to invalid elements in the weights to
+ avoid their contribution in softmax. 0 will be set for the invalid elements in
+ the values.
+
+ Args:
+ weights: A float Tensor of shape [batch_size, input_size, context_size].
+ values: A float Tensor of shape [batch_size, context_size,
+ projected_dimension].
+ valid_mask: A boolean Tensor of shape [batch_size, context_size]. True means
+ valid and False means invalid.
+
+ Returns:
+ weights: A float Tensor of shape [batch_size, input_size, context_size].
+ values: A float Tensor of shape [batch_size, context_size,
+ projected_dimension].
+
+ Raises:
+ ValueError: If shape of doesn't match.
+ """
+ w_batch_size, _, w_context_size = weights.shape
+ v_batch_size, v_context_size, _ = values.shape
+ m_batch_size, m_context_size = valid_mask.shape
+ if w_batch_size != v_batch_size or v_batch_size != m_batch_size:
+ raise ValueError("Please make sure the first dimension of the input"
+ " tensors are the same.")
+
+ if w_context_size != v_context_size:
+ raise ValueError("Please make sure the third dimension of weights matches"
+ " the second dimension of values.")
+
+ if w_context_size != m_context_size:
+ raise ValueError("Please make sure the third dimension of the weights"
+ " matches the second dimension of the valid_mask.")
+
+ valid_mask = valid_mask[..., tf.newaxis]
+
+ # Force the invalid weights to be very negative so it won't contribute to
+ # the softmax.
+ weights += tf.transpose(
+ tf.cast(tf.math.logical_not(valid_mask), weights.dtype) *
+ _NEGATIVE_PADDING_VALUE,
+ perm=[0, 2, 1])
+
+ # Force the invalid values to be 0.
+ values *= tf.cast(valid_mask, values.dtype)
+
+ return weights, values
+
+
+def compute_valid_mask(num_valid_elements, num_elements):
+ """Computes mask of valid entries within padded context feature.
+
+ Args:
+ num_valid_elements: A int32 Tensor of shape [batch_size].
+ num_elements: An int32 Tensor.
+
+ Returns:
+ A boolean Tensor of the shape [batch_size, num_elements]. True means
+ valid and False means invalid.
+ """
+ batch_size = num_valid_elements.shape[0]
+ element_idxs = tf.range(num_elements, dtype=tf.int32)
+ batch_element_idxs = tf.tile(element_idxs[tf.newaxis, ...], [batch_size, 1])
+ num_valid_elements = num_valid_elements[..., tf.newaxis]
+ valid_mask = tf.less(batch_element_idxs, num_valid_elements)
+ return valid_mask
+
+
+def project_features(features, projection_dimension, is_training, normalize):
+ """Projects features to another feature space.
+
+ Args:
+ features: A float Tensor of shape [batch_size, features_size,
+ num_features].
+ projection_dimension: A int32 Tensor.
+ is_training: A boolean Tensor (affecting batch normalization).
+ normalize: A boolean Tensor. If true, the output features will be l2
+ normalized on the last dimension.
+
+ Returns:
+ A float Tensor of shape [batch, features_size, projection_dimension].
+ """
+ # TODO(guanhangwu) Figure out a better way of specifying the batch norm
+ # params.
+ batch_norm_params = {
+ "is_training": is_training,
+ "decay": 0.97,
+ "epsilon": 0.001,
+ "center": True,
+ "scale": True
+ }
+
+ batch_size, _, num_features = features.shape
+ features = tf.reshape(features, [-1, num_features])
+ projected_features = slim.fully_connected(
+ features,
+ num_outputs=projection_dimension,
+ activation_fn=tf.nn.relu6,
+ normalizer_fn=slim.batch_norm,
+ normalizer_params=batch_norm_params)
+
+ projected_features = tf.reshape(projected_features,
+ [batch_size, -1, projection_dimension])
+
+ if normalize:
+ projected_features = tf.math.l2_normalize(projected_features, axis=-1)
+
+ return projected_features
+
+
+def attention_block(input_features, context_features, bottleneck_dimension,
+ output_dimension, attention_temperature, valid_mask,
+ is_training):
+ """Generic attention block.
+
+ Args:
+ input_features: A float Tensor of shape [batch_size, input_size,
+ num_input_features].
+ context_features: A float Tensor of shape [batch_size, context_size,
+ num_context_features].
+ bottleneck_dimension: A int32 Tensor representing the bottleneck dimension
+ for intermediate projections.
+ output_dimension: A int32 Tensor representing the last dimension of the
+ output feature.
+ attention_temperature: A float Tensor. It controls the temperature of the
+ softmax for weights calculation. The formula for calculation as follows:
+ weights = exp(weights / temperature) / sum(exp(weights / temperature))
+ valid_mask: A boolean Tensor of shape [batch_size, context_size].
+ is_training: A boolean Tensor (affecting batch normalization).
+
+ Returns:
+ A float Tensor of shape [batch_size, input_size, output_dimension].
+ """
+
+ with tf.variable_scope("AttentionBlock"):
+ queries = project_features(
+ input_features, bottleneck_dimension, is_training, normalize=True)
+ keys = project_features(
+ context_features, bottleneck_dimension, is_training, normalize=True)
+ values = project_features(
+ context_features, bottleneck_dimension, is_training, normalize=True)
+
+ weights = tf.matmul(queries, keys, transpose_b=True)
+
+ weights, values = filter_weight_value(weights, values, valid_mask)
+
+ weights = tf.nn.softmax(weights / attention_temperature)
+
+ features = tf.matmul(weights, values)
+ output_features = project_features(
+ features, output_dimension, is_training, normalize=False)
+ return output_features
+
+
+def compute_box_context_attention(box_features, context_features,
+ valid_context_size, bottleneck_dimension,
+ attention_temperature, is_training):
+ """Computes the attention feature from the context given a batch of box.
+
+ Args:
+ box_features: A float Tensor of shape [batch_size, max_num_proposals,
+ height, width, channels]. It is pooled features from first stage
+ proposals.
+ context_features: A float Tensor of shape [batch_size, context_size,
+ num_context_features].
+ valid_context_size: A int32 Tensor of shape [batch_size].
+ bottleneck_dimension: A int32 Tensor representing the bottleneck dimension
+ for intermediate projections.
+ attention_temperature: A float Tensor. It controls the temperature of the
+ softmax for weights calculation. The formula for calculation as follows:
+ weights = exp(weights / temperature) / sum(exp(weights / temperature))
+ is_training: A boolean Tensor (affecting batch normalization).
+
+ Returns:
+ A float Tensor of shape [batch_size, max_num_proposals, 1, 1, channels].
+ """
+ _, context_size, _ = context_features.shape
+ valid_mask = compute_valid_mask(valid_context_size, context_size)
+
+ channels = box_features.shape[-1]
+ # Average pools over height and width dimension so that the shape of
+ # box_features becomes [batch_size, max_num_proposals, channels].
+ box_features = tf.reduce_mean(box_features, [2, 3])
+
+ output_features = attention_block(box_features, context_features,
+ bottleneck_dimension, channels.value,
+ attention_temperature, valid_mask,
+ is_training)
+
+ # Expands the dimension back to match with the original feature map.
+ output_features = output_features[:, :, tf.newaxis, tf.newaxis, :]
+
+ return output_features
diff --git a/research/object_detection/meta_architectures/context_rcnn_lib_tf1_test.py b/research/object_detection/meta_architectures/context_rcnn_lib_tf1_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0b3b848d835dcad37f6c75f05b869fbaec4facb
--- /dev/null
+++ b/research/object_detection/meta_architectures/context_rcnn_lib_tf1_test.py
@@ -0,0 +1,126 @@
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for context_rcnn_lib."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import unittest
+from absl.testing import parameterized
+import tensorflow.compat.v1 as tf
+
+from object_detection.meta_architectures import context_rcnn_lib
+from object_detection.utils import test_case
+from object_detection.utils import tf_version
+
+_NEGATIVE_PADDING_VALUE = -100000
+
+
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
+class ContextRcnnLibTest(parameterized.TestCase, test_case.TestCase,
+ tf.test.TestCase):
+ """Tests for the functions in context_rcnn_lib."""
+
+ def test_compute_valid_mask(self):
+ num_elements = tf.constant(3, tf.int32)
+ num_valid_elementss = tf.constant((1, 2), tf.int32)
+ valid_mask = context_rcnn_lib.compute_valid_mask(num_valid_elementss,
+ num_elements)
+ expected_valid_mask = tf.constant([[1, 0, 0], [1, 1, 0]], tf.float32)
+ self.assertAllEqual(valid_mask, expected_valid_mask)
+
+ def test_filter_weight_value(self):
+ weights = tf.ones((2, 3, 2), tf.float32) * 4
+ values = tf.ones((2, 2, 4), tf.float32)
+ valid_mask = tf.constant([[True, True], [True, False]], tf.bool)
+
+ filtered_weights, filtered_values = context_rcnn_lib.filter_weight_value(
+ weights, values, valid_mask)
+ expected_weights = tf.constant([[[4, 4], [4, 4], [4, 4]],
+ [[4, _NEGATIVE_PADDING_VALUE + 4],
+ [4, _NEGATIVE_PADDING_VALUE + 4],
+ [4, _NEGATIVE_PADDING_VALUE + 4]]])
+
+ expected_values = tf.constant([[[1, 1, 1, 1], [1, 1, 1, 1]],
+ [[1, 1, 1, 1], [0, 0, 0, 0]]])
+ self.assertAllEqual(filtered_weights, expected_weights)
+ self.assertAllEqual(filtered_values, expected_values)
+
+ # Changes the valid_mask so the results will be different.
+ valid_mask = tf.constant([[True, True], [False, False]], tf.bool)
+
+ filtered_weights, filtered_values = context_rcnn_lib.filter_weight_value(
+ weights, values, valid_mask)
+ expected_weights = tf.constant(
+ [[[4, 4], [4, 4], [4, 4]],
+ [[_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4],
+ [_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4],
+ [_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4]]])
+
+ expected_values = tf.constant([[[1, 1, 1, 1], [1, 1, 1, 1]],
+ [[0, 0, 0, 0], [0, 0, 0, 0]]])
+ self.assertAllEqual(filtered_weights, expected_weights)
+ self.assertAllEqual(filtered_values, expected_values)
+
+ @parameterized.parameters((2, True, True), (2, False, True),
+ (10, True, False), (10, False, False))
+ def test_project_features(self, projection_dimension, is_training, normalize):
+ features = tf.ones([2, 3, 4], tf.float32)
+ projected_features = context_rcnn_lib.project_features(
+ features,
+ projection_dimension,
+ is_training=is_training,
+ normalize=normalize)
+
+ # Makes sure the shape is correct.
+ self.assertAllEqual(projected_features.shape, [2, 3, projection_dimension])
+
+ @parameterized.parameters(
+ (2, 10, 1),
+ (3, 10, 2),
+ (4, 20, 3),
+ (5, 20, 4),
+ (7, 20, 5),
+ )
+ def test_attention_block(self, bottleneck_dimension, output_dimension,
+ attention_temperature):
+ input_features = tf.ones([2, 3, 4], tf.float32)
+ context_features = tf.ones([2, 2, 3], tf.float32)
+ valid_mask = tf.constant([[True, True], [False, False]], tf.bool)
+ is_training = False
+ output_features = context_rcnn_lib.attention_block(
+ input_features, context_features, bottleneck_dimension,
+ output_dimension, attention_temperature, valid_mask, is_training)
+
+ # Makes sure the shape is correct.
+ self.assertAllEqual(output_features.shape, [2, 3, output_dimension])
+
+ @parameterized.parameters(True, False)
+ def test_compute_box_context_attention(self, is_training):
+ box_features = tf.ones([2, 3, 4, 4, 4], tf.float32)
+ context_features = tf.ones([2, 5, 6], tf.float32)
+ valid_context_size = tf.constant((2, 3), tf.int32)
+ bottleneck_dimension = 10
+ attention_temperature = 1
+ attention_features = context_rcnn_lib.compute_box_context_attention(
+ box_features, context_features, valid_context_size,
+ bottleneck_dimension, attention_temperature, is_training)
+ # Makes sure the shape is correct.
+ self.assertAllEqual(attention_features.shape, [2, 3, 1, 1, 4])
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/research/object_detection/meta_architectures/context_rcnn_meta_arch.py b/research/object_detection/meta_architectures/context_rcnn_meta_arch.py
new file mode 100644
index 0000000000000000000000000000000000000000..abe30558b01218df8999b3f0f7698e57f67f8ff2
--- /dev/null
+++ b/research/object_detection/meta_architectures/context_rcnn_meta_arch.py
@@ -0,0 +1,340 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Context R-CNN meta-architecture definition.
+
+This adds the ability to use attention into contextual features within the
+Faster R-CNN object detection framework to improve object detection performance.
+See https://arxiv.org/abs/1912.03538 for more information.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import functools
+
+from object_detection.core import standard_fields as fields
+from object_detection.meta_architectures import context_rcnn_lib
+from object_detection.meta_architectures import faster_rcnn_meta_arch
+
+
+class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
+ """Context R-CNN Meta-architecture definition."""
+
+ def __init__(self,
+ is_training,
+ num_classes,
+ image_resizer_fn,
+ feature_extractor,
+ number_of_stages,
+ first_stage_anchor_generator,
+ first_stage_target_assigner,
+ first_stage_atrous_rate,
+ first_stage_box_predictor_arg_scope_fn,
+ first_stage_box_predictor_kernel_size,
+ first_stage_box_predictor_depth,
+ first_stage_minibatch_size,
+ first_stage_sampler,
+ first_stage_non_max_suppression_fn,
+ first_stage_max_proposals,
+ first_stage_localization_loss_weight,
+ first_stage_objectness_loss_weight,
+ crop_and_resize_fn,
+ initial_crop_size,
+ maxpool_kernel_size,
+ maxpool_stride,
+ second_stage_target_assigner,
+ second_stage_mask_rcnn_box_predictor,
+ second_stage_batch_size,
+ second_stage_sampler,
+ second_stage_non_max_suppression_fn,
+ second_stage_score_conversion_fn,
+ second_stage_localization_loss_weight,
+ second_stage_classification_loss_weight,
+ second_stage_classification_loss,
+ second_stage_mask_prediction_loss_weight=1.0,
+ hard_example_miner=None,
+ parallel_iterations=16,
+ add_summaries=True,
+ clip_anchors_to_image=False,
+ use_static_shapes=False,
+ resize_masks=True,
+ freeze_batchnorm=False,
+ return_raw_detections_during_predict=False,
+ output_final_box_features=False,
+ attention_bottleneck_dimension=None,
+ attention_temperature=None):
+ """ContextRCNNMetaArch Constructor.
+
+ Args:
+ is_training: A boolean indicating whether the training version of the
+ computation graph should be constructed.
+ num_classes: Number of classes. Note that num_classes *does not*
+ include the background category, so if groundtruth labels take values
+ in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
+ assigned classification targets can range from {0,... K}).
+ image_resizer_fn: A callable for image resizing. This callable
+ takes a rank-3 image tensor of shape [height, width, channels]
+ (corresponding to a single image), an optional rank-3 instance mask
+ tensor of shape [num_masks, height, width] and returns a resized rank-3
+ image tensor, a resized mask tensor if one was provided in the input. In
+ addition this callable must also return a 1-D tensor of the form
+ [height, width, channels] containing the size of the true image, as the
+ image resizer can perform zero padding. See protos/image_resizer.proto.
+ feature_extractor: A FasterRCNNFeatureExtractor object.
+ number_of_stages: An integer values taking values in {1, 2, 3}. If
+ 1, the function will construct only the Region Proposal Network (RPN)
+ part of the model. If 2, the function will perform box refinement and
+ other auxiliary predictions all in the second stage. If 3, it will
+ extract features from refined boxes and perform the auxiliary
+ predictions on the non-maximum suppressed refined boxes.
+ If is_training is true and the value of number_of_stages is 3, it is
+ reduced to 2 since all the model heads are trained in parallel in second
+ stage during training.
+ first_stage_anchor_generator: An anchor_generator.AnchorGenerator object
+ (note that currently we only support
+ grid_anchor_generator.GridAnchorGenerator objects)
+ first_stage_target_assigner: Target assigner to use for first stage of
+ Faster R-CNN (RPN).
+ first_stage_atrous_rate: A single integer indicating the atrous rate for
+ the single convolution op which is applied to the `rpn_features_to_crop`
+ tensor to obtain a tensor to be used for box prediction. Some feature
+ extractors optionally allow for producing feature maps computed at
+ denser resolutions. The atrous rate is used to compensate for the
+ denser feature maps by using an effectively larger receptive field.
+ (This should typically be set to 1).
+ first_stage_box_predictor_arg_scope_fn: Either a
+ Keras layer hyperparams object or a function to construct tf-slim
+ arg_scope for conv2d, separable_conv2d and fully_connected ops. Used
+ for the RPN box predictor. If it is a keras hyperparams object the
+ RPN box predictor will be a Keras model. If it is a function to
+ construct an arg scope it will be a tf-slim box predictor.
+ first_stage_box_predictor_kernel_size: Kernel size to use for the
+ convolution op just prior to RPN box predictions.
+ first_stage_box_predictor_depth: Output depth for the convolution op
+ just prior to RPN box predictions.
+ first_stage_minibatch_size: The "batch size" to use for computing the
+ objectness and location loss of the region proposal network. This
+ "batch size" refers to the number of anchors selected as contributing
+ to the loss function for any given image within the image batch and is
+ only called "batch_size" due to terminology from the Faster R-CNN paper.
+ first_stage_sampler: Sampler to use for first stage loss (RPN loss).
+ first_stage_non_max_suppression_fn: batch_multiclass_non_max_suppression
+ callable that takes `boxes`, `scores` and optional `clip_window`(with
+ all other inputs already set) and returns a dictionary containing
+ tensors with keys: `detection_boxes`, `detection_scores`,
+ `detection_classes`, `num_detections`. This is used to perform non max
+ suppression on the boxes predicted by the Region Proposal Network
+ (RPN).
+ See `post_processing.batch_multiclass_non_max_suppression` for the type
+ and shape of these tensors.
+ first_stage_max_proposals: Maximum number of boxes to retain after
+ performing Non-Max Suppression (NMS) on the boxes predicted by the
+ Region Proposal Network (RPN).
+ first_stage_localization_loss_weight: A float
+ first_stage_objectness_loss_weight: A float
+ crop_and_resize_fn: A differentiable resampler to use for cropping RPN
+ proposal features.
+ initial_crop_size: A single integer indicating the output size
+ (width and height are set to be the same) of the initial bilinear
+ interpolation based cropping during ROI pooling.
+ maxpool_kernel_size: A single integer indicating the kernel size of the
+ max pool op on the cropped feature map during ROI pooling.
+ maxpool_stride: A single integer indicating the stride of the max pool
+ op on the cropped feature map during ROI pooling.
+ second_stage_target_assigner: Target assigner to use for second stage of
+ Faster R-CNN. If the model is configured with multiple prediction heads,
+ this target assigner is used to generate targets for all heads (with the
+ correct `unmatched_class_label`).
+ second_stage_mask_rcnn_box_predictor: Mask R-CNN box predictor to use for
+ the second stage.
+ second_stage_batch_size: The batch size used for computing the
+ classification and refined location loss of the box classifier. This
+ "batch size" refers to the number of proposals selected as contributing
+ to the loss function for any given image within the image batch and is
+ only called "batch_size" due to terminology from the Faster R-CNN paper.
+ second_stage_sampler: Sampler to use for second stage loss (box
+ classifier loss).
+ second_stage_non_max_suppression_fn: batch_multiclass_non_max_suppression
+ callable that takes `boxes`, `scores`, optional `clip_window` and
+ optional (kwarg) `mask` inputs (with all other inputs already set)
+ and returns a dictionary containing tensors with keys:
+ `detection_boxes`, `detection_scores`, `detection_classes`,
+ `num_detections`, and (optionally) `detection_masks`. See
+ `post_processing.batch_multiclass_non_max_suppression` for the type and
+ shape of these tensors.
+ second_stage_score_conversion_fn: Callable elementwise nonlinearity
+ (that takes tensors as inputs and returns tensors). This is usually
+ used to convert logits to probabilities.
+ second_stage_localization_loss_weight: A float indicating the scale factor
+ for second stage localization loss.
+ second_stage_classification_loss_weight: A float indicating the scale
+ factor for second stage classification loss.
+ second_stage_classification_loss: Classification loss used by the second
+ stage classifier. Either losses.WeightedSigmoidClassificationLoss or
+ losses.WeightedSoftmaxClassificationLoss.
+ second_stage_mask_prediction_loss_weight: A float indicating the scale
+ factor for second stage mask prediction loss. This is applicable only if
+ second stage box predictor is configured to predict masks.
+ hard_example_miner: A losses.HardExampleMiner object (can be None).
+ parallel_iterations: (Optional) The number of iterations allowed to run
+ in parallel for calls to tf.map_fn.
+ add_summaries: boolean (default: True) controlling whether summary ops
+ should be added to tensorflow graph.
+ clip_anchors_to_image: Normally, anchors generated for a given image size
+ are pruned during training if they lie outside the image window. This
+ option clips the anchors to be within the image instead of pruning.
+ use_static_shapes: If True, uses implementation of ops with static shape
+ guarantees.
+ resize_masks: Indicates whether the masks presend in the groundtruth
+ should be resized in the model with `image_resizer_fn`
+ freeze_batchnorm: Whether to freeze batch norm parameters in the first
+ stage box predictor during training or not. When training with a small
+ batch size (e.g. 1), it is desirable to freeze batch norm update and
+ use pretrained batch norm params.
+ return_raw_detections_during_predict: Whether to return raw detection
+ boxes in the predict() method. These are decoded boxes that have not
+ been through postprocessing (i.e. NMS). Default False.
+ output_final_box_features: Whether to output final box features. If true,
+ it crops the feauture map based on the final box prediction and returns
+ in the dict as detection_features.
+ attention_bottleneck_dimension: A single integer. The bottleneck feature
+ dimension of the attention block.
+ attention_temperature: A single float. The attention temperature.
+
+ Raises:
+ ValueError: If `second_stage_batch_size` > `first_stage_max_proposals` at
+ training time.
+ ValueError: If first_stage_anchor_generator is not of type
+ grid_anchor_generator.GridAnchorGenerator.
+ """
+ super(ContextRCNNMetaArch, self).__init__(
+ is_training,
+ num_classes,
+ image_resizer_fn,
+ feature_extractor,
+ number_of_stages,
+ first_stage_anchor_generator,
+ first_stage_target_assigner,
+ first_stage_atrous_rate,
+ first_stage_box_predictor_arg_scope_fn,
+ first_stage_box_predictor_kernel_size,
+ first_stage_box_predictor_depth,
+ first_stage_minibatch_size,
+ first_stage_sampler,
+ first_stage_non_max_suppression_fn,
+ first_stage_max_proposals,
+ first_stage_localization_loss_weight,
+ first_stage_objectness_loss_weight,
+ crop_and_resize_fn,
+ initial_crop_size,
+ maxpool_kernel_size,
+ maxpool_stride,
+ second_stage_target_assigner,
+ second_stage_mask_rcnn_box_predictor,
+ second_stage_batch_size,
+ second_stage_sampler,
+ second_stage_non_max_suppression_fn,
+ second_stage_score_conversion_fn,
+ second_stage_localization_loss_weight,
+ second_stage_classification_loss_weight,
+ second_stage_classification_loss,
+ second_stage_mask_prediction_loss_weight=(
+ second_stage_mask_prediction_loss_weight),
+ hard_example_miner=hard_example_miner,
+ parallel_iterations=parallel_iterations,
+ add_summaries=add_summaries,
+ clip_anchors_to_image=clip_anchors_to_image,
+ use_static_shapes=use_static_shapes,
+ resize_masks=resize_masks,
+ freeze_batchnorm=freeze_batchnorm,
+ return_raw_detections_during_predict=(
+ return_raw_detections_during_predict),
+ output_final_box_features=output_final_box_features)
+
+ self._context_feature_extract_fn = functools.partial(
+ context_rcnn_lib.compute_box_context_attention,
+ bottleneck_dimension=attention_bottleneck_dimension,
+ attention_temperature=attention_temperature,
+ is_training=is_training)
+
+ @staticmethod
+ def get_side_inputs(features):
+ """Overrides the get_side_inputs function in the base class.
+
+ This function returns context_features and valid_context_size, which will be
+ used in the _compute_second_stage_input_feature_maps function.
+
+ Args:
+ features: A dictionary of tensors.
+
+ Returns:
+ A dictionary of tensors contains context_features and valid_context_size.
+
+ Raises:
+ ValueError: If context_features or valid_context_size is not in the
+ features.
+ """
+ if (fields.InputDataFields.context_features not in features or
+ fields.InputDataFields.valid_context_size not in features):
+ raise ValueError(
+ "Please make sure context_features and valid_context_size are in the "
+ "features")
+
+ return {
+ fields.InputDataFields.context_features:
+ features[fields.InputDataFields.context_features],
+ fields.InputDataFields.valid_context_size:
+ features[fields.InputDataFields.valid_context_size]
+ }
+
+ def _compute_second_stage_input_feature_maps(self, features_to_crop,
+ proposal_boxes_normalized,
+ context_features,
+ valid_context_size):
+ """Crops to a set of proposals from the feature map for a batch of images.
+
+ This function overrides the one in the FasterRCNNMetaArch. Aside from
+ cropping and resizing the feature maps, which is done in the parent class,
+ it adds context attention features to the box features.
+
+ Args:
+ features_to_crop: A float32 Tensor with shape [batch_size, height, width,
+ depth]
+ proposal_boxes_normalized: A float32 Tensor with shape [batch_size,
+ num_proposals, box_code_size] containing proposal boxes in normalized
+ coordinates.
+ context_features: A float Tensor of shape [batch_size, context_size,
+ num_context_features].
+ valid_context_size: A int32 Tensor of shape [batch_size].
+
+ Returns:
+ A float32 Tensor with shape [K, new_height, new_width, depth].
+ """
+ box_features = self._crop_and_resize_fn(
+ features_to_crop, proposal_boxes_normalized,
+ [self._initial_crop_size, self._initial_crop_size])
+
+ attention_features = self._context_feature_extract_fn(
+ box_features=box_features,
+ context_features=context_features,
+ valid_context_size=valid_context_size)
+
+ # Adds box features with attention features.
+ box_features += attention_features
+
+ flattened_feature_maps = self._flatten_first_two_dimensions(box_features)
+
+ return self._maxpool_layer(flattened_feature_maps)
diff --git a/research/object_detection/meta_architectures/context_rcnn_meta_arch_tf1_test.py b/research/object_detection/meta_architectures/context_rcnn_meta_arch_tf1_test.py
index 47d7624d02f1329dd92b2cfe0d97e5522369bc6f..a5dc8cc8e12f8e2ee95465c651b3570db0cca80f 100644
--- a/research/object_detection/meta_architectures/context_rcnn_meta_arch_tf1_test.py
+++ b/research/object_detection/meta_architectures/context_rcnn_meta_arch_tf1_test.py
@@ -1,4 +1,4 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -12,14 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
-"""Tests for google3.third_party.tensorflow_models.object_detection.meta_architectures.context_meta_arch."""
+"""Tests for object_detection.meta_architectures.context_meta_arch."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
-
+import unittest
from absl.testing import parameterized
import mock
import tensorflow.compat.v1 as tf
@@ -109,6 +109,7 @@ class FakeFasterRCNNKerasFeatureExtractor(
])
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
def _get_model(self, box_predictor, **common_kwargs):
diff --git a/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py b/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
index 58e3664553ccb57e943f34b8d38919aab9c83309..a07ddd09a63dd32bf43e6c5523cd3f263dda365e 100644
--- a/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
+++ b/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
@@ -261,31 +261,6 @@ class FasterRCNNKerasFeatureExtractor(object):
"""Get model that extracts second stage box classifier features."""
pass
- def restore_from_classification_checkpoint_fn(
- self,
- first_stage_feature_extractor_scope,
- second_stage_feature_extractor_scope):
- """Returns a map of variables to load from a foreign checkpoint.
-
- Args:
- first_stage_feature_extractor_scope: A scope name for the first stage
- feature extractor.
- second_stage_feature_extractor_scope: A scope name for the second stage
- feature extractor.
-
- Returns:
- A dict mapping variable names (to load from a checkpoint) to variables in
- the model graph.
- """
- variables_to_restore = {}
- for variable in variables_helper.get_global_variables_safely():
- for scope_name in [first_stage_feature_extractor_scope,
- second_stage_feature_extractor_scope]:
- if variable.op.name.startswith(scope_name):
- var_name = variable.op.name.replace(scope_name + '/', '')
- variables_to_restore[var_name] = variable
- return variables_to_restore
-
class FasterRCNNMetaArch(model.DetectionModel):
"""Faster R-CNN Meta-architecture definition."""
@@ -2808,6 +2783,46 @@ class FasterRCNNMetaArch(model.DetectionModel):
variables_to_restore, include_patterns=include_patterns)
return {var.op.name: var for var in feature_extractor_variables}
+ def restore_from_objects(self, fine_tune_checkpoint_type='detection'):
+ """Returns a map of Trackable objects to load from a foreign checkpoint.
+
+ Returns a dictionary of Tensorflow 2 Trackable objects (e.g. tf.Module
+ or Checkpoint). This enables the model to initialize based on weights from
+ another task. For example, the feature extractor variables from a
+ classification model can be used to bootstrap training of an object
+ detector. When loading from an object detection model, the checkpoint model
+ should have the same parameters as this detection model with exception of
+ the num_classes parameter.
+
+ Note that this function is intended to be used to restore Keras-based
+ models when running Tensorflow 2, whereas restore_map (above) is intended
+ to be used to restore Slim-based models when running Tensorflow 1.x.
+
+ Args:
+ fine_tune_checkpoint_type: whether to restore from a full detection
+ checkpoint (with compatible variable names) or to restore from a
+ classification checkpoint for initialization prior to training.
+ Valid values: `detection`, `classification`. Default 'detection'.
+
+ Returns:
+ A dict mapping keys to Trackable objects (tf.Module or Checkpoint).
+ """
+ if fine_tune_checkpoint_type == 'classification':
+ return {
+ 'feature_extractor':
+ self._feature_extractor.classification_backbone
+ }
+ elif fine_tune_checkpoint_type == 'detection':
+ fake_model = tf.train.Checkpoint(
+ _feature_extractor_for_box_classifier_features=
+ self._feature_extractor_for_box_classifier_features,
+ _feature_extractor_for_proposal_features=
+ self._feature_extractor_for_proposal_features)
+ return {'model': fake_model}
+ else:
+ raise ValueError('Not supported fine_tune_checkpoint_type: {}'.format(
+ fine_tune_checkpoint_type))
+
def updates(self):
"""Returns a list of update operators for this model.
diff --git a/research/object_detection/meta_architectures/ssd_meta_arch.py b/research/object_detection/meta_architectures/ssd_meta_arch.py
index d401b0de75a6a1c04984caad12986029e3166226..d5db202a8a5effc581f7e200dc49a7811e7a3d95 100644
--- a/research/object_detection/meta_architectures/ssd_meta_arch.py
+++ b/research/object_detection/meta_architectures/ssd_meta_arch.py
@@ -250,35 +250,6 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
def call(self, inputs, **kwargs):
return self._extract_features(inputs)
- def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
- """Returns a map of variables to load from a foreign checkpoint.
-
- Args:
- feature_extractor_scope: A scope name for the feature extractor.
-
- Returns:
- A dict mapping variable names (to load from a checkpoint) to variables in
- the model graph.
- """
- variables_to_restore = {}
- if tf.executing_eagerly():
- for variable in self.variables:
- # variable.name includes ":0" at the end, but the names in the
- # checkpoint do not have the suffix ":0". So, we strip it here.
- var_name = variable.name[:-2]
- if var_name.startswith(feature_extractor_scope + '/'):
- var_name = var_name.replace(feature_extractor_scope + '/', '')
- variables_to_restore[var_name] = variable
- else:
- # b/137854499: use global_variables.
- for variable in variables_helper.get_global_variables_safely():
- var_name = variable.op.name
- if var_name.startswith(feature_extractor_scope + '/'):
- var_name = var_name.replace(feature_extractor_scope + '/', '')
- variables_to_restore[var_name] = variable
-
- return variables_to_restore
-
class SSDMetaArch(model.DetectionModel):
"""SSD Meta-architecture definition."""
@@ -1295,8 +1266,8 @@ class SSDMetaArch(model.DetectionModel):
classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`. Default 'detection'.
load_all_detection_checkpoint_vars: whether to load all variables (when
- `fine_tune_checkpoint_type='detection'`). If False, only variables
- within the appropriate scopes are included. Default False.
+ `fine_tune_checkpoint_type` is `detection`). If False, only variables
+ within the feature extractor scope are included. Default False.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
@@ -1311,36 +1282,56 @@ class SSDMetaArch(model.DetectionModel):
elif fine_tune_checkpoint_type == 'detection':
variables_to_restore = {}
- if tf.executing_eagerly():
+ for variable in variables_helper.get_global_variables_safely():
+ var_name = variable.op.name
if load_all_detection_checkpoint_vars:
- # Grab all detection vars by name
- for variable in self.variables:
- # variable.name includes ":0" at the end, but the names in the
- # checkpoint do not have the suffix ":0". So, we strip it here.
- var_name = variable.name[:-2]
- variables_to_restore[var_name] = variable
+ variables_to_restore[var_name] = variable
else:
- # Grab just the feature extractor vars by name
- for variable in self._feature_extractor.variables:
- # variable.name includes ":0" at the end, but the names in the
- # checkpoint do not have the suffix ":0". So, we strip it here.
- var_name = variable.name[:-2]
- variables_to_restore[var_name] = variable
- else:
- for variable in variables_helper.get_global_variables_safely():
- var_name = variable.op.name
- if load_all_detection_checkpoint_vars:
+ if var_name.startswith(self._extract_features_scope):
variables_to_restore[var_name] = variable
- else:
- if var_name.startswith(self._extract_features_scope):
- variables_to_restore[var_name] = variable
-
return variables_to_restore
else:
raise ValueError('Not supported fine_tune_checkpoint_type: {}'.format(
fine_tune_checkpoint_type))
+ def restore_from_objects(self, fine_tune_checkpoint_type='detection'):
+ """Returns a map of Trackable objects to load from a foreign checkpoint.
+
+ Returns a dictionary of Tensorflow 2 Trackable objects (e.g. tf.Module
+ or Checkpoint). This enables the model to initialize based on weights from
+ another task. For example, the feature extractor variables from a
+ classification model can be used to bootstrap training of an object
+ detector. When loading from an object detection model, the checkpoint model
+ should have the same parameters as this detection model with exception of
+ the num_classes parameter.
+
+ Note that this function is intended to be used to restore Keras-based
+ models when running Tensorflow 2, whereas restore_map (above) is intended
+ to be used to restore Slim-based models when running Tensorflow 1.x.
+
+ Args:
+ fine_tune_checkpoint_type: whether to restore from a full detection
+ checkpoint (with compatible variable names) or to restore from a
+ classification checkpoint for initialization prior to training.
+ Valid values: `detection`, `classification`. Default 'detection'.
+
+ Returns:
+ A dict mapping keys to Trackable objects (tf.Module or Checkpoint).
+ """
+ if fine_tune_checkpoint_type == 'classification':
+ return {
+ 'feature_extractor':
+ self._feature_extractor.classification_backbone
+ }
+ elif fine_tune_checkpoint_type == 'detection':
+ fake_model = tf.train.Checkpoint(
+ _feature_extractor=self._feature_extractor)
+ return {'model': fake_model}
+ else:
+ raise ValueError('Not supported fine_tune_checkpoint_type: {}'.format(
+ fine_tune_checkpoint_type))
+
def updates(self):
"""Returns a list of update operators for this model.
diff --git a/research/object_detection/metrics/calibration_evaluation_test.py b/research/object_detection/metrics/calibration_evaluation_tf1_test.py
similarity index 98%
rename from research/object_detection/metrics/calibration_evaluation_test.py
rename to research/object_detection/metrics/calibration_evaluation_tf1_test.py
index 375978d86c9f4b537e212ec3a909a3fe6016495d..0f3d6eb319f0819937c04e030c9e1937bf09db10 100644
--- a/research/object_detection/metrics/calibration_evaluation_test.py
+++ b/research/object_detection/metrics/calibration_evaluation_tf1_test.py
@@ -18,9 +18,11 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+import unittest
import tensorflow.compat.v1 as tf
from object_detection.core import standard_fields
from object_detection.metrics import calibration_evaluation
+from object_detection.utils import tf_version
def _get_categories_list():
@@ -36,6 +38,7 @@ def _get_categories_list():
}]
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class CalibrationDetectionEvaluationTest(tf.test.TestCase):
def _get_ece(self, ece_op, update_op):
diff --git a/research/object_detection/metrics/calibration_metrics_test.py b/research/object_detection/metrics/calibration_metrics_tf1_test.py
similarity index 97%
rename from research/object_detection/metrics/calibration_metrics_test.py
rename to research/object_detection/metrics/calibration_metrics_tf1_test.py
index 54793fca09c464eec31149bccff31cbb6f83f4cf..9c1adbca20dfae80e97927d462c9cc18de6ff823 100644
--- a/research/object_detection/metrics/calibration_metrics_test.py
+++ b/research/object_detection/metrics/calibration_metrics_tf1_test.py
@@ -18,11 +18,14 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+import unittest
import numpy as np
import tensorflow.compat.v1 as tf
from object_detection.metrics import calibration_metrics
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class CalibrationLibTest(tf.test.TestCase):
@staticmethod
diff --git a/research/object_detection/metrics/coco_evaluation.py b/research/object_detection/metrics/coco_evaluation.py
index 7a962457bd2a6690be6bae10342a62c6705db781..3ecfddb0dd4221c3e511fab628b884bc5eb514e6 100644
--- a/research/object_detection/metrics/coco_evaluation.py
+++ b/research/object_detection/metrics/coco_evaluation.py
@@ -24,6 +24,7 @@ import tensorflow.compat.v1 as tf
from object_detection.core import standard_fields
from object_detection.metrics import coco_tools
from object_detection.utils import json_utils
+from object_detection.utils import np_mask_ops
from object_detection.utils import object_detection_evaluation
@@ -1263,3 +1264,535 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
eval_metric_ops[metric_name] = (tf.py_func(
value_func_factory(metric_name), [], np.float32), update_op)
return eval_metric_ops
+
+
+class CocoPanopticSegmentationEvaluator(
+ object_detection_evaluation.DetectionEvaluator):
+ """Class to evaluate PQ (panoptic quality) metric on COCO dataset.
+
+ More details about this metric: https://arxiv.org/pdf/1801.00868.pdf.
+ """
+
+ def __init__(self,
+ categories,
+ include_metrics_per_category=False,
+ iou_threshold=0.5,
+ ioa_threshold=0.5):
+ """Constructor.
+
+ Args:
+ categories: A list of dicts, each of which has the following keys -
+ 'id': (required) an integer id uniquely identifying this category.
+ 'name': (required) string representing category name e.g., 'cat', 'dog'.
+ include_metrics_per_category: If True, include metrics for each category.
+ iou_threshold: intersection-over-union threshold for mask matching (with
+ normal groundtruths).
+ ioa_threshold: intersection-over-area threshold for mask matching with
+ "is_crowd" groundtruths.
+ """
+ super(CocoPanopticSegmentationEvaluator, self).__init__(categories)
+ self._groundtruth_masks = {}
+ self._groundtruth_class_labels = {}
+ self._groundtruth_is_crowd = {}
+ self._predicted_masks = {}
+ self._predicted_class_labels = {}
+ self._include_metrics_per_category = include_metrics_per_category
+ self._iou_threshold = iou_threshold
+ self._ioa_threshold = ioa_threshold
+
+ def clear(self):
+ """Clears the state to prepare for a fresh evaluation."""
+ self._groundtruth_masks.clear()
+ self._groundtruth_class_labels.clear()
+ self._groundtruth_is_crowd.clear()
+ self._predicted_masks.clear()
+ self._predicted_class_labels.clear()
+
+ def add_single_ground_truth_image_info(self, image_id, groundtruth_dict):
+ """Adds groundtruth for a single image to be used for evaluation.
+
+ If the image has already been added, a warning is logged, and groundtruth is
+ ignored.
+
+ Args:
+ image_id: A unique string/integer identifier for the image.
+ groundtruth_dict: A dictionary containing -
+ InputDataFields.groundtruth_classes: integer numpy array of shape
+ [num_masks] containing 1-indexed groundtruth classes for the mask.
+ InputDataFields.groundtruth_instance_masks: uint8 numpy array of shape
+ [num_masks, image_height, image_width] containing groundtruth masks.
+ The elements of the array must be in {0, 1}.
+ InputDataFields.groundtruth_is_crowd (optional): integer numpy array of
+ shape [num_boxes] containing iscrowd flag for groundtruth boxes.
+ """
+
+ if image_id in self._groundtruth_masks:
+ tf.logging.warning(
+ 'Ignoring groundtruth with image %s, since it has already been '
+ 'added to the ground truth database.', image_id)
+ return
+
+ self._groundtruth_masks[image_id] = groundtruth_dict[
+ standard_fields.InputDataFields.groundtruth_instance_masks]
+ self._groundtruth_class_labels[image_id] = groundtruth_dict[
+ standard_fields.InputDataFields.groundtruth_classes]
+ groundtruth_is_crowd = groundtruth_dict.get(
+ standard_fields.InputDataFields.groundtruth_is_crowd)
+ # Drop groundtruth_is_crowd if empty tensor.
+ if groundtruth_is_crowd is not None and not groundtruth_is_crowd.size > 0:
+ groundtruth_is_crowd = None
+ if groundtruth_is_crowd is not None:
+ self._groundtruth_is_crowd[image_id] = groundtruth_is_crowd
+
+ def add_single_detected_image_info(self, image_id, detections_dict):
+ """Adds detections for a single image to be used for evaluation.
+
+ If a detection has already been added for this image id, a warning is
+ logged, and the detection is skipped.
+
+ Args:
+ image_id: A unique string/integer identifier for the image.
+ detections_dict: A dictionary containing -
+ DetectionResultFields.detection_classes: integer numpy array of shape
+ [num_masks] containing 1-indexed detection classes for the masks.
+ DetectionResultFields.detection_masks: optional uint8 numpy array of
+ shape [num_masks, image_height, image_width] containing instance
+ masks. The elements of the array must be in {0, 1}.
+
+ Raises:
+ ValueError: If results and groundtruth shape don't match.
+ """
+
+ if image_id not in self._groundtruth_masks:
+ raise ValueError('Missing groundtruth for image id: {}'.format(image_id))
+
+ detection_masks = detections_dict[
+ standard_fields.DetectionResultFields.detection_masks]
+ self._predicted_masks[image_id] = detection_masks
+ self._predicted_class_labels[image_id] = detections_dict[
+ standard_fields.DetectionResultFields.detection_classes]
+ groundtruth_mask_shape = self._groundtruth_masks[image_id].shape
+ if groundtruth_mask_shape[1:] != detection_masks.shape[1:]:
+ raise ValueError("The shape of results doesn't match groundtruth.")
+
+ def evaluate(self):
+ """Evaluates the detection masks and returns a dictionary of coco metrics.
+
+ Returns:
+ A dictionary holding -
+
+ 1. summary_metric:
+ 'PanopticQuality@%.2fIOU': mean panoptic quality averaged over classes at
+ the required IOU.
+ 'SegmentationQuality@%.2fIOU': mean segmentation quality averaged over
+ classes at the required IOU.
+ 'RecognitionQuality@%.2fIOU': mean recognition quality averaged over
+ classes at the required IOU.
+ 'NumValidClasses': number of valid classes. A valid class should have at
+ least one normal (is_crowd=0) groundtruth mask or one predicted mask.
+ 'NumTotalClasses': number of total classes.
+
+ 2. per_category_pq: if include_metrics_per_category is True, category
+ specific results with keys of the form:
+ 'PanopticQuality@%.2fIOU_ByCategory/category'.
+ """
+ # Evaluate and accumulate the iou/tp/fp/fn.
+ sum_tp_iou, sum_num_tp, sum_num_fp, sum_num_fn = self._evaluate_all_masks()
+ # Compute PQ metric for each category and average over all classes.
+ mask_metrics = self._compute_panoptic_metrics(sum_tp_iou, sum_num_tp,
+ sum_num_fp, sum_num_fn)
+ return mask_metrics
+
+ def get_estimator_eval_metric_ops(self, eval_dict):
+ """Returns a dictionary of eval metric ops.
+
+ Note that once value_op is called, the detections and groundtruth added via
+ update_op are cleared.
+
+ Args:
+ eval_dict: A dictionary that holds tensors for evaluating object detection
+ performance. For single-image evaluation, this dictionary may be
+ produced from eval_util.result_dict_for_single_example(). If multi-image
+ evaluation, `eval_dict` should contain the fields
+ 'num_gt_masks_per_image' and 'num_det_masks_per_image' to properly unpad
+ the tensors from the batch.
+
+ Returns:
+ a dictionary of metric names to tuple of value_op and update_op that can
+ be used as eval metric ops in tf.estimator.EstimatorSpec. Note that all
+ update ops must be run together and similarly all value ops must be run
+ together to guarantee correct behaviour.
+ """
+
+ def update_op(image_id_batched, groundtruth_classes_batched,
+ groundtruth_instance_masks_batched,
+ groundtruth_is_crowd_batched, num_gt_masks_per_image,
+ detection_classes_batched, detection_masks_batched,
+ num_det_masks_per_image):
+ """Update op for metrics."""
+ for (image_id, groundtruth_classes, groundtruth_instance_masks,
+ groundtruth_is_crowd, num_gt_mask, detection_classes,
+ detection_masks, num_det_mask) in zip(
+ image_id_batched, groundtruth_classes_batched,
+ groundtruth_instance_masks_batched, groundtruth_is_crowd_batched,
+ num_gt_masks_per_image, detection_classes_batched,
+ detection_masks_batched, num_det_masks_per_image):
+
+ self.add_single_ground_truth_image_info(
+ image_id, {
+ 'groundtruth_classes':
+ groundtruth_classes[:num_gt_mask],
+ 'groundtruth_instance_masks':
+ groundtruth_instance_masks[:num_gt_mask],
+ 'groundtruth_is_crowd':
+ groundtruth_is_crowd[:num_gt_mask]
+ })
+ self.add_single_detected_image_info(
+ image_id, {
+ 'detection_classes': detection_classes[:num_det_mask],
+ 'detection_masks': detection_masks[:num_det_mask]
+ })
+
+ # Unpack items from the evaluation dictionary.
+ (image_id, groundtruth_classes, groundtruth_instance_masks,
+ groundtruth_is_crowd, num_gt_masks_per_image, detection_classes,
+ detection_masks, num_det_masks_per_image
+ ) = self._unpack_evaluation_dictionary_items(eval_dict)
+
+ update_op = tf.py_func(update_op, [
+ image_id, groundtruth_classes, groundtruth_instance_masks,
+ groundtruth_is_crowd, num_gt_masks_per_image, detection_classes,
+ detection_masks, num_det_masks_per_image
+ ], [])
+
+ metric_names = [
+ 'PanopticQuality@%.2fIOU' % self._iou_threshold,
+ 'SegmentationQuality@%.2fIOU' % self._iou_threshold,
+ 'RecognitionQuality@%.2fIOU' % self._iou_threshold
+ ]
+ if self._include_metrics_per_category:
+ for category_dict in self._categories:
+ metric_names.append('PanopticQuality@%.2fIOU_ByCategory/%s' %
+ (self._iou_threshold, category_dict['name']))
+
+ def first_value_func():
+ self._metrics = self.evaluate()
+ self.clear()
+ return np.float32(self._metrics[metric_names[0]])
+
+ def value_func_factory(metric_name):
+
+ def value_func():
+ return np.float32(self._metrics[metric_name])
+
+ return value_func
+
+ # Ensure that the metrics are only evaluated once.
+ first_value_op = tf.py_func(first_value_func, [], tf.float32)
+ eval_metric_ops = {metric_names[0]: (first_value_op, update_op)}
+ with tf.control_dependencies([first_value_op]):
+ for metric_name in metric_names[1:]:
+ eval_metric_ops[metric_name] = (tf.py_func(
+ value_func_factory(metric_name), [], np.float32), update_op)
+ return eval_metric_ops
+
+ def _evaluate_all_masks(self):
+ """Evaluate all masks and compute sum iou/TP/FP/FN."""
+
+ sum_num_tp = {category['id']: 0 for category in self._categories}
+ sum_num_fp = sum_num_tp.copy()
+ sum_num_fn = sum_num_tp.copy()
+ sum_tp_iou = sum_num_tp.copy()
+
+ for image_id in self._groundtruth_class_labels:
+ # Separate normal and is_crowd groundtruth
+ crowd_gt_indices = self._groundtruth_is_crowd.get(image_id)
+ (normal_gt_masks, normal_gt_classes, crowd_gt_masks,
+ crowd_gt_classes) = self._separate_normal_and_crowd_labels(
+ crowd_gt_indices, self._groundtruth_masks[image_id],
+ self._groundtruth_class_labels[image_id])
+
+ # Mask matching to normal GT.
+ predicted_masks = self._predicted_masks[image_id]
+ predicted_class_labels = self._predicted_class_labels[image_id]
+ (overlaps, pred_matched,
+ gt_matched) = self._match_predictions_to_groundtruths(
+ predicted_masks,
+ predicted_class_labels,
+ normal_gt_masks,
+ normal_gt_classes,
+ self._iou_threshold,
+ is_crowd=False,
+ with_replacement=False)
+
+ # Accumulate true positives.
+ for (class_id, is_matched, overlap) in zip(predicted_class_labels,
+ pred_matched, overlaps):
+ if is_matched:
+ sum_num_tp[class_id] += 1
+ sum_tp_iou[class_id] += overlap
+
+ # Accumulate false negatives.
+ for (class_id, is_matched) in zip(normal_gt_classes, gt_matched):
+ if not is_matched:
+ sum_num_fn[class_id] += 1
+
+ # Match remaining predictions to crowd gt.
+ remained_pred_indices = np.logical_not(pred_matched)
+ remained_pred_masks = predicted_masks[remained_pred_indices, :, :]
+ remained_pred_classes = predicted_class_labels[remained_pred_indices]
+ _, pred_matched, _ = self._match_predictions_to_groundtruths(
+ remained_pred_masks,
+ remained_pred_classes,
+ crowd_gt_masks,
+ crowd_gt_classes,
+ self._ioa_threshold,
+ is_crowd=True,
+ with_replacement=True)
+
+ # Accumulate false positives
+ for (class_id, is_matched) in zip(remained_pred_classes, pred_matched):
+ if not is_matched:
+ sum_num_fp[class_id] += 1
+ return sum_tp_iou, sum_num_tp, sum_num_fp, sum_num_fn
+
+ def _compute_panoptic_metrics(self, sum_tp_iou, sum_num_tp, sum_num_fp,
+ sum_num_fn):
+ """Compute PQ metric for each category and average over all classes.
+
+ Args:
+ sum_tp_iou: dict, summed true positive intersection-over-union (IoU) for
+ each class, keyed by class_id.
+ sum_num_tp: the total number of true positives for each class, keyed by
+ class_id.
+ sum_num_fp: the total number of false positives for each class, keyed by
+ class_id.
+ sum_num_fn: the total number of false negatives for each class, keyed by
+ class_id.
+
+ Returns:
+ mask_metrics: a dictionary containing averaged metrics over all classes,
+ and per-category metrics if required.
+ """
+ mask_metrics = {}
+ sum_pq = 0
+ sum_sq = 0
+ sum_rq = 0
+ num_valid_classes = 0
+ for category in self._categories:
+ class_id = category['id']
+ (panoptic_quality, segmentation_quality,
+ recognition_quality) = self._compute_panoptic_metrics_single_class(
+ sum_tp_iou[class_id], sum_num_tp[class_id], sum_num_fp[class_id],
+ sum_num_fn[class_id])
+ if panoptic_quality is not None:
+ sum_pq += panoptic_quality
+ sum_sq += segmentation_quality
+ sum_rq += recognition_quality
+ num_valid_classes += 1
+ if self._include_metrics_per_category:
+ mask_metrics['PanopticQuality@%.2fIOU_ByCategory/%s' %
+ (self._iou_threshold,
+ category['name'])] = panoptic_quality
+ mask_metrics['PanopticQuality@%.2fIOU' %
+ self._iou_threshold] = sum_pq / num_valid_classes
+ mask_metrics['SegmentationQuality@%.2fIOU' %
+ self._iou_threshold] = sum_sq / num_valid_classes
+ mask_metrics['RecognitionQuality@%.2fIOU' %
+ self._iou_threshold] = sum_rq / num_valid_classes
+ mask_metrics['NumValidClasses'] = num_valid_classes
+ mask_metrics['NumTotalClasses'] = len(self._categories)
+ return mask_metrics
+
+ def _compute_panoptic_metrics_single_class(self, sum_tp_iou, num_tp, num_fp,
+ num_fn):
+ """Compute panoptic metrics: panoptic/segmentation/recognition quality.
+
+ More computation details in https://arxiv.org/pdf/1801.00868.pdf.
+ Args:
+ sum_tp_iou: summed true positive intersection-over-union (IoU) for a
+ specific class.
+ num_tp: the total number of true positives for a specific class.
+ num_fp: the total number of false positives for a specific class.
+ num_fn: the total number of false negatives for a specific class.
+
+ Returns:
+ panoptic_quality: sum_tp_iou / (num_tp + 0.5*num_fp + 0.5*num_fn).
+ segmentation_quality: sum_tp_iou / num_tp.
+ recognition_quality: num_tp / (num_tp + 0.5*num_fp + 0.5*num_fn).
+ """
+ denominator = num_tp + 0.5 * num_fp + 0.5 * num_fn
+ # Calculate metric only if there is at least one GT or one prediction.
+ if denominator > 0:
+ recognition_quality = num_tp / denominator
+ if num_tp > 0:
+ segmentation_quality = sum_tp_iou / num_tp
+ else:
+ # If there is no TP for this category.
+ segmentation_quality = 0
+ panoptic_quality = segmentation_quality * recognition_quality
+ return panoptic_quality, segmentation_quality, recognition_quality
+ else:
+ return None, None, None
+
+ def _separate_normal_and_crowd_labels(self, crowd_gt_indices,
+ groundtruth_masks, groundtruth_classes):
+ """Separate normal and crowd groundtruth class_labels and masks.
+
+ Args:
+ crowd_gt_indices: None or array of shape [num_groundtruths]. If None, all
+ groundtruths are treated as normal ones.
+ groundtruth_masks: array of shape [num_groundtruths, height, width].
+ groundtruth_classes: array of shape [num_groundtruths].
+
+ Returns:
+ normal_gt_masks: array of shape [num_normal_groundtruths, height, width].
+ normal_gt_classes: array of shape [num_normal_groundtruths].
+ crowd_gt_masks: array of shape [num_crowd_groundtruths, height, width].
+ crowd_gt_classes: array of shape [num_crowd_groundtruths].
+ Raises:
+ ValueError: if the shape of groundtruth classes doesn't match groundtruth
+ masks or if the shape of crowd_gt_indices.
+ """
+ if groundtruth_masks.shape[0] != groundtruth_classes.shape[0]:
+ raise ValueError(
+ "The number of masks doesn't match the number of labels.")
+ if crowd_gt_indices is None:
+ # All gts are treated as normal
+ crowd_gt_indices = np.zeros(groundtruth_masks.shape, dtype=np.bool)
+ else:
+ if groundtruth_masks.shape[0] != crowd_gt_indices.shape[0]:
+ raise ValueError(
+ "The number of masks doesn't match the number of is_crowd labels.")
+ crowd_gt_indices = crowd_gt_indices.astype(np.bool)
+ normal_gt_indices = np.logical_not(crowd_gt_indices)
+ if normal_gt_indices.size:
+ normal_gt_masks = groundtruth_masks[normal_gt_indices, :, :]
+ normal_gt_classes = groundtruth_classes[normal_gt_indices]
+ crowd_gt_masks = groundtruth_masks[crowd_gt_indices, :, :]
+ crowd_gt_classes = groundtruth_classes[crowd_gt_indices]
+ else:
+ # No groundtruths available, groundtruth_masks.shape = (0, h, w)
+ normal_gt_masks = groundtruth_masks
+ normal_gt_classes = groundtruth_classes
+ crowd_gt_masks = groundtruth_masks
+ crowd_gt_classes = groundtruth_classes
+ return normal_gt_masks, normal_gt_classes, crowd_gt_masks, crowd_gt_classes
+
+ def _match_predictions_to_groundtruths(self,
+ predicted_masks,
+ predicted_classes,
+ groundtruth_masks,
+ groundtruth_classes,
+ matching_threshold,
+ is_crowd=False,
+ with_replacement=False):
+ """Match the predicted masks to groundtruths.
+
+ Args:
+ predicted_masks: array of shape [num_predictions, height, width].
+ predicted_classes: array of shape [num_predictions].
+ groundtruth_masks: array of shape [num_groundtruths, height, width].
+ groundtruth_classes: array of shape [num_groundtruths].
+ matching_threshold: if the overlap between a prediction and a groundtruth
+ is larger than this threshold, the prediction is true positive.
+ is_crowd: whether the groundtruths are crowd annotation or not. If True,
+ use intersection over area (IoA) as the overlapping metric; otherwise
+ use intersection over union (IoU).
+ with_replacement: whether a groundtruth can be matched to multiple
+ predictions. By default, for normal groundtruths, only 1-1 matching is
+ allowed for normal groundtruths; for crowd groundtruths, 1-to-many must
+ be allowed.
+
+ Returns:
+ best_overlaps: array of shape [num_predictions]. Values representing the
+ IoU
+ or IoA with best matched groundtruth.
+ pred_matched: array of shape [num_predictions]. Boolean value representing
+ whether the ith prediction is matched to a groundtruth.
+ gt_matched: array of shape [num_groundtruth]. Boolean value representing
+ whether the ith groundtruth is matched to a prediction.
+ Raises:
+ ValueError: if the shape of groundtruth/predicted masks doesn't match
+ groundtruth/predicted classes.
+ """
+ if groundtruth_masks.shape[0] != groundtruth_classes.shape[0]:
+ raise ValueError(
+ "The number of GT masks doesn't match the number of labels.")
+ if predicted_masks.shape[0] != predicted_classes.shape[0]:
+ raise ValueError(
+ "The number of predicted masks doesn't match the number of labels.")
+ gt_matched = np.zeros(groundtruth_classes.shape, dtype=np.bool)
+ pred_matched = np.zeros(predicted_classes.shape, dtype=np.bool)
+ best_overlaps = np.zeros(predicted_classes.shape)
+ for pid in range(predicted_classes.shape[0]):
+ best_overlap = 0
+ matched_gt_id = -1
+ for gid in range(groundtruth_classes.shape[0]):
+ if predicted_classes[pid] == groundtruth_classes[gid]:
+ if (not with_replacement) and gt_matched[gid]:
+ continue
+ if not is_crowd:
+ overlap = np_mask_ops.iou(predicted_masks[pid:pid + 1],
+ groundtruth_masks[gid:gid + 1])[0, 0]
+ else:
+ overlap = np_mask_ops.ioa(groundtruth_masks[gid:gid + 1],
+ predicted_masks[pid:pid + 1])[0, 0]
+ if overlap >= matching_threshold and overlap > best_overlap:
+ matched_gt_id = gid
+ best_overlap = overlap
+ if matched_gt_id >= 0:
+ gt_matched[matched_gt_id] = True
+ pred_matched[pid] = True
+ best_overlaps[pid] = best_overlap
+ return best_overlaps, pred_matched, gt_matched
+
+ def _unpack_evaluation_dictionary_items(self, eval_dict):
+ """Unpack items from the evaluation dictionary."""
+ input_data_fields = standard_fields.InputDataFields
+ detection_fields = standard_fields.DetectionResultFields
+ image_id = eval_dict[input_data_fields.key]
+ groundtruth_classes = eval_dict[input_data_fields.groundtruth_classes]
+ groundtruth_instance_masks = eval_dict[
+ input_data_fields.groundtruth_instance_masks]
+ groundtruth_is_crowd = eval_dict.get(input_data_fields.groundtruth_is_crowd,
+ None)
+ num_gt_masks_per_image = eval_dict.get(
+ input_data_fields.num_groundtruth_boxes, None)
+ detection_classes = eval_dict[detection_fields.detection_classes]
+ detection_masks = eval_dict[detection_fields.detection_masks]
+ num_det_masks_per_image = eval_dict.get(detection_fields.num_detections,
+ None)
+ if groundtruth_is_crowd is None:
+ groundtruth_is_crowd = tf.zeros_like(groundtruth_classes, dtype=tf.bool)
+
+ if not image_id.shape.as_list():
+ # Apply a batch dimension to all tensors.
+ image_id = tf.expand_dims(image_id, 0)
+ groundtruth_classes = tf.expand_dims(groundtruth_classes, 0)
+ groundtruth_instance_masks = tf.expand_dims(groundtruth_instance_masks, 0)
+ groundtruth_is_crowd = tf.expand_dims(groundtruth_is_crowd, 0)
+ detection_classes = tf.expand_dims(detection_classes, 0)
+ detection_masks = tf.expand_dims(detection_masks, 0)
+
+ if num_gt_masks_per_image is None:
+ num_gt_masks_per_image = tf.shape(groundtruth_classes)[1:2]
+ else:
+ num_gt_masks_per_image = tf.expand_dims(num_gt_masks_per_image, 0)
+
+ if num_det_masks_per_image is None:
+ num_det_masks_per_image = tf.shape(detection_classes)[1:2]
+ else:
+ num_det_masks_per_image = tf.expand_dims(num_det_masks_per_image, 0)
+ else:
+ if num_gt_masks_per_image is None:
+ num_gt_masks_per_image = tf.tile(
+ tf.shape(groundtruth_classes)[1:2],
+ multiples=tf.shape(groundtruth_classes)[0:1])
+ if num_det_masks_per_image is None:
+ num_det_masks_per_image = tf.tile(
+ tf.shape(detection_classes)[1:2],
+ multiples=tf.shape(detection_classes)[0:1])
+ return (image_id, groundtruth_classes, groundtruth_instance_masks,
+ groundtruth_is_crowd, num_gt_masks_per_image, detection_classes,
+ detection_masks, num_det_masks_per_image)
diff --git a/research/object_detection/metrics/coco_evaluation_test.py b/research/object_detection/metrics/coco_evaluation_test.py
index aed6047f8c5dce427e5381398ad742c996fadc41..165c94780d93bb93bab9ab1187c7fa41b79b96b9 100644
--- a/research/object_detection/metrics/coco_evaluation_test.py
+++ b/research/object_detection/metrics/coco_evaluation_test.py
@@ -18,10 +18,12 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+import unittest
import numpy as np
import tensorflow.compat.v1 as tf
from object_detection.core import standard_fields
from object_detection.metrics import coco_evaluation
+from object_detection.utils import tf_version
def _get_categories_list():
@@ -250,6 +252,7 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
})
+@unittest.skipIf(tf_version.is_tf2(), 'Only Supported in TF1.X')
class CocoEvaluationPyFuncTest(tf.test.TestCase):
def testGetOneMAPWithMatchingGroundtruthAndDetections(self):
@@ -926,6 +929,7 @@ class CocoKeypointEvaluationTest(tf.test.TestCase):
-1.0)
+@unittest.skipIf(tf_version.is_tf2(), 'Only Supported in TF1.X')
class CocoKeypointEvaluationPyFuncTest(tf.test.TestCase):
def testGetOneMAPWithMatchingKeypoints(self):
@@ -1438,6 +1442,7 @@ class CocoMaskEvaluationTest(tf.test.TestCase):
self.assertFalse(coco_evaluator._detection_masks_list)
+@unittest.skipIf(tf_version.is_tf2(), 'Only Supported in TF1.X')
class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
def testAddEvalDict(self):
@@ -1716,5 +1721,221 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
self.assertFalse(coco_evaluator._detection_masks_list)
+def _get_panoptic_test_data():
+ # image1 contains 3 people in gt, (2 normal annotation and 1 "is_crowd"
+ # annotation), and 3 people in prediction.
+ gt_masks1 = np.zeros((3, 50, 50), dtype=np.uint8)
+ result_masks1 = np.zeros((3, 50, 50), dtype=np.uint8)
+ gt_masks1[0, 10:20, 20:30] = 1
+ result_masks1[0, 10:18, 20:30] = 1
+ gt_masks1[1, 25:30, 25:35] = 1
+ result_masks1[1, 18:25, 25:30] = 1
+ gt_masks1[2, 40:50, 40:50] = 1
+ result_masks1[2, 47:50, 47:50] = 1
+ gt_class1 = np.array([1, 1, 1])
+ gt_is_crowd1 = np.array([0, 0, 1])
+ result_class1 = np.array([1, 1, 1])
+
+ # image2 contains 1 dog and 1 cat in gt, while 1 person and 1 dog in
+ # prediction.
+ gt_masks2 = np.zeros((2, 30, 40), dtype=np.uint8)
+ result_masks2 = np.zeros((2, 30, 40), dtype=np.uint8)
+ gt_masks2[0, 5:15, 20:35] = 1
+ gt_masks2[1, 20:30, 0:10] = 1
+ result_masks2[0, 20:25, 10:15] = 1
+ result_masks2[1, 6:15, 15:35] = 1
+ gt_class2 = np.array([2, 3])
+ gt_is_crowd2 = np.array([0, 0])
+ result_class2 = np.array([1, 2])
+
+ gt_class = [gt_class1, gt_class2]
+ gt_masks = [gt_masks1, gt_masks2]
+ gt_is_crowd = [gt_is_crowd1, gt_is_crowd2]
+ result_class = [result_class1, result_class2]
+ result_masks = [result_masks1, result_masks2]
+ return gt_class, gt_masks, gt_is_crowd, result_class, result_masks
+
+
+class CocoPanopticEvaluationTest(tf.test.TestCase):
+
+ def test_panoptic_quality(self):
+ pq_evaluator = coco_evaluation.CocoPanopticSegmentationEvaluator(
+ _get_categories_list(), include_metrics_per_category=True)
+ (gt_class, gt_masks, gt_is_crowd, result_class,
+ result_masks) = _get_panoptic_test_data()
+
+ for i in range(2):
+ pq_evaluator.add_single_ground_truth_image_info(
+ image_id='image%d' % i,
+ groundtruth_dict={
+ standard_fields.InputDataFields.groundtruth_classes:
+ gt_class[i],
+ standard_fields.InputDataFields.groundtruth_instance_masks:
+ gt_masks[i],
+ standard_fields.InputDataFields.groundtruth_is_crowd:
+ gt_is_crowd[i]
+ })
+
+ pq_evaluator.add_single_detected_image_info(
+ image_id='image%d' % i,
+ detections_dict={
+ standard_fields.DetectionResultFields.detection_classes:
+ result_class[i],
+ standard_fields.DetectionResultFields.detection_masks:
+ result_masks[i]
+ })
+
+ metrics = pq_evaluator.evaluate()
+ self.assertAlmostEqual(metrics['PanopticQuality@0.50IOU_ByCategory/person'],
+ 0.32)
+ self.assertAlmostEqual(metrics['PanopticQuality@0.50IOU_ByCategory/dog'],
+ 135.0 / 195)
+ self.assertAlmostEqual(metrics['PanopticQuality@0.50IOU_ByCategory/cat'], 0)
+ self.assertAlmostEqual(metrics['SegmentationQuality@0.50IOU'],
+ (0.8 + 135.0 / 195) / 3)
+ self.assertAlmostEqual(metrics['RecognitionQuality@0.50IOU'], (0.4 + 1) / 3)
+ self.assertAlmostEqual(metrics['PanopticQuality@0.50IOU'],
+ (0.32 + 135.0 / 195) / 3)
+ self.assertEqual(metrics['NumValidClasses'], 3)
+ self.assertEqual(metrics['NumTotalClasses'], 3)
+
+
+@unittest.skipIf(tf_version.is_tf2(), 'Only Supported in TF1.X')
+class CocoPanopticEvaluationPyFuncTest(tf.test.TestCase):
+
+ def testPanopticQualityNoBatch(self):
+ pq_evaluator = coco_evaluation.CocoPanopticSegmentationEvaluator(
+ _get_categories_list(), include_metrics_per_category=True)
+
+ image_id = tf.placeholder(tf.string, shape=())
+ groundtruth_classes = tf.placeholder(tf.int32, shape=(None))
+ groundtruth_masks = tf.placeholder(tf.uint8, shape=(None, None, None))
+ groundtruth_is_crowd = tf.placeholder(tf.int32, shape=(None))
+ detection_classes = tf.placeholder(tf.int32, shape=(None))
+ detection_masks = tf.placeholder(tf.uint8, shape=(None, None, None))
+
+ input_data_fields = standard_fields.InputDataFields
+ detection_fields = standard_fields.DetectionResultFields
+ eval_dict = {
+ input_data_fields.key: image_id,
+ input_data_fields.groundtruth_classes: groundtruth_classes,
+ input_data_fields.groundtruth_instance_masks: groundtruth_masks,
+ input_data_fields.groundtruth_is_crowd: groundtruth_is_crowd,
+ detection_fields.detection_classes: detection_classes,
+ detection_fields.detection_masks: detection_masks,
+ }
+
+ eval_metric_ops = pq_evaluator.get_estimator_eval_metric_ops(eval_dict)
+
+ _, update_op = eval_metric_ops['PanopticQuality@0.50IOU']
+ (gt_class, gt_masks, gt_is_crowd, result_class,
+ result_masks) = _get_panoptic_test_data()
+
+ with self.test_session() as sess:
+ for i in range(2):
+ sess.run(
+ update_op,
+ feed_dict={
+ image_id: 'image%d' % i,
+ groundtruth_classes: gt_class[i],
+ groundtruth_masks: gt_masks[i],
+ groundtruth_is_crowd: gt_is_crowd[i],
+ detection_classes: result_class[i],
+ detection_masks: result_masks[i]
+ })
+ metrics = {}
+ for key, (value_op, _) in eval_metric_ops.items():
+ metrics[key] = value_op
+ metrics = sess.run(metrics)
+ self.assertAlmostEqual(metrics['PanopticQuality@0.50IOU'],
+ (0.32 + 135.0 / 195) / 3)
+
+ def testPanopticQualityBatched(self):
+ pq_evaluator = coco_evaluation.CocoPanopticSegmentationEvaluator(
+ _get_categories_list(), include_metrics_per_category=True)
+ batch_size = 2
+ image_id = tf.placeholder(tf.string, shape=(batch_size))
+ groundtruth_classes = tf.placeholder(tf.int32, shape=(batch_size, None))
+ groundtruth_masks = tf.placeholder(
+ tf.uint8, shape=(batch_size, None, None, None))
+ groundtruth_is_crowd = tf.placeholder(tf.int32, shape=(batch_size, None))
+ detection_classes = tf.placeholder(tf.int32, shape=(batch_size, None))
+ detection_masks = tf.placeholder(
+ tf.uint8, shape=(batch_size, None, None, None))
+ num_gt_masks_per_image = tf.placeholder(tf.int32, shape=(batch_size))
+ num_det_masks_per_image = tf.placeholder(tf.int32, shape=(batch_size))
+
+ input_data_fields = standard_fields.InputDataFields
+ detection_fields = standard_fields.DetectionResultFields
+ eval_dict = {
+ input_data_fields.key: image_id,
+ input_data_fields.groundtruth_classes: groundtruth_classes,
+ input_data_fields.groundtruth_instance_masks: groundtruth_masks,
+ input_data_fields.groundtruth_is_crowd: groundtruth_is_crowd,
+ input_data_fields.num_groundtruth_boxes: num_gt_masks_per_image,
+ detection_fields.detection_classes: detection_classes,
+ detection_fields.detection_masks: detection_masks,
+ detection_fields.num_detections: num_det_masks_per_image,
+ }
+
+ eval_metric_ops = pq_evaluator.get_estimator_eval_metric_ops(eval_dict)
+
+ _, update_op = eval_metric_ops['PanopticQuality@0.50IOU']
+ (gt_class, gt_masks, gt_is_crowd, result_class,
+ result_masks) = _get_panoptic_test_data()
+ with self.test_session() as sess:
+ sess.run(
+ update_op,
+ feed_dict={
+ image_id: ['image0', 'image1'],
+ groundtruth_classes:
+ np.stack([
+ gt_class[0],
+ np.pad(gt_class[1], (0, 1), mode='constant')
+ ],
+ axis=0),
+ groundtruth_masks:
+ np.stack([
+ np.pad(
+ gt_masks[0], ((0, 0), (0, 10), (0, 10)),
+ mode='constant'),
+ np.pad(
+ gt_masks[1], ((0, 1), (0, 30), (0, 20)),
+ mode='constant'),
+ ],
+ axis=0),
+ groundtruth_is_crowd:
+ np.stack([
+ gt_is_crowd[0],
+ np.pad(gt_is_crowd[1], (0, 1), mode='constant')
+ ],
+ axis=0),
+ num_gt_masks_per_image: np.array([3, 2]),
+ detection_classes:
+ np.stack([
+ result_class[0],
+ np.pad(result_class[1], (0, 1), mode='constant')
+ ],
+ axis=0),
+ detection_masks:
+ np.stack([
+ np.pad(
+ result_masks[0], ((0, 0), (0, 10), (0, 10)),
+ mode='constant'),
+ np.pad(
+ result_masks[1], ((0, 1), (0, 30), (0, 20)),
+ mode='constant'),
+ ],
+ axis=0),
+ num_det_masks_per_image: np.array([3, 2]),
+ })
+ metrics = {}
+ for key, (value_op, _) in eval_metric_ops.items():
+ metrics[key] = value_op
+ metrics = sess.run(metrics)
+ self.assertAlmostEqual(metrics['PanopticQuality@0.50IOU'],
+ (0.32 + 135.0 / 195) / 3)
+
+
if __name__ == '__main__':
tf.test.main()
diff --git a/research/object_detection/metrics/coco_tools.py b/research/object_detection/metrics/coco_tools.py
index f2379f6537997a591d4ea661cce831b67dfc8d0f..790d5bdef23bef149e8eb1afa9cdecb9ce458e6e 100644
--- a/research/object_detection/metrics/coco_tools.py
+++ b/research/object_detection/metrics/coco_tools.py
@@ -52,6 +52,7 @@ from pycocotools import coco
from pycocotools import cocoeval
from pycocotools import mask
+import six
from six.moves import range
from six.moves import zip
import tensorflow.compat.v1 as tf
@@ -353,7 +354,9 @@ def _RleCompress(masks):
Returns:
A pycocotools Run-length encoding of the mask.
"""
- return mask.encode(np.asfortranarray(masks))
+ rle = mask.encode(np.asfortranarray(masks))
+ rle['counts'] = six.ensure_str(rle['counts'])
+ return rle
def ExportSingleImageGroundtruthToCoco(image_id,
diff --git a/research/object_detection/metrics/offline_eval_map_corloc.py b/research/object_detection/metrics/offline_eval_map_corloc.py
index 69ecaeaaed30ad0b330793a22ca730c6e923b4b8..a12b1d98493e022d302c76b0cadb514e7fc0eb60 100644
--- a/research/object_detection/metrics/offline_eval_map_corloc.py
+++ b/research/object_detection/metrics/offline_eval_map_corloc.py
@@ -36,8 +36,8 @@ import os
import re
import tensorflow.compat.v1 as tf
+from object_detection import eval_util
from object_detection.core import standard_fields
-from object_detection.legacy import evaluator
from object_detection.metrics import tf_example_parser
from object_detection.utils import config_util
from object_detection.utils import label_map_util
@@ -94,7 +94,7 @@ def read_data_and_evaluate(input_config, eval_config):
categories = label_map_util.create_categories_from_labelmap(
input_config.label_map_path)
- object_detection_evaluators = evaluator.get_evaluators(
+ object_detection_evaluators = eval_util.get_evaluators(
eval_config, categories)
# Support a single evaluator
object_detection_evaluator = object_detection_evaluators[0]
diff --git a/research/object_detection/model_lib.py b/research/object_detection/model_lib.py
index 5791251512071440f455744af6b280753cc253a1..365ea1c0261aa0b23c11460d2f5127632ed7b482 100644
--- a/research/object_detection/model_lib.py
+++ b/research/object_detection/model_lib.py
@@ -23,9 +23,9 @@ import functools
import os
import tensorflow.compat.v1 as tf
+import tensorflow.compat.v2 as tf2
import tf_slim as slim
-
from object_detection import eval_util
from object_detection import exporter as exporter_lib
from object_detection import inputs
@@ -349,7 +349,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
from tensorflow.python.keras.engine import base_layer_utils # pylint: disable=g-import-not-at-top
# Enable v2 behavior, as `mixed_bfloat16` is only supported in TF 2.0.
base_layer_utils.enable_v2_dtype_behavior()
- tf.compat.v2.keras.mixed_precision.experimental.set_policy(
+ tf2.keras.mixed_precision.experimental.set_policy(
'mixed_bfloat16')
detection_model = detection_model_fn(
is_training=is_training, add_summaries=(not use_tpu))
diff --git a/research/object_detection/model_lib_test.py b/research/object_detection/model_lib_tf1_test.py
similarity index 98%
rename from research/object_detection/model_lib_test.py
rename to research/object_detection/model_lib_tf1_test.py
index ae14ad844eece3cf893d391a2abf2ff597ed650f..7d4d81b2cb43e0faa3d84f48df91c27d0da217bc 100644
--- a/research/object_detection/model_lib_test.py
+++ b/research/object_detection/model_lib_tf1_test.py
@@ -20,19 +20,17 @@ from __future__ import print_function
import functools
import os
-
+import unittest
import numpy as np
import tensorflow.compat.v1 as tf
-from tensorflow.contrib.tpu.python.tpu import tpu_config
-from tensorflow.contrib.tpu.python.tpu import tpu_estimator
-
from object_detection import inputs
from object_detection import model_hparams
from object_detection import model_lib
from object_detection.builders import model_builder
from object_detection.core import standard_fields as fields
from object_detection.utils import config_util
+from object_detection.utils import tf_version
# Model for test. Options are:
@@ -122,6 +120,7 @@ def _make_initializable_iterator(dataset):
return iterator
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class ModelLibTest(tf.test.TestCase):
@classmethod
@@ -337,8 +336,7 @@ class ModelLibTest(tf.test.TestCase):
def test_create_tpu_estimator_and_inputs(self):
"""Tests that number of train/eval defaults to config values."""
-
- run_config = tpu_config.RunConfig()
+ run_config = tf.estimator.tpu.RunConfig()
hparams = model_hparams.create_hparams(
hparams_overrides='load_pretrained=false')
pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST)
@@ -352,7 +350,7 @@ class ModelLibTest(tf.test.TestCase):
estimator = train_and_eval_dict['estimator']
train_steps = train_and_eval_dict['train_steps']
- self.assertIsInstance(estimator, tpu_estimator.TPUEstimator)
+ self.assertIsInstance(estimator, tf.estimator.tpu.TPUEstimator)
self.assertEqual(20, train_steps)
def test_create_train_and_eval_specs(self):
@@ -406,6 +404,7 @@ class ModelLibTest(tf.test.TestCase):
self.assertEqual(None, experiment.eval_steps)
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class UnbatchTensorsTest(tf.test.TestCase):
def test_unbatch_without_unpadding(self):
diff --git a/research/object_detection/model_lib_v2_test.py b/research/object_detection/model_lib_tf2_test.py
similarity index 80%
rename from research/object_detection/model_lib_v2_test.py
rename to research/object_detection/model_lib_tf2_test.py
index d2eff82f9d698cc1839b8983e1006707cbbcf921..f65273660195752227b2bcc90dceb04184a6eb62 100644
--- a/research/object_detection/model_lib_v2_test.py
+++ b/research/object_detection/model_lib_tf2_test.py
@@ -20,18 +20,19 @@ from __future__ import print_function
import os
import tempfile
-
+import unittest
import numpy as np
import six
import tensorflow.compat.v1 as tf
+import tensorflow.compat.v2 as tf2
from object_detection import inputs
-from object_detection import model_hparams
from object_detection import model_lib_v2
from object_detection.builders import model_builder
from object_detection.core import model
from object_detection.protos import train_pb2
from object_detection.utils import config_util
+from object_detection.utils import tf_version
if six.PY2:
import mock # pylint: disable=g-importing-member,g-import-not-at-top
@@ -72,6 +73,7 @@ def _get_config_kwarg_overrides():
}
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class ModelLibTest(tf.test.TestCase):
@classmethod
@@ -80,24 +82,25 @@ class ModelLibTest(tf.test.TestCase):
def test_train_loop_then_eval_loop(self):
"""Tests that Estimator and input function are constructed correctly."""
- hparams = model_hparams.create_hparams(
- hparams_overrides='load_pretrained=false')
+ model_dir = tf.test.get_temp_dir()
pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST)
+ new_pipeline_config_path = os.path.join(model_dir, 'new_pipeline.config')
+ config_util.clear_fine_tune_checkpoint(pipeline_config_path,
+ new_pipeline_config_path)
config_kwarg_overrides = _get_config_kwarg_overrides()
- model_dir = tf.test.get_temp_dir()
train_steps = 2
- model_lib_v2.train_loop(
- hparams,
- pipeline_config_path,
- model_dir=model_dir,
- train_steps=train_steps,
- checkpoint_every_n=1,
- **config_kwarg_overrides)
+ strategy = tf2.distribute.OneDeviceStrategy(device='/cpu:0')
+ with strategy.scope():
+ model_lib_v2.train_loop(
+ new_pipeline_config_path,
+ model_dir=model_dir,
+ train_steps=train_steps,
+ checkpoint_every_n=1,
+ **config_kwarg_overrides)
model_lib_v2.eval_continuously(
- hparams,
- pipeline_config_path,
+ new_pipeline_config_path,
model_dir=model_dir,
checkpoint_dir=model_dir,
train_steps=train_steps,
@@ -120,6 +123,9 @@ class SimpleModel(model.DetectionModel):
return []
def restore_map(self, *args, **kwargs):
+ pass
+
+ def restore_from_objects(self, fine_tune_checkpoint_type):
return {'model': self}
def preprocess(self, _):
@@ -139,27 +145,31 @@ class SimpleModel(model.DetectionModel):
return []
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class ModelCheckpointTest(tf.test.TestCase):
"""Test for model checkpoint related functionality."""
def test_checkpoint_max_to_keep(self):
"""Test that only the most recent checkpoints are kept."""
+ strategy = tf2.distribute.OneDeviceStrategy(device='/cpu:0')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
- mock_builder.return_value = SimpleModel()
-
- hparams = model_hparams.create_hparams(
- hparams_overrides='load_pretrained=false')
+ with strategy.scope():
+ mock_builder.return_value = SimpleModel()
+ model_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST)
+ new_pipeline_config_path = os.path.join(model_dir, 'new_pipeline.config')
+ config_util.clear_fine_tune_checkpoint(pipeline_config_path,
+ new_pipeline_config_path)
config_kwarg_overrides = _get_config_kwarg_overrides()
- model_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
- model_lib_v2.train_loop(
- hparams, pipeline_config_path, model_dir=model_dir,
- train_steps=20, checkpoint_every_n=2, checkpoint_max_to_keep=3,
- **config_kwarg_overrides
- )
+ with strategy.scope():
+ model_lib_v2.train_loop(
+ new_pipeline_config_path, model_dir=model_dir,
+ train_steps=20, checkpoint_every_n=2, checkpoint_max_to_keep=3,
+ **config_kwarg_overrides
+ )
ckpt_files = tf.io.gfile.glob(os.path.join(model_dir, 'ckpt-*.index'))
self.assertEqual(len(ckpt_files), 3,
'{} not of length 3.'.format(ckpt_files))
@@ -167,10 +177,11 @@ class ModelCheckpointTest(tf.test.TestCase):
class IncompatibleModel(SimpleModel):
- def restore_map(self, *args, **kwargs):
+ def restore_from_objects(self, *args, **kwargs):
return {'weight': self.weight}
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class CheckpointV2Test(tf.test.TestCase):
def setUp(self):
@@ -199,7 +210,6 @@ class CheckpointV2Test(tf.test.TestCase):
model_lib_v2.load_fine_tune_checkpoint(
self._model, self._ckpt_path, checkpoint_type='',
checkpoint_version=train_pb2.CheckpointVersion.V2,
- load_all_detection_checkpoint_vars=True,
input_dataset=self._train_input_fn(),
unpad_groundtruth_tensors=True)
np.testing.assert_allclose(self._model.weight.numpy(), 42)
@@ -212,8 +222,9 @@ class CheckpointV2Test(tf.test.TestCase):
model_lib_v2.load_fine_tune_checkpoint(
IncompatibleModel(), self._ckpt_path, checkpoint_type='',
checkpoint_version=train_pb2.CheckpointVersion.V2,
- load_all_detection_checkpoint_vars=True,
input_dataset=self._train_input_fn(),
unpad_groundtruth_tensors=True)
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/research/object_detection/model_lib_v2.py b/research/object_detection/model_lib_v2.py
index ab1fbdc1e13095834c99473b88ff718f77cb245a..8d266388d59cc9a254a26bc2ee6b5dbb70f30f87 100644
--- a/research/object_detection/model_lib_v2.py
+++ b/research/object_detection/model_lib_v2.py
@@ -34,7 +34,6 @@ from object_detection.protos import train_pb2
from object_detection.utils import config_util
from object_detection.utils import label_map_util
from object_detection.utils import ops
-from object_detection.utils import variables_helper
from object_detection.utils import visualization_utils as vutils
# pylint: disable=g-import-not-at-top
@@ -47,13 +46,6 @@ except ImportError:
MODEL_BUILD_UTIL_MAP = model_lib.MODEL_BUILD_UTIL_MAP
-### NOTE: This file is a wip.
-### TODO(kaftan): Explore adding unit tests for individual methods
-### TODO(kaftan): Add unit test that checks training on a single image w/
-#### groundtruth, and verfiy that loss goes to zero.
-#### Possibly have version that takes it as the whole train & eval dataset,
-#### & verify the loss output from the eval_loop method.
-### TODO(kaftan): Make sure the unit tests run in TAP presubmits or Kokoro
RESTORE_MAP_ERROR_TEMPLATE = (
'Since we are restoring a v2 style checkpoint'
@@ -277,14 +269,21 @@ def validate_tf_v2_checkpoint_restore_map(checkpoint_restore_map):
"""
for key, value in checkpoint_restore_map.items():
- if not (isinstance(key, str) and isinstance(value, tf.Module)):
+ if not (isinstance(key, str) and
+ (isinstance(value, tf.Module)
+ or isinstance(value, tf.train.Checkpoint))):
raise TypeError(RESTORE_MAP_ERROR_TEMPLATE.format(
key.__class__.__name__, value.__class__.__name__))
+def is_object_based_checkpoint(checkpoint_path):
+ """Returns true if `checkpoint_path` points to an object-based checkpoint."""
+ var_names = [var[0] for var in tf.train.list_variables(checkpoint_path)]
+ return '_CHECKPOINTABLE_OBJECT_GRAPH' in var_names
+
+
def load_fine_tune_checkpoint(
- model, checkpoint_path, checkpoint_type, checkpoint_version,
- load_all_detection_checkpoint_vars, input_dataset,
+ model, checkpoint_path, checkpoint_type, checkpoint_version, input_dataset,
unpad_groundtruth_tensors):
"""Load a fine tuning classification or detection checkpoint.
@@ -292,8 +291,7 @@ def load_fine_tune_checkpoint(
the model by computing a dummy loss. (Models might not have built their
variables before their first execution)
- It then loads a variable-name based classification or detection checkpoint
- that comes from converted TF 1.x slim model checkpoints.
+ It then loads an object-based classification or detection checkpoint.
This method updates the model in-place and does not return a value.
@@ -306,14 +304,22 @@ def load_fine_tune_checkpoint(
classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`.
checkpoint_version: train_pb2.CheckpointVersion.V1 or V2 enum indicating
- whether to load checkpoints in V1 style or V2 style.
- load_all_detection_checkpoint_vars: whether to load all variables (when
- `fine_tune_checkpoint_type` is `detection`). If False, only variables
- within the feature extractor scopes are included. Default False.
+ whether to load checkpoints in V1 style or V2 style. In this binary
+ we only support V2 style (object-based) checkpoints.
input_dataset: The tf.data Dataset the model is being trained on. Needed
to get the shapes for the dummy loss computation.
unpad_groundtruth_tensors: A parameter passed to unstack_batch.
+
+ Raises:
+ IOError: if `checkpoint_path` does not point at a valid object-based
+ checkpoint
+ ValueError: if `checkpoint_version` is not train_pb2.CheckpointVersion.V2
"""
+ if not is_object_based_checkpoint(checkpoint_path):
+ raise IOError('Checkpoint is expected to be an object-based checkpoint.')
+ if checkpoint_version == train_pb2.CheckpointVersion.V1:
+ raise ValueError('Checkpoint version should be V2')
+
features, labels = iter(input_dataset).next()
@tf.function
@@ -330,35 +336,20 @@ def load_fine_tune_checkpoint(
labels)
strategy = tf.compat.v2.distribute.get_strategy()
- strategy.run(
+ strategy.experimental_run_v2(
_dummy_computation_fn, args=(
features,
labels,
))
- if checkpoint_version == train_pb2.CheckpointVersion.V1:
- var_map = model.restore_map(
- fine_tune_checkpoint_type=checkpoint_type,
- load_all_detection_checkpoint_vars=(
- load_all_detection_checkpoint_vars))
- available_var_map = variables_helper.get_variables_available_in_checkpoint(
- var_map,
- checkpoint_path,
- include_global_step=False)
- tf.train.init_from_checkpoint(checkpoint_path,
- available_var_map)
- elif checkpoint_version == train_pb2.CheckpointVersion.V2:
- restore_map = model.restore_map(
- fine_tune_checkpoint_type=checkpoint_type,
- load_all_detection_checkpoint_vars=(
- load_all_detection_checkpoint_vars))
- validate_tf_v2_checkpoint_restore_map(restore_map)
-
- ckpt = tf.train.Checkpoint(**restore_map)
- ckpt.restore(checkpoint_path).assert_existing_objects_matched()
-
-
-def _get_filepath(strategy, filepath):
+ restore_from_objects_dict = model.restore_from_objects(
+ fine_tune_checkpoint_type=checkpoint_type)
+ validate_tf_v2_checkpoint_restore_map(restore_from_objects_dict)
+ ckpt = tf.train.Checkpoint(**restore_from_objects_dict)
+ ckpt.restore(checkpoint_path).assert_existing_objects_matched()
+
+
+def get_filepath(strategy, filepath):
"""Get appropriate filepath for worker.
Args:
@@ -377,7 +368,7 @@ def _get_filepath(strategy, filepath):
return os.path.join(filepath, 'temp_worker_{:03d}'.format(task_id))
-def _clean_temporary_directories(strategy, filepath):
+def clean_temporary_directories(strategy, filepath):
"""Temporary directory clean up for MultiWorker Mirrored Strategy.
This is needed for all non-chief workers.
@@ -392,14 +383,12 @@ def _clean_temporary_directories(strategy, filepath):
def train_loop(
- hparams,
pipeline_config_path,
model_dir,
config_override=None,
train_steps=None,
use_tpu=False,
save_final_config=False,
- export_to_tpu=None,
checkpoint_every_n=1000,
checkpoint_max_to_keep=7,
**kwargs):
@@ -417,7 +406,6 @@ def train_loop(
8. Logs the training metrics as TensorBoard summaries.
Args:
- hparams: A `HParams`.
pipeline_config_path: A path to a pipeline config file.
model_dir:
The directory to save checkpoints and summaries to.
@@ -428,10 +416,6 @@ def train_loop(
use_tpu: Boolean, whether training and evaluation should run on TPU.
save_final_config: Whether to save final config (obtained after applying
overrides) to `model_dir`.
- export_to_tpu: When use_tpu and export_to_tpu are true,
- `export_savedmodel()` exports a metagraph for serving on TPU besides the
- one on CPU. If export_to_tpu is not provided, we will look for it in
- hparams too.
checkpoint_every_n:
Checkpoint every n training steps.
checkpoint_max_to_keep:
@@ -453,7 +437,7 @@ def train_loop(
'use_bfloat16': configs['train_config'].use_bfloat16 and use_tpu
})
configs = merge_external_params_with_configs(
- configs, hparams, kwargs_dict=kwargs)
+ configs, None, kwargs_dict=kwargs)
model_config = configs['model']
train_config = configs['train_config']
train_input_config = configs['train_input_config']
@@ -468,33 +452,14 @@ def train_loop(
if train_steps is None and train_config.num_steps != 0:
train_steps = train_config.num_steps
- # Read export_to_tpu from hparams if not passed.
- if export_to_tpu is None:
- export_to_tpu = hparams.get('export_to_tpu', False)
- tf.logging.info(
- 'train_loop: use_tpu %s, export_to_tpu %s', use_tpu,
- export_to_tpu)
-
if kwargs['use_bfloat16']:
tf.compat.v2.keras.mixed_precision.experimental.set_policy('mixed_bfloat16')
- # Parse the checkpoint fine tuning configs
- if hparams.load_pretrained:
- fine_tune_checkpoint_path = train_config.fine_tune_checkpoint
- else:
- fine_tune_checkpoint_path = None
- load_all_detection_checkpoint_vars = (
- train_config.load_all_detection_checkpoint_vars)
- # TODO(kaftan) (or anyone else): move this piece of config munging to
- ## utils/config_util.py
- if not train_config.fine_tune_checkpoint_type:
- # train_config.from_detection_checkpoint field is deprecated. For
- # backward compatibility, set train_config.fine_tune_checkpoint_type
- # based on train_config.from_detection_checkpoint.
- if train_config.from_detection_checkpoint:
- train_config.fine_tune_checkpoint_type = 'detection'
- else:
- train_config.fine_tune_checkpoint_type = 'classification'
+ if train_config.load_all_detection_checkpoint_vars:
+ raise ValueError('train_pb2.load_all_detection_checkpoint_vars '
+ 'unsupported in TF2')
+
+ config_util.update_fine_tune_checkpoint_type(train_config)
fine_tune_checkpoint_type = train_config.fine_tune_checkpoint_type
fine_tune_checkpoint_version = train_config.fine_tune_checkpoint_version
@@ -539,8 +504,8 @@ def train_loop(
## Train the model
# Get the appropriate filepath (temporary or not) based on whether the worker
# is the chief.
- summary_writer_filepath = _get_filepath(strategy,
- os.path.join(model_dir, 'train'))
+ summary_writer_filepath = get_filepath(strategy,
+ os.path.join(model_dir, 'train'))
summary_writer = tf.compat.v2.summary.create_file_writer(
summary_writer_filepath)
@@ -556,18 +521,18 @@ def train_loop(
with tf.compat.v2.summary.record_if(
lambda: global_step % num_steps_per_iteration == 0):
# Load a fine-tuning checkpoint.
- if fine_tune_checkpoint_path:
- load_fine_tune_checkpoint(detection_model, fine_tune_checkpoint_path,
+ if train_config.fine_tune_checkpoint:
+ load_fine_tune_checkpoint(detection_model,
+ train_config.fine_tune_checkpoint,
fine_tune_checkpoint_type,
fine_tune_checkpoint_version,
- load_all_detection_checkpoint_vars,
train_input,
unpad_groundtruth_tensors)
ckpt = tf.compat.v2.train.Checkpoint(
step=global_step, model=detection_model, optimizer=optimizer)
- manager_dir = _get_filepath(strategy, model_dir)
+ manager_dir = get_filepath(strategy, model_dir)
if not strategy.extended.should_checkpoint:
checkpoint_max_to_keep = 1
manager = tf.compat.v2.train.CheckpointManager(
@@ -597,7 +562,7 @@ def train_loop(
def _sample_and_train(strategy, train_step_fn, data_iterator):
features, labels = data_iterator.next()
- per_replica_losses = strategy.run(
+ per_replica_losses = strategy.experimental_run_v2(
train_step_fn, args=(features, labels))
# TODO(anjalisridhar): explore if it is safe to remove the
## num_replicas scaling of the loss and switch this to a ReduceOp.Mean
@@ -615,6 +580,10 @@ def train_loop(
return _sample_and_train(strategy, train_step_fn, data_iterator)
train_input_iter = iter(train_input)
+
+ if int(global_step.value()) == 0:
+ manager.save()
+
checkpointed_step = int(global_step.value())
logged_step = global_step.value()
@@ -646,8 +615,8 @@ def train_loop(
# Remove the checkpoint directories of the non-chief workers that
# MultiWorkerMirroredStrategy forces us to save during sync distributed
# training.
- _clean_temporary_directories(strategy, manager_dir)
- _clean_temporary_directories(strategy, summary_writer_filepath)
+ clean_temporary_directories(strategy, manager_dir)
+ clean_temporary_directories(strategy, summary_writer_filepath)
def eager_eval_loop(
@@ -767,28 +736,25 @@ def eager_eval_loop(
return eval_dict, losses_dict, class_agnostic
+ agnostic_categories = label_map_util.create_class_agnostic_category_index()
+ per_class_categories = label_map_util.create_category_index_from_labelmap(
+ eval_input_config.label_map_path)
+ keypoint_edges = [
+ (kp.start, kp.end) for kp in eval_config.keypoint_edge]
+
for i, (features, labels) in enumerate(eval_dataset):
eval_dict, losses_dict, class_agnostic = compute_eval_dict(features, labels)
+ if class_agnostic:
+ category_index = agnostic_categories
+ else:
+ category_index = per_class_categories
+
if i % 100 == 0:
tf.logging.info('Finished eval step %d', i)
use_original_images = fields.InputDataFields.original_image in features
- if not use_tpu and use_original_images:
- # Summary for input images.
- tf.compat.v2.summary.image(
- name='eval_input_images',
- step=global_step,
- data=eval_dict['original_image'],
- max_outputs=1)
- # Summary for prediction/groundtruth side-by-side images.
- if class_agnostic:
- category_index = label_map_util.create_class_agnostic_category_index()
- else:
- category_index = label_map_util.create_category_index_from_labelmap(
- eval_input_config.label_map_path)
- keypoint_edges = [
- (kp.start, kp.end) for kp in eval_config.keypoint_edge]
+ if use_original_images and i < eval_config.num_visualizations:
sbys_image_list = vutils.draw_side_by_side_evaluation_image(
eval_dict,
category_index=category_index,
@@ -798,10 +764,10 @@ def eager_eval_loop(
keypoint_edges=keypoint_edges or None)
sbys_images = tf.concat(sbys_image_list, axis=0)
tf.compat.v2.summary.image(
- name='eval_side_by_side',
+ name='eval_side_by_side_' + str(i),
step=global_step,
data=sbys_images,
- max_outputs=eval_config.num_visualizations)
+ max_outputs=1)
if evaluators is None:
if class_agnostic:
@@ -830,14 +796,15 @@ def eager_eval_loop(
eval_metrics[loss_key] = loss_metrics[loss_key].result()
eval_metrics = {str(k): v for k, v in eval_metrics.items()}
+ tf.logging.info('Eval metrics at step %d', global_step)
for k in eval_metrics:
tf.compat.v2.summary.scalar(k, eval_metrics[k], step=global_step)
+ tf.logging.info('\t+ %s: %f', k, eval_metrics[k])
return eval_metrics
def eval_continuously(
- hparams,
pipeline_config_path,
config_override=None,
train_steps=None,
@@ -846,7 +813,6 @@ def eval_continuously(
use_tpu=False,
override_eval_num_epochs=True,
postprocess_on_cpu=False,
- export_to_tpu=None,
model_dir=None,
checkpoint_dir=None,
wait_interval=180,
@@ -859,7 +825,6 @@ def eval_continuously(
on the evaluation data.
Args:
- hparams: A `HParams`.
pipeline_config_path: A path to a pipeline config file.
config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to
override the config from `pipeline_config_path`.
@@ -875,10 +840,6 @@ def eval_continuously(
eval_input.
postprocess_on_cpu: When use_tpu and postprocess_on_cpu are true,
postprocess is scheduled on the host cpu.
- export_to_tpu: When use_tpu and export_to_tpu are true,
- `export_savedmodel()` exports a metagraph for serving on TPU besides the
- one on CPU. If export_to_tpu is not provided, we will look for it in
- hparams too.
model_dir: Directory to output resulting evaluation summaries to.
checkpoint_dir: Directory that contains the training checkpoints.
wait_interval: The mimmum number of seconds to wait before checking for a
@@ -906,7 +867,7 @@ def eval_continuously(
tf.logging.warning(
'Forced number of epochs for all eval validations to be 1.')
configs = merge_external_params_with_configs(
- configs, hparams, kwargs_dict=kwargs)
+ configs, None, kwargs_dict=kwargs)
model_config = configs['model']
train_input_config = configs['train_input_config']
eval_config = configs['eval_config']
@@ -938,12 +899,6 @@ def eval_continuously(
model=detection_model)
eval_inputs.append((eval_input_config.name, next_eval_input))
- # Read export_to_tpu from hparams if not passed.
- if export_to_tpu is None:
- export_to_tpu = hparams.get('export_to_tpu', False)
- tf.logging.info('eval_continuously: use_tpu %s, export_to_tpu %s',
- use_tpu, export_to_tpu)
-
global_step = tf.compat.v2.Variable(
0, trainable=False, dtype=tf.compat.v2.dtypes.int64)
@@ -956,7 +911,7 @@ def eval_continuously(
for eval_name, eval_input in eval_inputs:
summary_writer = tf.compat.v2.summary.create_file_writer(
- model_dir + '/eval' + eval_name)
+ os.path.join(model_dir, 'eval', eval_name))
with summary_writer.as_default():
eager_eval_loop(
detection_model,
diff --git a/research/object_detection/model_main_tf2.py b/research/object_detection/model_main_tf2.py
new file mode 100644
index 0000000000000000000000000000000000000000..715dc798cf7c15317ae316574b46b3fed9d1e614
--- /dev/null
+++ b/research/object_detection/model_main_tf2.py
@@ -0,0 +1,99 @@
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+r"""Creates and runs TF2 object detection models.
+
+For local training/evaluation run:
+PIPELINE_CONFIG_PATH=path/to/pipeline.config
+MODEL_DIR=/tmp/model_outputs
+NUM_TRAIN_STEPS=10000
+SAMPLE_1_OF_N_EVAL_EXAMPLES=1
+python model_main_tf2.py -- \
+ --model_dir=$MODEL_DIR --num_train_steps=$NUM_TRAIN_STEPS \
+ --sample_1_of_n_eval_examples=$SAMPLE_1_OF_N_EVAL_EXAMPLES \
+ --pipeline_config_path=$PIPELINE_CONFIG_PATH \
+ --alsologtostderr
+"""
+from absl import flags
+import tensorflow.compat.v2 as tf
+from object_detection import model_lib_v2
+
+flags.DEFINE_string('pipeline_config_path', None, 'Path to pipeline config '
+ 'file.')
+flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.')
+flags.DEFINE_bool('eval_on_train_data', False, 'Enable evaluating on train '
+ 'data (only supported in distributed training).')
+flags.DEFINE_integer('sample_1_of_n_eval_examples', None, 'Will sample one of '
+ 'every n eval input examples, where n is provided.')
+flags.DEFINE_integer('sample_1_of_n_eval_on_train_examples', 5, 'Will sample '
+ 'one of every n train input examples for evaluation, '
+ 'where n is provided. This is only used if '
+ '`eval_training_data` is True.')
+flags.DEFINE_string(
+ 'model_dir', None, 'Path to output model directory '
+ 'where event and checkpoint files will be written.')
+flags.DEFINE_string(
+ 'checkpoint_dir', None, 'Path to directory holding a checkpoint. If '
+ '`checkpoint_dir` is provided, this binary operates in eval-only mode, '
+ 'writing resulting metrics to `model_dir`.')
+
+flags.DEFINE_integer('eval_timeout', 3600, 'Number of seconds to wait for an'
+ 'evaluation checkpoint before exiting.')
+
+flags.DEFINE_bool('use_tpu', False, 'Whether the job is executing on a TPU.')
+flags.DEFINE_integer(
+ 'num_workers', 1, 'When num_workers > 1, training uses '
+ 'MultiWorkerMirroredStrategy. When num_workers = 1 it uses '
+ 'MirroredStrategy.')
+
+FLAGS = flags.FLAGS
+
+
+def main(unused_argv):
+ flags.mark_flag_as_required('model_dir')
+ flags.mark_flag_as_required('pipeline_config_path')
+ tf.config.set_soft_device_placement(True)
+
+ if FLAGS.checkpoint_dir:
+ model_lib_v2.eval_continuously(
+ pipeline_config_path=FLAGS.pipeline_config_path,
+ model_dir=FLAGS.model_dir,
+ train_steps=FLAGS.num_train_steps,
+ sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples,
+ sample_1_of_n_eval_on_train_examples=(
+ FLAGS.sample_1_of_n_eval_on_train_examples),
+ checkpoint_dir=FLAGS.checkpoint_dir,
+ wait_interval=300, timeout=FLAGS.eval_timeout)
+ else:
+ if FLAGS.use_tpu:
+ resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
+ tf.config.experimental_connect_to_cluster(resolver)
+ tf.tpu.experimental.initialize_tpu_system(resolver)
+ strategy = tf.distribute.experimental.TPUStrategy(resolver)
+ elif FLAGS.num_workers > 1:
+ strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
+ else:
+ strategy = tf.compat.v2.distribute.MirroredStrategy()
+
+ with strategy.scope():
+ model_lib_v2.train_loop(
+ pipeline_config_path=FLAGS.pipeline_config_path,
+ model_dir=FLAGS.model_dir,
+ train_steps=FLAGS.num_train_steps,
+ use_tpu=FLAGS.use_tpu)
+
+if __name__ == '__main__':
+ tf.compat.v1.app.run()
diff --git a/research/object_detection/models/bidirectional_feature_pyramid_generators.py b/research/object_detection/models/bidirectional_feature_pyramid_generators.py
new file mode 100644
index 0000000000000000000000000000000000000000..b53dc60ef6465c408900800216cbe066e6d18259
--- /dev/null
+++ b/research/object_detection/models/bidirectional_feature_pyramid_generators.py
@@ -0,0 +1,486 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functions to generate bidirectional feature pyramids based on image features.
+
+Provides bidirectional feature pyramid network (BiFPN) generators that can be
+used to build object detection feature extractors, as proposed by Tan et al.
+See https://arxiv.org/abs/1911.09070 for more details.
+"""
+import collections
+import functools
+from six.moves import range
+from six.moves import zip
+import tensorflow as tf
+
+from object_detection.utils import bifpn_utils
+
+
+def _create_bifpn_input_config(fpn_min_level,
+ fpn_max_level,
+ input_max_level,
+ level_scales=None):
+ """Creates a BiFPN input config for the input levels from a backbone network.
+
+ Args:
+ fpn_min_level: the minimum pyramid level (highest feature map resolution) to
+ use in the BiFPN.
+ fpn_max_level: the maximum pyramid level (lowest feature map resolution) to
+ use in the BiFPN.
+ input_max_level: the maximum pyramid level that will be provided as input to
+ the BiFPN. Accordingly, the BiFPN will compute additional pyramid levels
+ from input_max_level, up to the desired fpn_max_level.
+ level_scales: a list of pyramid level scale factors. If 'None', each level's
+ scale is set to 2^level by default, which corresponds to each successive
+ feature map scaling by a factor of 2.
+
+ Returns:
+ A list of dictionaries for each feature map expected as input to the BiFPN,
+ where each has entries for the feature map 'name' and 'scale'.
+ """
+ if not level_scales:
+ level_scales = [2**i for i in range(fpn_min_level, fpn_max_level + 1)]
+
+ bifpn_input_params = []
+ for i in range(fpn_min_level, min(fpn_max_level, input_max_level) + 1):
+ bifpn_input_params.append({
+ 'name': '0_up_lvl_{}'.format(i),
+ 'scale': level_scales[i - fpn_min_level]
+ })
+
+ return bifpn_input_params
+
+
+def _get_bifpn_output_node_names(fpn_min_level, fpn_max_level, node_config):
+ """Returns a list of BiFPN output node names, given a BiFPN node config.
+
+ Args:
+ fpn_min_level: the minimum pyramid level (highest feature map resolution)
+ used by the BiFPN.
+ fpn_max_level: the maximum pyramid level (lowest feature map resolution)
+ used by the BiFPN.
+ node_config: the BiFPN node_config, a list of dictionaries corresponding to
+ each node in the BiFPN computation graph, where each entry should have an
+ associated 'name'.
+
+ Returns:
+ A list of strings corresponding to the names of the output BiFPN nodes.
+ """
+ num_output_nodes = fpn_max_level - fpn_min_level + 1
+ return [node['name'] for node in node_config[-num_output_nodes:]]
+
+
+def _create_bifpn_node_config(bifpn_num_iterations,
+ bifpn_num_filters,
+ fpn_min_level,
+ fpn_max_level,
+ input_max_level,
+ bifpn_node_params=None,
+ level_scales=None):
+ """Creates a config specifying a bidirectional feature pyramid network.
+
+ Args:
+ bifpn_num_iterations: the number of top-down bottom-up feature computations
+ to repeat in the BiFPN.
+ bifpn_num_filters: the number of filters (channels) for every feature map
+ used in the BiFPN.
+ fpn_min_level: the minimum pyramid level (highest feature map resolution) to
+ use in the BiFPN.
+ fpn_max_level: the maximum pyramid level (lowest feature map resolution) to
+ use in the BiFPN.
+ input_max_level: the maximum pyramid level that will be provided as input to
+ the BiFPN. Accordingly, the BiFPN will compute additional pyramid levels
+ from input_max_level, up to the desired fpn_max_level.
+ bifpn_node_params: If not 'None', a dictionary of additional default BiFPN
+ node parameters that will be applied to all BiFPN nodes.
+ level_scales: a list of pyramid level scale factors. If 'None', each level's
+ scale is set to 2^level by default, which corresponds to each successive
+ feature map scaling by a factor of 2.
+
+ Returns:
+ A list of dictionaries used to define nodes in the BiFPN computation graph,
+ as proposed by EfficientDet, Tan et al (https://arxiv.org/abs/1911.09070).
+ Each node's entry has the corresponding keys:
+ name: String. The name of this node in the BiFPN. The node name follows
+ the format '{bifpn_iteration}_{dn|up}_lvl_{pyramid_level}', where 'dn'
+ or 'up' refers to whether the node is in the top-down or bottom-up
+ portion of a single BiFPN iteration.
+ scale: the scale factor for this node, by default 2^level.
+ inputs: A list of names of nodes which are inputs to this node.
+ num_channels: The number of channels for this node.
+ combine_method: String. Name of the method used to combine input
+ node feature maps, 'fast_attention' by default for nodes which have more
+ than one input. Otherwise, 'None' for nodes with only one input node.
+ input_op: A (partial) function which is called to construct the layers
+ that will be applied to this BiFPN node's inputs. This function is
+ called with the arguments:
+ input_op(name, input_scale, input_num_channels, output_scale,
+ output_num_channels, conv_hyperparams, is_training,
+ freeze_batchnorm)
+ post_combine_op: A (partial) function which is called to construct the
+ layers that will be applied to the result of the combine operation for
+ this BiFPN node. This function will be called with the arguments:
+ post_combine_op(name, conv_hyperparams, is_training, freeze_batchnorm)
+ If 'None', then no layers will be applied after the combine operation
+ for this node.
+ """
+ if not level_scales:
+ level_scales = [2**i for i in range(fpn_min_level, fpn_max_level + 1)]
+
+ default_node_params = {
+ 'num_channels':
+ bifpn_num_filters,
+ 'combine_method':
+ 'fast_attention',
+ 'input_op':
+ functools.partial(
+ _create_bifpn_resample_block, downsample_method='max_pooling'),
+ 'post_combine_op':
+ functools.partial(
+ bifpn_utils.create_conv_block,
+ num_filters=bifpn_num_filters,
+ kernel_size=3,
+ strides=1,
+ padding='SAME',
+ use_separable=True,
+ apply_batchnorm=True,
+ apply_activation=True,
+ conv_bn_act_pattern=False),
+ }
+ if bifpn_node_params:
+ default_node_params.update(bifpn_node_params)
+
+ bifpn_node_params = []
+ # Create additional base pyramid levels not provided as input to the BiFPN.
+ # Note, combine_method and post_combine_op are set to None for additional
+ # base pyramid levels because they do not combine multiple input BiFPN nodes.
+ for i in range(input_max_level + 1, fpn_max_level + 1):
+ node_params = dict(default_node_params)
+ node_params.update({
+ 'name': '0_up_lvl_{}'.format(i),
+ 'scale': level_scales[i - fpn_min_level],
+ 'inputs': ['0_up_lvl_{}'.format(i - 1)],
+ 'combine_method': None,
+ 'post_combine_op': None,
+ })
+ bifpn_node_params.append(node_params)
+
+ for i in range(bifpn_num_iterations):
+ # The first bottom-up feature pyramid (which includes the input pyramid
+ # levels from the backbone network and the additional base pyramid levels)
+ # is indexed at 0. So, the first top-down bottom-up pass of the BiFPN is
+ # indexed from 1, and repeated for bifpn_num_iterations iterations.
+ bifpn_i = i + 1
+
+ # Create top-down nodes.
+ for level_i in reversed(range(fpn_min_level, fpn_max_level)):
+ inputs = []
+ # BiFPN nodes in the top-down pass receive input from the corresponding
+ # level from the previous BiFPN iteration's bottom-up pass, except for the
+ # bottom-most (min) level node, which is computed once in the initial
+ # bottom-up pass, and is afterwards only computed in each top-down pass.
+ if level_i > fpn_min_level or bifpn_i == 1:
+ inputs.append('{}_up_lvl_{}'.format(bifpn_i - 1, level_i))
+ else:
+ inputs.append('{}_dn_lvl_{}'.format(bifpn_i - 1, level_i))
+ inputs.append(bifpn_node_params[-1]['name'])
+ node_params = dict(default_node_params)
+ node_params.update({
+ 'name': '{}_dn_lvl_{}'.format(bifpn_i, level_i),
+ 'scale': level_scales[level_i - fpn_min_level],
+ 'inputs': inputs
+ })
+ bifpn_node_params.append(node_params)
+
+ # Create bottom-up nodes.
+ for level_i in range(fpn_min_level + 1, fpn_max_level + 1):
+ # BiFPN nodes in the bottom-up pass receive input from the corresponding
+ # level from the preceding top-down pass, except for the top (max) level
+ # which does not have a corresponding node in the top-down pass.
+ inputs = ['{}_up_lvl_{}'.format(bifpn_i - 1, level_i)]
+ if level_i < fpn_max_level:
+ inputs.append('{}_dn_lvl_{}'.format(bifpn_i, level_i))
+ inputs.append(bifpn_node_params[-1]['name'])
+ node_params = dict(default_node_params)
+ node_params.update({
+ 'name': '{}_up_lvl_{}'.format(bifpn_i, level_i),
+ 'scale': level_scales[level_i - fpn_min_level],
+ 'inputs': inputs
+ })
+ bifpn_node_params.append(node_params)
+
+ return bifpn_node_params
+
+
+def _create_bifpn_resample_block(name,
+ input_scale,
+ input_num_channels,
+ output_scale,
+ output_num_channels,
+ conv_hyperparams,
+ is_training,
+ freeze_batchnorm,
+ downsample_method=None,
+ use_native_resize_op=False,
+ maybe_apply_1x1_conv=True,
+ apply_1x1_pre_sampling=True,
+ apply_1x1_post_sampling=False):
+ """Creates resample block layers for input feature maps to BiFPN nodes.
+
+ Args:
+ name: String. Name used for this block of layers.
+ input_scale: Scale factor of the input feature map.
+ input_num_channels: Number of channels in the input feature map.
+ output_scale: Scale factor of the output feature map.
+ output_num_channels: Number of channels in the output feature map.
+ conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
+ containing hyperparameters for convolution ops.
+ is_training: Indicates whether the feature generator is in training mode.
+ freeze_batchnorm: Bool. Whether to freeze batch norm parameters during
+ training or not. When training with a small batch size (e.g. 1), it is
+ desirable to freeze batch norm update and use pretrained batch norm
+ params.
+ downsample_method: String. Method to use when downsampling feature maps.
+ use_native_resize_op: Bool. Whether to use the native resize up when
+ upsampling feature maps.
+ maybe_apply_1x1_conv: Bool. If 'True', a 1x1 convolution will only be
+ applied if the input_num_channels differs from the output_num_channels.
+ apply_1x1_pre_sampling: Bool. Whether a 1x1 convolution will be applied to
+ the input feature map before the up/down-sampling operation.
+ apply_1x1_post_sampling: Bool. Whether a 1x1 convolution will be applied to
+ the input feature map after the up/down-sampling operation.
+
+ Returns:
+ A list of layers which may be applied to the input feature maps in order to
+ compute feature maps with the specified scale and number of channels.
+ """
+ # By default, 1x1 convolutions are only applied before sampling when the
+ # number of input and output channels differ.
+ if maybe_apply_1x1_conv and output_num_channels == input_num_channels:
+ apply_1x1_pre_sampling = False
+ apply_1x1_post_sampling = False
+
+ apply_bn_for_resampling = True
+ layers = []
+ if apply_1x1_pre_sampling:
+ layers.extend(
+ bifpn_utils.create_conv_block(
+ name=name + '1x1_pre_sample/',
+ num_filters=output_num_channels,
+ kernel_size=1,
+ strides=1,
+ padding='SAME',
+ use_separable=False,
+ apply_batchnorm=apply_bn_for_resampling,
+ apply_activation=False,
+ conv_hyperparams=conv_hyperparams,
+ is_training=is_training,
+ freeze_batchnorm=freeze_batchnorm))
+
+ layers.extend(
+ bifpn_utils.create_resample_feature_map_ops(input_scale, output_scale,
+ downsample_method,
+ use_native_resize_op,
+ conv_hyperparams, is_training,
+ freeze_batchnorm, name))
+
+ if apply_1x1_post_sampling:
+ layers.extend(
+ bifpn_utils.create_conv_block(
+ name=name + '1x1_post_sample/',
+ num_filters=output_num_channels,
+ kernel_size=1,
+ strides=1,
+ padding='SAME',
+ use_separable=False,
+ apply_batchnorm=apply_bn_for_resampling,
+ apply_activation=False,
+ conv_hyperparams=conv_hyperparams,
+ is_training=is_training,
+ freeze_batchnorm=freeze_batchnorm))
+
+ return layers
+
+
+def _create_bifpn_combine_op(num_inputs, name, combine_method):
+ """Creates a BiFPN output config, a list of the output BiFPN node names.
+
+ Args:
+ num_inputs: The number of inputs to this combine operation.
+ name: String. The name of this combine operation.
+ combine_method: String. The method used to combine input feature maps.
+
+ Returns:
+ A function which may be called with a list of num_inputs feature maps
+ and which will return a single feature map.
+ """
+
+ combine_op = None
+ if num_inputs < 1:
+ raise ValueError('Expected at least 1 input for BiFPN combine.')
+ elif num_inputs == 1:
+ combine_op = lambda x: x[0]
+ else:
+ combine_op = bifpn_utils.BiFPNCombineLayer(
+ combine_method=combine_method, name=name)
+ return combine_op
+
+
+class KerasBiFpnFeatureMaps(tf.keras.Model):
+ """Generates Keras based BiFPN feature maps from an input feature map pyramid.
+
+ A Keras model that generates multi-scale feature maps for detection by
+ iteratively computing top-down and bottom-up feature pyramids, as in the
+ EfficientDet paper by Tan et al, see arxiv.org/abs/1911.09070 for details.
+ """
+
+ def __init__(self,
+ bifpn_num_iterations,
+ bifpn_num_filters,
+ fpn_min_level,
+ fpn_max_level,
+ input_max_level,
+ is_training,
+ conv_hyperparams,
+ freeze_batchnorm,
+ bifpn_node_params=None,
+ name=None):
+ """Constructor.
+
+ Args:
+ bifpn_num_iterations: The number of top-down bottom-up iterations.
+ bifpn_num_filters: The number of filters (channels) to be used for all
+ feature maps in this BiFPN.
+ fpn_min_level: The minimum pyramid level (highest feature map resolution)
+ to use in the BiFPN.
+ fpn_max_level: The maximum pyramid level (lowest feature map resolution)
+ to use in the BiFPN.
+ input_max_level: The maximum pyramid level that will be provided as input
+ to the BiFPN. Accordingly, the BiFPN will compute any additional pyramid
+ levels from input_max_level up to the desired fpn_max_level, with each
+ successivel level downsampling by a scale factor of 2 by default.
+ is_training: Indicates whether the feature generator is in training mode.
+ conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
+ containing hyperparameters for convolution ops.
+ freeze_batchnorm: Bool. Whether to freeze batch norm parameters during
+ training or not. When training with a small batch size (e.g. 1), it is
+ desirable to freeze batch norm update and use pretrained batch norm
+ params.
+ bifpn_node_params: An optional dictionary that may be used to specify
+ default parameters for BiFPN nodes, without the need to provide a custom
+ bifpn_node_config. For example, if '{ combine_method: 'sum' }', then all
+ BiFPN nodes will combine input feature maps by summation, rather than
+ by the default fast attention method.
+ name: A string name scope to assign to the model. If 'None', Keras
+ will auto-generate one from the class name.
+ """
+ super(KerasBiFpnFeatureMaps, self).__init__(name=name)
+ bifpn_node_config = _create_bifpn_node_config(
+ bifpn_num_iterations, bifpn_num_filters, fpn_min_level, fpn_max_level,
+ input_max_level, bifpn_node_params)
+ bifpn_input_config = _create_bifpn_input_config(
+ fpn_min_level, fpn_max_level, input_max_level)
+ bifpn_output_node_names = _get_bifpn_output_node_names(
+ fpn_min_level, fpn_max_level, bifpn_node_config)
+
+ self.bifpn_node_config = bifpn_node_config
+ self.bifpn_output_node_names = bifpn_output_node_names
+ self.node_input_blocks = []
+ self.node_combine_op = []
+ self.node_post_combine_block = []
+
+ all_node_params = bifpn_input_config
+ all_node_names = [node['name'] for node in all_node_params]
+ for node_config in bifpn_node_config:
+ # Maybe transform and/or resample input feature maps.
+ input_blocks = []
+ for input_name in node_config['inputs']:
+ if input_name not in all_node_names:
+ raise ValueError(
+ 'Input feature map ({}) does not exist:'.format(input_name))
+ input_index = all_node_names.index(input_name)
+ input_params = all_node_params[input_index]
+ input_block = node_config['input_op'](
+ name='{}/input_{}/'.format(node_config['name'], input_name),
+ input_scale=input_params['scale'],
+ input_num_channels=input_params.get('num_channels', None),
+ output_scale=node_config['scale'],
+ output_num_channels=node_config['num_channels'],
+ conv_hyperparams=conv_hyperparams,
+ is_training=is_training,
+ freeze_batchnorm=freeze_batchnorm)
+ input_blocks.append((input_index, input_block))
+
+ # Combine input feature maps.
+ combine_op = _create_bifpn_combine_op(
+ num_inputs=len(input_blocks),
+ name=(node_config['name'] + '/combine'),
+ combine_method=node_config['combine_method'])
+
+ # Post-combine layers.
+ post_combine_block = []
+ if node_config['post_combine_op']:
+ post_combine_block.extend(node_config['post_combine_op'](
+ name=node_config['name'] + '/post_combine/',
+ conv_hyperparams=conv_hyperparams,
+ is_training=is_training,
+ freeze_batchnorm=freeze_batchnorm))
+
+ self.node_input_blocks.append(input_blocks)
+ self.node_combine_op.append(combine_op)
+ self.node_post_combine_block.append(post_combine_block)
+ all_node_params.append(node_config)
+ all_node_names.append(node_config['name'])
+
+ def call(self, feature_pyramid):
+ """Compute BiFPN feature maps from input feature pyramid.
+
+ Executed when calling the `.__call__` method on input.
+
+ Args:
+ feature_pyramid: list of tuples of (tensor_name, image_feature_tensor).
+
+ Returns:
+ feature_maps: an OrderedDict mapping keys (feature map names) to
+ tensors where each tensor has shape [batch, height_i, width_i, depth_i].
+ """
+ feature_maps = [el[1] for el in feature_pyramid]
+ output_feature_maps = [None for node in self.bifpn_output_node_names]
+
+ for index, node in enumerate(self.bifpn_node_config):
+ node_scope = 'node_{:02d}'.format(index)
+ with tf.name_scope(node_scope):
+ # Apply layer blocks to this node's input feature maps.
+ input_block_results = []
+ for input_index, input_block in self.node_input_blocks[index]:
+ block_result = feature_maps[input_index]
+ for layer in input_block:
+ block_result = layer(block_result)
+ input_block_results.append(block_result)
+
+ # Combine the resulting feature maps.
+ node_result = self.node_combine_op[index](input_block_results)
+
+ # Apply post-combine layer block if applicable.
+ for layer in self.node_post_combine_block[index]:
+ node_result = layer(node_result)
+
+ feature_maps.append(node_result)
+
+ if node['name'] in self.bifpn_output_node_names:
+ index = self.bifpn_output_node_names.index(node['name'])
+ output_feature_maps[index] = node_result
+
+ return collections.OrderedDict(
+ zip(self.bifpn_output_node_names, output_feature_maps))
diff --git a/research/object_detection/models/bidirectional_feature_pyramid_generators_tf2_test.py b/research/object_detection/models/bidirectional_feature_pyramid_generators_tf2_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..cbc815cc446add205a5b307cd56cf81ee60a1041
--- /dev/null
+++ b/research/object_detection/models/bidirectional_feature_pyramid_generators_tf2_test.py
@@ -0,0 +1,167 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for bidirectional feature pyramid generators."""
+import unittest
+from absl.testing import parameterized
+
+import tensorflow.compat.v1 as tf
+
+from google.protobuf import text_format
+
+from object_detection.builders import hyperparams_builder
+from object_detection.models import bidirectional_feature_pyramid_generators as bifpn_generators
+from object_detection.protos import hyperparams_pb2
+from object_detection.utils import test_case
+from object_detection.utils import test_utils
+from object_detection.utils import tf_version
+
+
+@parameterized.parameters({'bifpn_num_iterations': 2},
+ {'bifpn_num_iterations': 8})
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class BiFPNFeaturePyramidGeneratorTest(test_case.TestCase):
+
+ def _build_conv_hyperparams(self):
+ conv_hyperparams = hyperparams_pb2.Hyperparams()
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ force_use_bias: true
+ """
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
+ return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
+
+ def test_get_expected_feature_map_shapes(self, bifpn_num_iterations):
+ with test_utils.GraphContextOrNone() as g:
+ image_features = [
+ ('block3', tf.random_uniform([4, 16, 16, 256], dtype=tf.float32)),
+ ('block4', tf.random_uniform([4, 8, 8, 256], dtype=tf.float32)),
+ ('block5', tf.random_uniform([4, 4, 4, 256], dtype=tf.float32))
+ ]
+ bifpn_generator = bifpn_generators.KerasBiFpnFeatureMaps(
+ bifpn_num_iterations=bifpn_num_iterations,
+ bifpn_num_filters=128,
+ fpn_min_level=3,
+ fpn_max_level=7,
+ input_max_level=5,
+ is_training=True,
+ conv_hyperparams=self._build_conv_hyperparams(),
+ freeze_batchnorm=False)
+ def graph_fn():
+ feature_maps = bifpn_generator(image_features)
+ return feature_maps
+
+ expected_feature_map_shapes = {
+ '{}_dn_lvl_3'.format(bifpn_num_iterations): (4, 16, 16, 128),
+ '{}_up_lvl_4'.format(bifpn_num_iterations): (4, 8, 8, 128),
+ '{}_up_lvl_5'.format(bifpn_num_iterations): (4, 4, 4, 128),
+ '{}_up_lvl_6'.format(bifpn_num_iterations): (4, 2, 2, 128),
+ '{}_up_lvl_7'.format(bifpn_num_iterations): (4, 1, 1, 128)}
+ out_feature_maps = self.execute(graph_fn, [], g)
+ out_feature_map_shapes = dict(
+ (key, value.shape) for key, value in out_feature_maps.items())
+ self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
+
+ def test_get_expected_variable_names(self, bifpn_num_iterations):
+ with test_utils.GraphContextOrNone() as g:
+ image_features = [
+ ('block3', tf.random_uniform([4, 16, 16, 256], dtype=tf.float32)),
+ ('block4', tf.random_uniform([4, 8, 8, 256], dtype=tf.float32)),
+ ('block5', tf.random_uniform([4, 4, 4, 256], dtype=tf.float32))
+ ]
+ bifpn_generator = bifpn_generators.KerasBiFpnFeatureMaps(
+ bifpn_num_iterations=bifpn_num_iterations,
+ bifpn_num_filters=128,
+ fpn_min_level=3,
+ fpn_max_level=7,
+ input_max_level=5,
+ is_training=True,
+ conv_hyperparams=self._build_conv_hyperparams(),
+ freeze_batchnorm=False,
+ name='bifpn')
+ def graph_fn():
+ return bifpn_generator(image_features)
+
+ self.execute(graph_fn, [], g)
+ expected_variables = [
+ 'bifpn/node_00/0_up_lvl_6/input_0_up_lvl_5/1x1_pre_sample/conv/bias',
+ 'bifpn/node_00/0_up_lvl_6/input_0_up_lvl_5/1x1_pre_sample/conv/kernel',
+ 'bifpn/node_03/1_dn_lvl_5/input_0_up_lvl_5/1x1_pre_sample/conv/bias',
+ 'bifpn/node_03/1_dn_lvl_5/input_0_up_lvl_5/1x1_pre_sample/conv/kernel',
+ 'bifpn/node_04/1_dn_lvl_4/input_0_up_lvl_4/1x1_pre_sample/conv/bias',
+ 'bifpn/node_04/1_dn_lvl_4/input_0_up_lvl_4/1x1_pre_sample/conv/kernel',
+ 'bifpn/node_05/1_dn_lvl_3/input_0_up_lvl_3/1x1_pre_sample/conv/bias',
+ 'bifpn/node_05/1_dn_lvl_3/input_0_up_lvl_3/1x1_pre_sample/conv/kernel',
+ 'bifpn/node_06/1_up_lvl_4/input_0_up_lvl_4/1x1_pre_sample/conv/bias',
+ 'bifpn/node_06/1_up_lvl_4/input_0_up_lvl_4/1x1_pre_sample/conv/kernel',
+ 'bifpn/node_07/1_up_lvl_5/input_0_up_lvl_5/1x1_pre_sample/conv/bias',
+ 'bifpn/node_07/1_up_lvl_5/input_0_up_lvl_5/1x1_pre_sample/conv/kernel']
+ expected_node_variable_patterns = [
+ ['bifpn/node_{:02}/{}_dn_lvl_6/combine/bifpn_combine_weights',
+ 'bifpn/node_{:02}/{}_dn_lvl_6/post_combine/separable_conv/bias',
+ 'bifpn/node_{:02}/{}_dn_lvl_6/post_combine/separable_conv/depthwise_kernel',
+ 'bifpn/node_{:02}/{}_dn_lvl_6/post_combine/separable_conv/pointwise_kernel'],
+ ['bifpn/node_{:02}/{}_dn_lvl_5/combine/bifpn_combine_weights',
+ 'bifpn/node_{:02}/{}_dn_lvl_5/post_combine/separable_conv/bias',
+ 'bifpn/node_{:02}/{}_dn_lvl_5/post_combine/separable_conv/depthwise_kernel',
+ 'bifpn/node_{:02}/{}_dn_lvl_5/post_combine/separable_conv/pointwise_kernel'],
+ ['bifpn/node_{:02}/{}_dn_lvl_4/combine/bifpn_combine_weights',
+ 'bifpn/node_{:02}/{}_dn_lvl_4/post_combine/separable_conv/bias',
+ 'bifpn/node_{:02}/{}_dn_lvl_4/post_combine/separable_conv/depthwise_kernel',
+ 'bifpn/node_{:02}/{}_dn_lvl_4/post_combine/separable_conv/pointwise_kernel'],
+ ['bifpn/node_{:02}/{}_dn_lvl_3/combine/bifpn_combine_weights',
+ 'bifpn/node_{:02}/{}_dn_lvl_3/post_combine/separable_conv/bias',
+ 'bifpn/node_{:02}/{}_dn_lvl_3/post_combine/separable_conv/depthwise_kernel',
+ 'bifpn/node_{:02}/{}_dn_lvl_3/post_combine/separable_conv/pointwise_kernel'],
+ ['bifpn/node_{:02}/{}_up_lvl_4/combine/bifpn_combine_weights',
+ 'bifpn/node_{:02}/{}_up_lvl_4/post_combine/separable_conv/bias',
+ 'bifpn/node_{:02}/{}_up_lvl_4/post_combine/separable_conv/depthwise_kernel',
+ 'bifpn/node_{:02}/{}_up_lvl_4/post_combine/separable_conv/pointwise_kernel'],
+ ['bifpn/node_{:02}/{}_up_lvl_5/combine/bifpn_combine_weights',
+ 'bifpn/node_{:02}/{}_up_lvl_5/post_combine/separable_conv/bias',
+ 'bifpn/node_{:02}/{}_up_lvl_5/post_combine/separable_conv/depthwise_kernel',
+ 'bifpn/node_{:02}/{}_up_lvl_5/post_combine/separable_conv/pointwise_kernel'],
+ ['bifpn/node_{:02}/{}_up_lvl_6/combine/bifpn_combine_weights',
+ 'bifpn/node_{:02}/{}_up_lvl_6/post_combine/separable_conv/bias',
+ 'bifpn/node_{:02}/{}_up_lvl_6/post_combine/separable_conv/depthwise_kernel',
+ 'bifpn/node_{:02}/{}_up_lvl_6/post_combine/separable_conv/pointwise_kernel'],
+ ['bifpn/node_{:02}/{}_up_lvl_7/combine/bifpn_combine_weights',
+ 'bifpn/node_{:02}/{}_up_lvl_7/post_combine/separable_conv/bias',
+ 'bifpn/node_{:02}/{}_up_lvl_7/post_combine/separable_conv/depthwise_kernel',
+ 'bifpn/node_{:02}/{}_up_lvl_7/post_combine/separable_conv/pointwise_kernel']]
+
+ node_i = 2
+ for iter_i in range(1, bifpn_num_iterations+1):
+ for node_variable_patterns in expected_node_variable_patterns:
+ for pattern in node_variable_patterns:
+ expected_variables.append(pattern.format(node_i, iter_i))
+ node_i += 1
+
+ expected_variables = set(expected_variables)
+ actual_variable_set = set(
+ [var.name.split(':')[0] for var in bifpn_generator.variables])
+ self.assertSetEqual(expected_variables, actual_variable_set)
+
+# TODO(aom): Tests for create_bifpn_combine_op.
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/research/object_detection/models/center_net_hourglass_feature_extractor.py b/research/object_detection/models/center_net_hourglass_feature_extractor.py
new file mode 100644
index 0000000000000000000000000000000000000000..4761915aa5ad0023673199f2083ff355816f7bb1
--- /dev/null
+++ b/research/object_detection/models/center_net_hourglass_feature_extractor.py
@@ -0,0 +1,75 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Hourglass[1] feature extractor for CenterNet[2] meta architecture.
+
+[1]: https://arxiv.org/abs/1603.06937
+[2]: https://arxiv.org/abs/1904.07850
+"""
+
+from object_detection.meta_architectures import center_net_meta_arch
+from object_detection.models.keras_models import hourglass_network
+
+
+class CenterNetHourglassFeatureExtractor(
+ center_net_meta_arch.CenterNetFeatureExtractor):
+ """The hourglass feature extractor for CenterNet.
+
+ This class is a thin wrapper around the HourglassFeatureExtractor class
+ along with some preprocessing methods inherited from the base class.
+ """
+
+ def __init__(self, hourglass_net, channel_means=(0., 0., 0.),
+ channel_stds=(1., 1., 1.), bgr_ordering=False):
+ """Intializes the feature extractor.
+
+ Args:
+ hourglass_net: The underlying hourglass network to use.
+ channel_means: A tuple of floats, denoting the mean of each channel
+ which will be subtracted from it.
+ channel_stds: A tuple of floats, denoting the standard deviation of each
+ channel. Each channel will be divided by its standard deviation value.
+ bgr_ordering: bool, if set will change the channel ordering to be in the
+ [blue, red, green] order.
+ """
+
+ super(CenterNetHourglassFeatureExtractor, self).__init__(
+ channel_means=channel_means, channel_stds=channel_stds,
+ bgr_ordering=bgr_ordering)
+ self._network = hourglass_net
+
+ def call(self, inputs):
+ return self._network(inputs)
+
+ @property
+ def out_stride(self):
+ """The stride in the output image of the network."""
+ return 4
+
+ @property
+ def num_feature_outputs(self):
+ """Ther number of feature outputs returned by the feature extractor."""
+ return self._network.num_hourglasses
+
+ def get_model(self):
+ return self._network
+
+
+def hourglass_104(channel_means, channel_stds, bgr_ordering):
+ """The Hourglass-104 backbone for CenterNet."""
+
+ network = hourglass_network.hourglass_104()
+ return CenterNetHourglassFeatureExtractor(
+ network, channel_means=channel_means, channel_stds=channel_stds,
+ bgr_ordering=bgr_ordering)
diff --git a/research/object_detection/models/center_net_hourglass_feature_extractor_tf2_test.py b/research/object_detection/models/center_net_hourglass_feature_extractor_tf2_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..19d5cbe9843ff03d6d1499a02980a067dc305579
--- /dev/null
+++ b/research/object_detection/models/center_net_hourglass_feature_extractor_tf2_test.py
@@ -0,0 +1,44 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Testing hourglass feature extractor for CenterNet."""
+import unittest
+import numpy as np
+import tensorflow.compat.v1 as tf
+
+from object_detection.models import center_net_hourglass_feature_extractor as hourglass
+from object_detection.models.keras_models import hourglass_network
+from object_detection.utils import test_case
+from object_detection.utils import tf_version
+
+
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class CenterNetHourglassFeatureExtractorTest(test_case.TestCase):
+
+ def test_center_net_hourglass_feature_extractor(self):
+
+ net = hourglass_network.HourglassNetwork(
+ num_stages=4, blocks_per_stage=[2, 3, 4, 5, 6],
+ channel_dims=[4, 6, 8, 10, 12, 14], num_hourglasses=2)
+
+ model = hourglass.CenterNetHourglassFeatureExtractor(net)
+ def graph_fn():
+ return model(tf.zeros((2, 64, 64, 3), dtype=np.float32))
+ outputs = self.execute(graph_fn, [])
+ self.assertEqual(outputs[0].shape, (2, 16, 16, 6))
+ self.assertEqual(outputs[1].shape, (2, 16, 16, 6))
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/research/object_detection/models/center_net_resnet_feature_extractor.py b/research/object_detection/models/center_net_resnet_feature_extractor.py
new file mode 100644
index 0000000000000000000000000000000000000000..477fa4c50ea9e0bc62b43a75c1674acfef7a183c
--- /dev/null
+++ b/research/object_detection/models/center_net_resnet_feature_extractor.py
@@ -0,0 +1,149 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Resnetv2 based feature extractors for CenterNet[1] meta architecture.
+
+[1]: https://arxiv.org/abs/1904.07850
+"""
+
+
+import tensorflow.compat.v1 as tf
+
+from object_detection.meta_architectures.center_net_meta_arch import CenterNetFeatureExtractor
+
+
+class CenterNetResnetFeatureExtractor(CenterNetFeatureExtractor):
+ """Resnet v2 base feature extractor for the CenterNet model."""
+
+ def __init__(self, resnet_type, channel_means=(0., 0., 0.),
+ channel_stds=(1., 1., 1.), bgr_ordering=False):
+ """Initializes the feature extractor with a specific ResNet architecture.
+
+ Args:
+ resnet_type: A string specifying which kind of ResNet to use. Currently
+ only `resnet_v2_50` and `resnet_v2_101` are supported.
+ channel_means: A tuple of floats, denoting the mean of each channel
+ which will be subtracted from it.
+ channel_stds: A tuple of floats, denoting the standard deviation of each
+ channel. Each channel will be divided by its standard deviation value.
+ bgr_ordering: bool, if set will change the channel ordering to be in the
+ [blue, red, green] order.
+
+ """
+
+ super(CenterNetResnetFeatureExtractor, self).__init__(
+ channel_means=channel_means, channel_stds=channel_stds,
+ bgr_ordering=bgr_ordering)
+ if resnet_type == 'resnet_v2_101':
+ self._base_model = tf.keras.applications.ResNet101V2(weights=None)
+ output_layer = 'conv5_block3_out'
+ elif resnet_type == 'resnet_v2_50':
+ self._base_model = tf.keras.applications.ResNet50V2(weights=None)
+ output_layer = 'conv5_block3_out'
+ else:
+ raise ValueError('Unknown Resnet Model {}'.format(resnet_type))
+ output_layer = self._base_model.get_layer(output_layer)
+
+ self._resnet_model = tf.keras.models.Model(inputs=self._base_model.input,
+ outputs=output_layer.output)
+ resnet_output = self._resnet_model(self._base_model.input)
+
+ for num_filters in [256, 128, 64]:
+ # TODO(vighneshb) This section has a few differences from the paper
+ # Figure out how much of a performance impact they have.
+
+ # 1. We use a simple convolution instead of a deformable convolution
+ conv = tf.keras.layers.Conv2D(filters=num_filters, kernel_size=3,
+ strides=1, padding='same')
+ resnet_output = conv(resnet_output)
+ resnet_output = tf.keras.layers.BatchNormalization()(resnet_output)
+ resnet_output = tf.keras.layers.ReLU()(resnet_output)
+
+ # 2. We use the default initialization for the convolution layers
+ # instead of initializing it to do bilinear upsampling.
+ conv_transpose = tf.keras.layers.Conv2DTranspose(filters=num_filters,
+ kernel_size=3, strides=2,
+ padding='same')
+ resnet_output = conv_transpose(resnet_output)
+ resnet_output = tf.keras.layers.BatchNormalization()(resnet_output)
+ resnet_output = tf.keras.layers.ReLU()(resnet_output)
+
+ self._feature_extractor_model = tf.keras.models.Model(
+ inputs=self._base_model.input, outputs=resnet_output)
+
+ def preprocess(self, resized_inputs):
+ """Preprocess input images for the ResNet model.
+
+ This scales images in the range [0, 255] to the range [-1, 1]
+
+ Args:
+ resized_inputs: a [batch, height, width, channels] float32 tensor.
+
+ Returns:
+ outputs: a [batch, height, width, channels] float32 tensor.
+
+ """
+ resized_inputs = super(CenterNetResnetFeatureExtractor, self).preprocess(
+ resized_inputs)
+ return tf.keras.applications.resnet_v2.preprocess_input(resized_inputs)
+
+ def load_feature_extractor_weights(self, path):
+ self._base_model.load_weights(path)
+
+ def get_base_model(self):
+ """Get base resnet model for inspection and testing."""
+ return self._base_model
+
+ def call(self, inputs):
+ """Returns image features extracted by the backbone.
+
+ Args:
+ inputs: An image tensor of shape [batch_size, input_height,
+ input_width, 3]
+
+ Returns:
+ features_list: A list of length 1 containing a tensor of shape
+ [batch_size, input_height // 4, input_width // 4, 64] containing
+ the features extracted by the ResNet.
+ """
+ return [self._feature_extractor_model(inputs)]
+
+ @property
+ def num_feature_outputs(self):
+ return 1
+
+ @property
+ def out_stride(self):
+ return 4
+
+
+def resnet_v2_101(channel_means, channel_stds, bgr_ordering):
+ """The ResNet v2 101 feature extractor."""
+
+ return CenterNetResnetFeatureExtractor(
+ resnet_type='resnet_v2_101',
+ channel_means=channel_means,
+ channel_stds=channel_stds,
+ bgr_ordering=bgr_ordering
+ )
+
+
+def resnet_v2_50(channel_means, channel_stds, bgr_ordering):
+ """The ResNet v2 50 feature extractor."""
+
+ return CenterNetResnetFeatureExtractor(
+ resnet_type='resnet_v2_50',
+ channel_means=channel_means,
+ channel_stds=channel_stds,
+ bgr_ordering=bgr_ordering)
diff --git a/research/object_detection/models/center_net_resnet_feature_extractor_tf2_test.py b/research/object_detection/models/center_net_resnet_feature_extractor_tf2_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..3429c0442053982d3d3d9502508ede3177cbf102
--- /dev/null
+++ b/research/object_detection/models/center_net_resnet_feature_extractor_tf2_test.py
@@ -0,0 +1,54 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Testing ResNet v2 models for the CenterNet meta architecture."""
+import unittest
+import numpy as np
+import tensorflow.compat.v1 as tf
+
+from object_detection.models import center_net_resnet_feature_extractor
+from object_detection.utils import test_case
+from object_detection.utils import tf_version
+
+
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class CenterNetResnetFeatureExtractorTest(test_case.TestCase):
+
+ def test_output_size(self):
+ """Verify that shape of features returned by the backbone is correct."""
+
+ model = center_net_resnet_feature_extractor.\
+ CenterNetResnetFeatureExtractor('resnet_v2_101')
+ def graph_fn():
+ img = np.zeros((8, 224, 224, 3), dtype=np.float32)
+ processed_img = model.preprocess(img)
+ return model(processed_img)
+ outputs = self.execute(graph_fn, [])
+ self.assertEqual(outputs.shape, (8, 56, 56, 64))
+
+ def test_output_size_resnet50(self):
+ """Verify that shape of features returned by the backbone is correct."""
+
+ model = center_net_resnet_feature_extractor.\
+ CenterNetResnetFeatureExtractor('resnet_v2_50')
+ def graph_fn():
+ img = np.zeros((8, 224, 224, 3), dtype=np.float32)
+ processed_img = model.preprocess(img)
+ return model(processed_img)
+ outputs = self.execute(graph_fn, [])
+ self.assertEqual(outputs.shape, (8, 56, 56, 64))
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor.py b/research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor.py
new file mode 100644
index 0000000000000000000000000000000000000000..842e9cf1b2e5393a6bc87df3989f173d0409de70
--- /dev/null
+++ b/research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor.py
@@ -0,0 +1,176 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Resnetv1 FPN [1] based feature extractors for CenterNet[2] meta architecture.
+
+
+[1]: https://arxiv.org/abs/1612.03144.
+[2]: https://arxiv.org/abs/1904.07850.
+"""
+import tensorflow.compat.v1 as tf
+
+from object_detection.meta_architectures.center_net_meta_arch import CenterNetFeatureExtractor
+
+
+_RESNET_MODEL_OUTPUT_LAYERS = {
+ 'resnet_v1_50': ['conv2_block3_out', 'conv3_block4_out',
+ 'conv4_block6_out', 'conv5_block3_out'],
+ 'resnet_v1_101': ['conv2_block3_out', 'conv3_block4_out',
+ 'conv4_block23_out', 'conv5_block3_out'],
+}
+
+
+class CenterNetResnetV1FpnFeatureExtractor(CenterNetFeatureExtractor):
+ """Resnet v1 FPN base feature extractor for the CenterNet model.
+
+ This feature extractor uses residual skip connections and nearest neighbor
+ upsampling to produce an output feature map of stride 4, which has precise
+ localization information along with strong semantic information from the top
+ of the net. This design does not exactly follow the original FPN design,
+ specifically:
+ - Since only one output map is necessary for heatmap prediction (stride 4
+ output), the top-down feature maps can have different numbers of channels.
+ Specifically, the top down feature maps have the following sizes:
+ [h/4, w/4, 64], [h/8, w/8, 128], [h/16, w/16, 256], [h/32, w/32, 256].
+ - No additional coarse features are used after conv5_x.
+ """
+
+ def __init__(self, resnet_type, channel_means=(0., 0., 0.),
+ channel_stds=(1., 1., 1.), bgr_ordering=False):
+ """Initializes the feature extractor with a specific ResNet architecture.
+
+ Args:
+ resnet_type: A string specifying which kind of ResNet to use. Currently
+ only `resnet_v1_50` and `resnet_v1_101` are supported.
+ channel_means: A tuple of floats, denoting the mean of each channel
+ which will be subtracted from it.
+ channel_stds: A tuple of floats, denoting the standard deviation of each
+ channel. Each channel will be divided by its standard deviation value.
+ bgr_ordering: bool, if set will change the channel ordering to be in the
+ [blue, red, green] order.
+
+ """
+
+ super(CenterNetResnetV1FpnFeatureExtractor, self).__init__(
+ channel_means=channel_means, channel_stds=channel_stds,
+ bgr_ordering=bgr_ordering)
+ if resnet_type == 'resnet_v1_50':
+ self._base_model = tf.keras.applications.ResNet50(weights=None)
+ elif resnet_type == 'resnet_v1_101':
+ self._base_model = tf.keras.applications.ResNet101(weights=None)
+ else:
+ raise ValueError('Unknown Resnet Model {}'.format(resnet_type))
+ output_layers = _RESNET_MODEL_OUTPUT_LAYERS[resnet_type]
+ outputs = [self._base_model.get_layer(output_layer_name).output
+ for output_layer_name in output_layers]
+
+ self._resnet_model = tf.keras.models.Model(inputs=self._base_model.input,
+ outputs=outputs)
+ resnet_outputs = self._resnet_model(self._base_model.input)
+
+ # Construct the top-down feature maps.
+ top_layer = resnet_outputs[-1]
+ residual_op = tf.keras.layers.Conv2D(filters=256, kernel_size=1,
+ strides=1, padding='same')
+ top_down = residual_op(top_layer)
+
+ num_filters_list = [256, 128, 64]
+ for i, num_filters in enumerate(num_filters_list):
+ level_ind = 2 - i
+ # Upsample.
+ upsample_op = tf.keras.layers.UpSampling2D(2, interpolation='nearest')
+ top_down = upsample_op(top_down)
+
+ # Residual (skip-connection) from bottom-up pathway.
+ residual_op = tf.keras.layers.Conv2D(filters=num_filters, kernel_size=1,
+ strides=1, padding='same')
+ residual = residual_op(resnet_outputs[level_ind])
+
+ # Merge.
+ top_down = top_down + residual
+ next_num_filters = num_filters_list[i+1] if i + 1 <= 2 else 64
+ conv = tf.keras.layers.Conv2D(filters=next_num_filters,
+ kernel_size=3, strides=1, padding='same')
+ top_down = conv(top_down)
+ top_down = tf.keras.layers.BatchNormalization()(top_down)
+ top_down = tf.keras.layers.ReLU()(top_down)
+
+ self._feature_extractor_model = tf.keras.models.Model(
+ inputs=self._base_model.input, outputs=top_down)
+
+ def preprocess(self, resized_inputs):
+ """Preprocess input images for the ResNet model.
+
+ This scales images in the range [0, 255] to the range [-1, 1]
+
+ Args:
+ resized_inputs: a [batch, height, width, channels] float32 tensor.
+
+ Returns:
+ outputs: a [batch, height, width, channels] float32 tensor.
+
+ """
+ resized_inputs = super(
+ CenterNetResnetV1FpnFeatureExtractor, self).preprocess(resized_inputs)
+ return tf.keras.applications.resnet.preprocess_input(resized_inputs)
+
+ def load_feature_extractor_weights(self, path):
+ self._base_model.load_weights(path)
+
+ def get_base_model(self):
+ """Get base resnet model for inspection and testing."""
+ return self._base_model
+
+ def call(self, inputs):
+ """Returns image features extracted by the backbone.
+
+ Args:
+ inputs: An image tensor of shape [batch_size, input_height,
+ input_width, 3]
+
+ Returns:
+ features_list: A list of length 1 containing a tensor of shape
+ [batch_size, input_height // 4, input_width // 4, 64] containing
+ the features extracted by the ResNet.
+ """
+ return [self._feature_extractor_model(inputs)]
+
+ @property
+ def num_feature_outputs(self):
+ return 1
+
+ @property
+ def out_stride(self):
+ return 4
+
+
+def resnet_v1_101_fpn(channel_means, channel_stds, bgr_ordering):
+ """The ResNet v1 101 FPN feature extractor."""
+
+ return CenterNetResnetV1FpnFeatureExtractor(
+ resnet_type='resnet_v1_101',
+ channel_means=channel_means,
+ channel_stds=channel_stds,
+ bgr_ordering=bgr_ordering
+ )
+
+
+def resnet_v1_50_fpn(channel_means, channel_stds, bgr_ordering):
+ """The ResNet v1 50 FPN feature extractor."""
+
+ return CenterNetResnetV1FpnFeatureExtractor(
+ resnet_type='resnet_v1_50',
+ channel_means=channel_means,
+ channel_stds=channel_stds,
+ bgr_ordering=bgr_ordering)
diff --git a/research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor_tf2_test.py b/research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor_tf2_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..3f1524904f0a055e48342d09febdd7bd3ec6fb3c
--- /dev/null
+++ b/research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor_tf2_test.py
@@ -0,0 +1,49 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Testing ResNet v1 FPN models for the CenterNet meta architecture."""
+import unittest
+from absl.testing import parameterized
+
+import numpy as np
+import tensorflow.compat.v1 as tf
+
+from object_detection.models import center_net_resnet_v1_fpn_feature_extractor
+from object_detection.utils import test_case
+from object_detection.utils import tf_version
+
+
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class CenterNetResnetV1FpnFeatureExtractorTest(test_case.TestCase,
+ parameterized.TestCase):
+
+ @parameterized.parameters(
+ {'resnet_type': 'resnet_v1_50'},
+ {'resnet_type': 'resnet_v1_101'},
+ )
+ def test_correct_output_size(self, resnet_type):
+ """Verify that shape of features returned by the backbone is correct."""
+
+ model = center_net_resnet_v1_fpn_feature_extractor.\
+ CenterNetResnetV1FpnFeatureExtractor(resnet_type)
+ def graph_fn():
+ img = np.zeros((8, 224, 224, 3), dtype=np.float32)
+ processed_img = model.preprocess(img)
+ return model(processed_img)
+
+ self.assertEqual(self.execute(graph_fn, []).shape, (8, 56, 56, 64))
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_test.py b/research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_tf1_test.py
similarity index 97%
rename from research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_test.py
rename to research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_tf1_test.py
index fd7e0454419abdaa188607d007fd52b47c4a079c..4a27e8c8d649c4cb9ae961bffafc7ad824b63b25 100644
--- a/research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_test.py
+++ b/research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_tf1_test.py
@@ -14,13 +14,16 @@
# ==============================================================================
"""Tests for embedded_ssd_mobilenet_v1_feature_extractor."""
+import unittest
import numpy as np
import tensorflow.compat.v1 as tf
from object_detection.models import embedded_ssd_mobilenet_v1_feature_extractor
from object_detection.models import ssd_feature_extractor_test
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class EmbeddedSSDMobileNetV1FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
diff --git a/research/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor_test.py b/research/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor_tf1_test.py
similarity index 97%
rename from research/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor_test.py
rename to research/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor_tf1_test.py
index 8b5351c8f5bf0e236f97aa0466aeedce7d4976df..2505fbfb3ad6e8621a3b2d05caba506b350f0f49 100644
--- a/research/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor_test.py
+++ b/research/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor_tf1_test.py
@@ -14,12 +14,14 @@
# ==============================================================================
"""Tests for models.faster_rcnn_inception_resnet_v2_feature_extractor."""
-
+import unittest
import tensorflow.compat.v1 as tf
from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class FasterRcnnInceptionResnetV2FeatureExtractorTest(tf.test.TestCase):
def _build_feature_extractor(self, first_stage_features_stride):
diff --git a/research/object_detection/models/faster_rcnn_inception_resnet_v2_keras_feature_extractor.py b/research/object_detection/models/faster_rcnn_inception_resnet_v2_keras_feature_extractor.py
index 9196871bd3acbdf5d2b8379b56e1a8778daf3065..f185aa01dd377c66b94ca37cc244350b2071f21c 100644
--- a/research/object_detection/models/faster_rcnn_inception_resnet_v2_keras_feature_extractor.py
+++ b/research/object_detection/models/faster_rcnn_inception_resnet_v2_keras_feature_extractor.py
@@ -59,6 +59,7 @@ class FasterRCNNInceptionResnetV2KerasFeatureExtractor(
is_training, first_stage_features_stride, batch_norm_trainable,
weight_decay)
self._variable_dict = {}
+ self.classification_backbone = None
def preprocess(self, resized_inputs):
"""Faster R-CNN with Inception Resnet v2 preprocessing.
@@ -95,19 +96,20 @@ class FasterRCNNInceptionResnetV2KerasFeatureExtractor(
And returns rpn_feature_map:
A tensor with shape [batch, height, width, depth]
"""
- with tf.name_scope(name):
- with tf.name_scope('InceptionResnetV2'):
- model = inception_resnet_v2.inception_resnet_v2(
+ if not self.classification_backbone:
+ self.classification_backbone = inception_resnet_v2.inception_resnet_v2(
self._train_batch_norm,
output_stride=self._first_stage_features_stride,
align_feature_maps=True,
weight_decay=self._weight_decay,
weights=None,
include_top=False)
- proposal_features = model.get_layer(
+ with tf.name_scope(name):
+ with tf.name_scope('InceptionResnetV2'):
+ proposal_features = self.classification_backbone.get_layer(
name='block17_20_ac').output
keras_model = tf.keras.Model(
- inputs=model.inputs,
+ inputs=self.classification_backbone.inputs,
outputs=proposal_features)
for variable in keras_model.variables:
self._variable_dict[variable.name[:-2]] = variable
@@ -132,962 +134,26 @@ class FasterRCNNInceptionResnetV2KerasFeatureExtractor(
[batch_size * self.max_num_proposals, height, width, depth]
representing box classifier features for each proposal.
"""
+ if not self.classification_backbone:
+ self.classification_backbone = inception_resnet_v2.inception_resnet_v2(
+ self._train_batch_norm,
+ output_stride=self._first_stage_features_stride,
+ align_feature_maps=True,
+ weight_decay=self._weight_decay,
+ weights=None,
+ include_top=False)
with tf.name_scope(name):
with tf.name_scope('InceptionResnetV2'):
- model = inception_resnet_v2.inception_resnet_v2(
- self._train_batch_norm,
- output_stride=16,
- align_feature_maps=False,
- weight_decay=self._weight_decay,
- weights=None,
- include_top=False)
-
- proposal_feature_maps = model.get_layer(
+ proposal_feature_maps = self.classification_backbone.get_layer(
name='block17_20_ac').output
- proposal_classifier_features = model.get_layer(
+ proposal_classifier_features = self.classification_backbone.get_layer(
name='conv_7b_ac').output
keras_model = model_util.extract_submodel(
- model=model,
+ model=self.classification_backbone,
inputs=proposal_feature_maps,
outputs=proposal_classifier_features)
for variable in keras_model.variables:
self._variable_dict[variable.name[:-2]] = variable
return keras_model
- def restore_from_classification_checkpoint_fn(
- self,
- first_stage_feature_extractor_scope,
- second_stage_feature_extractor_scope):
- """Returns a map of variables to load from a foreign checkpoint.
-
- This uses a hard-coded conversion to load into Keras from a slim-trained
- inception_resnet_v2 checkpoint.
- Note that this overrides the default implementation in
- faster_rcnn_meta_arch.FasterRCNNKerasFeatureExtractor which does not work
- for InceptionResnetV2 checkpoints.
-
- Args:
- first_stage_feature_extractor_scope: A scope name for the first stage
- feature extractor.
- second_stage_feature_extractor_scope: A scope name for the second stage
- feature extractor.
-
- Returns:
- A dict mapping variable names (to load from a checkpoint) to variables in
- the model graph.
- """
-
- keras_to_slim_name_mapping = {
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d/kernel': 'InceptionResnetV2/Conv2d_1a_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm/beta': 'InceptionResnetV2/Conv2d_1a_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm/moving_mean': 'InceptionResnetV2/Conv2d_1a_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm/moving_variance': 'InceptionResnetV2/Conv2d_1a_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_1/kernel': 'InceptionResnetV2/Conv2d_2a_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_1/beta': 'InceptionResnetV2/Conv2d_2a_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_1/moving_mean': 'InceptionResnetV2/Conv2d_2a_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_1/moving_variance': 'InceptionResnetV2/Conv2d_2a_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_2/kernel': 'InceptionResnetV2/Conv2d_2b_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_2/beta': 'InceptionResnetV2/Conv2d_2b_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_2/moving_mean': 'InceptionResnetV2/Conv2d_2b_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_2/moving_variance': 'InceptionResnetV2/Conv2d_2b_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_3/kernel': 'InceptionResnetV2/Conv2d_3b_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_3/beta': 'InceptionResnetV2/Conv2d_3b_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_3/moving_mean': 'InceptionResnetV2/Conv2d_3b_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_3/moving_variance': 'InceptionResnetV2/Conv2d_3b_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_4/kernel': 'InceptionResnetV2/Conv2d_4a_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_4/beta': 'InceptionResnetV2/Conv2d_4a_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_4/moving_mean': 'InceptionResnetV2/Conv2d_4a_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_4/moving_variance': 'InceptionResnetV2/Conv2d_4a_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_5/kernel': 'InceptionResnetV2/Mixed_5b/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_5/beta': 'InceptionResnetV2/Mixed_5b/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_5/moving_mean': 'InceptionResnetV2/Mixed_5b/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_5/moving_variance': 'InceptionResnetV2/Mixed_5b/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_6/kernel': 'InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_6/beta': 'InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_6/moving_mean': 'InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_6/moving_variance': 'InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_7/kernel': 'InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0b_5x5/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_7/beta': 'InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0b_5x5/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_7/moving_mean': 'InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0b_5x5/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_7/moving_variance': 'InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0b_5x5/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_8/kernel': 'InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_8/beta': 'InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_8/moving_mean': 'InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_8/moving_variance': 'InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_9/kernel': 'InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0b_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_9/beta': 'InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0b_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_9/moving_mean': 'InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_9/moving_variance': 'InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_10/kernel': 'InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0c_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_10/beta': 'InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0c_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_10/moving_mean': 'InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_10/moving_variance': 'InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_11/kernel': 'InceptionResnetV2/Mixed_5b/Branch_3/Conv2d_0b_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_11/beta': 'InceptionResnetV2/Mixed_5b/Branch_3/Conv2d_0b_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_11/moving_mean': 'InceptionResnetV2/Mixed_5b/Branch_3/Conv2d_0b_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_11/moving_variance': 'InceptionResnetV2/Mixed_5b/Branch_3/Conv2d_0b_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_12/kernel': 'InceptionResnetV2/Repeat/block35_1/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_12/beta': 'InceptionResnetV2/Repeat/block35_1/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_12/moving_mean': 'InceptionResnetV2/Repeat/block35_1/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_12/moving_variance': 'InceptionResnetV2/Repeat/block35_1/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_13/kernel': 'InceptionResnetV2/Repeat/block35_1/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_13/beta': 'InceptionResnetV2/Repeat/block35_1/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_13/moving_mean': 'InceptionResnetV2/Repeat/block35_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_13/moving_variance': 'InceptionResnetV2/Repeat/block35_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_14/kernel': 'InceptionResnetV2/Repeat/block35_1/Branch_1/Conv2d_0b_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_14/beta': 'InceptionResnetV2/Repeat/block35_1/Branch_1/Conv2d_0b_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_14/moving_mean': 'InceptionResnetV2/Repeat/block35_1/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_14/moving_variance': 'InceptionResnetV2/Repeat/block35_1/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_15/kernel': 'InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_15/beta': 'InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_15/moving_mean': 'InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_15/moving_variance': 'InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_16/kernel': 'InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0b_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_16/beta': 'InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0b_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_16/moving_mean': 'InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_16/moving_variance': 'InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_17/kernel': 'InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0c_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_17/beta': 'InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0c_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_17/moving_mean': 'InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_17/moving_variance': 'InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block35_1_conv/kernel': 'InceptionResnetV2/Repeat/block35_1/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block35_1_conv/bias': 'InceptionResnetV2/Repeat/block35_1/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_18/kernel': 'InceptionResnetV2/Repeat/block35_2/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_18/beta': 'InceptionResnetV2/Repeat/block35_2/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_18/moving_mean': 'InceptionResnetV2/Repeat/block35_2/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_18/moving_variance': 'InceptionResnetV2/Repeat/block35_2/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_19/kernel': 'InceptionResnetV2/Repeat/block35_2/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_19/beta': 'InceptionResnetV2/Repeat/block35_2/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_19/moving_mean': 'InceptionResnetV2/Repeat/block35_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_19/moving_variance': 'InceptionResnetV2/Repeat/block35_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_20/kernel': 'InceptionResnetV2/Repeat/block35_2/Branch_1/Conv2d_0b_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_20/beta': 'InceptionResnetV2/Repeat/block35_2/Branch_1/Conv2d_0b_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_20/moving_mean': 'InceptionResnetV2/Repeat/block35_2/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_20/moving_variance': 'InceptionResnetV2/Repeat/block35_2/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_21/kernel': 'InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_21/beta': 'InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_21/moving_mean': 'InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_21/moving_variance': 'InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_22/kernel': 'InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0b_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_22/beta': 'InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0b_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_22/moving_mean': 'InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_22/moving_variance': 'InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_23/kernel': 'InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0c_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_23/beta': 'InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0c_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_23/moving_mean': 'InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_23/moving_variance': 'InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block35_2_conv/kernel': 'InceptionResnetV2/Repeat/block35_2/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block35_2_conv/bias': 'InceptionResnetV2/Repeat/block35_2/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_24/kernel': 'InceptionResnetV2/Repeat/block35_3/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_24/beta': 'InceptionResnetV2/Repeat/block35_3/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_24/moving_mean': 'InceptionResnetV2/Repeat/block35_3/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_24/moving_variance': 'InceptionResnetV2/Repeat/block35_3/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_25/kernel': 'InceptionResnetV2/Repeat/block35_3/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_25/beta': 'InceptionResnetV2/Repeat/block35_3/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_25/moving_mean': 'InceptionResnetV2/Repeat/block35_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_25/moving_variance': 'InceptionResnetV2/Repeat/block35_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_26/kernel': 'InceptionResnetV2/Repeat/block35_3/Branch_1/Conv2d_0b_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_26/beta': 'InceptionResnetV2/Repeat/block35_3/Branch_1/Conv2d_0b_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_26/moving_mean': 'InceptionResnetV2/Repeat/block35_3/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_26/moving_variance': 'InceptionResnetV2/Repeat/block35_3/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_27/kernel': 'InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_27/beta': 'InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_27/moving_mean': 'InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_27/moving_variance': 'InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_28/kernel': 'InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0b_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_28/beta': 'InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0b_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_28/moving_mean': 'InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_28/moving_variance': 'InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_29/kernel': 'InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0c_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_29/beta': 'InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0c_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_29/moving_mean': 'InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_29/moving_variance': 'InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block35_3_conv/kernel': 'InceptionResnetV2/Repeat/block35_3/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block35_3_conv/bias': 'InceptionResnetV2/Repeat/block35_3/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_30/kernel': 'InceptionResnetV2/Repeat/block35_4/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_30/beta': 'InceptionResnetV2/Repeat/block35_4/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_30/moving_mean': 'InceptionResnetV2/Repeat/block35_4/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_30/moving_variance': 'InceptionResnetV2/Repeat/block35_4/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_31/kernel': 'InceptionResnetV2/Repeat/block35_4/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_31/beta': 'InceptionResnetV2/Repeat/block35_4/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_31/moving_mean': 'InceptionResnetV2/Repeat/block35_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_31/moving_variance': 'InceptionResnetV2/Repeat/block35_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_32/kernel': 'InceptionResnetV2/Repeat/block35_4/Branch_1/Conv2d_0b_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_32/beta': 'InceptionResnetV2/Repeat/block35_4/Branch_1/Conv2d_0b_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_32/moving_mean': 'InceptionResnetV2/Repeat/block35_4/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_32/moving_variance': 'InceptionResnetV2/Repeat/block35_4/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_33/kernel': 'InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_33/beta': 'InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_33/moving_mean': 'InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_33/moving_variance': 'InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_34/kernel': 'InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0b_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_34/beta': 'InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0b_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_34/moving_mean': 'InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_34/moving_variance': 'InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_35/kernel': 'InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0c_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_35/beta': 'InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0c_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_35/moving_mean': 'InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_35/moving_variance': 'InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block35_4_conv/kernel': 'InceptionResnetV2/Repeat/block35_4/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block35_4_conv/bias': 'InceptionResnetV2/Repeat/block35_4/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_36/kernel': 'InceptionResnetV2/Repeat/block35_5/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_36/beta': 'InceptionResnetV2/Repeat/block35_5/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_36/moving_mean': 'InceptionResnetV2/Repeat/block35_5/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_36/moving_variance': 'InceptionResnetV2/Repeat/block35_5/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_37/kernel': 'InceptionResnetV2/Repeat/block35_5/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_37/beta': 'InceptionResnetV2/Repeat/block35_5/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_37/moving_mean': 'InceptionResnetV2/Repeat/block35_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_37/moving_variance': 'InceptionResnetV2/Repeat/block35_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_38/kernel': 'InceptionResnetV2/Repeat/block35_5/Branch_1/Conv2d_0b_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_38/beta': 'InceptionResnetV2/Repeat/block35_5/Branch_1/Conv2d_0b_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_38/moving_mean': 'InceptionResnetV2/Repeat/block35_5/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_38/moving_variance': 'InceptionResnetV2/Repeat/block35_5/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_39/kernel': 'InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_39/beta': 'InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_39/moving_mean': 'InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_39/moving_variance': 'InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_40/kernel': 'InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0b_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_40/beta': 'InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0b_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_40/moving_mean': 'InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_40/moving_variance': 'InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_41/kernel': 'InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0c_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_41/beta': 'InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0c_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_41/moving_mean': 'InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_41/moving_variance': 'InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block35_5_conv/kernel': 'InceptionResnetV2/Repeat/block35_5/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block35_5_conv/bias': 'InceptionResnetV2/Repeat/block35_5/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_42/kernel': 'InceptionResnetV2/Repeat/block35_6/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_42/beta': 'InceptionResnetV2/Repeat/block35_6/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_42/moving_mean': 'InceptionResnetV2/Repeat/block35_6/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_42/moving_variance': 'InceptionResnetV2/Repeat/block35_6/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_43/kernel': 'InceptionResnetV2/Repeat/block35_6/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_43/beta': 'InceptionResnetV2/Repeat/block35_6/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_43/moving_mean': 'InceptionResnetV2/Repeat/block35_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_43/moving_variance': 'InceptionResnetV2/Repeat/block35_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_44/kernel': 'InceptionResnetV2/Repeat/block35_6/Branch_1/Conv2d_0b_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_44/beta': 'InceptionResnetV2/Repeat/block35_6/Branch_1/Conv2d_0b_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_44/moving_mean': 'InceptionResnetV2/Repeat/block35_6/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_44/moving_variance': 'InceptionResnetV2/Repeat/block35_6/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_45/kernel': 'InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_45/beta': 'InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_45/moving_mean': 'InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_45/moving_variance': 'InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_46/kernel': 'InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0b_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_46/beta': 'InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0b_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_46/moving_mean': 'InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_46/moving_variance': 'InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_47/kernel': 'InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0c_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_47/beta': 'InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0c_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_47/moving_mean': 'InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_47/moving_variance': 'InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block35_6_conv/kernel': 'InceptionResnetV2/Repeat/block35_6/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block35_6_conv/bias': 'InceptionResnetV2/Repeat/block35_6/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_48/kernel': 'InceptionResnetV2/Repeat/block35_7/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_48/beta': 'InceptionResnetV2/Repeat/block35_7/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_48/moving_mean': 'InceptionResnetV2/Repeat/block35_7/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_48/moving_variance': 'InceptionResnetV2/Repeat/block35_7/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_49/kernel': 'InceptionResnetV2/Repeat/block35_7/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_49/beta': 'InceptionResnetV2/Repeat/block35_7/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_49/moving_mean': 'InceptionResnetV2/Repeat/block35_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_49/moving_variance': 'InceptionResnetV2/Repeat/block35_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_50/kernel': 'InceptionResnetV2/Repeat/block35_7/Branch_1/Conv2d_0b_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_50/beta': 'InceptionResnetV2/Repeat/block35_7/Branch_1/Conv2d_0b_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_50/moving_mean': 'InceptionResnetV2/Repeat/block35_7/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_50/moving_variance': 'InceptionResnetV2/Repeat/block35_7/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_51/kernel': 'InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_51/beta': 'InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_51/moving_mean': 'InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_51/moving_variance': 'InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_52/kernel': 'InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0b_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_52/beta': 'InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0b_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_52/moving_mean': 'InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_52/moving_variance': 'InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_53/kernel': 'InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0c_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_53/beta': 'InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0c_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_53/moving_mean': 'InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_53/moving_variance': 'InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block35_7_conv/kernel': 'InceptionResnetV2/Repeat/block35_7/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block35_7_conv/bias': 'InceptionResnetV2/Repeat/block35_7/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_54/kernel': 'InceptionResnetV2/Repeat/block35_8/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_54/beta': 'InceptionResnetV2/Repeat/block35_8/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_54/moving_mean': 'InceptionResnetV2/Repeat/block35_8/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_54/moving_variance': 'InceptionResnetV2/Repeat/block35_8/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_55/kernel': 'InceptionResnetV2/Repeat/block35_8/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_55/beta': 'InceptionResnetV2/Repeat/block35_8/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_55/moving_mean': 'InceptionResnetV2/Repeat/block35_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_55/moving_variance': 'InceptionResnetV2/Repeat/block35_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_56/kernel': 'InceptionResnetV2/Repeat/block35_8/Branch_1/Conv2d_0b_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_56/beta': 'InceptionResnetV2/Repeat/block35_8/Branch_1/Conv2d_0b_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_56/moving_mean': 'InceptionResnetV2/Repeat/block35_8/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_56/moving_variance': 'InceptionResnetV2/Repeat/block35_8/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_57/kernel': 'InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_57/beta': 'InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_57/moving_mean': 'InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_57/moving_variance': 'InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_58/kernel': 'InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0b_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_58/beta': 'InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0b_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_58/moving_mean': 'InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_58/moving_variance': 'InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_59/kernel': 'InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0c_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_59/beta': 'InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0c_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_59/moving_mean': 'InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_59/moving_variance': 'InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block35_8_conv/kernel': 'InceptionResnetV2/Repeat/block35_8/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block35_8_conv/bias': 'InceptionResnetV2/Repeat/block35_8/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_60/kernel': 'InceptionResnetV2/Repeat/block35_9/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_60/beta': 'InceptionResnetV2/Repeat/block35_9/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_60/moving_mean': 'InceptionResnetV2/Repeat/block35_9/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_60/moving_variance': 'InceptionResnetV2/Repeat/block35_9/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_61/kernel': 'InceptionResnetV2/Repeat/block35_9/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_61/beta': 'InceptionResnetV2/Repeat/block35_9/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_61/moving_mean': 'InceptionResnetV2/Repeat/block35_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_61/moving_variance': 'InceptionResnetV2/Repeat/block35_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_62/kernel': 'InceptionResnetV2/Repeat/block35_9/Branch_1/Conv2d_0b_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_62/beta': 'InceptionResnetV2/Repeat/block35_9/Branch_1/Conv2d_0b_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_62/moving_mean': 'InceptionResnetV2/Repeat/block35_9/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_62/moving_variance': 'InceptionResnetV2/Repeat/block35_9/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_63/kernel': 'InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_63/beta': 'InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_63/moving_mean': 'InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_63/moving_variance': 'InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_64/kernel': 'InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0b_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_64/beta': 'InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0b_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_64/moving_mean': 'InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_64/moving_variance': 'InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_65/kernel': 'InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0c_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_65/beta': 'InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0c_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_65/moving_mean': 'InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_65/moving_variance': 'InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block35_9_conv/kernel': 'InceptionResnetV2/Repeat/block35_9/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block35_9_conv/bias': 'InceptionResnetV2/Repeat/block35_9/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_66/kernel': 'InceptionResnetV2/Repeat/block35_10/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_66/beta': 'InceptionResnetV2/Repeat/block35_10/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_66/moving_mean': 'InceptionResnetV2/Repeat/block35_10/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_66/moving_variance': 'InceptionResnetV2/Repeat/block35_10/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_67/kernel': 'InceptionResnetV2/Repeat/block35_10/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_67/beta': 'InceptionResnetV2/Repeat/block35_10/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_67/moving_mean': 'InceptionResnetV2/Repeat/block35_10/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_67/moving_variance': 'InceptionResnetV2/Repeat/block35_10/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_68/kernel': 'InceptionResnetV2/Repeat/block35_10/Branch_1/Conv2d_0b_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_68/beta': 'InceptionResnetV2/Repeat/block35_10/Branch_1/Conv2d_0b_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_68/moving_mean': 'InceptionResnetV2/Repeat/block35_10/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_68/moving_variance': 'InceptionResnetV2/Repeat/block35_10/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_69/kernel': 'InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_69/beta': 'InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_69/moving_mean': 'InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_69/moving_variance': 'InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_70/kernel': 'InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0b_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_70/beta': 'InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0b_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_70/moving_mean': 'InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_70/moving_variance': 'InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_71/kernel': 'InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0c_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_71/beta': 'InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0c_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_71/moving_mean': 'InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_71/moving_variance': 'InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block35_10_conv/kernel': 'InceptionResnetV2/Repeat/block35_10/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block35_10_conv/bias': 'InceptionResnetV2/Repeat/block35_10/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_72/kernel': 'InceptionResnetV2/Mixed_6a/Branch_0/Conv2d_1a_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_72/beta': 'InceptionResnetV2/Mixed_6a/Branch_0/Conv2d_1a_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_72/moving_mean': 'InceptionResnetV2/Mixed_6a/Branch_0/Conv2d_1a_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_72/moving_variance': 'InceptionResnetV2/Mixed_6a/Branch_0/Conv2d_1a_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_73/kernel': 'InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_73/beta': 'InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_73/moving_mean': 'InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_73/moving_variance': 'InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_74/kernel': 'InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0b_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_74/beta': 'InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_74/moving_mean': 'InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_74/moving_variance': 'InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_75/kernel': 'InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_1a_3x3/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_75/beta': 'InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_75/moving_mean': 'InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_75/moving_variance': 'InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_76/kernel': 'InceptionResnetV2/Repeat_1/block17_1/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_76/beta': 'InceptionResnetV2/Repeat_1/block17_1/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_76/moving_mean': 'InceptionResnetV2/Repeat_1/block17_1/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_76/moving_variance': 'InceptionResnetV2/Repeat_1/block17_1/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_77/kernel': 'InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_77/beta': 'InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_77/moving_mean': 'InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_77/moving_variance': 'InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_78/kernel': 'InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0b_1x7/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_78/beta': 'InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_78/moving_mean': 'InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_78/moving_variance': 'InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_79/kernel': 'InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0c_7x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_79/beta': 'InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_79/moving_mean': 'InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_79/moving_variance': 'InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_1_conv/kernel': 'InceptionResnetV2/Repeat_1/block17_1/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_1_conv/bias': 'InceptionResnetV2/Repeat_1/block17_1/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_80/kernel': 'InceptionResnetV2/Repeat_1/block17_2/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_80/beta': 'InceptionResnetV2/Repeat_1/block17_2/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_80/moving_mean': 'InceptionResnetV2/Repeat_1/block17_2/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_80/moving_variance': 'InceptionResnetV2/Repeat_1/block17_2/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_81/kernel': 'InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_81/beta': 'InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_81/moving_mean': 'InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_81/moving_variance': 'InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_82/kernel': 'InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0b_1x7/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_82/beta': 'InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_82/moving_mean': 'InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_82/moving_variance': 'InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_83/kernel': 'InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0c_7x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_83/beta': 'InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_83/moving_mean': 'InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_83/moving_variance': 'InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_2_conv/kernel': 'InceptionResnetV2/Repeat_1/block17_2/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_2_conv/bias': 'InceptionResnetV2/Repeat_1/block17_2/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_84/kernel': 'InceptionResnetV2/Repeat_1/block17_3/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_84/beta': 'InceptionResnetV2/Repeat_1/block17_3/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_84/moving_mean': 'InceptionResnetV2/Repeat_1/block17_3/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_84/moving_variance': 'InceptionResnetV2/Repeat_1/block17_3/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_85/kernel': 'InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_85/beta': 'InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_85/moving_mean': 'InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_85/moving_variance': 'InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_86/kernel': 'InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0b_1x7/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_86/beta': 'InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_86/moving_mean': 'InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_86/moving_variance': 'InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_87/kernel': 'InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0c_7x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_87/beta': 'InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_87/moving_mean': 'InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_87/moving_variance': 'InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_3_conv/kernel': 'InceptionResnetV2/Repeat_1/block17_3/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_3_conv/bias': 'InceptionResnetV2/Repeat_1/block17_3/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_88/kernel': 'InceptionResnetV2/Repeat_1/block17_4/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_88/beta': 'InceptionResnetV2/Repeat_1/block17_4/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_88/moving_mean': 'InceptionResnetV2/Repeat_1/block17_4/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_88/moving_variance': 'InceptionResnetV2/Repeat_1/block17_4/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_89/kernel': 'InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_89/beta': 'InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_89/moving_mean': 'InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_89/moving_variance': 'InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_90/kernel': 'InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0b_1x7/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_90/beta': 'InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_90/moving_mean': 'InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_90/moving_variance': 'InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_91/kernel': 'InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0c_7x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_91/beta': 'InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_91/moving_mean': 'InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_91/moving_variance': 'InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_4_conv/kernel': 'InceptionResnetV2/Repeat_1/block17_4/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_4_conv/bias': 'InceptionResnetV2/Repeat_1/block17_4/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_92/kernel': 'InceptionResnetV2/Repeat_1/block17_5/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_92/beta': 'InceptionResnetV2/Repeat_1/block17_5/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_92/moving_mean': 'InceptionResnetV2/Repeat_1/block17_5/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_92/moving_variance': 'InceptionResnetV2/Repeat_1/block17_5/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_93/kernel': 'InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_93/beta': 'InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_93/moving_mean': 'InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_93/moving_variance': 'InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_94/kernel': 'InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0b_1x7/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_94/beta': 'InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_94/moving_mean': 'InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_94/moving_variance': 'InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_95/kernel': 'InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0c_7x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_95/beta': 'InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_95/moving_mean': 'InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_95/moving_variance': 'InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_5_conv/kernel': 'InceptionResnetV2/Repeat_1/block17_5/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_5_conv/bias': 'InceptionResnetV2/Repeat_1/block17_5/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_96/kernel': 'InceptionResnetV2/Repeat_1/block17_6/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_96/beta': 'InceptionResnetV2/Repeat_1/block17_6/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_96/moving_mean': 'InceptionResnetV2/Repeat_1/block17_6/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_96/moving_variance': 'InceptionResnetV2/Repeat_1/block17_6/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_97/kernel': 'InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_97/beta': 'InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_97/moving_mean': 'InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_97/moving_variance': 'InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_98/kernel': 'InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0b_1x7/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_98/beta': 'InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_98/moving_mean': 'InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_98/moving_variance': 'InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_99/kernel': 'InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0c_7x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_99/beta': 'InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_99/moving_mean': 'InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_99/moving_variance': 'InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_6_conv/kernel': 'InceptionResnetV2/Repeat_1/block17_6/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_6_conv/bias': 'InceptionResnetV2/Repeat_1/block17_6/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_100/kernel': 'InceptionResnetV2/Repeat_1/block17_7/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_100/beta': 'InceptionResnetV2/Repeat_1/block17_7/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_100/moving_mean': 'InceptionResnetV2/Repeat_1/block17_7/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_100/moving_variance': 'InceptionResnetV2/Repeat_1/block17_7/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_101/kernel': 'InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_101/beta': 'InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_101/moving_mean': 'InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_101/moving_variance': 'InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_102/kernel': 'InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0b_1x7/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_102/beta': 'InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_102/moving_mean': 'InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_102/moving_variance': 'InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_103/kernel': 'InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0c_7x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_103/beta': 'InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_103/moving_mean': 'InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_103/moving_variance': 'InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_7_conv/kernel': 'InceptionResnetV2/Repeat_1/block17_7/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_7_conv/bias': 'InceptionResnetV2/Repeat_1/block17_7/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_104/kernel': 'InceptionResnetV2/Repeat_1/block17_8/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_104/beta': 'InceptionResnetV2/Repeat_1/block17_8/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_104/moving_mean': 'InceptionResnetV2/Repeat_1/block17_8/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_104/moving_variance': 'InceptionResnetV2/Repeat_1/block17_8/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_105/kernel': 'InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_105/beta': 'InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_105/moving_mean': 'InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_105/moving_variance': 'InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_106/kernel': 'InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0b_1x7/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_106/beta': 'InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_106/moving_mean': 'InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_106/moving_variance': 'InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_107/kernel': 'InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0c_7x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_107/beta': 'InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_107/moving_mean': 'InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_107/moving_variance': 'InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_8_conv/kernel': 'InceptionResnetV2/Repeat_1/block17_8/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_8_conv/bias': 'InceptionResnetV2/Repeat_1/block17_8/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_108/kernel': 'InceptionResnetV2/Repeat_1/block17_9/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_108/beta': 'InceptionResnetV2/Repeat_1/block17_9/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_108/moving_mean': 'InceptionResnetV2/Repeat_1/block17_9/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_108/moving_variance': 'InceptionResnetV2/Repeat_1/block17_9/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_109/kernel': 'InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_109/beta': 'InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_109/moving_mean': 'InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_109/moving_variance': 'InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_110/kernel': 'InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0b_1x7/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_110/beta': 'InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_110/moving_mean': 'InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_110/moving_variance': 'InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_111/kernel': 'InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0c_7x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_111/beta': 'InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_111/moving_mean': 'InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_111/moving_variance': 'InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_9_conv/kernel': 'InceptionResnetV2/Repeat_1/block17_9/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_9_conv/bias': 'InceptionResnetV2/Repeat_1/block17_9/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_112/kernel': 'InceptionResnetV2/Repeat_1/block17_10/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_112/beta': 'InceptionResnetV2/Repeat_1/block17_10/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_112/moving_mean': 'InceptionResnetV2/Repeat_1/block17_10/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_112/moving_variance': 'InceptionResnetV2/Repeat_1/block17_10/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_113/kernel': 'InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_113/beta': 'InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_113/moving_mean': 'InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_113/moving_variance': 'InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_114/kernel': 'InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0b_1x7/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_114/beta': 'InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_114/moving_mean': 'InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_114/moving_variance': 'InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_115/kernel': 'InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0c_7x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_115/beta': 'InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_115/moving_mean': 'InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_115/moving_variance': 'InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_10_conv/kernel': 'InceptionResnetV2/Repeat_1/block17_10/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_10_conv/bias': 'InceptionResnetV2/Repeat_1/block17_10/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_116/kernel': 'InceptionResnetV2/Repeat_1/block17_11/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_116/beta': 'InceptionResnetV2/Repeat_1/block17_11/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_116/moving_mean': 'InceptionResnetV2/Repeat_1/block17_11/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_116/moving_variance': 'InceptionResnetV2/Repeat_1/block17_11/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_117/kernel': 'InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_117/beta': 'InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_117/moving_mean': 'InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_117/moving_variance': 'InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_118/kernel': 'InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0b_1x7/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_118/beta': 'InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_118/moving_mean': 'InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_118/moving_variance': 'InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_119/kernel': 'InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0c_7x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_119/beta': 'InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_119/moving_mean': 'InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_119/moving_variance': 'InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_11_conv/kernel': 'InceptionResnetV2/Repeat_1/block17_11/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_11_conv/bias': 'InceptionResnetV2/Repeat_1/block17_11/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_120/kernel': 'InceptionResnetV2/Repeat_1/block17_12/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_120/beta': 'InceptionResnetV2/Repeat_1/block17_12/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_120/moving_mean': 'InceptionResnetV2/Repeat_1/block17_12/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_120/moving_variance': 'InceptionResnetV2/Repeat_1/block17_12/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_121/kernel': 'InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_121/beta': 'InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_121/moving_mean': 'InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_121/moving_variance': 'InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_122/kernel': 'InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0b_1x7/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_122/beta': 'InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_122/moving_mean': 'InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_122/moving_variance': 'InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_123/kernel': 'InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0c_7x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_123/beta': 'InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_123/moving_mean': 'InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_123/moving_variance': 'InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_12_conv/kernel': 'InceptionResnetV2/Repeat_1/block17_12/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_12_conv/bias': 'InceptionResnetV2/Repeat_1/block17_12/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_124/kernel': 'InceptionResnetV2/Repeat_1/block17_13/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_124/beta': 'InceptionResnetV2/Repeat_1/block17_13/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_124/moving_mean': 'InceptionResnetV2/Repeat_1/block17_13/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_124/moving_variance': 'InceptionResnetV2/Repeat_1/block17_13/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_125/kernel': 'InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_125/beta': 'InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_125/moving_mean': 'InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_125/moving_variance': 'InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_126/kernel': 'InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0b_1x7/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_126/beta': 'InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_126/moving_mean': 'InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_126/moving_variance': 'InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_127/kernel': 'InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0c_7x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_127/beta': 'InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_127/moving_mean': 'InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_127/moving_variance': 'InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_13_conv/kernel': 'InceptionResnetV2/Repeat_1/block17_13/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_13_conv/bias': 'InceptionResnetV2/Repeat_1/block17_13/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_128/kernel': 'InceptionResnetV2/Repeat_1/block17_14/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_128/beta': 'InceptionResnetV2/Repeat_1/block17_14/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_128/moving_mean': 'InceptionResnetV2/Repeat_1/block17_14/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_128/moving_variance': 'InceptionResnetV2/Repeat_1/block17_14/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_129/kernel': 'InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_129/beta': 'InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_129/moving_mean': 'InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_129/moving_variance': 'InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_130/kernel': 'InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0b_1x7/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_130/beta': 'InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_130/moving_mean': 'InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_130/moving_variance': 'InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_131/kernel': 'InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0c_7x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_131/beta': 'InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_131/moving_mean': 'InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_131/moving_variance': 'InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_14_conv/kernel': 'InceptionResnetV2/Repeat_1/block17_14/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_14_conv/bias': 'InceptionResnetV2/Repeat_1/block17_14/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_132/kernel': 'InceptionResnetV2/Repeat_1/block17_15/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_132/beta': 'InceptionResnetV2/Repeat_1/block17_15/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_132/moving_mean': 'InceptionResnetV2/Repeat_1/block17_15/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_132/moving_variance': 'InceptionResnetV2/Repeat_1/block17_15/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_133/kernel': 'InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_133/beta': 'InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_133/moving_mean': 'InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_133/moving_variance': 'InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_134/kernel': 'InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0b_1x7/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_134/beta': 'InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_134/moving_mean': 'InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_134/moving_variance': 'InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_135/kernel': 'InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0c_7x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_135/beta': 'InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_135/moving_mean': 'InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_135/moving_variance': 'InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_15_conv/kernel': 'InceptionResnetV2/Repeat_1/block17_15/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_15_conv/bias': 'InceptionResnetV2/Repeat_1/block17_15/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_136/kernel': 'InceptionResnetV2/Repeat_1/block17_16/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_136/beta': 'InceptionResnetV2/Repeat_1/block17_16/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_136/moving_mean': 'InceptionResnetV2/Repeat_1/block17_16/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_136/moving_variance': 'InceptionResnetV2/Repeat_1/block17_16/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_137/kernel': 'InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_137/beta': 'InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_137/moving_mean': 'InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_137/moving_variance': 'InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_138/kernel': 'InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0b_1x7/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_138/beta': 'InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_138/moving_mean': 'InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_138/moving_variance': 'InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_139/kernel': 'InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0c_7x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_139/beta': 'InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_139/moving_mean': 'InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_139/moving_variance': 'InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_16_conv/kernel': 'InceptionResnetV2/Repeat_1/block17_16/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_16_conv/bias': 'InceptionResnetV2/Repeat_1/block17_16/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_140/kernel': 'InceptionResnetV2/Repeat_1/block17_17/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_140/beta': 'InceptionResnetV2/Repeat_1/block17_17/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_140/moving_mean': 'InceptionResnetV2/Repeat_1/block17_17/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_140/moving_variance': 'InceptionResnetV2/Repeat_1/block17_17/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_141/kernel': 'InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_141/beta': 'InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_141/moving_mean': 'InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_141/moving_variance': 'InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_142/kernel': 'InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0b_1x7/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_142/beta': 'InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_142/moving_mean': 'InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_142/moving_variance': 'InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_143/kernel': 'InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0c_7x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_143/beta': 'InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_143/moving_mean': 'InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_143/moving_variance': 'InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_17_conv/kernel': 'InceptionResnetV2/Repeat_1/block17_17/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_17_conv/bias': 'InceptionResnetV2/Repeat_1/block17_17/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_144/kernel': 'InceptionResnetV2/Repeat_1/block17_18/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_144/beta': 'InceptionResnetV2/Repeat_1/block17_18/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_144/moving_mean': 'InceptionResnetV2/Repeat_1/block17_18/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_144/moving_variance': 'InceptionResnetV2/Repeat_1/block17_18/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_145/kernel': 'InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_145/beta': 'InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_145/moving_mean': 'InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_145/moving_variance': 'InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_146/kernel': 'InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0b_1x7/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_146/beta': 'InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_146/moving_mean': 'InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_146/moving_variance': 'InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_147/kernel': 'InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0c_7x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_147/beta': 'InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_147/moving_mean': 'InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_147/moving_variance': 'InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_18_conv/kernel': 'InceptionResnetV2/Repeat_1/block17_18/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_18_conv/bias': 'InceptionResnetV2/Repeat_1/block17_18/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_148/kernel': 'InceptionResnetV2/Repeat_1/block17_19/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_148/beta': 'InceptionResnetV2/Repeat_1/block17_19/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_148/moving_mean': 'InceptionResnetV2/Repeat_1/block17_19/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_148/moving_variance': 'InceptionResnetV2/Repeat_1/block17_19/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_149/kernel': 'InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_149/beta': 'InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_149/moving_mean': 'InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_149/moving_variance': 'InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_150/kernel': 'InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0b_1x7/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_150/beta': 'InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_150/moving_mean': 'InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_150/moving_variance': 'InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_151/kernel': 'InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0c_7x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_151/beta': 'InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_151/moving_mean': 'InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_151/moving_variance': 'InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_19_conv/kernel': 'InceptionResnetV2/Repeat_1/block17_19/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_19_conv/bias': 'InceptionResnetV2/Repeat_1/block17_19/Conv2d_1x1/biases',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_152/kernel': 'InceptionResnetV2/Repeat_1/block17_20/Branch_0/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_152/beta': 'InceptionResnetV2/Repeat_1/block17_20/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_152/moving_mean': 'InceptionResnetV2/Repeat_1/block17_20/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_152/moving_variance': 'InceptionResnetV2/Repeat_1/block17_20/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_153/kernel': 'InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0a_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_153/beta': 'InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_153/moving_mean': 'InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_153/moving_variance': 'InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_154/kernel': 'InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0b_1x7/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_154/beta': 'InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_154/moving_mean': 'InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_154/moving_variance': 'InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/conv2d_155/kernel': 'InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0c_7x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_155/beta': 'InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_155/moving_mean': 'InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',
- 'FirstStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_155/moving_variance': 'InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_20_conv/kernel': 'InceptionResnetV2/Repeat_1/block17_20/Conv2d_1x1/weights',
- 'FirstStageFeatureExtractor/InceptionResnetV2/block17_20_conv/bias': 'InceptionResnetV2/Repeat_1/block17_20/Conv2d_1x1/biases',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_359/kernel': 'InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_0a_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_359/beta': 'InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_0a_1x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_359/moving_mean': 'InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_359/moving_variance': 'InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_360/kernel': 'InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_1a_3x3/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_360/beta': 'InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_1a_3x3/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_360/moving_mean': 'InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_1a_3x3/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_360/moving_variance': 'InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_1a_3x3/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_361/kernel': 'InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_0a_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_361/beta': 'InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_361/moving_mean': 'InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_361/moving_variance': 'InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_362/kernel': 'InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_1a_3x3/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_362/beta': 'InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_1a_3x3/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_362/moving_mean': 'InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_1a_3x3/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_362/moving_variance': 'InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_1a_3x3/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_363/kernel': 'InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0a_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_363/beta': 'InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0a_1x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_363/moving_mean': 'InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_363/moving_variance': 'InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_364/kernel': 'InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0b_3x3/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_364/beta': 'InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0b_3x3/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_364/moving_mean': 'InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_364/moving_variance': 'InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_365/kernel': 'InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_1a_3x3/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_365/beta': 'InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_1a_3x3/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_365/moving_mean': 'InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_1a_3x3/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_365/moving_variance': 'InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_1a_3x3/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_366/kernel': 'InceptionResnetV2/Repeat_2/block8_1/Branch_0/Conv2d_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_366/beta': 'InceptionResnetV2/Repeat_2/block8_1/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_366/moving_mean': 'InceptionResnetV2/Repeat_2/block8_1/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_366/moving_variance': 'InceptionResnetV2/Repeat_2/block8_1/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_367/kernel': 'InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0a_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_367/beta': 'InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_367/moving_mean': 'InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_367/moving_variance': 'InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_368/kernel': 'InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0b_1x3/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_368/beta': 'InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0b_1x3/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_368/moving_mean': 'InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_368/moving_variance': 'InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_369/kernel': 'InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0c_3x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_369/beta': 'InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0c_3x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_369/moving_mean': 'InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_369/moving_variance': 'InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/block8_1_conv/kernel': 'InceptionResnetV2/Repeat_2/block8_1/Conv2d_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/block8_1_conv/bias': 'InceptionResnetV2/Repeat_2/block8_1/Conv2d_1x1/biases',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_370/kernel': 'InceptionResnetV2/Repeat_2/block8_2/Branch_0/Conv2d_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_370/beta': 'InceptionResnetV2/Repeat_2/block8_2/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_370/moving_mean': 'InceptionResnetV2/Repeat_2/block8_2/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_370/moving_variance': 'InceptionResnetV2/Repeat_2/block8_2/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_371/kernel': 'InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0a_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_371/beta': 'InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_371/moving_mean': 'InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_371/moving_variance': 'InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_372/kernel': 'InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0b_1x3/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_372/beta': 'InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0b_1x3/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_372/moving_mean': 'InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_372/moving_variance': 'InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_373/kernel': 'InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0c_3x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_373/beta': 'InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0c_3x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_373/moving_mean': 'InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_373/moving_variance': 'InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/block8_2_conv/kernel': 'InceptionResnetV2/Repeat_2/block8_2/Conv2d_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/block8_2_conv/bias': 'InceptionResnetV2/Repeat_2/block8_2/Conv2d_1x1/biases',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_374/kernel': 'InceptionResnetV2/Repeat_2/block8_3/Branch_0/Conv2d_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_374/beta': 'InceptionResnetV2/Repeat_2/block8_3/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_374/moving_mean': 'InceptionResnetV2/Repeat_2/block8_3/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_374/moving_variance': 'InceptionResnetV2/Repeat_2/block8_3/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_375/kernel': 'InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0a_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_375/beta': 'InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_375/moving_mean': 'InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_375/moving_variance': 'InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_376/kernel': 'InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0b_1x3/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_376/beta': 'InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0b_1x3/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_376/moving_mean': 'InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_376/moving_variance': 'InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_377/kernel': 'InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0c_3x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_377/beta': 'InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0c_3x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_377/moving_mean': 'InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_377/moving_variance': 'InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/block8_3_conv/kernel': 'InceptionResnetV2/Repeat_2/block8_3/Conv2d_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/block8_3_conv/bias': 'InceptionResnetV2/Repeat_2/block8_3/Conv2d_1x1/biases',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_378/kernel': 'InceptionResnetV2/Repeat_2/block8_4/Branch_0/Conv2d_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_378/beta': 'InceptionResnetV2/Repeat_2/block8_4/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_378/moving_mean': 'InceptionResnetV2/Repeat_2/block8_4/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_378/moving_variance': 'InceptionResnetV2/Repeat_2/block8_4/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_379/kernel': 'InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0a_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_379/beta': 'InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_379/moving_mean': 'InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_379/moving_variance': 'InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_380/kernel': 'InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0b_1x3/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_380/beta': 'InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0b_1x3/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_380/moving_mean': 'InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_380/moving_variance': 'InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_381/kernel': 'InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0c_3x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_381/beta': 'InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0c_3x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_381/moving_mean': 'InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_381/moving_variance': 'InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/block8_4_conv/kernel': 'InceptionResnetV2/Repeat_2/block8_4/Conv2d_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/block8_4_conv/bias': 'InceptionResnetV2/Repeat_2/block8_4/Conv2d_1x1/biases',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_382/kernel': 'InceptionResnetV2/Repeat_2/block8_5/Branch_0/Conv2d_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_382/beta': 'InceptionResnetV2/Repeat_2/block8_5/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_382/moving_mean': 'InceptionResnetV2/Repeat_2/block8_5/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_382/moving_variance': 'InceptionResnetV2/Repeat_2/block8_5/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_383/kernel': 'InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0a_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_383/beta': 'InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_383/moving_mean': 'InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_383/moving_variance': 'InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_384/kernel': 'InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0b_1x3/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_384/beta': 'InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0b_1x3/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_384/moving_mean': 'InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_384/moving_variance': 'InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_385/kernel': 'InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0c_3x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_385/beta': 'InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0c_3x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_385/moving_mean': 'InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_385/moving_variance': 'InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/block8_5_conv/kernel': 'InceptionResnetV2/Repeat_2/block8_5/Conv2d_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/block8_5_conv/bias': 'InceptionResnetV2/Repeat_2/block8_5/Conv2d_1x1/biases',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_386/kernel': 'InceptionResnetV2/Repeat_2/block8_6/Branch_0/Conv2d_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_386/beta': 'InceptionResnetV2/Repeat_2/block8_6/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_386/moving_mean': 'InceptionResnetV2/Repeat_2/block8_6/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_386/moving_variance': 'InceptionResnetV2/Repeat_2/block8_6/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_387/kernel': 'InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0a_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_387/beta': 'InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_387/moving_mean': 'InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_387/moving_variance': 'InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_388/kernel': 'InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0b_1x3/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_388/beta': 'InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0b_1x3/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_388/moving_mean': 'InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_388/moving_variance': 'InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_389/kernel': 'InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0c_3x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_389/beta': 'InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0c_3x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_389/moving_mean': 'InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_389/moving_variance': 'InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/block8_6_conv/kernel': 'InceptionResnetV2/Repeat_2/block8_6/Conv2d_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/block8_6_conv/bias': 'InceptionResnetV2/Repeat_2/block8_6/Conv2d_1x1/biases',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_390/kernel': 'InceptionResnetV2/Repeat_2/block8_7/Branch_0/Conv2d_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_390/beta': 'InceptionResnetV2/Repeat_2/block8_7/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_390/moving_mean': 'InceptionResnetV2/Repeat_2/block8_7/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_390/moving_variance': 'InceptionResnetV2/Repeat_2/block8_7/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_391/kernel': 'InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0a_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_391/beta': 'InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_391/moving_mean': 'InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_391/moving_variance': 'InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_392/kernel': 'InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0b_1x3/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_392/beta': 'InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0b_1x3/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_392/moving_mean': 'InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_392/moving_variance': 'InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_393/kernel': 'InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0c_3x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_393/beta': 'InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0c_3x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_393/moving_mean': 'InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_393/moving_variance': 'InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/block8_7_conv/kernel': 'InceptionResnetV2/Repeat_2/block8_7/Conv2d_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/block8_7_conv/bias': 'InceptionResnetV2/Repeat_2/block8_7/Conv2d_1x1/biases',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_394/kernel': 'InceptionResnetV2/Repeat_2/block8_8/Branch_0/Conv2d_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_394/beta': 'InceptionResnetV2/Repeat_2/block8_8/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_394/moving_mean': 'InceptionResnetV2/Repeat_2/block8_8/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_394/moving_variance': 'InceptionResnetV2/Repeat_2/block8_8/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_395/kernel': 'InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0a_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_395/beta': 'InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_395/moving_mean': 'InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_395/moving_variance': 'InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_396/kernel': 'InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0b_1x3/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_396/beta': 'InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0b_1x3/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_396/moving_mean': 'InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_396/moving_variance': 'InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_397/kernel': 'InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0c_3x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_397/beta': 'InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0c_3x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_397/moving_mean': 'InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_397/moving_variance': 'InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/block8_8_conv/kernel': 'InceptionResnetV2/Repeat_2/block8_8/Conv2d_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/block8_8_conv/bias': 'InceptionResnetV2/Repeat_2/block8_8/Conv2d_1x1/biases',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_398/kernel': 'InceptionResnetV2/Repeat_2/block8_9/Branch_0/Conv2d_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_398/beta': 'InceptionResnetV2/Repeat_2/block8_9/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_398/moving_mean': 'InceptionResnetV2/Repeat_2/block8_9/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_398/moving_variance': 'InceptionResnetV2/Repeat_2/block8_9/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_399/kernel': 'InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0a_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_399/beta': 'InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_399/moving_mean': 'InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_399/moving_variance': 'InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_400/kernel': 'InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0b_1x3/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_400/beta': 'InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0b_1x3/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_400/moving_mean': 'InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_400/moving_variance': 'InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_401/kernel': 'InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0c_3x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_401/beta': 'InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0c_3x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_401/moving_mean': 'InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_401/moving_variance': 'InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/block8_9_conv/kernel': 'InceptionResnetV2/Repeat_2/block8_9/Conv2d_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/block8_9_conv/bias': 'InceptionResnetV2/Repeat_2/block8_9/Conv2d_1x1/biases',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_402/kernel': 'InceptionResnetV2/Block8/Branch_0/Conv2d_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_402/beta': 'InceptionResnetV2/Block8/Branch_0/Conv2d_1x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_402/moving_mean': 'InceptionResnetV2/Block8/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_402/moving_variance': 'InceptionResnetV2/Block8/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_403/kernel': 'InceptionResnetV2/Block8/Branch_1/Conv2d_0a_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_403/beta': 'InceptionResnetV2/Block8/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_403/moving_mean': 'InceptionResnetV2/Block8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_403/moving_variance': 'InceptionResnetV2/Block8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_404/kernel': 'InceptionResnetV2/Block8/Branch_1/Conv2d_0b_1x3/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_404/beta': 'InceptionResnetV2/Block8/Branch_1/Conv2d_0b_1x3/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_404/moving_mean': 'InceptionResnetV2/Block8/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_404/moving_variance': 'InceptionResnetV2/Block8/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv2d_405/kernel': 'InceptionResnetV2/Block8/Branch_1/Conv2d_0c_3x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_405/beta': 'InceptionResnetV2/Block8/Branch_1/Conv2d_0c_3x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_405/moving_mean': 'InceptionResnetV2/Block8/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/freezable_batch_norm_405/moving_variance': 'InceptionResnetV2/Block8/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance',
- 'SecondStageFeatureExtractor/InceptionResnetV2/block8_10_conv/kernel': 'InceptionResnetV2/Block8/Conv2d_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/block8_10_conv/bias': 'InceptionResnetV2/Block8/Conv2d_1x1/biases',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv_7b/kernel': 'InceptionResnetV2/Conv2d_7b_1x1/weights',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv_7b_bn/beta': 'InceptionResnetV2/Conv2d_7b_1x1/BatchNorm/beta',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv_7b_bn/moving_mean': 'InceptionResnetV2/Conv2d_7b_1x1/BatchNorm/moving_mean',
- 'SecondStageFeatureExtractor/InceptionResnetV2/conv_7b_bn/moving_variance': 'InceptionResnetV2/Conv2d_7b_1x1/BatchNorm/moving_variance',
- }
-
- variables_to_restore = {}
- if tf.executing_eagerly():
- for key in self._variable_dict:
- # variable.name includes ":0" at the end, but the names in the
- # checkpoint do not have the suffix ":0". So, we strip it here.
- var_name = keras_to_slim_name_mapping.get(key)
- if var_name:
- variables_to_restore[var_name] = self._variable_dict[key]
- else:
- for variable in variables_helper.get_global_variables_safely():
- var_name = keras_to_slim_name_mapping.get(variable.op.name)
- if var_name:
- variables_to_restore[var_name] = variable
- return variables_to_restore
diff --git a/research/object_detection/models/faster_rcnn_inception_resnet_v2_keras_feature_extractor_test.py b/research/object_detection/models/faster_rcnn_inception_resnet_v2_keras_feature_extractor_tf2_test.py
similarity index 67%
rename from research/object_detection/models/faster_rcnn_inception_resnet_v2_keras_feature_extractor_test.py
rename to research/object_detection/models/faster_rcnn_inception_resnet_v2_keras_feature_extractor_tf2_test.py
index c8227603ac8737a77a9c5857eb6023c97997757c..20bb50ef836aaf71448f9711f430b532d5a01b5b 100644
--- a/research/object_detection/models/faster_rcnn_inception_resnet_v2_keras_feature_extractor_test.py
+++ b/research/object_detection/models/faster_rcnn_inception_resnet_v2_keras_feature_extractor_tf2_test.py
@@ -14,12 +14,14 @@
# ==============================================================================
"""Tests for models.faster_rcnn_inception_resnet_v2_keras_feature_extractor."""
-
+import unittest
import tensorflow.compat.v1 as tf
from object_detection.models import faster_rcnn_inception_resnet_v2_keras_feature_extractor as frcnn_inc_res
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class FasterRcnnInceptionResnetV2KerasFeatureExtractorTest(tf.test.TestCase):
def _build_feature_extractor(self, first_stage_features_stride):
@@ -38,11 +40,7 @@ class FasterRcnnInceptionResnetV2KerasFeatureExtractorTest(tf.test.TestCase):
name='TestScope')(preprocessed_inputs)
features_shape = tf.shape(rpn_feature_map)
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- features_shape_out = sess.run(features_shape)
- self.assertAllEqual(features_shape_out, [1, 19, 19, 1088])
+ self.assertAllEqual(features_shape.numpy(), [1, 19, 19, 1088])
def test_extract_proposal_features_stride_eight(self):
feature_extractor = self._build_feature_extractor(
@@ -53,11 +51,7 @@ class FasterRcnnInceptionResnetV2KerasFeatureExtractorTest(tf.test.TestCase):
name='TestScope')(preprocessed_inputs)
features_shape = tf.shape(rpn_feature_map)
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- features_shape_out = sess.run(features_shape)
- self.assertAllEqual(features_shape_out, [1, 28, 28, 1088])
+ self.assertAllEqual(features_shape.numpy(), [1, 28, 28, 1088])
def test_extract_proposal_features_half_size_input(self):
feature_extractor = self._build_feature_extractor(
@@ -67,25 +61,7 @@ class FasterRcnnInceptionResnetV2KerasFeatureExtractorTest(tf.test.TestCase):
rpn_feature_map = feature_extractor.get_proposal_feature_extractor_model(
name='TestScope')(preprocessed_inputs)
features_shape = tf.shape(rpn_feature_map)
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- features_shape_out = sess.run(features_shape)
- self.assertAllEqual(features_shape_out, [1, 7, 7, 1088])
-
- def test_extract_proposal_features_dies_on_invalid_stride(self):
- with self.assertRaises(ValueError):
- self._build_feature_extractor(first_stage_features_stride=99)
-
- def test_extract_proposal_features_dies_with_incorrect_rank_inputs(self):
- feature_extractor = self._build_feature_extractor(
- first_stage_features_stride=16)
- preprocessed_inputs = tf.random_uniform(
- [224, 224, 3], maxval=255, dtype=tf.float32)
- with self.assertRaises(ValueError):
- feature_extractor.get_proposal_feature_extractor_model(
- name='TestScope')(preprocessed_inputs)
+ self.assertAllEqual(features_shape.numpy(), [1, 7, 7, 1088])
def test_extract_box_classifier_features_returns_expected_size(self):
feature_extractor = self._build_feature_extractor(
@@ -97,12 +73,7 @@ class FasterRcnnInceptionResnetV2KerasFeatureExtractorTest(tf.test.TestCase):
proposal_classifier_features = (
model(proposal_feature_maps))
features_shape = tf.shape(proposal_classifier_features)
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- features_shape_out = sess.run(features_shape)
- self.assertAllEqual(features_shape_out, [2, 8, 8, 1536])
+ self.assertAllEqual(features_shape.numpy(), [2, 9, 9, 1536])
if __name__ == '__main__':
diff --git a/research/object_detection/models/faster_rcnn_inception_v2_feature_extractor_test.py b/research/object_detection/models/faster_rcnn_inception_v2_feature_extractor_tf1_test.py
similarity index 97%
rename from research/object_detection/models/faster_rcnn_inception_v2_feature_extractor_test.py
rename to research/object_detection/models/faster_rcnn_inception_v2_feature_extractor_tf1_test.py
index 600c699c8fe196a30256590ddf68281b9846fc2d..f5d01145f291f7b795a917e5a96632d52b42bac5 100644
--- a/research/object_detection/models/faster_rcnn_inception_v2_feature_extractor_test.py
+++ b/research/object_detection/models/faster_rcnn_inception_v2_feature_extractor_tf1_test.py
@@ -14,13 +14,15 @@
# ==============================================================================
"""Tests for faster_rcnn_inception_v2_feature_extractor."""
-
+import unittest
import numpy as np
import tensorflow.compat.v1 as tf
from object_detection.models import faster_rcnn_inception_v2_feature_extractor as faster_rcnn_inception_v2
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class FasterRcnnInceptionV2FeatureExtractorTest(tf.test.TestCase):
def _build_feature_extractor(self, first_stage_features_stride):
diff --git a/research/object_detection/models/faster_rcnn_mobilenet_v1_feature_extractor_test.py b/research/object_detection/models/faster_rcnn_mobilenet_v1_feature_extractor_tf1_test.py
similarity index 97%
rename from research/object_detection/models/faster_rcnn_mobilenet_v1_feature_extractor_test.py
rename to research/object_detection/models/faster_rcnn_mobilenet_v1_feature_extractor_tf1_test.py
index 39d6d234d1d7fb902ebdc92e457a2ceeca1bdd3f..65a4958e4c20964b2857f95f7bc2b83d05d3cc02 100644
--- a/research/object_detection/models/faster_rcnn_mobilenet_v1_feature_extractor_test.py
+++ b/research/object_detection/models/faster_rcnn_mobilenet_v1_feature_extractor_tf1_test.py
@@ -14,13 +14,15 @@
# ==============================================================================
"""Tests for faster_rcnn_mobilenet_v1_feature_extractor."""
-
+import unittest
import numpy as np
import tensorflow.compat.v1 as tf
from object_detection.models import faster_rcnn_mobilenet_v1_feature_extractor as faster_rcnn_mobilenet_v1
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class FasterRcnnMobilenetV1FeatureExtractorTest(tf.test.TestCase):
def _build_feature_extractor(self, first_stage_features_stride):
diff --git a/research/object_detection/models/faster_rcnn_nas_feature_extractor.py b/research/object_detection/models/faster_rcnn_nas_feature_extractor.py
index b1f5e1e6ec533fc698bef7bc23c54a2788aec949..9fe17cbea856dd1ed8ca0bf1a8c25327714c5b6d 100644
--- a/research/object_detection/models/faster_rcnn_nas_feature_extractor.py
+++ b/research/object_detection/models/faster_rcnn_nas_feature_extractor.py
@@ -31,8 +31,14 @@ import tf_slim as slim
from object_detection.meta_architectures import faster_rcnn_meta_arch
from object_detection.utils import variables_helper
-from nets.nasnet import nasnet
-from nets.nasnet import nasnet_utils
+
+# pylint: disable=g-import-not-at-top
+try:
+ from nets.nasnet import nasnet
+ from nets.nasnet import nasnet_utils
+except: # pylint: disable=bare-except
+ pass
+# pylint: enable=g-import-not-at-top
arg_scope = slim.arg_scope
diff --git a/research/object_detection/models/faster_rcnn_nas_feature_extractor_test.py b/research/object_detection/models/faster_rcnn_nas_feature_extractor_tf1_test.py
similarity index 97%
rename from research/object_detection/models/faster_rcnn_nas_feature_extractor_test.py
rename to research/object_detection/models/faster_rcnn_nas_feature_extractor_tf1_test.py
index 4f7e5bed983fcac3718daa652daf8df35ee3d48d..a41cb0f733d613ffb050bbf4f8506579375c9d08 100644
--- a/research/object_detection/models/faster_rcnn_nas_feature_extractor_test.py
+++ b/research/object_detection/models/faster_rcnn_nas_feature_extractor_tf1_test.py
@@ -14,12 +14,14 @@
# ==============================================================================
"""Tests for models.faster_rcnn_nas_feature_extractor."""
-
+import unittest
import tensorflow.compat.v1 as tf
from object_detection.models import faster_rcnn_nas_feature_extractor as frcnn_nas
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class FasterRcnnNASFeatureExtractorTest(tf.test.TestCase):
def _build_feature_extractor(self, first_stage_features_stride):
diff --git a/research/object_detection/models/faster_rcnn_pnas_feature_extractor.py b/research/object_detection/models/faster_rcnn_pnas_feature_extractor.py
index 7f4ff7e8fee8b1df08cd76857ba77406a08bd903..ec32cd309d3a3fe135cf72665631b04273e21424 100644
--- a/research/object_detection/models/faster_rcnn_pnas_feature_extractor.py
+++ b/research/object_detection/models/faster_rcnn_pnas_feature_extractor.py
@@ -30,7 +30,11 @@ import tf_slim as slim
from object_detection.meta_architectures import faster_rcnn_meta_arch
from object_detection.utils import variables_helper
from nets.nasnet import nasnet_utils
-from nets.nasnet import pnasnet
+
+try:
+ from nets.nasnet import pnasnet # pylint: disable=g-import-not-at-top
+except: # pylint: disable=bare-except
+ pass
arg_scope = slim.arg_scope
diff --git a/research/object_detection/models/faster_rcnn_pnas_feature_extractor_test.py b/research/object_detection/models/faster_rcnn_pnas_feature_extractor_tf1_test.py
similarity index 97%
rename from research/object_detection/models/faster_rcnn_pnas_feature_extractor_test.py
rename to research/object_detection/models/faster_rcnn_pnas_feature_extractor_tf1_test.py
index 46b822fd25b838d67db6be5b495c3e591e69f08d..16774511b4d9c6eb1c94b8304640d9bf99c47ce0 100644
--- a/research/object_detection/models/faster_rcnn_pnas_feature_extractor_test.py
+++ b/research/object_detection/models/faster_rcnn_pnas_feature_extractor_tf1_test.py
@@ -14,12 +14,14 @@
# ==============================================================================
"""Tests for models.faster_rcnn_pnas_feature_extractor."""
-
+import unittest
import tensorflow.compat.v1 as tf
from object_detection.models import faster_rcnn_pnas_feature_extractor as frcnn_pnas
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class FasterRcnnPNASFeatureExtractorTest(tf.test.TestCase):
def _build_feature_extractor(self, first_stage_features_stride):
diff --git a/research/object_detection/models/faster_rcnn_resnet_keras_feature_extractor.py b/research/object_detection/models/faster_rcnn_resnet_keras_feature_extractor.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6b1e25404c71be5a3b68df9ce85416ffd4e982e
--- /dev/null
+++ b/research/object_detection/models/faster_rcnn_resnet_keras_feature_extractor.py
@@ -0,0 +1,254 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Resnet based Faster R-CNN implementation in Keras.
+
+See Deep Residual Learning for Image Recognition by He et al.
+https://arxiv.org/abs/1512.03385
+"""
+
+import tensorflow.compat.v1 as tf
+
+from object_detection.meta_architectures import faster_rcnn_meta_arch
+from object_detection.models.keras_models import resnet_v1
+from object_detection.utils import model_util
+
+
+_RESNET_MODEL_CONV4_LAST_LAYERS = {
+ 'resnet_v1_50': 'conv4_block6_out',
+ 'resnet_v1_101': 'conv4_block23_out',
+ 'resnet_v1_152': 'conv4_block36_out',
+}
+
+
+class FasterRCNNResnetKerasFeatureExtractor(
+ faster_rcnn_meta_arch.FasterRCNNKerasFeatureExtractor):
+ """Faster R-CNN with Resnet feature extractor implementation."""
+
+ def __init__(self,
+ is_training,
+ resnet_v1_base_model,
+ resnet_v1_base_model_name,
+ first_stage_features_stride=16,
+ batch_norm_trainable=False,
+ weight_decay=0.0):
+ """Constructor.
+
+ Args:
+ is_training: See base class.
+ resnet_v1_base_model: base resnet v1 network to use. One of
+ the resnet_v1.resnet_v1_{50,101,152} models.
+ resnet_v1_base_model_name: model name under which to construct resnet v1.
+ first_stage_features_stride: See base class.
+ batch_norm_trainable: See base class.
+ weight_decay: See base class.
+
+ Raises:
+ ValueError: If `first_stage_features_stride` is not 8 or 16.
+ """
+ if first_stage_features_stride != 16:
+ raise ValueError('`first_stage_features_stride` must be 16.')
+ super(FasterRCNNResnetKerasFeatureExtractor, self).__init__(
+ is_training, first_stage_features_stride, batch_norm_trainable,
+ weight_decay)
+ self.classification_backbone = None
+ self._variable_dict = {}
+ self._resnet_v1_base_model = resnet_v1_base_model
+ self._resnet_v1_base_model_name = resnet_v1_base_model_name
+
+ def preprocess(self, resized_inputs):
+ """Faster R-CNN Resnet V1 preprocessing.
+
+ VGG style channel mean subtraction as described here:
+ https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-md
+ Note that if the number of channels is not equal to 3, the mean subtraction
+ will be skipped and the original resized_inputs will be returned.
+
+ Args:
+ resized_inputs: A [batch, height_in, width_in, channels] float32 tensor
+ representing a batch of images with values between 0 and 255.0.
+
+ Returns:
+ preprocessed_inputs: A [batch, height_out, width_out, channels] float32
+ tensor representing a batch of images.
+
+ """
+ if resized_inputs.shape.as_list()[3] == 3:
+ channel_means = [123.68, 116.779, 103.939]
+ return resized_inputs - [[channel_means]]
+ else:
+ return resized_inputs
+
+ def get_proposal_feature_extractor_model(self, name=None):
+ """Returns a model that extracts first stage RPN features.
+
+ Extracts features using the first half of the Resnet v1 network.
+
+ Args:
+ name: A scope name to construct all variables within.
+
+ Returns:
+ A Keras model that takes preprocessed_inputs:
+ A [batch, height, width, channels] float32 tensor
+ representing a batch of images.
+
+ And returns rpn_feature_map:
+ A tensor with shape [batch, height, width, depth]
+ """
+ if not self.classification_backbone:
+ self.classification_backbone = self._resnet_v1_base_model(
+ batchnorm_training=self._train_batch_norm,
+ conv_hyperparams=None,
+ weight_decay=self._weight_decay,
+ classes=None,
+ weights=None,
+ include_top=False
+ )
+ with tf.name_scope(name):
+ with tf.name_scope('ResnetV1'):
+
+ conv4_last_layer = _RESNET_MODEL_CONV4_LAST_LAYERS[
+ self._resnet_v1_base_model_name]
+ proposal_features = self.classification_backbone.get_layer(
+ name=conv4_last_layer).output
+ keras_model = tf.keras.Model(
+ inputs=self.classification_backbone.inputs,
+ outputs=proposal_features)
+ for variable in keras_model.variables:
+ self._variable_dict[variable.name[:-2]] = variable
+ return keras_model
+
+ def get_box_classifier_feature_extractor_model(self, name=None):
+ """Returns a model that extracts second stage box classifier features.
+
+ This function reconstructs the "second half" of the ResNet v1
+ network after the part defined in `get_proposal_feature_extractor_model`.
+
+ Args:
+ name: A scope name to construct all variables within.
+
+ Returns:
+ A Keras model that takes proposal_feature_maps:
+ A 4-D float tensor with shape
+ [batch_size * self.max_num_proposals, crop_height, crop_width, depth]
+ representing the feature map cropped to each proposal.
+ And returns proposal_classifier_features:
+ A 4-D float tensor with shape
+ [batch_size * self.max_num_proposals, height, width, depth]
+ representing box classifier features for each proposal.
+ """
+ if not self.classification_backbone:
+ self.classification_backbone = self._resnet_v1_base_model(
+ batchnorm_training=self._train_batch_norm,
+ conv_hyperparams=None,
+ weight_decay=self._weight_decay,
+ classes=None,
+ weights=None,
+ include_top=False
+ )
+ with tf.name_scope(name):
+ with tf.name_scope('ResnetV1'):
+ conv4_last_layer = _RESNET_MODEL_CONV4_LAST_LAYERS[
+ self._resnet_v1_base_model_name]
+ proposal_feature_maps = self.classification_backbone.get_layer(
+ name=conv4_last_layer).output
+ proposal_classifier_features = self.classification_backbone.get_layer(
+ name='conv5_block3_out').output
+
+ keras_model = model_util.extract_submodel(
+ model=self.classification_backbone,
+ inputs=proposal_feature_maps,
+ outputs=proposal_classifier_features)
+ for variable in keras_model.variables:
+ self._variable_dict[variable.name[:-2]] = variable
+ return keras_model
+
+
+class FasterRCNNResnet50KerasFeatureExtractor(
+ FasterRCNNResnetKerasFeatureExtractor):
+ """Faster R-CNN with Resnet50 feature extractor implementation."""
+
+ def __init__(self,
+ is_training,
+ first_stage_features_stride=16,
+ batch_norm_trainable=False,
+ weight_decay=0.0):
+ """Constructor.
+
+ Args:
+ is_training: See base class.
+ first_stage_features_stride: See base class.
+ batch_norm_trainable: See base class.
+ weight_decay: See base class.
+ """
+ super(FasterRCNNResnet50KerasFeatureExtractor, self).__init__(
+ is_training=is_training,
+ resnet_v1_base_model=resnet_v1.resnet_v1_50,
+ resnet_v1_base_model_name='resnet_v1_50',
+ first_stage_features_stride=first_stage_features_stride,
+ batch_norm_trainable=batch_norm_trainable,
+ weight_decay=weight_decay)
+
+
+class FasterRCNNResnet101KerasFeatureExtractor(
+ FasterRCNNResnetKerasFeatureExtractor):
+ """Faster R-CNN with Resnet101 feature extractor implementation."""
+
+ def __init__(self,
+ is_training,
+ first_stage_features_stride=16,
+ batch_norm_trainable=False,
+ weight_decay=0.0):
+ """Constructor.
+
+ Args:
+ is_training: See base class.
+ first_stage_features_stride: See base class.
+ batch_norm_trainable: See base class.
+ weight_decay: See base class.
+ """
+ super(FasterRCNNResnet101KerasFeatureExtractor, self).__init__(
+ is_training=is_training,
+ resnet_v1_base_model=resnet_v1.resnet_v1_101,
+ resnet_v1_base_model_name='resnet_v1_101',
+ first_stage_features_stride=first_stage_features_stride,
+ batch_norm_trainable=batch_norm_trainable,
+ weight_decay=weight_decay)
+
+
+class FasterRCNNResnet152KerasFeatureExtractor(
+ FasterRCNNResnetKerasFeatureExtractor):
+ """Faster R-CNN with Resnet152 feature extractor implementation."""
+
+ def __init__(self,
+ is_training,
+ first_stage_features_stride=16,
+ batch_norm_trainable=False,
+ weight_decay=0.0):
+ """Constructor.
+
+ Args:
+ is_training: See base class.
+ first_stage_features_stride: See base class.
+ batch_norm_trainable: See base class.
+ weight_decay: See base class.
+ """
+ super(FasterRCNNResnet152KerasFeatureExtractor, self).__init__(
+ is_training=is_training,
+ resnet_v1_base_model=resnet_v1.resnet_v1_152,
+ resnet_v1_base_model_name='resnet_v1_152',
+ first_stage_features_stride=first_stage_features_stride,
+ batch_norm_trainable=batch_norm_trainable,
+ weight_decay=weight_decay)
diff --git a/research/object_detection/models/faster_rcnn_resnet_keras_feature_extractor_tf2_test.py b/research/object_detection/models/faster_rcnn_resnet_keras_feature_extractor_tf2_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..15e8a5fbf153cdee690be94d2d9c910070af35f0
--- /dev/null
+++ b/research/object_detection/models/faster_rcnn_resnet_keras_feature_extractor_tf2_test.py
@@ -0,0 +1,80 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for models.faster_rcnn_resnet_keras_feature_extractor."""
+import unittest
+import tensorflow.compat.v1 as tf
+
+from object_detection.models import faster_rcnn_resnet_keras_feature_extractor as frcnn_res
+from object_detection.utils import tf_version
+
+
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class FasterRcnnResnetKerasFeatureExtractorTest(tf.test.TestCase):
+
+ def _build_feature_extractor(self, architecture='resnet_v1_50'):
+ return frcnn_res.FasterRCNNResnet50KerasFeatureExtractor(
+ is_training=False,
+ first_stage_features_stride=16,
+ batch_norm_trainable=False,
+ weight_decay=0.0)
+
+ def test_extract_proposal_features_returns_expected_size(self):
+ feature_extractor = self._build_feature_extractor()
+ preprocessed_inputs = tf.random_uniform(
+ [1, 224, 224, 3], maxval=255, dtype=tf.float32)
+ rpn_feature_map = feature_extractor.get_proposal_feature_extractor_model(
+ name='TestScope')(preprocessed_inputs)
+ features_shape = tf.shape(rpn_feature_map)
+ self.assertAllEqual(features_shape.numpy(), [1, 14, 14, 1024])
+
+ def test_extract_proposal_features_half_size_input(self):
+ feature_extractor = self._build_feature_extractor()
+ preprocessed_inputs = tf.random_uniform(
+ [1, 112, 112, 3], maxval=255, dtype=tf.float32)
+ rpn_feature_map = feature_extractor.get_proposal_feature_extractor_model(
+ name='TestScope')(preprocessed_inputs)
+ features_shape = tf.shape(rpn_feature_map)
+ self.assertAllEqual(features_shape.numpy(), [1, 7, 7, 1024])
+
+ def test_extract_proposal_features_dies_with_incorrect_rank_inputs(self):
+ feature_extractor = self._build_feature_extractor()
+ preprocessed_inputs = tf.random_uniform(
+ [224, 224, 3], maxval=255, dtype=tf.float32)
+ with self.assertRaises(tf.errors.InvalidArgumentError):
+ feature_extractor.get_proposal_feature_extractor_model(
+ name='TestScope')(preprocessed_inputs)
+
+ def test_extract_box_classifier_features_returns_expected_size(self):
+ feature_extractor = self._build_feature_extractor()
+ proposal_feature_maps = tf.random_uniform(
+ [3, 7, 7, 1024], maxval=255, dtype=tf.float32)
+ model = feature_extractor.get_box_classifier_feature_extractor_model(
+ name='TestScope')
+ proposal_classifier_features = (
+ model(proposal_feature_maps))
+ features_shape = tf.shape(proposal_classifier_features)
+ # Note: due to a slight mismatch in slim and keras resnet definitions
+ # the output shape of the box classifier is slightly different compared to
+ # that of the slim implementation. The keras version is more `canonical`
+ # in that it more accurately reflects the original authors' implementation.
+ # TODO(jonathanhuang): make the output shape match that of the slim
+ # implementation by using atrous convolutions.
+ self.assertAllEqual(features_shape.numpy(), [3, 4, 4, 2048])
+
+
+if __name__ == '__main__':
+ tf.enable_v2_behavior()
+ tf.test.main()
diff --git a/research/object_detection/models/faster_rcnn_resnet_v1_feature_extractor_test.py b/research/object_detection/models/faster_rcnn_resnet_v1_feature_extractor_tf1_test.py
similarity index 98%
rename from research/object_detection/models/faster_rcnn_resnet_v1_feature_extractor_test.py
rename to research/object_detection/models/faster_rcnn_resnet_v1_feature_extractor_tf1_test.py
index 0b5055a0c6829bdcdadf6c2fb6295ce67e3e137a..3d47da04af5fb3f728379a649d64329c862eaf75 100644
--- a/research/object_detection/models/faster_rcnn_resnet_v1_feature_extractor_test.py
+++ b/research/object_detection/models/faster_rcnn_resnet_v1_feature_extractor_tf1_test.py
@@ -14,13 +14,15 @@
# ==============================================================================
"""Tests for object_detection.models.faster_rcnn_resnet_v1_feature_extractor."""
-
+import unittest
import numpy as np
import tensorflow.compat.v1 as tf
from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as faster_rcnn_resnet_v1
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class FasterRcnnResnetV1FeatureExtractorTest(tf.test.TestCase):
def _build_feature_extractor(self,
diff --git a/research/object_detection/models/feature_map_generators_test.py b/research/object_detection/models/feature_map_generators_test.py
index 49ba09bdf0681374643c349bed5d5d319cc90a9a..951e7760bd8a42afb19f61b6c6bc1c1f744d74dd 100644
--- a/research/object_detection/models/feature_map_generators_test.py
+++ b/research/object_detection/models/feature_map_generators_test.py
@@ -14,7 +14,7 @@
# ==============================================================================
"""Tests for feature map generators."""
-
+import unittest
from absl.testing import parameterized
import numpy as np
@@ -25,6 +25,9 @@ from google.protobuf import text_format
from object_detection.builders import hyperparams_builder
from object_detection.models import feature_map_generators
from object_detection.protos import hyperparams_pb2
+from object_detection.utils import test_case
+from object_detection.utils import test_utils
+from object_detection.utils import tf_version
INCEPTION_V2_LAYOUT = {
'from_layer': ['Mixed_3c', 'Mixed_4c', 'Mixed_5c', '', '', ''],
@@ -52,11 +55,7 @@ SSD_MOBILENET_V1_WEIGHT_SHARED_LAYOUT = {
}
-@parameterized.parameters(
- {'use_keras': False},
- {'use_keras': True},
-)
-class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
+class MultiResolutionFeatureMapGeneratorTest(test_case.TestCase):
def _build_conv_hyperparams(self):
conv_hyperparams = hyperparams_pb2.Hyperparams()
@@ -73,9 +72,9 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
- def _build_feature_map_generator(self, feature_map_layout, use_keras,
+ def _build_feature_map_generator(self, feature_map_layout,
pool_residual=False):
- if use_keras:
+ if tf_version.is_tf2():
return feature_map_generators.KerasMultiResolutionFeatureMaps(
feature_map_layout=feature_map_layout,
depth_multiplier=1,
@@ -97,17 +96,18 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
pool_residual=pool_residual)
return feature_map_generator
- def test_get_expected_feature_map_shapes_with_inception_v2(self, use_keras):
- image_features = {
- 'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
- 'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32),
- 'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32)
- }
- feature_map_generator = self._build_feature_map_generator(
- feature_map_layout=INCEPTION_V2_LAYOUT,
- use_keras=use_keras
- )
- feature_maps = feature_map_generator(image_features)
+ def test_get_expected_feature_map_shapes_with_inception_v2(self):
+ with test_utils.GraphContextOrNone() as g:
+ image_features = {
+ 'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
+ 'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32),
+ 'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32)
+ }
+ feature_map_generator = self._build_feature_map_generator(
+ feature_map_layout=INCEPTION_V2_LAYOUT)
+ def graph_fn():
+ feature_maps = feature_map_generator(image_features)
+ return feature_maps
expected_feature_map_shapes = {
'Mixed_3c': (4, 28, 28, 256),
@@ -116,29 +116,25 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
'Mixed_5c_2_Conv2d_3_3x3_s2_512': (4, 4, 4, 512),
'Mixed_5c_2_Conv2d_4_3x3_s2_256': (4, 2, 2, 256),
'Mixed_5c_2_Conv2d_5_3x3_s2_256': (4, 1, 1, 256)}
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- out_feature_maps = sess.run(feature_maps)
- out_feature_map_shapes = dict(
- (key, value.shape) for key, value in out_feature_maps.items())
- self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
+ out_feature_maps = self.execute(graph_fn, [], g)
+ out_feature_map_shapes = dict(
+ (key, value.shape) for key, value in out_feature_maps.items())
+ self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
def test_get_expected_feature_map_shapes_with_inception_v2_use_depthwise(
- self, use_keras):
- image_features = {
- 'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
- 'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32),
- 'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32)
- }
- layout_copy = INCEPTION_V2_LAYOUT.copy()
- layout_copy['use_depthwise'] = True
- feature_map_generator = self._build_feature_map_generator(
- feature_map_layout=layout_copy,
- use_keras=use_keras
- )
- feature_maps = feature_map_generator(image_features)
+ self):
+ with test_utils.GraphContextOrNone() as g:
+ image_features = {
+ 'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
+ 'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32),
+ 'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32)
+ }
+ layout_copy = INCEPTION_V2_LAYOUT.copy()
+ layout_copy['use_depthwise'] = True
+ feature_map_generator = self._build_feature_map_generator(
+ feature_map_layout=layout_copy)
+ def graph_fn():
+ return feature_map_generator(image_features)
expected_feature_map_shapes = {
'Mixed_3c': (4, 28, 28, 256),
@@ -147,29 +143,25 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
'Mixed_5c_2_Conv2d_3_3x3_s2_512': (4, 4, 4, 512),
'Mixed_5c_2_Conv2d_4_3x3_s2_256': (4, 2, 2, 256),
'Mixed_5c_2_Conv2d_5_3x3_s2_256': (4, 1, 1, 256)}
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- out_feature_maps = sess.run(feature_maps)
- out_feature_map_shapes = dict(
- (key, value.shape) for key, value in out_feature_maps.items())
- self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
-
- def test_get_expected_feature_map_shapes_use_explicit_padding(
- self, use_keras):
- image_features = {
- 'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
- 'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32),
- 'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32)
- }
- layout_copy = INCEPTION_V2_LAYOUT.copy()
- layout_copy['use_explicit_padding'] = True
- feature_map_generator = self._build_feature_map_generator(
- feature_map_layout=layout_copy,
- use_keras=use_keras
- )
- feature_maps = feature_map_generator(image_features)
+ out_feature_maps = self.execute(graph_fn, [], g)
+ out_feature_map_shapes = dict(
+ (key, value.shape) for key, value in out_feature_maps.items())
+ self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
+
+ def test_get_expected_feature_map_shapes_use_explicit_padding(self):
+ with test_utils.GraphContextOrNone() as g:
+ image_features = {
+ 'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
+ 'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32),
+ 'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32)
+ }
+ layout_copy = INCEPTION_V2_LAYOUT.copy()
+ layout_copy['use_explicit_padding'] = True
+ feature_map_generator = self._build_feature_map_generator(
+ feature_map_layout=layout_copy,
+ )
+ def graph_fn():
+ return feature_map_generator(image_features)
expected_feature_map_shapes = {
'Mixed_3c': (4, 28, 28, 256),
@@ -178,27 +170,24 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
'Mixed_5c_2_Conv2d_3_3x3_s2_512': (4, 4, 4, 512),
'Mixed_5c_2_Conv2d_4_3x3_s2_256': (4, 2, 2, 256),
'Mixed_5c_2_Conv2d_5_3x3_s2_256': (4, 1, 1, 256)}
+ out_feature_maps = self.execute(graph_fn, [], g)
+ out_feature_map_shapes = dict(
+ (key, value.shape) for key, value in out_feature_maps.items())
+ self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
+
+ def test_get_expected_feature_map_shapes_with_inception_v3(self):
+ with test_utils.GraphContextOrNone() as g:
+ image_features = {
+ 'Mixed_5d': tf.random_uniform([4, 35, 35, 256], dtype=tf.float32),
+ 'Mixed_6e': tf.random_uniform([4, 17, 17, 576], dtype=tf.float32),
+ 'Mixed_7c': tf.random_uniform([4, 8, 8, 1024], dtype=tf.float32)
+ }
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- out_feature_maps = sess.run(feature_maps)
- out_feature_map_shapes = dict(
- (key, value.shape) for key, value in out_feature_maps.items())
- self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
-
- def test_get_expected_feature_map_shapes_with_inception_v3(self, use_keras):
- image_features = {
- 'Mixed_5d': tf.random_uniform([4, 35, 35, 256], dtype=tf.float32),
- 'Mixed_6e': tf.random_uniform([4, 17, 17, 576], dtype=tf.float32),
- 'Mixed_7c': tf.random_uniform([4, 8, 8, 1024], dtype=tf.float32)
- }
-
- feature_map_generator = self._build_feature_map_generator(
- feature_map_layout=INCEPTION_V3_LAYOUT,
- use_keras=use_keras
- )
- feature_maps = feature_map_generator(image_features)
+ feature_map_generator = self._build_feature_map_generator(
+ feature_map_layout=INCEPTION_V3_LAYOUT,
+ )
+ def graph_fn():
+ return feature_map_generator(image_features)
expected_feature_map_shapes = {
'Mixed_5d': (4, 35, 35, 256),
@@ -207,29 +196,26 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
'Mixed_7c_2_Conv2d_3_3x3_s2_512': (4, 4, 4, 512),
'Mixed_7c_2_Conv2d_4_3x3_s2_256': (4, 2, 2, 256),
'Mixed_7c_2_Conv2d_5_3x3_s2_128': (4, 1, 1, 128)}
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- out_feature_maps = sess.run(feature_maps)
- out_feature_map_shapes = dict(
- (key, value.shape) for key, value in out_feature_maps.items())
- self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
+ out_feature_maps = self.execute(graph_fn, [], g)
+ out_feature_map_shapes = dict(
+ (key, value.shape) for key, value in out_feature_maps.items())
+ self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
def test_get_expected_feature_map_shapes_with_embedded_ssd_mobilenet_v1(
- self, use_keras):
- image_features = {
- 'Conv2d_11_pointwise': tf.random_uniform([4, 16, 16, 512],
- dtype=tf.float32),
- 'Conv2d_13_pointwise': tf.random_uniform([4, 8, 8, 1024],
- dtype=tf.float32),
- }
+ self):
+ with test_utils.GraphContextOrNone() as g:
+ image_features = {
+ 'Conv2d_11_pointwise': tf.random_uniform([4, 16, 16, 512],
+ dtype=tf.float32),
+ 'Conv2d_13_pointwise': tf.random_uniform([4, 8, 8, 1024],
+ dtype=tf.float32),
+ }
- feature_map_generator = self._build_feature_map_generator(
- feature_map_layout=EMBEDDED_SSD_MOBILENET_V1_LAYOUT,
- use_keras=use_keras
- )
- feature_maps = feature_map_generator(image_features)
+ feature_map_generator = self._build_feature_map_generator(
+ feature_map_layout=EMBEDDED_SSD_MOBILENET_V1_LAYOUT,
+ )
+ def graph_fn():
+ return feature_map_generator(image_features)
expected_feature_map_shapes = {
'Conv2d_11_pointwise': (4, 16, 16, 512),
@@ -237,55 +223,50 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
'Conv2d_13_pointwise_2_Conv2d_2_3x3_s2_512': (4, 4, 4, 512),
'Conv2d_13_pointwise_2_Conv2d_3_3x3_s2_256': (4, 2, 2, 256),
'Conv2d_13_pointwise_2_Conv2d_4_2x2_s2_256': (4, 1, 1, 256)}
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- out_feature_maps = sess.run(feature_maps)
- out_feature_map_shapes = dict(
- (key, value.shape) for key, value in out_feature_maps.items())
- self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
+ out_feature_maps = self.execute(graph_fn, [], g)
+ out_feature_map_shapes = dict(
+ (key, value.shape) for key, value in out_feature_maps.items())
+ self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
def test_feature_map_shapes_with_pool_residual_ssd_mobilenet_v1(
- self, use_keras):
- image_features = {
- 'Conv2d_13_pointwise': tf.random_uniform([4, 8, 8, 1024],
- dtype=tf.float32),
- }
+ self):
+ with test_utils.GraphContextOrNone() as g:
+ image_features = {
+ 'Conv2d_13_pointwise': tf.random_uniform([4, 8, 8, 1024],
+ dtype=tf.float32),
+ }
- feature_map_generator = self._build_feature_map_generator(
- feature_map_layout=SSD_MOBILENET_V1_WEIGHT_SHARED_LAYOUT,
- use_keras=use_keras,
- pool_residual=True
- )
- feature_maps = feature_map_generator(image_features)
+ feature_map_generator = self._build_feature_map_generator(
+ feature_map_layout=SSD_MOBILENET_V1_WEIGHT_SHARED_LAYOUT,
+ pool_residual=True
+ )
+ def graph_fn():
+ return feature_map_generator(image_features)
expected_feature_map_shapes = {
'Conv2d_13_pointwise': (4, 8, 8, 1024),
'Conv2d_13_pointwise_2_Conv2d_1_3x3_s2_256': (4, 4, 4, 256),
'Conv2d_13_pointwise_2_Conv2d_2_3x3_s2_256': (4, 2, 2, 256),
'Conv2d_13_pointwise_2_Conv2d_3_3x3_s2_256': (4, 1, 1, 256)}
+ out_feature_maps = self.execute(graph_fn, [], g)
+ out_feature_map_shapes = dict(
+ (key, value.shape) for key, value in out_feature_maps.items())
+ self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
+
+ def test_get_expected_variable_names_with_inception_v2(self):
+ with test_utils.GraphContextOrNone() as g:
+ image_features = {
+ 'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
+ 'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32),
+ 'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32)
+ }
+ feature_map_generator = self._build_feature_map_generator(
+ feature_map_layout=INCEPTION_V2_LAYOUT,
+ )
+ def graph_fn():
+ return feature_map_generator(image_features)
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- out_feature_maps = sess.run(feature_maps)
- out_feature_map_shapes = dict(
- (key, value.shape) for key, value in out_feature_maps.items())
- self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
-
- def test_get_expected_variable_names_with_inception_v2(self, use_keras):
- image_features = {
- 'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
- 'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32),
- 'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32)
- }
- feature_map_generator = self._build_feature_map_generator(
- feature_map_layout=INCEPTION_V2_LAYOUT,
- use_keras=use_keras
- )
- feature_maps = feature_map_generator(image_features)
-
+ self.execute(graph_fn, [], g)
expected_slim_variables = set([
'Mixed_5c_1_Conv2d_3_1x1_256/weights',
'Mixed_5c_1_Conv2d_3_1x1_256/biases',
@@ -316,32 +297,32 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
'FeatureMaps/Mixed_5c_2_Conv2d_5_3x3_s2_256_conv/bias',
])
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- sess.run(feature_maps)
+ if tf_version.is_tf2():
actual_variable_set = set(
- [var.op.name for var in tf.trainable_variables()])
- if use_keras:
- self.assertSetEqual(expected_keras_variables, actual_variable_set)
- else:
- self.assertSetEqual(expected_slim_variables, actual_variable_set)
+ [var.name.split(':')[0] for var in feature_map_generator.variables])
+ self.assertSetEqual(expected_keras_variables, actual_variable_set)
+ else:
+ with g.as_default():
+ actual_variable_set = set(
+ [var.op.name for var in tf.trainable_variables()])
+ self.assertSetEqual(expected_slim_variables, actual_variable_set)
def test_get_expected_variable_names_with_inception_v2_use_depthwise(
- self,
- use_keras):
- image_features = {
- 'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
- 'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32),
- 'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32)
- }
- layout_copy = INCEPTION_V2_LAYOUT.copy()
- layout_copy['use_depthwise'] = True
- feature_map_generator = self._build_feature_map_generator(
- feature_map_layout=layout_copy,
- use_keras=use_keras
- )
- feature_maps = feature_map_generator(image_features)
+ self):
+ with test_utils.GraphContextOrNone() as g:
+ image_features = {
+ 'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
+ 'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32),
+ 'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32)
+ }
+ layout_copy = INCEPTION_V2_LAYOUT.copy()
+ layout_copy['use_depthwise'] = True
+ feature_map_generator = self._build_feature_map_generator(
+ feature_map_layout=layout_copy,
+ )
+ def graph_fn():
+ return feature_map_generator(image_features)
+ self.execute(graph_fn, [], g)
expected_slim_variables = set([
'Mixed_5c_1_Conv2d_3_1x1_256/weights',
@@ -391,23 +372,20 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
'FeatureMaps/Mixed_5c_2_Conv2d_5_3x3_s2_256_conv/bias',
])
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- sess.run(feature_maps)
+ if tf_version.is_tf2():
actual_variable_set = set(
- [var.op.name for var in tf.trainable_variables()])
- if use_keras:
- self.assertSetEqual(expected_keras_variables, actual_variable_set)
- else:
- self.assertSetEqual(expected_slim_variables, actual_variable_set)
+ [var.name.split(':')[0] for var in feature_map_generator.variables])
+ self.assertSetEqual(expected_keras_variables, actual_variable_set)
+ else:
+ with g.as_default():
+ actual_variable_set = set(
+ [var.op.name for var in tf.trainable_variables()])
+ self.assertSetEqual(expected_slim_variables, actual_variable_set)
-@parameterized.parameters({'use_native_resize_op': True, 'use_keras': False},
- {'use_native_resize_op': False, 'use_keras': False},
- {'use_native_resize_op': True, 'use_keras': True},
- {'use_native_resize_op': False, 'use_keras': True})
-class FPNFeatureMapGeneratorTest(tf.test.TestCase, parameterized.TestCase):
+@parameterized.parameters({'use_native_resize_op': True},
+ {'use_native_resize_op': False})
+class FPNFeatureMapGeneratorTest(test_case.TestCase, parameterized.TestCase):
def _build_conv_hyperparams(self):
conv_hyperparams = hyperparams_pb2.Hyperparams()
@@ -425,10 +403,10 @@ class FPNFeatureMapGeneratorTest(tf.test.TestCase, parameterized.TestCase):
return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
def _build_feature_map_generator(
- self, image_features, depth, use_keras, use_bounded_activations=False,
+ self, image_features, depth, use_bounded_activations=False,
use_native_resize_op=False, use_explicit_padding=False,
use_depthwise=False):
- if use_keras:
+ if tf_version.is_tf2():
return feature_map_generators.KerasFpnTopDownFeatureMaps(
num_levels=len(image_features),
depth=depth,
@@ -454,19 +432,20 @@ class FPNFeatureMapGeneratorTest(tf.test.TestCase, parameterized.TestCase):
return feature_map_generator
def test_get_expected_feature_map_shapes(
- self, use_native_resize_op, use_keras):
- image_features = [
- ('block2', tf.random_uniform([4, 8, 8, 256], dtype=tf.float32)),
- ('block3', tf.random_uniform([4, 4, 4, 256], dtype=tf.float32)),
- ('block4', tf.random_uniform([4, 2, 2, 256], dtype=tf.float32)),
- ('block5', tf.random_uniform([4, 1, 1, 256], dtype=tf.float32))
- ]
- feature_map_generator = self._build_feature_map_generator(
- image_features=image_features,
- depth=128,
- use_keras=use_keras,
- use_native_resize_op=use_native_resize_op)
- feature_maps = feature_map_generator(image_features)
+ self, use_native_resize_op):
+ with test_utils.GraphContextOrNone() as g:
+ image_features = [
+ ('block2', tf.random_uniform([4, 8, 8, 256], dtype=tf.float32)),
+ ('block3', tf.random_uniform([4, 4, 4, 256], dtype=tf.float32)),
+ ('block4', tf.random_uniform([4, 2, 2, 256], dtype=tf.float32)),
+ ('block5', tf.random_uniform([4, 1, 1, 256], dtype=tf.float32))
+ ]
+ feature_map_generator = self._build_feature_map_generator(
+ image_features=image_features,
+ depth=128,
+ use_native_resize_op=use_native_resize_op)
+ def graph_fn():
+ return feature_map_generator(image_features)
expected_feature_map_shapes = {
'top_down_block2': (4, 8, 8, 128),
@@ -474,30 +453,27 @@ class FPNFeatureMapGeneratorTest(tf.test.TestCase, parameterized.TestCase):
'top_down_block4': (4, 2, 2, 128),
'top_down_block5': (4, 1, 1, 128)
}
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- out_feature_maps = sess.run(feature_maps)
- out_feature_map_shapes = {key: value.shape
- for key, value in out_feature_maps.items()}
- self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
+ out_feature_maps = self.execute(graph_fn, [], g)
+ out_feature_map_shapes = dict(
+ (key, value.shape) for key, value in out_feature_maps.items())
+ self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
def test_get_expected_feature_map_shapes_with_explicit_padding(
- self, use_native_resize_op, use_keras):
- image_features = [
- ('block2', tf.random_uniform([4, 8, 8, 256], dtype=tf.float32)),
- ('block3', tf.random_uniform([4, 4, 4, 256], dtype=tf.float32)),
- ('block4', tf.random_uniform([4, 2, 2, 256], dtype=tf.float32)),
- ('block5', tf.random_uniform([4, 1, 1, 256], dtype=tf.float32))
- ]
- feature_map_generator = self._build_feature_map_generator(
- image_features=image_features,
- depth=128,
- use_keras=use_keras,
- use_explicit_padding=True,
- use_native_resize_op=use_native_resize_op)
- feature_maps = feature_map_generator(image_features)
+ self, use_native_resize_op):
+ with test_utils.GraphContextOrNone() as g:
+ image_features = [
+ ('block2', tf.random_uniform([4, 8, 8, 256], dtype=tf.float32)),
+ ('block3', tf.random_uniform([4, 4, 4, 256], dtype=tf.float32)),
+ ('block4', tf.random_uniform([4, 2, 2, 256], dtype=tf.float32)),
+ ('block5', tf.random_uniform([4, 1, 1, 256], dtype=tf.float32))
+ ]
+ feature_map_generator = self._build_feature_map_generator(
+ image_features=image_features,
+ depth=128,
+ use_explicit_padding=True,
+ use_native_resize_op=use_native_resize_op)
+ def graph_fn():
+ return feature_map_generator(image_features)
expected_feature_map_shapes = {
'top_down_block2': (4, 8, 8, 128),
@@ -505,19 +481,15 @@ class FPNFeatureMapGeneratorTest(tf.test.TestCase, parameterized.TestCase):
'top_down_block4': (4, 2, 2, 128),
'top_down_block5': (4, 1, 1, 128)
}
+ out_feature_maps = self.execute(graph_fn, [], g)
+ out_feature_map_shapes = dict(
+ (key, value.shape) for key, value in out_feature_maps.items())
+ self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- out_feature_maps = sess.run(feature_maps)
- out_feature_map_shapes = {key: value.shape
- for key, value in out_feature_maps.items()}
- self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
-
+ @unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
def test_use_bounded_activations_add_operations(
- self, use_native_resize_op, use_keras):
- tf_graph = tf.Graph()
- with tf_graph.as_default():
+ self, use_native_resize_op):
+ with test_utils.GraphContextOrNone() as g:
image_features = [('block2',
tf.random_uniform([4, 8, 8, 256], dtype=tf.float32)),
('block3',
@@ -529,34 +501,23 @@ class FPNFeatureMapGeneratorTest(tf.test.TestCase, parameterized.TestCase):
feature_map_generator = self._build_feature_map_generator(
image_features=image_features,
depth=128,
- use_keras=use_keras,
use_bounded_activations=True,
use_native_resize_op=use_native_resize_op)
- feature_map_generator(image_features)
-
- if use_keras:
- expected_added_operations = dict.fromkeys([
- 'FeatureMaps/top_down/clip_by_value/clip_by_value',
- 'FeatureMaps/top_down/clip_by_value_1/clip_by_value',
- 'FeatureMaps/top_down/clip_by_value_2/clip_by_value',
- 'FeatureMaps/top_down/clip_by_value_3/clip_by_value',
- 'FeatureMaps/top_down/clip_by_value_4/clip_by_value',
- 'FeatureMaps/top_down/clip_by_value_5/clip_by_value',
- 'FeatureMaps/top_down/clip_by_value_6/clip_by_value',
- ])
- else:
- expected_added_operations = dict.fromkeys([
- 'top_down/clip_by_value', 'top_down/clip_by_value_1',
- 'top_down/clip_by_value_2', 'top_down/clip_by_value_3',
- 'top_down/clip_by_value_4', 'top_down/clip_by_value_5',
- 'top_down/clip_by_value_6'
- ])
-
- op_names = {op.name: None for op in tf_graph.get_operations()}
- self.assertDictContainsSubset(expected_added_operations, op_names)
+ def graph_fn():
+ return feature_map_generator(image_features)
+ self.execute(graph_fn, [], g)
+ expected_added_operations = dict.fromkeys([
+ 'top_down/clip_by_value', 'top_down/clip_by_value_1',
+ 'top_down/clip_by_value_2', 'top_down/clip_by_value_3',
+ 'top_down/clip_by_value_4', 'top_down/clip_by_value_5',
+ 'top_down/clip_by_value_6'
+ ])
+ op_names = {op.name: None for op in g.get_operations()}
+ self.assertDictContainsSubset(expected_added_operations, op_names)
+ @unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
def test_use_bounded_activations_clip_value(
- self, use_native_resize_op, use_keras):
+ self, use_native_resize_op):
tf_graph = tf.Graph()
with tf_graph.as_default():
image_features = [
@@ -568,28 +529,16 @@ class FPNFeatureMapGeneratorTest(tf.test.TestCase, parameterized.TestCase):
feature_map_generator = self._build_feature_map_generator(
image_features=image_features,
depth=128,
- use_keras=use_keras,
use_bounded_activations=True,
use_native_resize_op=use_native_resize_op)
feature_map_generator(image_features)
- if use_keras:
- expected_clip_by_value_ops = dict.fromkeys([
- 'FeatureMaps/top_down/clip_by_value/clip_by_value',
- 'FeatureMaps/top_down/clip_by_value_1/clip_by_value',
- 'FeatureMaps/top_down/clip_by_value_2/clip_by_value',
- 'FeatureMaps/top_down/clip_by_value_3/clip_by_value',
- 'FeatureMaps/top_down/clip_by_value_4/clip_by_value',
- 'FeatureMaps/top_down/clip_by_value_5/clip_by_value',
- 'FeatureMaps/top_down/clip_by_value_6/clip_by_value',
- ])
- else:
- expected_clip_by_value_ops = [
- 'top_down/clip_by_value', 'top_down/clip_by_value_1',
- 'top_down/clip_by_value_2', 'top_down/clip_by_value_3',
- 'top_down/clip_by_value_4', 'top_down/clip_by_value_5',
- 'top_down/clip_by_value_6'
- ]
+ expected_clip_by_value_ops = [
+ 'top_down/clip_by_value', 'top_down/clip_by_value_1',
+ 'top_down/clip_by_value_2', 'top_down/clip_by_value_3',
+ 'top_down/clip_by_value_4', 'top_down/clip_by_value_5',
+ 'top_down/clip_by_value_6'
+ ]
# Gathers activation tensors before and after clip_by_value operations.
activations = {}
@@ -631,20 +580,21 @@ class FPNFeatureMapGeneratorTest(tf.test.TestCase, parameterized.TestCase):
self.assertLessEqual(after_clipping_upper_bound, expected_upper_bound)
def test_get_expected_feature_map_shapes_with_depthwise(
- self, use_native_resize_op, use_keras):
- image_features = [
- ('block2', tf.random_uniform([4, 8, 8, 256], dtype=tf.float32)),
- ('block3', tf.random_uniform([4, 4, 4, 256], dtype=tf.float32)),
- ('block4', tf.random_uniform([4, 2, 2, 256], dtype=tf.float32)),
- ('block5', tf.random_uniform([4, 1, 1, 256], dtype=tf.float32))
- ]
- feature_map_generator = self._build_feature_map_generator(
- image_features=image_features,
- depth=128,
- use_keras=use_keras,
- use_depthwise=True,
- use_native_resize_op=use_native_resize_op)
- feature_maps = feature_map_generator(image_features)
+ self, use_native_resize_op):
+ with test_utils.GraphContextOrNone() as g:
+ image_features = [
+ ('block2', tf.random_uniform([4, 8, 8, 256], dtype=tf.float32)),
+ ('block3', tf.random_uniform([4, 4, 4, 256], dtype=tf.float32)),
+ ('block4', tf.random_uniform([4, 2, 2, 256], dtype=tf.float32)),
+ ('block5', tf.random_uniform([4, 1, 1, 256], dtype=tf.float32))
+ ]
+ feature_map_generator = self._build_feature_map_generator(
+ image_features=image_features,
+ depth=128,
+ use_depthwise=True,
+ use_native_resize_op=use_native_resize_op)
+ def graph_fn():
+ return feature_map_generator(image_features)
expected_feature_map_shapes = {
'top_down_block2': (4, 8, 8, 128),
@@ -652,30 +602,27 @@ class FPNFeatureMapGeneratorTest(tf.test.TestCase, parameterized.TestCase):
'top_down_block4': (4, 2, 2, 128),
'top_down_block5': (4, 1, 1, 128)
}
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- out_feature_maps = sess.run(feature_maps)
- out_feature_map_shapes = {key: value.shape
- for key, value in out_feature_maps.items()}
- self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
+ out_feature_maps = self.execute(graph_fn, [], g)
+ out_feature_map_shapes = dict(
+ (key, value.shape) for key, value in out_feature_maps.items())
+ self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
def test_get_expected_variable_names(
- self, use_native_resize_op, use_keras):
- image_features = [
- ('block2', tf.random_uniform([4, 8, 8, 256], dtype=tf.float32)),
- ('block3', tf.random_uniform([4, 4, 4, 256], dtype=tf.float32)),
- ('block4', tf.random_uniform([4, 2, 2, 256], dtype=tf.float32)),
- ('block5', tf.random_uniform([4, 1, 1, 256], dtype=tf.float32))
- ]
- feature_map_generator = self._build_feature_map_generator(
- image_features=image_features,
- depth=128,
- use_keras=use_keras,
- use_native_resize_op=use_native_resize_op)
- feature_maps = feature_map_generator(image_features)
-
+ self, use_native_resize_op):
+ with test_utils.GraphContextOrNone() as g:
+ image_features = [
+ ('block2', tf.random_uniform([4, 8, 8, 256], dtype=tf.float32)),
+ ('block3', tf.random_uniform([4, 4, 4, 256], dtype=tf.float32)),
+ ('block4', tf.random_uniform([4, 2, 2, 256], dtype=tf.float32)),
+ ('block5', tf.random_uniform([4, 1, 1, 256], dtype=tf.float32))
+ ]
+ feature_map_generator = self._build_feature_map_generator(
+ image_features=image_features,
+ depth=128,
+ use_native_resize_op=use_native_resize_op)
+ def graph_fn():
+ return feature_map_generator(image_features)
+ self.execute(graph_fn, [], g)
expected_slim_variables = set([
'projection_1/weights',
'projection_1/biases',
@@ -709,33 +656,34 @@ class FPNFeatureMapGeneratorTest(tf.test.TestCase, parameterized.TestCase):
'FeatureMaps/top_down/smoothing_3_conv/kernel',
'FeatureMaps/top_down/smoothing_3_conv/bias'
])
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- sess.run(feature_maps)
+
+ if tf_version.is_tf2():
actual_variable_set = set(
- [var.op.name for var in tf.trainable_variables()])
- if use_keras:
- self.assertSetEqual(expected_keras_variables, actual_variable_set)
- else:
- self.assertSetEqual(expected_slim_variables, actual_variable_set)
+ [var.name.split(':')[0] for var in feature_map_generator.variables])
+ self.assertSetEqual(expected_keras_variables, actual_variable_set)
+ else:
+ with g.as_default():
+ actual_variable_set = set(
+ [var.op.name for var in tf.trainable_variables()])
+ self.assertSetEqual(expected_slim_variables, actual_variable_set)
def test_get_expected_variable_names_with_depthwise(
- self, use_native_resize_op, use_keras):
- image_features = [
- ('block2', tf.random_uniform([4, 8, 8, 256], dtype=tf.float32)),
- ('block3', tf.random_uniform([4, 4, 4, 256], dtype=tf.float32)),
- ('block4', tf.random_uniform([4, 2, 2, 256], dtype=tf.float32)),
- ('block5', tf.random_uniform([4, 1, 1, 256], dtype=tf.float32))
- ]
- feature_map_generator = self._build_feature_map_generator(
- image_features=image_features,
- depth=128,
- use_keras=use_keras,
- use_depthwise=True,
- use_native_resize_op=use_native_resize_op)
- feature_maps = feature_map_generator(image_features)
-
+ self, use_native_resize_op):
+ with test_utils.GraphContextOrNone() as g:
+ image_features = [
+ ('block2', tf.random_uniform([4, 8, 8, 256], dtype=tf.float32)),
+ ('block3', tf.random_uniform([4, 4, 4, 256], dtype=tf.float32)),
+ ('block4', tf.random_uniform([4, 2, 2, 256], dtype=tf.float32)),
+ ('block5', tf.random_uniform([4, 1, 1, 256], dtype=tf.float32))
+ ]
+ feature_map_generator = self._build_feature_map_generator(
+ image_features=image_features,
+ depth=128,
+ use_depthwise=True,
+ use_native_resize_op=use_native_resize_op)
+ def graph_fn():
+ return feature_map_generator(image_features)
+ self.execute(graph_fn, [], g)
expected_slim_variables = set([
'projection_1/weights',
'projection_1/biases',
@@ -775,16 +723,16 @@ class FPNFeatureMapGeneratorTest(tf.test.TestCase, parameterized.TestCase):
'FeatureMaps/top_down/smoothing_3_depthwise_conv/pointwise_kernel',
'FeatureMaps/top_down/smoothing_3_depthwise_conv/bias'
])
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- sess.run(feature_maps)
+
+ if tf_version.is_tf2():
actual_variable_set = set(
- [var.op.name for var in tf.trainable_variables()])
- if use_keras:
- self.assertSetEqual(expected_keras_variables, actual_variable_set)
- else:
- self.assertSetEqual(expected_slim_variables, actual_variable_set)
+ [var.name.split(':')[0] for var in feature_map_generator.variables])
+ self.assertSetEqual(expected_keras_variables, actual_variable_set)
+ else:
+ with g.as_default():
+ actual_variable_set = set(
+ [var.op.name for var in tf.trainable_variables()])
+ self.assertSetEqual(expected_slim_variables, actual_variable_set)
class GetDepthFunctionTest(tf.test.TestCase):
@@ -804,6 +752,7 @@ class GetDepthFunctionTest(tf.test.TestCase):
{'replace_pool_with_conv': False},
{'replace_pool_with_conv': True},
)
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class PoolingPyramidFeatureMapGeneratorTest(tf.test.TestCase):
def test_get_expected_feature_map_shapes(self, replace_pool_with_conv):
diff --git a/research/object_detection/models/keras_models/convert_keras_models.py b/research/object_detection/models/keras_models/convert_keras_models.py
new file mode 100644
index 0000000000000000000000000000000000000000..a34af981b37032115bf0c3e957e0f4c216504d4c
--- /dev/null
+++ b/research/object_detection/models/keras_models/convert_keras_models.py
@@ -0,0 +1,85 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Write keras weights into a tensorflow checkpoint.
+
+The imagenet weights in `keras.applications` are downloaded from github.
+This script converts them into the tensorflow checkpoint format and stores them
+on disk where they can be easily accessible during training.
+"""
+
+from __future__ import print_function
+
+import os
+
+from absl import app
+import numpy as np
+import tensorflow.compat.v1 as tf
+
+FLAGS = tf.flags.FLAGS
+
+
+tf.flags.DEFINE_string('model', 'resnet_v2_101',
+ 'The model to load. The following are supported: '
+ '"resnet_v1_50", "resnet_v1_101", "resnet_v2_50", '
+ '"resnet_v2_101"')
+tf.flags.DEFINE_string('output_path', None,
+ 'The directory to output weights in.')
+tf.flags.DEFINE_boolean('verify_weights', True,
+ ('Verify the weights are loaded correctly by making '
+ 'sure the predictions are the same before and after '
+ 'saving.'))
+
+
+def init_model(name):
+ """Creates a Keras Model with the specific ResNet version."""
+ if name == 'resnet_v1_50':
+ model = tf.keras.applications.ResNet50(weights='imagenet')
+ elif name == 'resnet_v1_101':
+ model = tf.keras.applications.ResNet101(weights='imagenet')
+ elif name == 'resnet_v2_50':
+ model = tf.keras.applications.ResNet50V2(weights='imagenet')
+ elif name == 'resnet_v2_101':
+ model = tf.keras.applications.ResNet101V2(weights='imagenet')
+ else:
+ raise ValueError('Model {} not supported'.format(FLAGS.model))
+
+ return model
+
+
+def main(_):
+
+ model = init_model(FLAGS.model)
+
+ path = os.path.join(FLAGS.output_path, FLAGS.model)
+ tf.gfile.MakeDirs(path)
+ weights_path = os.path.join(path, 'weights')
+ ckpt = tf.train.Checkpoint(feature_extractor=model)
+ saved_path = ckpt.save(weights_path)
+
+ if FLAGS.verify_weights:
+ imgs = np.random.randn(1, 224, 224, 3).astype(np.float32)
+ keras_preds = model(imgs)
+
+ model = init_model(FLAGS.model)
+ ckpt.restore(saved_path)
+ loaded_weights_pred = model(imgs).numpy()
+
+ if not np.all(np.isclose(keras_preds, loaded_weights_pred)):
+ raise RuntimeError('The model was not saved correctly.')
+
+
+if __name__ == '__main__':
+ tf.enable_v2_behavior()
+ app.run(main)
diff --git a/research/object_detection/models/keras_models/hourglass_network.py b/research/object_detection/models/keras_models/hourglass_network.py
index d216b1669e3864083ff477a395f48c596172e356..09fb8ed4f4fb6f4b2712d8403ba1b94985ad25ad 100644
--- a/research/object_detection/models/keras_models/hourglass_network.py
+++ b/research/object_detection/models/keras_models/hourglass_network.py
@@ -43,6 +43,15 @@ def _get_padding_for_kernel_size(kernel_size):
kernel_size))
+def batchnorm():
+ try:
+ return tf.keras.layers.experimental.SyncBatchNormalization(
+ name='batchnorm', epsilon=1e-5, momentum=0.1)
+ except AttributeError:
+ return tf.keras.layers.BatchNormalization(
+ name='batchnorm', epsilon=1e-5, momentum=0.1, fused=BATCH_NORM_FUSED)
+
+
class ConvolutionalBlock(tf.keras.layers.Layer):
"""Block that aggregates Convolution + Norm layer + ReLU."""
@@ -73,8 +82,7 @@ class ConvolutionalBlock(tf.keras.layers.Layer):
filters=out_channels, kernel_size=kernel_size, use_bias=False,
strides=stride, padding=padding)
- self.norm = tf.keras.layers.experimental.SyncBatchNormalization(
- name='batchnorm', epsilon=1e-5, momentum=0.1)
+ self.norm = batchnorm()
if relu:
self.relu = tf.keras.layers.ReLU()
@@ -124,8 +132,7 @@ class ResidualBlock(tf.keras.layers.Layer):
self.conv = tf.keras.layers.Conv2D(
filters=out_channels, kernel_size=kernel_size, use_bias=False,
strides=1, padding=padding)
- self.norm = tf.keras.layers.experimental.SyncBatchNormalization(
- name='batchnorm', epsilon=1e-5, momentum=0.1)
+ self.norm = batchnorm()
if skip_conv:
self.skip = SkipConvolution(out_channels=out_channels,
diff --git a/research/object_detection/models/keras_models/hourglass_network_test.py b/research/object_detection/models/keras_models/hourglass_network_tf2_test.py
similarity index 96%
rename from research/object_detection/models/keras_models/hourglass_network_test.py
rename to research/object_detection/models/keras_models/hourglass_network_tf2_test.py
index 2e05eb9924b19437b91f45c4454f72df7f0b1318..d90b950ecd4102a260643391de6a4475ed959c0f 100644
--- a/research/object_detection/models/keras_models/hourglass_network_test.py
+++ b/research/object_detection/models/keras_models/hourglass_network_tf2_test.py
@@ -13,14 +13,16 @@
# limitations under the License.
# ==============================================================================
"""Testing the Hourglass network."""
-
+import unittest
from absl.testing import parameterized
import numpy as np
import tensorflow.compat.v1 as tf
from object_detection.models.keras_models import hourglass_network as hourglass
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class HourglassFeatureExtractorTest(tf.test.TestCase, parameterized.TestCase):
def test_identity_layer(self):
@@ -95,5 +97,4 @@ class HourglassFeatureExtractorTest(tf.test.TestCase, parameterized.TestCase):
if __name__ == '__main__':
- tf.enable_v2_behavior()
tf.test.main()
diff --git a/research/object_detection/models/keras_models/inception_resnet_v2_test.py b/research/object_detection/models/keras_models/inception_resnet_v2_tf2_test.py
similarity index 97%
rename from research/object_detection/models/keras_models/inception_resnet_v2_test.py
rename to research/object_detection/models/keras_models/inception_resnet_v2_tf2_test.py
index 5706e679c74cc7d30617940325597489c9a89245..4cbcc54ad66985920e7739888b3542b6a1e48bca 100644
--- a/research/object_detection/models/keras_models/inception_resnet_v2_test.py
+++ b/research/object_detection/models/keras_models/inception_resnet_v2_tf2_test.py
@@ -30,13 +30,14 @@ consistent.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
-
+import unittest
import numpy as np
from six.moves import zip
import tensorflow.compat.v1 as tf
from object_detection.models.keras_models import inception_resnet_v2
from object_detection.utils import test_case
+from object_detection.utils import tf_version
_KERAS_TO_SLIM_ENDPOINT_NAMES = {
'activation': 'Conv2d_1a_3x3',
@@ -100,6 +101,7 @@ _NUM_CHANNELS = 3
_BATCH_SIZE = 2
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class InceptionResnetV2Test(test_case.TestCase):
def _create_application_with_layer_outputs(
@@ -166,8 +168,7 @@ class InceptionResnetV2Test(test_case.TestCase):
model = self._create_application_with_layer_outputs(
layer_names=layer_names,
batchnorm_training=False)
- preprocessed_inputs = tf.placeholder(
- tf.float32, (4, None, None, _NUM_CHANNELS))
+ preprocessed_inputs = tf.random.uniform([4, 40, 40, _NUM_CHANNELS])
model(preprocessed_inputs)
return model.variables
diff --git a/research/object_detection/models/keras_models/mobilenet_v1_test.py b/research/object_detection/models/keras_models/mobilenet_v1_tf2_test.py
similarity index 85%
rename from research/object_detection/models/keras_models/mobilenet_v1_test.py
rename to research/object_detection/models/keras_models/mobilenet_v1_tf2_test.py
index 72cc1f144dd0a914a4aaf388b90b339c13bd65c5..7e46999d9dfd2fc4ddcd2c432f5ecc2a07f3a9eb 100644
--- a/research/object_detection/models/keras_models/mobilenet_v1_test.py
+++ b/research/object_detection/models/keras_models/mobilenet_v1_tf2_test.py
@@ -29,7 +29,7 @@ consistent.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
-
+import unittest
import numpy as np
from six.moves import zip
import tensorflow.compat.v1 as tf
@@ -42,6 +42,7 @@ from object_detection.models.keras_models import model_utils
from object_detection.models.keras_models import test_utils
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
+from object_detection.utils import tf_version
_KERAS_LAYERS_TO_CHECK = [
'conv1_relu',
@@ -64,6 +65,7 @@ _NUM_CHANNELS = 3
_BATCH_SIZE = 2
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class MobilenetV1Test(test_case.TestCase):
def _build_conv_hyperparams(self):
@@ -118,19 +120,17 @@ class MobilenetV1Test(test_case.TestCase):
self, image_height, image_width, depth_multiplier,
expected_feature_map_shape, use_explicit_padding=False, min_depth=8,
layer_names=None, conv_defs=None):
- def graph_fn(image_tensor):
- model = self._create_application_with_layer_outputs(
- layer_names=layer_names,
- batchnorm_training=False,
- use_explicit_padding=use_explicit_padding,
- min_depth=min_depth,
- alpha=depth_multiplier,
- conv_defs=conv_defs)
- return model(image_tensor)
+ model = self._create_application_with_layer_outputs(
+ layer_names=layer_names,
+ batchnorm_training=False,
+ use_explicit_padding=use_explicit_padding,
+ min_depth=min_depth,
+ alpha=depth_multiplier,
+ conv_defs=conv_defs)
image_tensor = np.random.rand(_BATCH_SIZE, image_height, image_width,
_NUM_CHANNELS).astype(np.float32)
- feature_maps = self.execute(graph_fn, [image_tensor])
+ feature_maps = model(image_tensor)
for feature_map, expected_shape in zip(feature_maps,
expected_feature_map_shape):
@@ -140,36 +140,29 @@ class MobilenetV1Test(test_case.TestCase):
self, image_height, image_width, depth_multiplier,
expected_feature_map_shape, use_explicit_padding=False, min_depth=8,
layer_names=None):
- def graph_fn(image_height, image_width):
- image_tensor = tf.random_uniform([_BATCH_SIZE, image_height, image_width,
- _NUM_CHANNELS], dtype=tf.float32)
- model = self._create_application_with_layer_outputs(
- layer_names=layer_names,
- batchnorm_training=False,
- use_explicit_padding=use_explicit_padding,
- alpha=depth_multiplier)
- return model(image_tensor)
+ image_tensor = tf.random_uniform([_BATCH_SIZE, image_height, image_width,
+ _NUM_CHANNELS], dtype=tf.float32)
+ model = self._create_application_with_layer_outputs(
+ layer_names=layer_names,
+ batchnorm_training=False,
+ use_explicit_padding=use_explicit_padding,
+ alpha=depth_multiplier)
- feature_maps = self.execute_cpu(graph_fn, [
- np.array(image_height, dtype=np.int32),
- np.array(image_width, dtype=np.int32)
- ])
+ feature_maps = model(image_tensor)
for feature_map, expected_shape in zip(feature_maps,
expected_feature_map_shape):
self.assertAllEqual(feature_map.shape, expected_shape)
def _get_variables(self, depth_multiplier, layer_names=None):
- g = tf.Graph()
- with g.as_default():
- preprocessed_inputs = tf.placeholder(
- tf.float32, (4, None, None, _NUM_CHANNELS))
- model = self._create_application_with_layer_outputs(
- layer_names=layer_names,
- batchnorm_training=False, use_explicit_padding=False,
- alpha=depth_multiplier)
- model(preprocessed_inputs)
- return g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
+ tf.keras.backend.clear_session()
+ model = self._create_application_with_layer_outputs(
+ layer_names=layer_names,
+ batchnorm_training=False, use_explicit_padding=False,
+ alpha=depth_multiplier)
+ preprocessed_inputs = tf.random.uniform([2, 40, 40, 3])
+ model(preprocessed_inputs)
+ return model.variables
def test_returns_correct_shapes_128(self):
image_height = 128
diff --git a/research/object_detection/models/keras_models/mobilenet_v2_test.py b/research/object_detection/models/keras_models/mobilenet_v2_tf2_test.py
similarity index 84%
rename from research/object_detection/models/keras_models/mobilenet_v2_test.py
rename to research/object_detection/models/keras_models/mobilenet_v2_tf2_test.py
index cfdd119781dc3d73efdb457dc55949506cdaf1bb..2a53a9b63f28522197bc3daab29dab3a56dfb994 100644
--- a/research/object_detection/models/keras_models/mobilenet_v2_test.py
+++ b/research/object_detection/models/keras_models/mobilenet_v2_tf2_test.py
@@ -18,7 +18,7 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
-
+import unittest
import numpy as np
from six.moves import zip
import tensorflow.compat.v1 as tf
@@ -31,6 +31,7 @@ from object_detection.models.keras_models import model_utils
from object_detection.models.keras_models import test_utils
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
+from object_detection.utils import tf_version
_layers_to_check = [
'Conv1_relu',
@@ -53,6 +54,7 @@ _layers_to_check = [
'out_relu']
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class MobilenetV2Test(test_case.TestCase):
def _build_conv_hyperparams(self):
@@ -86,6 +88,8 @@ class MobilenetV2Test(test_case.TestCase):
min_depth=None,
conv_defs=None):
"""Constructs Keras mobilenetv2 that extracts intermediate layer outputs."""
+ # Have to clear the Keras backend to ensure isolation in layer naming
+ tf.keras.backend.clear_session()
if not layer_names:
layer_names = _layers_to_check
full_model = mobilenet_v2.mobilenet_v2(
@@ -107,19 +111,17 @@ class MobilenetV2Test(test_case.TestCase):
self, batch_size, image_height, image_width, depth_multiplier,
expected_feature_map_shapes, use_explicit_padding=False, min_depth=None,
layer_names=None, conv_defs=None):
- def graph_fn(image_tensor):
- model = self._create_application_with_layer_outputs(
- layer_names=layer_names,
- batchnorm_training=False,
- use_explicit_padding=use_explicit_padding,
- min_depth=min_depth,
- alpha=depth_multiplier,
- conv_defs=conv_defs)
- return model(image_tensor)
+ model = self._create_application_with_layer_outputs(
+ layer_names=layer_names,
+ batchnorm_training=False,
+ use_explicit_padding=use_explicit_padding,
+ min_depth=min_depth,
+ alpha=depth_multiplier,
+ conv_defs=conv_defs)
image_tensor = np.random.rand(batch_size, image_height, image_width,
3).astype(np.float32)
- feature_maps = self.execute(graph_fn, [image_tensor])
+ feature_maps = model([image_tensor])
for feature_map, expected_shape in zip(feature_maps,
expected_feature_map_shapes):
@@ -129,34 +131,30 @@ class MobilenetV2Test(test_case.TestCase):
self, batch_size, image_height, image_width, depth_multiplier,
expected_feature_map_shapes, use_explicit_padding=False,
layer_names=None):
- def graph_fn(image_height, image_width):
- image_tensor = tf.random_uniform([batch_size, image_height, image_width,
- 3], dtype=tf.float32)
- model = self._create_application_with_layer_outputs(
- layer_names=layer_names,
- batchnorm_training=False, use_explicit_padding=use_explicit_padding,
- alpha=depth_multiplier)
- return model(image_tensor)
-
- feature_maps = self.execute_cpu(graph_fn, [
- np.array(image_height, dtype=np.int32),
- np.array(image_width, dtype=np.int32)
- ])
-
+ height = tf.random.uniform([], minval=image_height, maxval=image_height+1,
+ dtype=tf.int32)
+ width = tf.random.uniform([], minval=image_width, maxval=image_width+1,
+ dtype=tf.int32)
+ image_tensor = tf.random.uniform([batch_size, height, width,
+ 3], dtype=tf.float32)
+ model = self._create_application_with_layer_outputs(
+ layer_names=layer_names,
+ batchnorm_training=False, use_explicit_padding=use_explicit_padding,
+ alpha=depth_multiplier)
+ feature_maps = model(image_tensor)
for feature_map, expected_shape in zip(feature_maps,
expected_feature_map_shapes):
self.assertAllEqual(feature_map.shape, expected_shape)
def _get_variables(self, depth_multiplier, layer_names=None):
- g = tf.Graph()
- with g.as_default():
- preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
- model = self._create_application_with_layer_outputs(
- layer_names=layer_names,
- batchnorm_training=False, use_explicit_padding=False,
- alpha=depth_multiplier)
- model(preprocessed_inputs)
- return g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
+ tf.keras.backend.clear_session()
+ model = self._create_application_with_layer_outputs(
+ layer_names=layer_names,
+ batchnorm_training=False, use_explicit_padding=False,
+ alpha=depth_multiplier)
+ preprocessed_inputs = tf.random.uniform([2, 40, 40, 3])
+ model(preprocessed_inputs)
+ return model.variables
def test_returns_correct_shapes_128(self):
image_height = 128
diff --git a/research/object_detection/models/keras_models/resnet_v1_test.py b/research/object_detection/models/keras_models/resnet_v1_tf2_test.py
similarity index 97%
rename from research/object_detection/models/keras_models/resnet_v1_test.py
rename to research/object_detection/models/keras_models/resnet_v1_tf2_test.py
index 7b0c2a8e049e01030f95f7e93aa27a1538d47830..71cc5f22bd994b6432957bf5b34837f829c9b8da 100644
--- a/research/object_detection/models/keras_models/resnet_v1_test.py
+++ b/research/object_detection/models/keras_models/resnet_v1_tf2_test.py
@@ -19,7 +19,7 @@ object detection. To verify the consistency of the two models, we compare:
1. Output shape of each layer given different inputs.
2. Number of global variables.
"""
-
+import unittest
import numpy as np
from six.moves import zip
import tensorflow.compat.v1 as tf
@@ -30,6 +30,7 @@ from object_detection.builders import hyperparams_builder
from object_detection.models.keras_models import resnet_v1
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
+from object_detection.utils import tf_version
_EXPECTED_SHAPES_224_RESNET50 = {
'conv2_block3_out': (4, 56, 56, 256),
@@ -65,6 +66,7 @@ _NUM_CHANNELS = 3
_BATCH_SIZE = 4
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class ResnetV1Test(test_case.TestCase):
def _build_conv_hyperparams(self):
@@ -146,8 +148,7 @@ class ResnetV1Test(test_case.TestCase):
tf.keras.backend.clear_session()
model = self._create_application_with_layer_outputs(
model_index, batchnorm_training=False)
- preprocessed_inputs = tf.placeholder(tf.float32,
- (4, None, None, _NUM_CHANNELS))
+ preprocessed_inputs = tf.random.uniform([2, 40, 40, _NUM_CHANNELS])
model(preprocessed_inputs)
return model.variables
diff --git a/research/object_detection/models/ssd_feature_extractor_test.py b/research/object_detection/models/ssd_feature_extractor_test.py
index 913a9f6a51330a2c3a1ca60a35f04b3f98c38e18..29c43e376c6167b61a256eb0812ee4d3bcee3ed5 100644
--- a/research/object_detection/models/ssd_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_feature_extractor_test.py
@@ -31,6 +31,7 @@ from google.protobuf import text_format
from object_detection.builders import hyperparams_builder
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
+from object_detection.utils import test_utils
class SsdFeatureExtractorTestBase(test_case.TestCase):
@@ -89,14 +90,13 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
"""
pass
- def _extract_features(self,
- image_tensor,
- depth_multiplier,
- pad_to_multiple,
- use_explicit_padding=False,
- use_depthwise=False,
- num_layers=6,
- use_keras=False):
+ def _create_features(self,
+ depth_multiplier,
+ pad_to_multiple,
+ use_explicit_padding=False,
+ use_depthwise=False,
+ num_layers=6,
+ use_keras=False):
kwargs = {}
if use_explicit_padding:
kwargs.update({'use_explicit_padding': use_explicit_padding})
@@ -110,6 +110,12 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
depth_multiplier,
pad_to_multiple,
**kwargs)
+ return feature_extractor
+
+ def _extract_features(self,
+ image_tensor,
+ feature_extractor,
+ use_keras=False):
if use_keras:
feature_maps = feature_extractor(image_tensor)
else:
@@ -127,10 +133,8 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
num_layers=6,
use_keras=False,
use_depthwise=False):
-
- def graph_fn(image_tensor):
- return self._extract_features(
- image_tensor,
+ with test_utils.GraphContextOrNone() as g:
+ feature_extractor = self._create_features(
depth_multiplier,
pad_to_multiple,
use_explicit_padding=use_explicit_padding,
@@ -138,9 +142,15 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
use_keras=use_keras,
use_depthwise=use_depthwise)
+ def graph_fn(image_tensor):
+ return self._extract_features(
+ image_tensor,
+ feature_extractor,
+ use_keras=use_keras)
+
image_tensor = np.random.rand(batch_size, image_height, image_width,
3).astype(np.float32)
- feature_maps = self.execute(graph_fn, [image_tensor])
+ feature_maps = self.execute(graph_fn, [image_tensor], graph=g)
for feature_map, expected_shape in zip(
feature_maps, expected_feature_map_shapes):
self.assertAllEqual(feature_map.shape, expected_shape)
@@ -158,11 +168,8 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
use_keras=False,
use_depthwise=False):
- def graph_fn(image_height, image_width):
- image_tensor = tf.random_uniform([batch_size, image_height, image_width,
- 3], dtype=tf.float32)
- return self._extract_features(
- image_tensor,
+ with test_utils.GraphContextOrNone() as g:
+ feature_extractor = self._create_features(
depth_multiplier,
pad_to_multiple,
use_explicit_padding=use_explicit_padding,
@@ -170,10 +177,18 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
use_keras=use_keras,
use_depthwise=use_depthwise)
+ def graph_fn(image_height, image_width):
+ image_tensor = tf.random_uniform([batch_size, image_height, image_width,
+ 3], dtype=tf.float32)
+ return self._extract_features(
+ image_tensor,
+ feature_extractor,
+ use_keras=use_keras)
+
feature_maps = self.execute_cpu(graph_fn, [
np.array(image_height, dtype=np.int32),
np.array(image_width, dtype=np.int32)
- ])
+ ], graph=g)
for feature_map, expected_shape in zip(
feature_maps, expected_feature_map_shapes):
self.assertAllEqual(feature_map.shape, expected_shape)
@@ -186,19 +201,33 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
pad_to_multiple,
use_keras=False,
use_depthwise=False):
- preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
- feature_maps = self._extract_features(
- preprocessed_inputs,
- depth_multiplier,
- pad_to_multiple,
- use_keras=use_keras,
- use_depthwise=use_depthwise)
- test_preprocessed_image = np.random.rand(4, image_height, image_width, 3)
- with self.test_session() as sess:
- sess.run(tf.global_variables_initializer())
+
+ with test_utils.GraphContextOrNone() as g:
+ batch = 4
+ width = tf.random.uniform([], minval=image_width, maxval=image_width+1,
+ dtype=tf.int32)
+ height = tf.random.uniform([], minval=image_height, maxval=image_height+1,
+ dtype=tf.int32)
+ shape = tf.stack([batch, height, width, 3])
+ preprocessed_inputs = tf.random.uniform(shape)
+ feature_extractor = self._create_features(
+ depth_multiplier,
+ pad_to_multiple,
+ use_keras=use_keras,
+ use_depthwise=use_depthwise)
+
+ def graph_fn():
+ feature_maps = self._extract_features(
+ preprocessed_inputs,
+ feature_extractor,
+ use_keras=use_keras)
+ return feature_maps
+ if self.is_tf2():
+ with self.assertRaises(ValueError):
+ self.execute_cpu(graph_fn, [], graph=g)
+ else:
with self.assertRaises(tf.errors.InvalidArgumentError):
- sess.run(feature_maps,
- feed_dict={preprocessed_inputs: test_preprocessed_image})
+ self.execute_cpu(graph_fn, [], graph=g)
def check_feature_extractor_variables_under_scope(self,
depth_multiplier,
@@ -221,11 +250,14 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
use_depthwise=False):
g = tf.Graph()
with g.as_default():
- preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
- self._extract_features(
- preprocessed_inputs,
+ feature_extractor = self._create_features(
depth_multiplier,
pad_to_multiple,
use_keras=use_keras,
use_depthwise=use_depthwise)
+ preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
+ self._extract_features(
+ preprocessed_inputs,
+ feature_extractor,
+ use_keras=use_keras)
return g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
diff --git a/research/object_detection/models/ssd_inception_v2_feature_extractor_test.py b/research/object_detection/models/ssd_inception_v2_feature_extractor_tf1_test.py
similarity index 98%
rename from research/object_detection/models/ssd_inception_v2_feature_extractor_test.py
rename to research/object_detection/models/ssd_inception_v2_feature_extractor_tf1_test.py
index 34921609c25849e704c21df5fcaccaf92290ca5e..1e33ed70ed45cef900d9f615cba9a5f196d36e23 100644
--- a/research/object_detection/models/ssd_inception_v2_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_inception_v2_feature_extractor_tf1_test.py
@@ -14,13 +14,16 @@
# ==============================================================================
"""Tests for object_detection.models.ssd_inception_v2_feature_extractor."""
+import unittest
import numpy as np
import tensorflow.compat.v1 as tf
from object_detection.models import ssd_feature_extractor_test
from object_detection.models import ssd_inception_v2_feature_extractor
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class SsdInceptionV2FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
diff --git a/research/object_detection/models/ssd_inception_v3_feature_extractor_test.py b/research/object_detection/models/ssd_inception_v3_feature_extractor_tf1_test.py
similarity index 98%
rename from research/object_detection/models/ssd_inception_v3_feature_extractor_test.py
rename to research/object_detection/models/ssd_inception_v3_feature_extractor_tf1_test.py
index 1e706c1e8c505588d76f712ad07f0720e97163fb..a0cbb451586b865cc448c292231a21dc468110a4 100644
--- a/research/object_detection/models/ssd_inception_v3_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_inception_v3_feature_extractor_tf1_test.py
@@ -14,13 +14,16 @@
# ==============================================================================
"""Tests for object_detection.models.ssd_inception_v3_feature_extractor."""
+import unittest
import numpy as np
import tensorflow.compat.v1 as tf
from object_detection.models import ssd_feature_extractor_test
from object_detection.models import ssd_inception_v3_feature_extractor
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class SsdInceptionV3FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
diff --git a/research/object_detection/models/ssd_mobiledet_feature_extractor.py b/research/object_detection/models/ssd_mobiledet_feature_extractor.py
index 33d7e053b62352d3c72bc98a3e82f2028d4e9cee..019d7543bb7b271d6158b6b30fbb69a7db5a99a8 100644
--- a/research/object_detection/models/ssd_mobiledet_feature_extractor.py
+++ b/research/object_detection/models/ssd_mobiledet_feature_extractor.py
@@ -290,6 +290,72 @@ def mobiledet_edgetpu_backbone(h, multiplier=1.0):
return endpoints
+def mobiledet_gpu_backbone(h, multiplier=1.0):
+ """Build a MobileDet GPU backbone."""
+
+ def _scale(filters):
+ return _scale_filters(filters, multiplier)
+
+ ibn = functools.partial(_inverted_bottleneck, activation_fn=tf.nn.relu6)
+ fused = functools.partial(_fused_conv, activation_fn=tf.nn.relu6)
+ tucker = functools.partial(_tucker_conv, activation_fn=tf.nn.relu6)
+
+ endpoints = {}
+ # block 0
+ h = _conv(h, _scale(32), 3, strides=2, activation_fn=tf.nn.relu6)
+
+ # block 1
+ h = tucker(
+ h,
+ _scale(16),
+ input_rank_ratio=0.25,
+ output_rank_ratio=0.25,
+ residual=False)
+ endpoints['C1'] = h
+
+ # block 2
+ h = fused(h, _scale(32), expansion=8, strides=2, residual=False)
+ h = tucker(h, _scale(32), input_rank_ratio=0.25, output_rank_ratio=0.25)
+ h = tucker(h, _scale(32), input_rank_ratio=0.25, output_rank_ratio=0.25)
+ h = tucker(h, _scale(32), input_rank_ratio=0.25, output_rank_ratio=0.25)
+ endpoints['C2'] = h
+
+ # block 3
+ h = fused(
+ h, _scale(64), expansion=8, kernel_size=3, strides=2, residual=False)
+ h = fused(h, _scale(64), expansion=8)
+ h = fused(h, _scale(64), expansion=8)
+ h = fused(h, _scale(64), expansion=4)
+ endpoints['C3'] = h
+
+ # block 4
+ h = fused(
+ h, _scale(128), expansion=8, kernel_size=3, strides=2, residual=False)
+ h = fused(h, _scale(128), expansion=4)
+ h = fused(h, _scale(128), expansion=4)
+ h = fused(h, _scale(128), expansion=4)
+
+ # block 5
+ h = fused(
+ h, _scale(128), expansion=8, kernel_size=3, strides=1, residual=False)
+ h = fused(h, _scale(128), expansion=8)
+ h = fused(h, _scale(128), expansion=8)
+ h = fused(h, _scale(128), expansion=8)
+ endpoints['C4'] = h
+
+ # block 6
+ h = fused(
+ h, _scale(128), expansion=4, kernel_size=3, strides=2, residual=False)
+ h = fused(h, _scale(128), expansion=4)
+ h = fused(h, _scale(128), expansion=4)
+ h = fused(h, _scale(128), expansion=4)
+
+ # block 7
+ h = ibn(h, _scale(384), expansion=8, kernel_size=3, strides=1, residual=False)
+ endpoints['C5'] = h
+ return endpoints
+
+
class SSDMobileDetFeatureExtractorBase(ssd_meta_arch.SSDFeatureExtractor):
"""Base class of SSD feature extractor using MobileDet features."""
@@ -490,3 +556,31 @@ class SSDMobileDetEdgeTPUFeatureExtractor(SSDMobileDetFeatureExtractorBase):
use_depthwise=use_depthwise,
override_base_feature_extractor_hyperparams=override_base_feature_extractor_hyperparams,
scope_name=scope_name)
+
+
+class SSDMobileDetGPUFeatureExtractor(SSDMobileDetFeatureExtractorBase):
+ """MobileDet-GPU feature extractor."""
+
+ def __init__(self,
+ is_training,
+ depth_multiplier,
+ min_depth,
+ pad_to_multiple,
+ conv_hyperparams_fn,
+ reuse_weights=None,
+ use_explicit_padding=False,
+ use_depthwise=False,
+ override_base_feature_extractor_hyperparams=False,
+ scope_name='MobileDetGPU'):
+ super(SSDMobileDetGPUFeatureExtractor, self).__init__(
+ backbone_fn=mobiledet_gpu_backbone,
+ is_training=is_training,
+ depth_multiplier=depth_multiplier,
+ min_depth=min_depth,
+ pad_to_multiple=pad_to_multiple,
+ conv_hyperparams_fn=conv_hyperparams_fn,
+ reuse_weights=reuse_weights,
+ use_explicit_padding=use_explicit_padding,
+ use_depthwise=use_depthwise,
+ override_base_feature_extractor_hyperparams=override_base_feature_extractor_hyperparams,
+ scope_name=scope_name)
diff --git a/research/object_detection/models/ssd_mobiledet_feature_extractor_test.py b/research/object_detection/models/ssd_mobiledet_feature_extractor_tf1_test.py
similarity index 86%
rename from research/object_detection/models/ssd_mobiledet_feature_extractor_test.py
rename to research/object_detection/models/ssd_mobiledet_feature_extractor_tf1_test.py
index c2c1ef6925373d3a36a7166a2df3c11b548519fb..2af37554b55f68e85ddbe7587b86015e10ac65e8 100644
--- a/research/object_detection/models/ssd_mobiledet_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_mobiledet_feature_extractor_tf1_test.py
@@ -13,14 +13,20 @@
# limitations under the License.
# ==============================================================================
"""Tests for ssd_mobiledet_feature_extractor."""
-
+import unittest
import tensorflow.compat.v1 as tf
-from tensorflow.contrib import quantize as contrib_quantize
from object_detection.models import ssd_feature_extractor_test
from object_detection.models import ssd_mobiledet_feature_extractor
+from object_detection.utils import tf_version
+
+try:
+ from tensorflow.contrib import quantize as contrib_quantize # pylint: disable=g-import-not-at-top
+except: # pylint: disable=bare-except
+ pass
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class SSDMobileDetFeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
@@ -105,6 +111,19 @@ class SSDMobileDetFeatureExtractorTest(
for expected_shape, x in zip(expected_feature_map_shapes, feature_maps):
self.assertTrue(x.shape.is_compatible_with(expected_shape))
+ def test_mobiledet_gpu_returns_correct_shapes(self):
+ expected_feature_map_shapes = [(2, 40, 20, 128), (2, 20, 10, 384),
+ (2, 10, 5, 512), (2, 5, 3, 256),
+ (2, 3, 2, 256), (2, 2, 1, 128)]
+ feature_extractor = self._create_feature_extractor(
+ ssd_mobiledet_feature_extractor.SSDMobileDetGPUFeatureExtractor)
+ image = tf.random.normal((2, 640, 320, 3))
+ feature_maps = feature_extractor.extract_features(image)
+
+ self.assertEqual(len(expected_feature_map_shapes), len(feature_maps))
+ for expected_shape, x in zip(expected_feature_map_shapes, feature_maps):
+ self.assertTrue(x.shape.is_compatible_with(expected_shape))
+
def _check_quantization(self, model_fn):
checkpoint_dir = self.get_temp_dir()
diff --git a/research/object_detection/models/ssd_mobilenet_edgetpu_feature_extractor_test.py b/research/object_detection/models/ssd_mobilenet_edgetpu_feature_extractor_tf1_test.py
similarity index 94%
rename from research/object_detection/models/ssd_mobilenet_edgetpu_feature_extractor_test.py
rename to research/object_detection/models/ssd_mobilenet_edgetpu_feature_extractor_tf1_test.py
index 186122028d20c0e65f9c7285d09a19e55115888f..841fe5a148864a0d62b52fd8f6f3e0059670dd57 100644
--- a/research/object_detection/models/ssd_mobilenet_edgetpu_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_mobilenet_edgetpu_feature_extractor_tf1_test.py
@@ -13,13 +13,15 @@
# limitations under the License.
# ==============================================================================
"""Tests for ssd_mobilenet_edgetpu_feature_extractor."""
-
+import unittest
import tensorflow.compat.v1 as tf
from object_detection.models import ssd_mobilenet_edgetpu_feature_extractor
from object_detection.models import ssd_mobilenet_edgetpu_feature_extractor_testbase
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class SsdMobilenetEdgeTPUFeatureExtractorTest(
ssd_mobilenet_edgetpu_feature_extractor_testbase
._SsdMobilenetEdgeTPUFeatureExtractorTestBase):
diff --git a/research/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py b/research/object_detection/models/ssd_mobilenet_v1_feature_extractor_tf1_test.py
similarity index 77%
rename from research/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py
rename to research/object_detection/models/ssd_mobilenet_v1_feature_extractor_tf1_test.py
index eaf8776afaa89c2d729a1de3cf2b65d67f859a19..2f1d4839693c891b550e04cdaff391219c4b8cf1 100644
--- a/research/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_mobilenet_v1_feature_extractor_tf1_test.py
@@ -17,20 +17,16 @@
By using parameterized test decorator, this test serves for both Slim-based and
Keras-based Mobilenet V1 feature extractors in SSD.
"""
-from absl.testing import parameterized
-
+import unittest
import numpy as np
import tensorflow.compat.v1 as tf
from object_detection.models import ssd_feature_extractor_test
from object_detection.models import ssd_mobilenet_v1_feature_extractor
-from object_detection.models import ssd_mobilenet_v1_keras_feature_extractor
+from object_detection.utils import tf_version
-@parameterized.parameters(
- {'use_keras': False},
- {'use_keras': True},
-)
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class SsdMobilenetV1FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
@@ -59,31 +55,17 @@ class SsdMobilenetV1FeatureExtractorTest(
an ssd_meta_arch.SSDFeatureExtractor object.
"""
min_depth = 32
- if use_keras:
- return (ssd_mobilenet_v1_keras_feature_extractor
- .SSDMobileNetV1KerasFeatureExtractor(
- is_training=is_training,
- depth_multiplier=depth_multiplier,
- min_depth=min_depth,
- pad_to_multiple=pad_to_multiple,
- conv_hyperparams=self._build_conv_hyperparams(
- add_batch_norm=False),
- freeze_batchnorm=False,
- inplace_batchnorm_update=False,
- use_explicit_padding=use_explicit_padding,
- num_layers=num_layers,
- name='MobilenetV1'))
- else:
- return ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor(
- is_training,
- depth_multiplier,
- min_depth,
- pad_to_multiple,
- self.conv_hyperparams_fn,
- use_explicit_padding=use_explicit_padding,
- num_layers=num_layers)
+ del use_keras
+ return ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor(
+ is_training,
+ depth_multiplier,
+ min_depth,
+ pad_to_multiple,
+ self.conv_hyperparams_fn,
+ use_explicit_padding=use_explicit_padding,
+ num_layers=num_layers)
- def test_extract_features_returns_correct_shapes_128(self, use_keras):
+ def test_extract_features_returns_correct_shapes_128(self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
@@ -99,7 +81,7 @@ class SsdMobilenetV1FeatureExtractorTest(
pad_to_multiple,
expected_feature_map_shape,
use_explicit_padding=False,
- use_keras=use_keras)
+ use_keras=False)
self.check_extract_features_returns_correct_shape(
2,
image_height,
@@ -108,9 +90,9 @@ class SsdMobilenetV1FeatureExtractorTest(
pad_to_multiple,
expected_feature_map_shape,
use_explicit_padding=True,
- use_keras=use_keras)
+ use_keras=False)
- def test_extract_features_returns_correct_shapes_299(self, use_keras):
+ def test_extract_features_returns_correct_shapes_299(self):
image_height = 299
image_width = 299
depth_multiplier = 1.0
@@ -126,7 +108,7 @@ class SsdMobilenetV1FeatureExtractorTest(
pad_to_multiple,
expected_feature_map_shape,
use_explicit_padding=False,
- use_keras=use_keras)
+ use_keras=False)
self.check_extract_features_returns_correct_shape(
2,
image_height,
@@ -135,9 +117,9 @@ class SsdMobilenetV1FeatureExtractorTest(
pad_to_multiple,
expected_feature_map_shape,
use_explicit_padding=True,
- use_keras=use_keras)
+ use_keras=False)
- def test_extract_features_with_dynamic_image_shape(self, use_keras):
+ def test_extract_features_with_dynamic_image_shape(self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
@@ -153,7 +135,7 @@ class SsdMobilenetV1FeatureExtractorTest(
pad_to_multiple,
expected_feature_map_shape,
use_explicit_padding=False,
- use_keras=use_keras)
+ use_keras=False)
self.check_extract_features_returns_correct_shape(
2,
image_height,
@@ -162,10 +144,10 @@ class SsdMobilenetV1FeatureExtractorTest(
pad_to_multiple,
expected_feature_map_shape,
use_explicit_padding=True,
- use_keras=use_keras)
+ use_keras=False)
def test_extract_features_returns_correct_shapes_enforcing_min_depth(
- self, use_keras):
+ self):
image_height = 299
image_width = 299
depth_multiplier = 0.5**12
@@ -181,7 +163,7 @@ class SsdMobilenetV1FeatureExtractorTest(
pad_to_multiple,
expected_feature_map_shape,
use_explicit_padding=False,
- use_keras=use_keras)
+ use_keras=False)
self.check_extract_features_returns_correct_shape(
2,
image_height,
@@ -190,10 +172,10 @@ class SsdMobilenetV1FeatureExtractorTest(
pad_to_multiple,
expected_feature_map_shape,
use_explicit_padding=True,
- use_keras=use_keras)
+ use_keras=False)
def test_extract_features_returns_correct_shapes_with_pad_to_multiple(
- self, use_keras):
+ self):
image_height = 299
image_width = 299
depth_multiplier = 1.0
@@ -209,7 +191,7 @@ class SsdMobilenetV1FeatureExtractorTest(
pad_to_multiple,
expected_feature_map_shape,
use_explicit_padding=False,
- use_keras=use_keras)
+ use_keras=False)
self.check_extract_features_returns_correct_shape(
2,
image_height,
@@ -218,10 +200,10 @@ class SsdMobilenetV1FeatureExtractorTest(
pad_to_multiple,
expected_feature_map_shape,
use_explicit_padding=True,
- use_keras=use_keras)
+ use_keras=False)
def test_extract_features_raises_error_with_invalid_image_size(
- self, use_keras):
+ self):
image_height = 32
image_width = 32
depth_multiplier = 1.0
@@ -231,34 +213,34 @@ class SsdMobilenetV1FeatureExtractorTest(
image_width,
depth_multiplier,
pad_to_multiple,
- use_keras=use_keras)
+ use_keras=False)
- def test_preprocess_returns_correct_value_range(self, use_keras):
+ def test_preprocess_returns_correct_value_range(self):
image_height = 128
image_width = 128
depth_multiplier = 1
pad_to_multiple = 1
test_image = np.random.rand(2, image_height, image_width, 3)
feature_extractor = self._create_feature_extractor(
- depth_multiplier, pad_to_multiple, use_keras=use_keras)
+ depth_multiplier, pad_to_multiple, use_keras=False)
preprocessed_image = feature_extractor.preprocess(test_image)
self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
- def test_variables_only_created_in_scope(self, use_keras):
+ def test_variables_only_created_in_scope(self):
depth_multiplier = 1
pad_to_multiple = 1
scope_name = 'MobilenetV1'
self.check_feature_extractor_variables_under_scope(
- depth_multiplier, pad_to_multiple, scope_name, use_keras=use_keras)
+ depth_multiplier, pad_to_multiple, scope_name, use_keras=False)
- def test_variable_count(self, use_keras):
+ def test_variable_count(self):
depth_multiplier = 1
pad_to_multiple = 1
variables = self.get_feature_extractor_variables(
- depth_multiplier, pad_to_multiple, use_keras=use_keras)
+ depth_multiplier, pad_to_multiple, use_keras=False)
self.assertEqual(len(variables), 151)
- def test_has_fused_batchnorm(self, use_keras):
+ def test_has_fused_batchnorm(self):
image_height = 40
image_width = 40
depth_multiplier = 1
@@ -266,17 +248,14 @@ class SsdMobilenetV1FeatureExtractorTest(
image_placeholder = tf.placeholder(tf.float32,
[1, image_height, image_width, 3])
feature_extractor = self._create_feature_extractor(
- depth_multiplier, pad_to_multiple, use_keras=use_keras)
+ depth_multiplier, pad_to_multiple, use_keras=False)
preprocessed_image = feature_extractor.preprocess(image_placeholder)
- if use_keras:
- _ = feature_extractor(preprocessed_image)
- else:
- _ = feature_extractor.extract_features(preprocessed_image)
+ _ = feature_extractor.extract_features(preprocessed_image)
self.assertTrue(
any('FusedBatchNorm' in op.type
for op in tf.get_default_graph().get_operations()))
- def test_extract_features_with_fewer_layers(self, use_keras):
+ def test_extract_features_with_fewer_layers(self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
@@ -286,7 +265,7 @@ class SsdMobilenetV1FeatureExtractorTest(
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=False, num_layers=4,
- use_keras=use_keras)
+ use_keras=False)
if __name__ == '__main__':
diff --git a/research/object_detection/models/ssd_mobilenet_v1_feature_extractor_tf2_test.py b/research/object_detection/models/ssd_mobilenet_v1_feature_extractor_tf2_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..b60537b886909edbc7236f799c51733b8030380a
--- /dev/null
+++ b/research/object_detection/models/ssd_mobilenet_v1_feature_extractor_tf2_test.py
@@ -0,0 +1,248 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for SSD Mobilenet V1 feature extractors.
+
+By using parameterized test decorator, this test serves for both Slim-based and
+Keras-based Mobilenet V1 feature extractors in SSD.
+"""
+import unittest
+import numpy as np
+import tensorflow.compat.v1 as tf
+
+from object_detection.models import ssd_feature_extractor_test
+from object_detection.models import ssd_mobilenet_v1_keras_feature_extractor
+from object_detection.utils import tf_version
+
+
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class SsdMobilenetV1FeatureExtractorTest(
+ ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
+
+ def _create_feature_extractor(self,
+ depth_multiplier,
+ pad_to_multiple,
+ use_explicit_padding=False,
+ num_layers=6,
+ is_training=False,
+ use_keras=False):
+ """Constructs a new feature extractor.
+
+ Args:
+ depth_multiplier: float depth multiplier for feature extractor
+ pad_to_multiple: the nearest multiple to zero pad the input height and
+ width dimensions to.
+ use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
+ inputs so that the output dimensions are the same as if 'SAME' padding
+ were used.
+ num_layers: number of SSD layers.
+ is_training: whether the network is in training mode.
+ use_keras: if True builds a keras-based feature extractor, if False builds
+ a slim-based one.
+
+ Returns:
+ an ssd_meta_arch.SSDFeatureExtractor object.
+ """
+ del use_keras
+ min_depth = 32
+ return (ssd_mobilenet_v1_keras_feature_extractor
+ .SSDMobileNetV1KerasFeatureExtractor(
+ is_training=is_training,
+ depth_multiplier=depth_multiplier,
+ min_depth=min_depth,
+ pad_to_multiple=pad_to_multiple,
+ conv_hyperparams=self._build_conv_hyperparams(
+ add_batch_norm=False),
+ freeze_batchnorm=False,
+ inplace_batchnorm_update=False,
+ use_explicit_padding=use_explicit_padding,
+ num_layers=num_layers,
+ name='MobilenetV1'))
+
+ def test_extract_features_returns_correct_shapes_128(self):
+ image_height = 128
+ image_width = 128
+ depth_multiplier = 1.0
+ pad_to_multiple = 1
+ expected_feature_map_shape = [(2, 8, 8, 512), (2, 4, 4, 1024),
+ (2, 2, 2, 512), (2, 1, 1, 256),
+ (2, 1, 1, 256), (2, 1, 1, 128)]
+ self.check_extract_features_returns_correct_shape(
+ 2,
+ image_height,
+ image_width,
+ depth_multiplier,
+ pad_to_multiple,
+ expected_feature_map_shape,
+ use_explicit_padding=False,
+ use_keras=True)
+ self.check_extract_features_returns_correct_shape(
+ 2,
+ image_height,
+ image_width,
+ depth_multiplier,
+ pad_to_multiple,
+ expected_feature_map_shape,
+ use_explicit_padding=True,
+ use_keras=True)
+
+ def test_extract_features_returns_correct_shapes_299(self):
+ image_height = 299
+ image_width = 299
+ depth_multiplier = 1.0
+ pad_to_multiple = 1
+ expected_feature_map_shape = [(2, 19, 19, 512), (2, 10, 10, 1024),
+ (2, 5, 5, 512), (2, 3, 3, 256),
+ (2, 2, 2, 256), (2, 1, 1, 128)]
+ self.check_extract_features_returns_correct_shape(
+ 2,
+ image_height,
+ image_width,
+ depth_multiplier,
+ pad_to_multiple,
+ expected_feature_map_shape,
+ use_explicit_padding=False,
+ use_keras=True)
+ self.check_extract_features_returns_correct_shape(
+ 2,
+ image_height,
+ image_width,
+ depth_multiplier,
+ pad_to_multiple,
+ expected_feature_map_shape,
+ use_explicit_padding=True,
+ use_keras=True)
+
+ def test_extract_features_with_dynamic_image_shape(self):
+ image_height = 128
+ image_width = 128
+ depth_multiplier = 1.0
+ pad_to_multiple = 1
+ expected_feature_map_shape = [(2, 8, 8, 512), (2, 4, 4, 1024),
+ (2, 2, 2, 512), (2, 1, 1, 256),
+ (2, 1, 1, 256), (2, 1, 1, 128)]
+ self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
+ 2,
+ image_height,
+ image_width,
+ depth_multiplier,
+ pad_to_multiple,
+ expected_feature_map_shape,
+ use_explicit_padding=False,
+ use_keras=True)
+ self.check_extract_features_returns_correct_shape(
+ 2,
+ image_height,
+ image_width,
+ depth_multiplier,
+ pad_to_multiple,
+ expected_feature_map_shape,
+ use_explicit_padding=True,
+ use_keras=True)
+
+ def test_extract_features_returns_correct_shapes_enforcing_min_depth(
+ self):
+ image_height = 299
+ image_width = 299
+ depth_multiplier = 0.5**12
+ pad_to_multiple = 1
+ expected_feature_map_shape = [(2, 19, 19, 32), (2, 10, 10, 32),
+ (2, 5, 5, 32), (2, 3, 3, 32), (2, 2, 2, 32),
+ (2, 1, 1, 32)]
+ self.check_extract_features_returns_correct_shape(
+ 2,
+ image_height,
+ image_width,
+ depth_multiplier,
+ pad_to_multiple,
+ expected_feature_map_shape,
+ use_explicit_padding=False,
+ use_keras=True)
+ self.check_extract_features_returns_correct_shape(
+ 2,
+ image_height,
+ image_width,
+ depth_multiplier,
+ pad_to_multiple,
+ expected_feature_map_shape,
+ use_explicit_padding=True,
+ use_keras=True)
+
+ def test_extract_features_returns_correct_shapes_with_pad_to_multiple(
+ self):
+ image_height = 299
+ image_width = 299
+ depth_multiplier = 1.0
+ pad_to_multiple = 32
+ expected_feature_map_shape = [(2, 20, 20, 512), (2, 10, 10, 1024),
+ (2, 5, 5, 512), (2, 3, 3, 256),
+ (2, 2, 2, 256), (2, 1, 1, 128)]
+ self.check_extract_features_returns_correct_shape(
+ 2,
+ image_height,
+ image_width,
+ depth_multiplier,
+ pad_to_multiple,
+ expected_feature_map_shape,
+ use_explicit_padding=False,
+ use_keras=True)
+ self.check_extract_features_returns_correct_shape(
+ 2,
+ image_height,
+ image_width,
+ depth_multiplier,
+ pad_to_multiple,
+ expected_feature_map_shape,
+ use_explicit_padding=True,
+ use_keras=True)
+
+ def test_extract_features_raises_error_with_invalid_image_size(
+ self):
+ image_height = 32
+ image_width = 32
+ depth_multiplier = 1.0
+ pad_to_multiple = 1
+ self.check_extract_features_raises_error_with_invalid_image_size(
+ image_height,
+ image_width,
+ depth_multiplier,
+ pad_to_multiple,
+ use_keras=True)
+
+ def test_preprocess_returns_correct_value_range(self):
+ image_height = 128
+ image_width = 128
+ depth_multiplier = 1
+ pad_to_multiple = 1
+ test_image = np.random.rand(2, image_height, image_width, 3)
+ feature_extractor = self._create_feature_extractor(
+ depth_multiplier, pad_to_multiple, use_keras=True)
+ preprocessed_image = feature_extractor.preprocess(test_image)
+ self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
+
+ def test_extract_features_with_fewer_layers(self):
+ image_height = 128
+ image_width = 128
+ depth_multiplier = 1.0
+ pad_to_multiple = 1
+ expected_feature_map_shape = [(2, 8, 8, 512), (2, 4, 4, 1024),
+ (2, 2, 2, 512), (2, 1, 1, 256)]
+ self.check_extract_features_returns_correct_shape(
+ 2, image_height, image_width, depth_multiplier, pad_to_multiple,
+ expected_feature_map_shape, use_explicit_padding=False, num_layers=4,
+ use_keras=True)
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/research/object_detection/models/ssd_mobilenet_v1_fpn_feature_extractor_test.py b/research/object_detection/models/ssd_mobilenet_v1_fpn_feature_extractor_tf1_test.py
similarity index 76%
rename from research/object_detection/models/ssd_mobilenet_v1_fpn_feature_extractor_test.py
rename to research/object_detection/models/ssd_mobilenet_v1_fpn_feature_extractor_tf1_test.py
index 131afed8a6b4d37bd99806715f279ba9230c5ad0..449b7803d390f60747f0f4d67d8b98414a7d24eb 100644
--- a/research/object_detection/models/ssd_mobilenet_v1_fpn_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_mobilenet_v1_fpn_feature_extractor_tf1_test.py
@@ -18,19 +18,16 @@
By using parameterized test decorator, this test serves for both Slim-based and
Keras-based Mobilenet V1 FPN feature extractors in SSD.
"""
-from absl.testing import parameterized
+import unittest
import numpy as np
import tensorflow.compat.v1 as tf
from object_detection.models import ssd_feature_extractor_test
from object_detection.models import ssd_mobilenet_v1_fpn_feature_extractor
-from object_detection.models import ssd_mobilenet_v1_fpn_keras_feature_extractor
+from object_detection.utils import tf_version
-@parameterized.parameters(
- {'use_keras': False},
- {'use_keras': True},
-)
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class SsdMobilenetV1FpnFeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
@@ -52,33 +49,19 @@ class SsdMobilenetV1FpnFeatureExtractorTest(
Returns:
an ssd_meta_arch.SSDFeatureExtractor object.
"""
+ del use_keras
min_depth = 32
- if use_keras:
- return (ssd_mobilenet_v1_fpn_keras_feature_extractor.
- SSDMobileNetV1FpnKerasFeatureExtractor(
- is_training=is_training,
- depth_multiplier=depth_multiplier,
- min_depth=min_depth,
- pad_to_multiple=pad_to_multiple,
- conv_hyperparams=self._build_conv_hyperparams(
- add_batch_norm=False),
- freeze_batchnorm=False,
- inplace_batchnorm_update=False,
- use_explicit_padding=use_explicit_padding,
- use_depthwise=True,
- name='MobilenetV1_FPN'))
- else:
- return (ssd_mobilenet_v1_fpn_feature_extractor.
- SSDMobileNetV1FpnFeatureExtractor(
- is_training,
- depth_multiplier,
- min_depth,
- pad_to_multiple,
- self.conv_hyperparams_fn,
- use_depthwise=True,
- use_explicit_padding=use_explicit_padding))
-
- def test_extract_features_returns_correct_shapes_256(self, use_keras):
+ return (ssd_mobilenet_v1_fpn_feature_extractor.
+ SSDMobileNetV1FpnFeatureExtractor(
+ is_training,
+ depth_multiplier,
+ min_depth,
+ pad_to_multiple,
+ self.conv_hyperparams_fn,
+ use_depthwise=True,
+ use_explicit_padding=use_explicit_padding))
+
+ def test_extract_features_returns_correct_shapes_256(self):
image_height = 256
image_width = 256
depth_multiplier = 1.0
@@ -89,13 +72,13 @@ class SsdMobilenetV1FpnFeatureExtractorTest(
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=False,
- use_keras=use_keras)
+ use_keras=False)
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=True,
- use_keras=use_keras)
+ use_keras=False)
- def test_extract_features_returns_correct_shapes_384(self, use_keras):
+ def test_extract_features_returns_correct_shapes_384(self):
image_height = 320
image_width = 320
depth_multiplier = 1.0
@@ -106,13 +89,13 @@ class SsdMobilenetV1FpnFeatureExtractorTest(
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=False,
- use_keras=use_keras)
+ use_keras=False)
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=True,
- use_keras=use_keras)
+ use_keras=False)
- def test_extract_features_with_dynamic_image_shape(self, use_keras):
+ def test_extract_features_with_dynamic_image_shape(self):
image_height = 256
image_width = 256
depth_multiplier = 1.0
@@ -123,14 +106,14 @@ class SsdMobilenetV1FpnFeatureExtractorTest(
self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=False,
- use_keras=use_keras)
+ use_keras=False)
self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=True,
- use_keras=use_keras)
+ use_keras=False)
def test_extract_features_returns_correct_shapes_with_pad_to_multiple(
- self, use_keras):
+ self):
image_height = 299
image_width = 299
depth_multiplier = 1.0
@@ -141,14 +124,14 @@ class SsdMobilenetV1FpnFeatureExtractorTest(
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=False,
- use_keras=use_keras)
+ use_keras=False)
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=True,
- use_keras=use_keras)
+ use_keras=False)
def test_extract_features_returns_correct_shapes_enforcing_min_depth(
- self, use_keras):
+ self):
image_height = 256
image_width = 256
depth_multiplier = 0.5**12
@@ -159,23 +142,23 @@ class SsdMobilenetV1FpnFeatureExtractorTest(
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=False,
- use_keras=use_keras)
+ use_keras=False)
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
expected_feature_map_shape, use_explicit_padding=True,
- use_keras=use_keras)
+ use_keras=False)
def test_extract_features_raises_error_with_invalid_image_size(
- self, use_keras):
+ self):
image_height = 32
image_width = 32
depth_multiplier = 1.0
pad_to_multiple = 1
self.check_extract_features_raises_error_with_invalid_image_size(
image_height, image_width, depth_multiplier, pad_to_multiple,
- use_keras=use_keras)
+ use_keras=False)
- def test_preprocess_returns_correct_value_range(self, use_keras):
+ def test_preprocess_returns_correct_value_range(self):
image_height = 256
image_width = 256
depth_multiplier = 1
@@ -183,25 +166,25 @@ class SsdMobilenetV1FpnFeatureExtractorTest(
test_image = np.random.rand(2, image_height, image_width, 3)
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple,
- use_keras=use_keras)
+ use_keras=False)
preprocessed_image = feature_extractor.preprocess(test_image)
self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
- def test_variables_only_created_in_scope(self, use_keras):
+ def test_variables_only_created_in_scope(self):
depth_multiplier = 1
pad_to_multiple = 1
scope_name = 'MobilenetV1'
self.check_feature_extractor_variables_under_scope(
- depth_multiplier, pad_to_multiple, scope_name, use_keras=use_keras)
+ depth_multiplier, pad_to_multiple, scope_name, use_keras=False)
- def test_variable_count(self, use_keras):
+ def test_variable_count(self):
depth_multiplier = 1
pad_to_multiple = 1
variables = self.get_feature_extractor_variables(
- depth_multiplier, pad_to_multiple, use_keras=use_keras)
+ depth_multiplier, pad_to_multiple, use_keras=False)
self.assertEqual(len(variables), 153)
- def test_fused_batchnorm(self, use_keras):
+ def test_fused_batchnorm(self):
image_height = 256
image_width = 256
depth_multiplier = 1
@@ -210,12 +193,9 @@ class SsdMobilenetV1FpnFeatureExtractorTest(
[1, image_height, image_width, 3])
feature_extractor = self._create_feature_extractor(depth_multiplier,
pad_to_multiple,
- use_keras=use_keras)
+ use_keras=False)
preprocessed_image = feature_extractor.preprocess(image_placeholder)
- if use_keras:
- _ = feature_extractor(preprocessed_image)
- else:
- _ = feature_extractor.extract_features(preprocessed_image)
+ _ = feature_extractor.extract_features(preprocessed_image)
self.assertTrue(
any('FusedBatchNorm' in op.type
diff --git a/research/object_detection/models/ssd_mobilenet_v1_fpn_feature_extractor_tf2_test.py b/research/object_detection/models/ssd_mobilenet_v1_fpn_feature_extractor_tf2_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..307cfa8b0b5594f921fee670699cc026ec16fbce
--- /dev/null
+++ b/research/object_detection/models/ssd_mobilenet_v1_fpn_feature_extractor_tf2_test.py
@@ -0,0 +1,179 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for ssd_mobilenet_v1_fpn_feature_extractor.
+
+By using parameterized test decorator, this test serves for both Slim-based and
+Keras-based Mobilenet V1 FPN feature extractors in SSD.
+"""
+import unittest
+import numpy as np
+import tensorflow.compat.v1 as tf
+
+from object_detection.models import ssd_feature_extractor_test
+from object_detection.models import ssd_mobilenet_v1_fpn_keras_feature_extractor
+from object_detection.utils import tf_version
+
+
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class SsdMobilenetV1FpnFeatureExtractorTest(
+ ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
+
+ def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
+ is_training=True, use_explicit_padding=False,
+ use_keras=True):
+ """Constructs a new feature extractor.
+
+ Args:
+ depth_multiplier: float depth multiplier for feature extractor
+ pad_to_multiple: the nearest multiple to zero pad the input height and
+ width dimensions to.
+ is_training: whether the network is in training mode.
+ use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
+ inputs so that the output dimensions are the same as if 'SAME' padding
+ were used.
+ use_keras: if True builds a keras-based feature extractor, if False builds
+ a slim-based one.
+ Returns:
+ an ssd_meta_arch.SSDFeatureExtractor object.
+ """
+ min_depth = 32
+ del use_keras
+ return (ssd_mobilenet_v1_fpn_keras_feature_extractor.
+ SSDMobileNetV1FpnKerasFeatureExtractor(
+ is_training=is_training,
+ depth_multiplier=depth_multiplier,
+ min_depth=min_depth,
+ pad_to_multiple=pad_to_multiple,
+ conv_hyperparams=self._build_conv_hyperparams(
+ add_batch_norm=False),
+ freeze_batchnorm=False,
+ inplace_batchnorm_update=False,
+ use_explicit_padding=use_explicit_padding,
+ use_depthwise=True,
+ name='MobilenetV1_FPN'))
+
+ def test_extract_features_returns_correct_shapes_256(self):
+ image_height = 256
+ image_width = 256
+ depth_multiplier = 1.0
+ pad_to_multiple = 1
+ expected_feature_map_shape = [(2, 32, 32, 256), (2, 16, 16, 256),
+ (2, 8, 8, 256), (2, 4, 4, 256),
+ (2, 2, 2, 256)]
+ self.check_extract_features_returns_correct_shape(
+ 2, image_height, image_width, depth_multiplier, pad_to_multiple,
+ expected_feature_map_shape, use_explicit_padding=False,
+ use_keras=True)
+ self.check_extract_features_returns_correct_shape(
+ 2, image_height, image_width, depth_multiplier, pad_to_multiple,
+ expected_feature_map_shape, use_explicit_padding=True,
+ use_keras=True)
+
+ def test_extract_features_returns_correct_shapes_384(self):
+ image_height = 320
+ image_width = 320
+ depth_multiplier = 1.0
+ pad_to_multiple = 1
+ expected_feature_map_shape = [(2, 40, 40, 256), (2, 20, 20, 256),
+ (2, 10, 10, 256), (2, 5, 5, 256),
+ (2, 3, 3, 256)]
+ self.check_extract_features_returns_correct_shape(
+ 2, image_height, image_width, depth_multiplier, pad_to_multiple,
+ expected_feature_map_shape, use_explicit_padding=False,
+ use_keras=True)
+ self.check_extract_features_returns_correct_shape(
+ 2, image_height, image_width, depth_multiplier, pad_to_multiple,
+ expected_feature_map_shape, use_explicit_padding=True,
+ use_keras=True)
+
+ def test_extract_features_with_dynamic_image_shape(self):
+ image_height = 256
+ image_width = 256
+ depth_multiplier = 1.0
+ pad_to_multiple = 1
+ expected_feature_map_shape = [(2, 32, 32, 256), (2, 16, 16, 256),
+ (2, 8, 8, 256), (2, 4, 4, 256),
+ (2, 2, 2, 256)]
+ self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
+ 2, image_height, image_width, depth_multiplier, pad_to_multiple,
+ expected_feature_map_shape, use_explicit_padding=False,
+ use_keras=True)
+ self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
+ 2, image_height, image_width, depth_multiplier, pad_to_multiple,
+ expected_feature_map_shape, use_explicit_padding=True,
+ use_keras=True)
+
+ def test_extract_features_returns_correct_shapes_with_pad_to_multiple(
+ self):
+ image_height = 299
+ image_width = 299
+ depth_multiplier = 1.0
+ pad_to_multiple = 32
+ expected_feature_map_shape = [(2, 40, 40, 256), (2, 20, 20, 256),
+ (2, 10, 10, 256), (2, 5, 5, 256),
+ (2, 3, 3, 256)]
+ self.check_extract_features_returns_correct_shape(
+ 2, image_height, image_width, depth_multiplier, pad_to_multiple,
+ expected_feature_map_shape, use_explicit_padding=False,
+ use_keras=True)
+ self.check_extract_features_returns_correct_shape(
+ 2, image_height, image_width, depth_multiplier, pad_to_multiple,
+ expected_feature_map_shape, use_explicit_padding=True,
+ use_keras=True)
+
+ def test_extract_features_returns_correct_shapes_enforcing_min_depth(
+ self):
+ image_height = 256
+ image_width = 256
+ depth_multiplier = 0.5**12
+ pad_to_multiple = 1
+ expected_feature_map_shape = [(2, 32, 32, 32), (2, 16, 16, 32),
+ (2, 8, 8, 32), (2, 4, 4, 32),
+ (2, 2, 2, 32)]
+ self.check_extract_features_returns_correct_shape(
+ 2, image_height, image_width, depth_multiplier, pad_to_multiple,
+ expected_feature_map_shape, use_explicit_padding=False,
+ use_keras=True)
+ self.check_extract_features_returns_correct_shape(
+ 2, image_height, image_width, depth_multiplier, pad_to_multiple,
+ expected_feature_map_shape, use_explicit_padding=True,
+ use_keras=True)
+
+ def test_extract_features_raises_error_with_invalid_image_size(
+ self):
+ image_height = 32
+ image_width = 32
+ depth_multiplier = 1.0
+ pad_to_multiple = 1
+ self.check_extract_features_raises_error_with_invalid_image_size(
+ image_height, image_width, depth_multiplier, pad_to_multiple,
+ use_keras=True)
+
+ def test_preprocess_returns_correct_value_range(self):
+ image_height = 256
+ image_width = 256
+ depth_multiplier = 1
+ pad_to_multiple = 1
+ test_image = np.random.rand(2, image_height, image_width, 3)
+ feature_extractor = self._create_feature_extractor(depth_multiplier,
+ pad_to_multiple,
+ use_keras=True)
+ preprocessed_image = feature_extractor.preprocess(test_image)
+ self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/research/object_detection/models/ssd_mobilenet_v1_fpn_keras_feature_extractor.py b/research/object_detection/models/ssd_mobilenet_v1_fpn_keras_feature_extractor.py
index 53d3fdbd447aa723a560b71461c186906465b25d..7792931875dc122ea938f8c87633e31f4adc4336 100644
--- a/research/object_detection/models/ssd_mobilenet_v1_fpn_keras_feature_extractor.py
+++ b/research/object_detection/models/ssd_mobilenet_v1_fpn_keras_feature_extractor.py
@@ -123,7 +123,7 @@ class SSDMobileNetV1FpnKerasFeatureExtractor(
'Conv2d_3_pointwise', 'Conv2d_5_pointwise', 'Conv2d_11_pointwise',
'Conv2d_13_pointwise'
]
- self._mobilenet_v1 = None
+ self.classification_backbone = None
self._fpn_features_generator = None
self._coarse_feature_layers = []
@@ -147,7 +147,7 @@ class SSDMobileNetV1FpnKerasFeatureExtractor(
name='conv_pw_11_relu').output
conv2d_13_pointwise = full_mobilenet_v1.get_layer(
name='conv_pw_13_relu').output
- self._mobilenet_v1 = tf.keras.Model(
+ self.classification_backbone = tf.keras.Model(
inputs=full_mobilenet_v1.inputs,
outputs=[conv2d_3_pointwise, conv2d_5_pointwise,
conv2d_11_pointwise, conv2d_13_pointwise]
@@ -218,7 +218,7 @@ class SSDMobileNetV1FpnKerasFeatureExtractor(
preprocessed_inputs = shape_utils.check_min_image_dim(
33, preprocessed_inputs)
- image_features = self._mobilenet_v1(
+ image_features = self.classification_backbone(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple))
feature_block_list = []
@@ -243,3 +243,14 @@ class SSDMobileNetV1FpnKerasFeatureExtractor(
last_feature_map = layer(last_feature_map)
feature_maps.append(last_feature_map)
return feature_maps
+
+ def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
+ """Returns a map for restoring from an (object-based) checkpoint.
+
+ Args:
+ feature_extractor_scope: A scope name for the feature extractor (unused).
+
+ Returns:
+ A dict mapping keys to Keras models
+ """
+ return {'feature_extractor': self.classification_backbone}
diff --git a/research/object_detection/models/ssd_mobilenet_v1_keras_feature_extractor.py b/research/object_detection/models/ssd_mobilenet_v1_keras_feature_extractor.py
index 679dc25dbd21039e8d0cbc2f3eeaa2eeac9c56c6..2f0df91540ae3598cde3d08c764b023c3c7f758e 100644
--- a/research/object_detection/models/ssd_mobilenet_v1_keras_feature_extractor.py
+++ b/research/object_detection/models/ssd_mobilenet_v1_keras_feature_extractor.py
@@ -93,7 +93,7 @@ class SSDMobileNetV1KerasFeatureExtractor(
'use_explicit_padding': self._use_explicit_padding,
'use_depthwise': self._use_depthwise,
}
- self._mobilenet_v1 = None
+ self.classification_backbone = None
self._feature_map_generator = None
def build(self, input_shape):
@@ -111,7 +111,7 @@ class SSDMobileNetV1KerasFeatureExtractor(
name='conv_pw_11_relu').output
conv2d_13_pointwise = full_mobilenet_v1.get_layer(
name='conv_pw_13_relu').output
- self._mobilenet_v1 = tf.keras.Model(
+ self.classification_backbone = tf.keras.Model(
inputs=full_mobilenet_v1.inputs,
outputs=[conv2d_11_pointwise, conv2d_13_pointwise])
self._feature_map_generator = (
@@ -155,7 +155,7 @@ class SSDMobileNetV1KerasFeatureExtractor(
preprocessed_inputs = shape_utils.check_min_image_dim(
33, preprocessed_inputs)
- image_features = self._mobilenet_v1(
+ image_features = self.classification_backbone(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple))
feature_maps = self._feature_map_generator({
diff --git a/research/object_detection/models/ssd_mobilenet_v1_ppn_feature_extractor_test.py b/research/object_detection/models/ssd_mobilenet_v1_ppn_feature_extractor_tf1_test.py
similarity index 98%
rename from research/object_detection/models/ssd_mobilenet_v1_ppn_feature_extractor_test.py
rename to research/object_detection/models/ssd_mobilenet_v1_ppn_feature_extractor_tf1_test.py
index c5a9cd807128e2e513d0dd8a9d9348921ff0e4d9..b5918c0dfa9a3e3819df14f9d504dd63b8febc63 100644
--- a/research/object_detection/models/ssd_mobilenet_v1_ppn_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_mobilenet_v1_ppn_feature_extractor_tf1_test.py
@@ -14,13 +14,16 @@
# ==============================================================================
"""Tests for ssd_mobilenet_v1_ppn_feature_extractor."""
+import unittest
import numpy as np
import tensorflow.compat.v1 as tf
from object_detection.models import ssd_feature_extractor_test
from object_detection.models import ssd_mobilenet_v1_ppn_feature_extractor
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class SsdMobilenetV1PpnFeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
diff --git a/research/object_detection/models/ssd_mobilenet_v2_feature_extractor_test.py b/research/object_detection/models/ssd_mobilenet_v2_feature_extractor_tf1_test.py
similarity index 70%
rename from research/object_detection/models/ssd_mobilenet_v2_feature_extractor_test.py
rename to research/object_detection/models/ssd_mobilenet_v2_feature_extractor_tf1_test.py
index 40eee93dbc021da82a788ff097cae580ebdd692b..96f9bc26e120f2f4396968429f474406b67894ca 100644
--- a/research/object_detection/models/ssd_mobilenet_v2_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_mobilenet_v2_feature_extractor_tf1_test.py
@@ -14,20 +14,17 @@
# ==============================================================================
"""Tests for ssd_mobilenet_v2_feature_extractor."""
-from absl.testing import parameterized
+import unittest
import numpy as np
import tensorflow.compat.v1 as tf
from object_detection.models import ssd_feature_extractor_test
from object_detection.models import ssd_mobilenet_v2_feature_extractor
-from object_detection.models import ssd_mobilenet_v2_keras_feature_extractor
+from object_detection.utils import tf_version
-@parameterized.parameters(
- {'use_keras': False},
- {'use_keras': True},
-)
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class SsdMobilenetV2FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
@@ -35,8 +32,7 @@ class SsdMobilenetV2FeatureExtractorTest(
depth_multiplier,
pad_to_multiple,
use_explicit_padding=False,
- num_layers=6,
- use_keras=False):
+ num_layers=6):
"""Constructs a new feature extractor.
Args:
@@ -47,36 +43,20 @@ class SsdMobilenetV2FeatureExtractorTest(
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
num_layers: number of SSD layers.
- use_keras: if True builds a keras-based feature extractor, if False builds
- a slim-based one.
Returns:
an ssd_meta_arch.SSDFeatureExtractor object.
"""
min_depth = 32
- if use_keras:
- return (ssd_mobilenet_v2_keras_feature_extractor.
- SSDMobileNetV2KerasFeatureExtractor(
- is_training=False,
- depth_multiplier=depth_multiplier,
- min_depth=min_depth,
- pad_to_multiple=pad_to_multiple,
- conv_hyperparams=self._build_conv_hyperparams(),
- freeze_batchnorm=False,
- inplace_batchnorm_update=False,
- use_explicit_padding=use_explicit_padding,
- num_layers=num_layers,
- name='MobilenetV2'))
- else:
- return ssd_mobilenet_v2_feature_extractor.SSDMobileNetV2FeatureExtractor(
- False,
- depth_multiplier,
- min_depth,
- pad_to_multiple,
- self.conv_hyperparams_fn,
- use_explicit_padding=use_explicit_padding,
- num_layers=num_layers)
-
- def test_extract_features_returns_correct_shapes_128(self, use_keras):
+ return ssd_mobilenet_v2_feature_extractor.SSDMobileNetV2FeatureExtractor(
+ False,
+ depth_multiplier,
+ min_depth,
+ pad_to_multiple,
+ self.conv_hyperparams_fn,
+ use_explicit_padding=use_explicit_padding,
+ num_layers=num_layers)
+
+ def test_extract_features_returns_correct_shapes_128(self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
@@ -86,10 +66,10 @@ class SsdMobilenetV2FeatureExtractorTest(
(2, 1, 1, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape, use_keras=use_keras)
+ expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_128_explicit_padding(
- self, use_keras):
+ self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
@@ -99,11 +79,10 @@ class SsdMobilenetV2FeatureExtractorTest(
(2, 1, 1, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape, use_explicit_padding=True,
- use_keras=use_keras)
+ expected_feature_map_shape, use_explicit_padding=True)
def test_extract_features_returns_correct_shapes_with_dynamic_inputs(
- self, use_keras):
+ self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
@@ -113,9 +92,9 @@ class SsdMobilenetV2FeatureExtractorTest(
(2, 1, 1, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape, use_keras=use_keras)
+ expected_feature_map_shape)
- def test_extract_features_returns_correct_shapes_299(self, use_keras):
+ def test_extract_features_returns_correct_shapes_299(self):
image_height = 299
image_width = 299
depth_multiplier = 1.0
@@ -125,10 +104,10 @@ class SsdMobilenetV2FeatureExtractorTest(
(2, 2, 2, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape, use_keras=use_keras)
+ expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_enforcing_min_depth(
- self, use_keras):
+ self):
image_height = 299
image_width = 299
depth_multiplier = 0.5**12
@@ -138,10 +117,10 @@ class SsdMobilenetV2FeatureExtractorTest(
(2, 2, 2, 32), (2, 1, 1, 32)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape, use_keras=use_keras)
+ expected_feature_map_shape)
def test_extract_features_returns_correct_shapes_with_pad_to_multiple(
- self, use_keras):
+ self):
image_height = 299
image_width = 299
depth_multiplier = 1.0
@@ -151,45 +130,43 @@ class SsdMobilenetV2FeatureExtractorTest(
(2, 2, 2, 256), (2, 1, 1, 128)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape, use_keras=use_keras)
+ expected_feature_map_shape)
def test_extract_features_raises_error_with_invalid_image_size(
- self, use_keras):
+ self):
image_height = 32
image_width = 32
depth_multiplier = 1.0
pad_to_multiple = 1
self.check_extract_features_raises_error_with_invalid_image_size(
- image_height, image_width, depth_multiplier, pad_to_multiple,
- use_keras=use_keras)
+ image_height, image_width, depth_multiplier, pad_to_multiple)
- def test_preprocess_returns_correct_value_range(self, use_keras):
+ def test_preprocess_returns_correct_value_range(self):
image_height = 128
image_width = 128
depth_multiplier = 1
pad_to_multiple = 1
test_image = np.random.rand(4, image_height, image_width, 3)
feature_extractor = self._create_feature_extractor(depth_multiplier,
- pad_to_multiple,
- use_keras=use_keras)
+ pad_to_multiple)
preprocessed_image = feature_extractor.preprocess(test_image)
self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
- def test_variables_only_created_in_scope(self, use_keras):
+ def test_variables_only_created_in_scope(self):
depth_multiplier = 1
pad_to_multiple = 1
scope_name = 'MobilenetV2'
self.check_feature_extractor_variables_under_scope(
- depth_multiplier, pad_to_multiple, scope_name, use_keras=use_keras)
+ depth_multiplier, pad_to_multiple, scope_name)
- def test_variable_count(self, use_keras):
+ def test_variable_count(self):
depth_multiplier = 1
pad_to_multiple = 1
variables = self.get_feature_extractor_variables(
- depth_multiplier, pad_to_multiple, use_keras=use_keras)
+ depth_multiplier, pad_to_multiple)
self.assertEqual(len(variables), 292)
- def test_has_fused_batchnorm(self, use_keras):
+ def test_has_fused_batchnorm(self):
image_height = 40
image_width = 40
depth_multiplier = 1
@@ -197,17 +174,13 @@ class SsdMobilenetV2FeatureExtractorTest(
image_placeholder = tf.placeholder(tf.float32,
[1, image_height, image_width, 3])
feature_extractor = self._create_feature_extractor(depth_multiplier,
- pad_to_multiple,
- use_keras=use_keras)
+ pad_to_multiple)
preprocessed_image = feature_extractor.preprocess(image_placeholder)
- if use_keras:
- _ = feature_extractor(preprocessed_image)
- else:
- _ = feature_extractor.extract_features(preprocessed_image)
+ _ = feature_extractor.extract_features(preprocessed_image)
self.assertTrue(any('FusedBatchNorm' in op.type
for op in tf.get_default_graph().get_operations()))
- def test_extract_features_with_fewer_layers(self, use_keras):
+ def test_extract_features_with_fewer_layers(self):
image_height = 128
image_width = 128
depth_multiplier = 1.0
@@ -216,8 +189,7 @@ class SsdMobilenetV2FeatureExtractorTest(
(2, 2, 2, 512), (2, 1, 1, 256)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape, use_explicit_padding=False, num_layers=4,
- use_keras=use_keras)
+ expected_feature_map_shape, use_explicit_padding=False, num_layers=4)
if __name__ == '__main__':
diff --git a/research/object_detection/models/ssd_mobilenet_v2_feature_extractor_tf2_test.py b/research/object_detection/models/ssd_mobilenet_v2_feature_extractor_tf2_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d4cb5afcf7c978cc24e01d5806914c618cd7fd7
--- /dev/null
+++ b/research/object_detection/models/ssd_mobilenet_v2_feature_extractor_tf2_test.py
@@ -0,0 +1,192 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for ssd_mobilenet_v2_feature_extractor."""
+import unittest
+
+import numpy as np
+import tensorflow.compat.v1 as tf
+
+from object_detection.models import ssd_feature_extractor_test
+from object_detection.models import ssd_mobilenet_v2_keras_feature_extractor
+from object_detection.utils import tf_version
+
+
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class SsdMobilenetV2FeatureExtractorTest(
+ ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
+
+ def _create_feature_extractor(self,
+ depth_multiplier,
+ pad_to_multiple,
+ use_explicit_padding=False,
+ num_layers=6,
+ use_keras=False):
+ """Constructs a new feature extractor.
+
+ Args:
+ depth_multiplier: float depth multiplier for feature extractor
+ pad_to_multiple: the nearest multiple to zero pad the input height and
+ width dimensions to.
+ use_explicit_padding: use 'VALID' padding for convolutions, but prepad
+ inputs so that the output dimensions are the same as if 'SAME' padding
+ were used.
+ num_layers: number of SSD layers.
+ use_keras: unused argument.
+
+ Returns:
+ an ssd_meta_arch.SSDFeatureExtractor object.
+ """
+ del use_keras
+ min_depth = 32
+ return (ssd_mobilenet_v2_keras_feature_extractor.
+ SSDMobileNetV2KerasFeatureExtractor(
+ is_training=False,
+ depth_multiplier=depth_multiplier,
+ min_depth=min_depth,
+ pad_to_multiple=pad_to_multiple,
+ conv_hyperparams=self._build_conv_hyperparams(),
+ freeze_batchnorm=False,
+ inplace_batchnorm_update=False,
+ use_explicit_padding=use_explicit_padding,
+ num_layers=num_layers,
+ name='MobilenetV2'))
+
+ def test_extract_features_returns_correct_shapes_128(self):
+ image_height = 128
+ image_width = 128
+ depth_multiplier = 1.0
+ pad_to_multiple = 1
+ expected_feature_map_shape = [(2, 8, 8, 576), (2, 4, 4, 1280),
+ (2, 2, 2, 512), (2, 1, 1, 256),
+ (2, 1, 1, 256), (2, 1, 1, 128)]
+ self.check_extract_features_returns_correct_shape(
+ 2, image_height, image_width, depth_multiplier, pad_to_multiple,
+ expected_feature_map_shape, use_keras=True)
+
+ def test_extract_features_returns_correct_shapes_128_explicit_padding(
+ self):
+ image_height = 128
+ image_width = 128
+ depth_multiplier = 1.0
+ pad_to_multiple = 1
+ expected_feature_map_shape = [(2, 8, 8, 576), (2, 4, 4, 1280),
+ (2, 2, 2, 512), (2, 1, 1, 256),
+ (2, 1, 1, 256), (2, 1, 1, 128)]
+ self.check_extract_features_returns_correct_shape(
+ 2, image_height, image_width, depth_multiplier, pad_to_multiple,
+ expected_feature_map_shape, use_explicit_padding=True, use_keras=True)
+
+ def test_extract_features_returns_correct_shapes_with_dynamic_inputs(
+ self):
+ image_height = 128
+ image_width = 128
+ depth_multiplier = 1.0
+ pad_to_multiple = 1
+ expected_feature_map_shape = [(2, 8, 8, 576), (2, 4, 4, 1280),
+ (2, 2, 2, 512), (2, 1, 1, 256),
+ (2, 1, 1, 256), (2, 1, 1, 128)]
+ self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
+ 2, image_height, image_width, depth_multiplier, pad_to_multiple,
+ expected_feature_map_shape, use_keras=True)
+
+ def test_extract_features_returns_correct_shapes_299(self):
+ image_height = 299
+ image_width = 299
+ depth_multiplier = 1.0
+ pad_to_multiple = 1
+ expected_feature_map_shape = [(2, 19, 19, 576), (2, 10, 10, 1280),
+ (2, 5, 5, 512), (2, 3, 3, 256),
+ (2, 2, 2, 256), (2, 1, 1, 128)]
+ self.check_extract_features_returns_correct_shape(
+ 2, image_height, image_width, depth_multiplier, pad_to_multiple,
+ expected_feature_map_shape, use_keras=True)
+
+ def test_extract_features_returns_correct_shapes_enforcing_min_depth(
+ self):
+ image_height = 299
+ image_width = 299
+ depth_multiplier = 0.5**12
+ pad_to_multiple = 1
+ expected_feature_map_shape = [(2, 19, 19, 192), (2, 10, 10, 32),
+ (2, 5, 5, 32), (2, 3, 3, 32),
+ (2, 2, 2, 32), (2, 1, 1, 32)]
+ self.check_extract_features_returns_correct_shape(
+ 2, image_height, image_width, depth_multiplier, pad_to_multiple,
+ expected_feature_map_shape, use_keras=True)
+
+ def test_extract_features_returns_correct_shapes_with_pad_to_multiple(
+ self):
+ image_height = 299
+ image_width = 299
+ depth_multiplier = 1.0
+ pad_to_multiple = 32
+ expected_feature_map_shape = [(2, 20, 20, 576), (2, 10, 10, 1280),
+ (2, 5, 5, 512), (2, 3, 3, 256),
+ (2, 2, 2, 256), (2, 1, 1, 128)]
+ self.check_extract_features_returns_correct_shape(
+ 2, image_height, image_width, depth_multiplier, pad_to_multiple,
+ expected_feature_map_shape, use_keras=True)
+
+ def test_extract_features_raises_error_with_invalid_image_size(
+ self):
+ image_height = 32
+ image_width = 32
+ depth_multiplier = 1.0
+ pad_to_multiple = 1
+ self.check_extract_features_raises_error_with_invalid_image_size(
+ image_height, image_width, depth_multiplier, pad_to_multiple,
+ use_keras=True)
+
+ def test_preprocess_returns_correct_value_range(self):
+ image_height = 128
+ image_width = 128
+ depth_multiplier = 1
+ pad_to_multiple = 1
+ test_image = np.random.rand(4, image_height, image_width, 3)
+ feature_extractor = self._create_feature_extractor(depth_multiplier,
+ pad_to_multiple)
+ preprocessed_image = feature_extractor.preprocess(test_image)
+ self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
+
+ def test_variables_only_created_in_scope(self):
+ depth_multiplier = 1
+ pad_to_multiple = 1
+ scope_name = 'MobilenetV2'
+ self.check_feature_extractor_variables_under_scope(
+ depth_multiplier, pad_to_multiple, scope_name, use_keras=True)
+
+ def test_variable_count(self):
+ depth_multiplier = 1
+ pad_to_multiple = 1
+ variables = self.get_feature_extractor_variables(
+ depth_multiplier, pad_to_multiple, use_keras=True)
+ self.assertEqual(len(variables), 292)
+
+ def test_extract_features_with_fewer_layers(self):
+ image_height = 128
+ image_width = 128
+ depth_multiplier = 1.0
+ pad_to_multiple = 1
+ expected_feature_map_shape = [(2, 8, 8, 576), (2, 4, 4, 1280),
+ (2, 2, 2, 512), (2, 1, 1, 256)]
+ self.check_extract_features_returns_correct_shape(
+ 2, image_height, image_width, depth_multiplier, pad_to_multiple,
+ expected_feature_map_shape, use_explicit_padding=False, num_layers=4,
+ use_keras=True)
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/research/object_detection/models/ssd_mobilenet_v2_fpn_feature_extractor_test.py b/research/object_detection/models/ssd_mobilenet_v2_fpn_feature_extractor_tf1_test.py
similarity index 70%
rename from research/object_detection/models/ssd_mobilenet_v2_fpn_feature_extractor_test.py
rename to research/object_detection/models/ssd_mobilenet_v2_fpn_feature_extractor_tf1_test.py
index f5bb42b68cf9afe0af64ebce81f4f4e12f48e277..9cdbed5fbe160baefb0afd41477748b9374e191f 100644
--- a/research/object_detection/models/ssd_mobilenet_v2_fpn_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_mobilenet_v2_fpn_feature_extractor_tf1_test.py
@@ -18,31 +18,23 @@
By using parameterized test decorator, this test serves for both Slim-based and
Keras-based Mobilenet V2 FPN feature extractors in SSD.
"""
+import unittest
from absl.testing import parameterized
import numpy as np
import tensorflow.compat.v1 as tf
from object_detection.models import ssd_feature_extractor_test
from object_detection.models import ssd_mobilenet_v2_fpn_feature_extractor
-from object_detection.models import ssd_mobilenet_v2_fpn_keras_feature_extractor
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
@parameterized.parameters(
{
- 'use_depthwise': False,
- 'use_keras': True
+ 'use_depthwise': False
},
{
- 'use_depthwise': True,
- 'use_keras': True
- },
- {
- 'use_depthwise': False,
- 'use_keras': False
- },
- {
- 'use_depthwise': True,
- 'use_keras': False
+ 'use_depthwise': True
},
)
class SsdMobilenetV2FpnFeatureExtractorTest(
@@ -71,34 +63,20 @@ class SsdMobilenetV2FpnFeatureExtractorTest(
Returns:
an ssd_meta_arch.SSDFeatureExtractor object.
"""
+ del use_keras
min_depth = 32
- if use_keras:
- return (ssd_mobilenet_v2_fpn_keras_feature_extractor
- .SSDMobileNetV2FpnKerasFeatureExtractor(
- is_training=is_training,
- depth_multiplier=depth_multiplier,
- min_depth=min_depth,
- pad_to_multiple=pad_to_multiple,
- conv_hyperparams=self._build_conv_hyperparams(
- add_batch_norm=False),
- freeze_batchnorm=False,
- inplace_batchnorm_update=False,
- use_explicit_padding=use_explicit_padding,
- use_depthwise=use_depthwise,
- name='MobilenetV2_FPN'))
- else:
- return (ssd_mobilenet_v2_fpn_feature_extractor
- .SSDMobileNetV2FpnFeatureExtractor(
- is_training,
- depth_multiplier,
- min_depth,
- pad_to_multiple,
- self.conv_hyperparams_fn,
- use_depthwise=use_depthwise,
- use_explicit_padding=use_explicit_padding))
+ return (ssd_mobilenet_v2_fpn_feature_extractor
+ .SSDMobileNetV2FpnFeatureExtractor(
+ is_training,
+ depth_multiplier,
+ min_depth,
+ pad_to_multiple,
+ self.conv_hyperparams_fn,
+ use_depthwise=use_depthwise,
+ use_explicit_padding=use_explicit_padding))
- def test_extract_features_returns_correct_shapes_256(self, use_keras,
- use_depthwise):
+ def test_extract_features_returns_correct_shapes_256(self, use_depthwise):
+ use_keras = False
image_height = 256
image_width = 256
depth_multiplier = 1.0
@@ -127,8 +105,8 @@ class SsdMobilenetV2FpnFeatureExtractorTest(
use_keras=use_keras,
use_depthwise=use_depthwise)
- def test_extract_features_returns_correct_shapes_384(self, use_keras,
- use_depthwise):
+ def test_extract_features_returns_correct_shapes_384(self, use_depthwise):
+ use_keras = False
image_height = 320
image_width = 320
depth_multiplier = 1.0
@@ -157,8 +135,9 @@ class SsdMobilenetV2FpnFeatureExtractorTest(
use_keras=use_keras,
use_depthwise=use_depthwise)
- def test_extract_features_with_dynamic_image_shape(self, use_keras,
+ def test_extract_features_with_dynamic_image_shape(self,
use_depthwise):
+ use_keras = False
image_height = 256
image_width = 256
depth_multiplier = 1.0
@@ -188,7 +167,8 @@ class SsdMobilenetV2FpnFeatureExtractorTest(
use_depthwise=use_depthwise)
def test_extract_features_returns_correct_shapes_with_pad_to_multiple(
- self, use_keras, use_depthwise):
+ self, use_depthwise):
+ use_keras = False
image_height = 299
image_width = 299
depth_multiplier = 1.0
@@ -218,7 +198,8 @@ class SsdMobilenetV2FpnFeatureExtractorTest(
use_depthwise=use_depthwise)
def test_extract_features_returns_correct_shapes_enforcing_min_depth(
- self, use_keras, use_depthwise):
+ self, use_depthwise):
+ use_keras = False
image_height = 256
image_width = 256
depth_multiplier = 0.5**12
@@ -248,7 +229,8 @@ class SsdMobilenetV2FpnFeatureExtractorTest(
use_depthwise=use_depthwise)
def test_extract_features_raises_error_with_invalid_image_size(
- self, use_keras, use_depthwise):
+ self, use_depthwise):
+ use_keras = False
image_height = 32
image_width = 32
depth_multiplier = 1.0
@@ -261,8 +243,9 @@ class SsdMobilenetV2FpnFeatureExtractorTest(
use_keras=use_keras,
use_depthwise=use_depthwise)
- def test_preprocess_returns_correct_value_range(self, use_keras,
+ def test_preprocess_returns_correct_value_range(self,
use_depthwise):
+ use_keras = False
image_height = 256
image_width = 256
depth_multiplier = 1
@@ -276,7 +259,8 @@ class SsdMobilenetV2FpnFeatureExtractorTest(
preprocessed_image = feature_extractor.preprocess(test_image)
self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
- def test_variables_only_created_in_scope(self, use_keras, use_depthwise):
+ def test_variables_only_created_in_scope(self, use_depthwise):
+ use_keras = False
depth_multiplier = 1
pad_to_multiple = 1
scope_name = 'MobilenetV2'
@@ -287,7 +271,8 @@ class SsdMobilenetV2FpnFeatureExtractorTest(
use_keras=use_keras,
use_depthwise=use_depthwise)
- def test_fused_batchnorm(self, use_keras, use_depthwise):
+ def test_fused_batchnorm(self, use_depthwise):
+ use_keras = False
image_height = 256
image_width = 256
depth_multiplier = 1
@@ -300,15 +285,13 @@ class SsdMobilenetV2FpnFeatureExtractorTest(
use_keras=use_keras,
use_depthwise=use_depthwise)
preprocessed_image = feature_extractor.preprocess(image_placeholder)
- if use_keras:
- _ = feature_extractor(preprocessed_image)
- else:
- _ = feature_extractor.extract_features(preprocessed_image)
+ _ = feature_extractor.extract_features(preprocessed_image)
self.assertTrue(
any('FusedBatchNorm' in op.type
for op in tf.get_default_graph().get_operations()))
- def test_variable_count(self, use_keras, use_depthwise):
+ def test_variable_count(self, use_depthwise):
+ use_keras = False
depth_multiplier = 1
pad_to_multiple = 1
variables = self.get_feature_extractor_variables(
@@ -321,8 +304,9 @@ class SsdMobilenetV2FpnFeatureExtractorTest(
expected_variables_len = 278
self.assertEqual(len(variables), expected_variables_len)
- def test_get_expected_feature_map_variable_names(self, use_keras,
+ def test_get_expected_feature_map_variable_names(self,
use_depthwise):
+ use_keras = False
depth_multiplier = 1.0
pad_to_multiple = 1
@@ -360,44 +344,6 @@ class SsdMobilenetV2FpnFeatureExtractorTest(
'MobilenetV2/fpn/projection_2/weights',
'MobilenetV2/fpn/projection_3/weights',
])
- keras_expected_feature_maps_variables = set([
- # Keras Mobilenet V2 feature maps
- 'MobilenetV2_FPN/block_4_depthwise/depthwise_kernel',
- 'MobilenetV2_FPN/block_7_depthwise/depthwise_kernel',
- 'MobilenetV2_FPN/block_14_depthwise/depthwise_kernel',
- 'MobilenetV2_FPN/Conv_1/kernel',
- # FPN layers
- 'MobilenetV2_FPN/bottom_up_Conv2d_20_conv/kernel',
- 'MobilenetV2_FPN/bottom_up_Conv2d_21_conv/kernel',
- 'MobilenetV2_FPN/FeatureMaps/top_down/smoothing_1_conv/kernel',
- 'MobilenetV2_FPN/FeatureMaps/top_down/smoothing_2_conv/kernel',
- 'MobilenetV2_FPN/FeatureMaps/top_down/projection_1/kernel',
- 'MobilenetV2_FPN/FeatureMaps/top_down/projection_2/kernel',
- 'MobilenetV2_FPN/FeatureMaps/top_down/projection_3/kernel'
- ])
- keras_expected_feature_maps_variables_with_depthwise = set([
- # Keras Mobilenet V2 feature maps
- 'MobilenetV2_FPN/block_4_depthwise/depthwise_kernel',
- 'MobilenetV2_FPN/block_7_depthwise/depthwise_kernel',
- 'MobilenetV2_FPN/block_14_depthwise/depthwise_kernel',
- 'MobilenetV2_FPN/Conv_1/kernel',
- # FPN layers
- 'MobilenetV2_FPN/bottom_up_Conv2d_20_depthwise_conv/depthwise_kernel',
- 'MobilenetV2_FPN/bottom_up_Conv2d_20_depthwise_conv/pointwise_kernel',
- 'MobilenetV2_FPN/bottom_up_Conv2d_21_depthwise_conv/depthwise_kernel',
- 'MobilenetV2_FPN/bottom_up_Conv2d_21_depthwise_conv/pointwise_kernel',
- ('MobilenetV2_FPN/FeatureMaps/top_down/smoothing_1_depthwise_conv/'
- 'depthwise_kernel'),
- ('MobilenetV2_FPN/FeatureMaps/top_down/smoothing_1_depthwise_conv/'
- 'pointwise_kernel'),
- ('MobilenetV2_FPN/FeatureMaps/top_down/smoothing_2_depthwise_conv/'
- 'depthwise_kernel'),
- ('MobilenetV2_FPN/FeatureMaps/top_down/smoothing_2_depthwise_conv/'
- 'pointwise_kernel'),
- 'MobilenetV2_FPN/FeatureMaps/top_down/projection_1/kernel',
- 'MobilenetV2_FPN/FeatureMaps/top_down/projection_2/kernel',
- 'MobilenetV2_FPN/FeatureMaps/top_down/projection_3/kernel'
- ])
g = tf.Graph()
with g.as_default():
@@ -407,18 +353,12 @@ class SsdMobilenetV2FpnFeatureExtractorTest(
pad_to_multiple,
use_keras=use_keras,
use_depthwise=use_depthwise)
- if use_keras:
- _ = feature_extractor(preprocessed_inputs)
- expected_feature_maps_variables = keras_expected_feature_maps_variables
- if use_depthwise:
- expected_feature_maps_variables = (
- keras_expected_feature_maps_variables_with_depthwise)
- else:
- _ = feature_extractor.extract_features(preprocessed_inputs)
- expected_feature_maps_variables = slim_expected_feature_maps_variables
- if use_depthwise:
- expected_feature_maps_variables = (
- slim_expected_feature_maps_variables_with_depthwise)
+
+ _ = feature_extractor.extract_features(preprocessed_inputs)
+ expected_feature_maps_variables = slim_expected_feature_maps_variables
+ if use_depthwise:
+ expected_feature_maps_variables = (
+ slim_expected_feature_maps_variables_with_depthwise)
actual_variable_set = set([
var.op.name for var in g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
])
diff --git a/research/object_detection/models/ssd_mobilenet_v2_fpn_feature_extractor_tf2_test.py b/research/object_detection/models/ssd_mobilenet_v2_fpn_feature_extractor_tf2_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..44522ac94494430cb109e084689cc6a1a1dbeddb
--- /dev/null
+++ b/research/object_detection/models/ssd_mobilenet_v2_fpn_feature_extractor_tf2_test.py
@@ -0,0 +1,269 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for ssd_mobilenet_v2_fpn_feature_extractor.
+
+By using parameterized test decorator, this test serves for both Slim-based and
+Keras-based Mobilenet V2 FPN feature extractors in SSD.
+"""
+import unittest
+from absl.testing import parameterized
+import numpy as np
+import tensorflow.compat.v1 as tf
+
+from object_detection.models import ssd_feature_extractor_test
+from object_detection.models import ssd_mobilenet_v2_fpn_keras_feature_extractor
+from object_detection.utils import tf_version
+
+
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+@parameterized.parameters(
+ {
+ 'use_depthwise': False,
+ },
+ {
+ 'use_depthwise': True,
+ },
+)
+class SsdMobilenetV2FpnFeatureExtractorTest(
+ ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
+
+ def _create_feature_extractor(self,
+ depth_multiplier,
+ pad_to_multiple,
+ is_training=True,
+ use_explicit_padding=False,
+ use_keras=False,
+ use_depthwise=False):
+ """Constructs a new feature extractor.
+
+ Args:
+ depth_multiplier: float depth multiplier for feature extractor
+ pad_to_multiple: the nearest multiple to zero pad the input height and
+ width dimensions to.
+ is_training: whether the network is in training mode.
+ use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
+ inputs so that the output dimensions are the same as if 'SAME' padding
+ were used.
+ use_keras: if True builds a keras-based feature extractor, if False builds
+ a slim-based one.
+ use_depthwise: Whether to use depthwise convolutions.
+ Returns:
+ an ssd_meta_arch.SSDFeatureExtractor object.
+ """
+ del use_keras
+ min_depth = 32
+ return (ssd_mobilenet_v2_fpn_keras_feature_extractor
+ .SSDMobileNetV2FpnKerasFeatureExtractor(
+ is_training=is_training,
+ depth_multiplier=depth_multiplier,
+ min_depth=min_depth,
+ pad_to_multiple=pad_to_multiple,
+ conv_hyperparams=self._build_conv_hyperparams(
+ add_batch_norm=False),
+ freeze_batchnorm=False,
+ inplace_batchnorm_update=False,
+ use_explicit_padding=use_explicit_padding,
+ use_depthwise=use_depthwise,
+ name='MobilenetV2_FPN'))
+
+ def test_extract_features_returns_correct_shapes_256(self,
+ use_depthwise):
+ use_keras = True
+ image_height = 256
+ image_width = 256
+ depth_multiplier = 1.0
+ pad_to_multiple = 1
+ expected_feature_map_shape = [(2, 32, 32, 256), (2, 16, 16, 256),
+ (2, 8, 8, 256), (2, 4, 4, 256),
+ (2, 2, 2, 256)]
+ self.check_extract_features_returns_correct_shape(
+ 2,
+ image_height,
+ image_width,
+ depth_multiplier,
+ pad_to_multiple,
+ expected_feature_map_shape,
+ use_explicit_padding=False,
+ use_keras=use_keras,
+ use_depthwise=use_depthwise)
+ self.check_extract_features_returns_correct_shape(
+ 2,
+ image_height,
+ image_width,
+ depth_multiplier,
+ pad_to_multiple,
+ expected_feature_map_shape,
+ use_explicit_padding=True,
+ use_keras=use_keras,
+ use_depthwise=use_depthwise)
+
+ def test_extract_features_returns_correct_shapes_384(self,
+ use_depthwise):
+ use_keras = True
+ image_height = 320
+ image_width = 320
+ depth_multiplier = 1.0
+ pad_to_multiple = 1
+ expected_feature_map_shape = [(2, 40, 40, 256), (2, 20, 20, 256),
+ (2, 10, 10, 256), (2, 5, 5, 256),
+ (2, 3, 3, 256)]
+ self.check_extract_features_returns_correct_shape(
+ 2,
+ image_height,
+ image_width,
+ depth_multiplier,
+ pad_to_multiple,
+ expected_feature_map_shape,
+ use_explicit_padding=False,
+ use_keras=use_keras,
+ use_depthwise=use_depthwise)
+ self.check_extract_features_returns_correct_shape(
+ 2,
+ image_height,
+ image_width,
+ depth_multiplier,
+ pad_to_multiple,
+ expected_feature_map_shape,
+ use_explicit_padding=True,
+ use_keras=use_keras,
+ use_depthwise=use_depthwise)
+
+ def test_extract_features_with_dynamic_image_shape(self,
+ use_depthwise):
+ use_keras = True
+ image_height = 256
+ image_width = 256
+ depth_multiplier = 1.0
+ pad_to_multiple = 1
+ expected_feature_map_shape = [(2, 32, 32, 256), (2, 16, 16, 256),
+ (2, 8, 8, 256), (2, 4, 4, 256),
+ (2, 2, 2, 256)]
+ self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
+ 2,
+ image_height,
+ image_width,
+ depth_multiplier,
+ pad_to_multiple,
+ expected_feature_map_shape,
+ use_explicit_padding=False,
+ use_keras=use_keras,
+ use_depthwise=use_depthwise)
+ self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
+ 2,
+ image_height,
+ image_width,
+ depth_multiplier,
+ pad_to_multiple,
+ expected_feature_map_shape,
+ use_explicit_padding=True,
+ use_keras=use_keras,
+ use_depthwise=use_depthwise)
+
+ def test_extract_features_returns_correct_shapes_with_pad_to_multiple(
+ self, use_depthwise):
+ use_keras = True
+ image_height = 299
+ image_width = 299
+ depth_multiplier = 1.0
+ pad_to_multiple = 32
+ expected_feature_map_shape = [(2, 40, 40, 256), (2, 20, 20, 256),
+ (2, 10, 10, 256), (2, 5, 5, 256),
+ (2, 3, 3, 256)]
+ self.check_extract_features_returns_correct_shape(
+ 2,
+ image_height,
+ image_width,
+ depth_multiplier,
+ pad_to_multiple,
+ expected_feature_map_shape,
+ use_explicit_padding=False,
+ use_keras=use_keras,
+ use_depthwise=use_depthwise)
+ self.check_extract_features_returns_correct_shape(
+ 2,
+ image_height,
+ image_width,
+ depth_multiplier,
+ pad_to_multiple,
+ expected_feature_map_shape,
+ use_explicit_padding=True,
+ use_keras=use_keras,
+ use_depthwise=use_depthwise)
+
+ def test_extract_features_returns_correct_shapes_enforcing_min_depth(
+ self, use_depthwise):
+ use_keras = True
+ image_height = 256
+ image_width = 256
+ depth_multiplier = 0.5**12
+ pad_to_multiple = 1
+ expected_feature_map_shape = [(2, 32, 32, 32), (2, 16, 16, 32),
+ (2, 8, 8, 32), (2, 4, 4, 32),
+ (2, 2, 2, 32)]
+ self.check_extract_features_returns_correct_shape(
+ 2,
+ image_height,
+ image_width,
+ depth_multiplier,
+ pad_to_multiple,
+ expected_feature_map_shape,
+ use_explicit_padding=False,
+ use_keras=use_keras,
+ use_depthwise=use_depthwise)
+ self.check_extract_features_returns_correct_shape(
+ 2,
+ image_height,
+ image_width,
+ depth_multiplier,
+ pad_to_multiple,
+ expected_feature_map_shape,
+ use_explicit_padding=True,
+ use_keras=use_keras,
+ use_depthwise=use_depthwise)
+
+ def test_extract_features_raises_error_with_invalid_image_size(
+ self, use_depthwise=False):
+ use_keras = True
+ image_height = 32
+ image_width = 32
+ depth_multiplier = 1.0
+ pad_to_multiple = 1
+ self.check_extract_features_raises_error_with_invalid_image_size(
+ image_height,
+ image_width,
+ depth_multiplier,
+ pad_to_multiple,
+ use_keras=use_keras,
+ use_depthwise=use_depthwise)
+
+ def test_preprocess_returns_correct_value_range(self,
+ use_depthwise):
+ use_keras = True
+ image_height = 256
+ image_width = 256
+ depth_multiplier = 1
+ pad_to_multiple = 1
+ test_image = np.random.rand(2, image_height, image_width, 3)
+ feature_extractor = self._create_feature_extractor(
+ depth_multiplier,
+ pad_to_multiple,
+ use_keras=use_keras,
+ use_depthwise=use_depthwise)
+ preprocessed_image = feature_extractor.preprocess(test_image)
+ self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/research/object_detection/models/ssd_mobilenet_v2_fpn_keras_feature_extractor.py b/research/object_detection/models/ssd_mobilenet_v2_fpn_keras_feature_extractor.py
index f01bec9c5b53026113dc74324a739eb13fa48d3d..0834ea6b9db2d853b06392b48b594a7c9a5f301b 100644
--- a/research/object_detection/models/ssd_mobilenet_v2_fpn_keras_feature_extractor.py
+++ b/research/object_detection/models/ssd_mobilenet_v2_fpn_keras_feature_extractor.py
@@ -123,7 +123,7 @@ class SSDMobileNetV2FpnKerasFeatureExtractor(
self._conv_defs = _create_modified_mobilenet_config()
self._use_native_resize_op = use_native_resize_op
self._feature_blocks = ['layer_4', 'layer_7', 'layer_14', 'layer_19']
- self._mobilenet_v2 = None
+ self.classification_backbone = None
self._fpn_features_generator = None
self._coarse_feature_layers = []
@@ -147,7 +147,7 @@ class SSDMobileNetV2FpnKerasFeatureExtractor(
outputs.append(full_mobilenet_v2.get_layer(output_layer_name).output)
layer_19 = full_mobilenet_v2.get_layer(name='out_relu').output
outputs.append(layer_19)
- self._mobilenet_v2 = tf.keras.Model(
+ self.classification_backbone = tf.keras.Model(
inputs=full_mobilenet_v2.inputs,
outputs=outputs)
# pylint:disable=g-long-lambda
@@ -216,7 +216,7 @@ class SSDMobileNetV2FpnKerasFeatureExtractor(
preprocessed_inputs = shape_utils.check_min_image_dim(
33, preprocessed_inputs)
- image_features = self._mobilenet_v2(
+ image_features = self.classification_backbone(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple))
feature_block_list = []
diff --git a/research/object_detection/models/ssd_mobilenet_v2_keras_feature_extractor.py b/research/object_detection/models/ssd_mobilenet_v2_keras_feature_extractor.py
index e9260cd7af7fb251e8da191cee5dd984a19aec31..0f79fc271d55edbc0e61384948bd816fa6f9cd3b 100644
--- a/research/object_detection/models/ssd_mobilenet_v2_keras_feature_extractor.py
+++ b/research/object_detection/models/ssd_mobilenet_v2_keras_feature_extractor.py
@@ -97,7 +97,7 @@ class SSDMobileNetV2KerasFeatureExtractor(
'use_explicit_padding': self._use_explicit_padding,
}
- self.mobilenet_v2 = None
+ self.classification_backbone = None
self.feature_map_generator = None
def build(self, input_shape):
@@ -114,7 +114,7 @@ class SSDMobileNetV2KerasFeatureExtractor(
conv2d_11_pointwise = full_mobilenet_v2.get_layer(
name='block_13_expand_relu').output
conv2d_13_pointwise = full_mobilenet_v2.get_layer(name='out_relu').output
- self.mobilenet_v2 = tf.keras.Model(
+ self.classification_backbone = tf.keras.Model(
inputs=full_mobilenet_v2.inputs,
outputs=[conv2d_11_pointwise, conv2d_13_pointwise])
self.feature_map_generator = (
@@ -158,7 +158,7 @@ class SSDMobileNetV2KerasFeatureExtractor(
preprocessed_inputs = shape_utils.check_min_image_dim(
33, preprocessed_inputs)
- image_features = self.mobilenet_v2(
+ image_features = self.classification_backbone(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple))
feature_maps = self.feature_map_generator({
diff --git a/research/object_detection/models/ssd_mobilenet_v2_mnasfpn_feature_extractor_test.py b/research/object_detection/models/ssd_mobilenet_v2_mnasfpn_feature_extractor_tf1_test.py
similarity index 96%
rename from research/object_detection/models/ssd_mobilenet_v2_mnasfpn_feature_extractor_test.py
rename to research/object_detection/models/ssd_mobilenet_v2_mnasfpn_feature_extractor_tf1_test.py
index dd9aae976665dc535c07d49de6eeb9292b6b1dd0..032433128de057c97a422c97e96d16bd2942f62b 100644
--- a/research/object_detection/models/ssd_mobilenet_v2_mnasfpn_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_mobilenet_v2_mnasfpn_feature_extractor_tf1_test.py
@@ -14,13 +14,16 @@
# limitations under the License.
# ==============================================================================
"""Tests for ssd_mobilenet_v2_nas_fpn_feature_extractor."""
+import unittest
import numpy as np
import tensorflow.compat.v1 as tf
from object_detection.models import ssd_feature_extractor_test
from object_detection.models import ssd_mobilenet_v2_mnasfpn_feature_extractor as mnasfpn_feature_extractor
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class SsdMobilenetV2MnasFPNFeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
diff --git a/research/object_detection/models/ssd_mobilenet_v3_feature_extractor_test.py b/research/object_detection/models/ssd_mobilenet_v3_feature_extractor_tf1_test.py
similarity index 95%
rename from research/object_detection/models/ssd_mobilenet_v3_feature_extractor_test.py
rename to research/object_detection/models/ssd_mobilenet_v3_feature_extractor_tf1_test.py
index 38621744daa19ebc25e07e4a933694ae9e3d7e76..43c02490a7358820404380d20aa1d2190fce01a1 100644
--- a/research/object_detection/models/ssd_mobilenet_v3_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_mobilenet_v3_feature_extractor_tf1_test.py
@@ -13,17 +13,15 @@
# limitations under the License.
# ==============================================================================
"""Tests for ssd_mobilenet_v3_feature_extractor."""
-
+import unittest
import tensorflow.compat.v1 as tf
-import tf_slim as slim
from object_detection.models import ssd_mobilenet_v3_feature_extractor
from object_detection.models import ssd_mobilenet_v3_feature_extractor_testbase
+from object_detection.utils import tf_version
-slim = slim
-
-
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class SsdMobilenetV3LargeFeatureExtractorTest(
ssd_mobilenet_v3_feature_extractor_testbase
._SsdMobilenetV3FeatureExtractorTestBase):
@@ -63,6 +61,7 @@ class SsdMobilenetV3LargeFeatureExtractorTest(
use_explicit_padding=use_explicit_padding))
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class SsdMobilenetV3SmallFeatureExtractorTest(
ssd_mobilenet_v3_feature_extractor_testbase
._SsdMobilenetV3FeatureExtractorTestBase):
diff --git a/research/object_detection/models/ssd_pnasnet_feature_extractor.py b/research/object_detection/models/ssd_pnasnet_feature_extractor.py
index 802c839484eaabf746f848930da50af44f8a1f00..48f1dee3b4f6aceffd87b995bebb06a88b25c4ca 100644
--- a/research/object_detection/models/ssd_pnasnet_feature_extractor.py
+++ b/research/object_detection/models/ssd_pnasnet_feature_extractor.py
@@ -27,7 +27,10 @@ from object_detection.models import feature_map_generators
from object_detection.utils import context_manager
from object_detection.utils import ops
from object_detection.utils import variables_helper
-from nets.nasnet import pnasnet
+try:
+ from nets.nasnet import pnasnet # pylint: disable=g-import-not-at-top
+except: # pylint: disable=bare-except
+ pass
def pnasnet_large_arg_scope_for_detection(is_batch_norm_training=False):
diff --git a/research/object_detection/models/ssd_pnasnet_feature_extractor_test.py b/research/object_detection/models/ssd_pnasnet_feature_extractor_tf1_test.py
similarity index 97%
rename from research/object_detection/models/ssd_pnasnet_feature_extractor_test.py
rename to research/object_detection/models/ssd_pnasnet_feature_extractor_tf1_test.py
index 1f2fb0f836b2f050906caed9a202c0d613d57375..d5f5bff92d9f7da6fbf8243dd3dc1dff0bc9e628 100644
--- a/research/object_detection/models/ssd_pnasnet_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_pnasnet_feature_extractor_tf1_test.py
@@ -14,13 +14,16 @@
# ==============================================================================
"""Tests for ssd_pnas_feature_extractor."""
+import unittest
import numpy as np
import tensorflow.compat.v1 as tf
from object_detection.models import ssd_feature_extractor_test
from object_detection.models import ssd_pnasnet_feature_extractor
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class SsdPnasNetFeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
diff --git a/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_test.py b/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_test.py
deleted file mode 100644
index ddd4b0811a0e9c6527451dfaa149992efa86e4c0..0000000000000000000000000000000000000000
--- a/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_test.py
+++ /dev/null
@@ -1,126 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for ssd resnet v1 FPN feature extractors."""
-import tensorflow.compat.v1 as tf
-
-from object_detection.models import ssd_resnet_v1_fpn_feature_extractor
-from object_detection.models import ssd_resnet_v1_fpn_feature_extractor_testbase
-from object_detection.models import ssd_resnet_v1_fpn_keras_feature_extractor
-
-
-class SSDResnet50V1FeatureExtractorTest(
- ssd_resnet_v1_fpn_feature_extractor_testbase.
- SSDResnetFPNFeatureExtractorTestBase):
- """SSDResnet50v1Fpn feature extractor test."""
-
- def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
- use_explicit_padding=False, min_depth=32,
- use_keras=False):
- is_training = True
- if use_keras:
- return (ssd_resnet_v1_fpn_keras_feature_extractor.
- SSDResNet50V1FpnKerasFeatureExtractor(
- is_training=is_training,
- depth_multiplier=depth_multiplier,
- min_depth=min_depth,
- pad_to_multiple=pad_to_multiple,
- conv_hyperparams=self._build_conv_hyperparams(
- add_batch_norm=False),
- freeze_batchnorm=False,
- inplace_batchnorm_update=False,
- name='ResNet50V1_FPN'))
- else:
- return (
- ssd_resnet_v1_fpn_feature_extractor.SSDResnet50V1FpnFeatureExtractor(
- is_training, depth_multiplier, min_depth, pad_to_multiple,
- self.conv_hyperparams_fn,
- use_explicit_padding=use_explicit_padding))
-
- def _resnet_scope_name(self, use_keras=False):
- if use_keras:
- return 'ResNet50V1_FPN'
- return 'resnet_v1_50'
-
-
-class SSDResnet101V1FeatureExtractorTest(
- ssd_resnet_v1_fpn_feature_extractor_testbase.
- SSDResnetFPNFeatureExtractorTestBase):
- """SSDResnet101v1Fpn feature extractor test."""
-
- def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
- use_explicit_padding=False, min_depth=32,
- use_keras=False):
- is_training = True
- if use_keras:
- return (ssd_resnet_v1_fpn_keras_feature_extractor.
- SSDResNet101V1FpnKerasFeatureExtractor(
- is_training=is_training,
- depth_multiplier=depth_multiplier,
- min_depth=min_depth,
- pad_to_multiple=pad_to_multiple,
- conv_hyperparams=self._build_conv_hyperparams(
- add_batch_norm=False),
- freeze_batchnorm=False,
- inplace_batchnorm_update=False,
- name='ResNet101V1_FPN'))
- else:
- return (
- ssd_resnet_v1_fpn_feature_extractor.SSDResnet101V1FpnFeatureExtractor(
- is_training, depth_multiplier, min_depth, pad_to_multiple,
- self.conv_hyperparams_fn,
- use_explicit_padding=use_explicit_padding))
-
- def _resnet_scope_name(self, use_keras):
- if use_keras:
- return 'ResNet101V1_FPN'
- return 'resnet_v1_101'
-
-
-class SSDResnet152V1FeatureExtractorTest(
- ssd_resnet_v1_fpn_feature_extractor_testbase.
- SSDResnetFPNFeatureExtractorTestBase):
- """SSDResnet152v1Fpn feature extractor test."""
-
- def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
- use_explicit_padding=False, min_depth=32,
- use_keras=False):
- is_training = True
- if use_keras:
- return (ssd_resnet_v1_fpn_keras_feature_extractor.
- SSDResNet152V1FpnKerasFeatureExtractor(
- is_training=is_training,
- depth_multiplier=depth_multiplier,
- min_depth=min_depth,
- pad_to_multiple=pad_to_multiple,
- conv_hyperparams=self._build_conv_hyperparams(
- add_batch_norm=False),
- freeze_batchnorm=False,
- inplace_batchnorm_update=False,
- name='ResNet152V1_FPN'))
- else:
- return (
- ssd_resnet_v1_fpn_feature_extractor.SSDResnet152V1FpnFeatureExtractor(
- is_training, depth_multiplier, min_depth, pad_to_multiple,
- self.conv_hyperparams_fn,
- use_explicit_padding=use_explicit_padding))
-
- def _resnet_scope_name(self, use_keras):
- if use_keras:
- return 'ResNet152V1_FPN'
- return 'resnet_v1_152'
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_testbase.py b/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_testbase.py
index c3854444dadb3ef8bf76ff65bd9013d382648848..1ccad530ed5f34da2bd903c23b1d974f86a9d933 100644
--- a/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_testbase.py
+++ b/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_testbase.py
@@ -19,24 +19,20 @@ from __future__ import division
from __future__ import print_function
import abc
-from absl.testing import parameterized
import numpy as np
from six.moves import zip
import tensorflow.compat.v1 as tf
from object_detection.models import ssd_feature_extractor_test
+from object_detection.utils import test_utils
-@parameterized.parameters(
- {'use_keras': False},
- {'use_keras': True},
-)
class SSDResnetFPNFeatureExtractorTestBase(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
"""Helper test class for SSD Resnet v1 FPN feature extractors."""
@abc.abstractmethod
- def _resnet_scope_name(self, use_keras):
+ def _resnet_scope_name(self):
pass
@abc.abstractmethod
@@ -52,7 +48,7 @@ class SSDResnetFPNFeatureExtractorTestBase(
use_keras=False):
pass
- def test_extract_features_returns_correct_shapes_256(self, use_keras):
+ def test_extract_features_returns_correct_shapes_256(self):
image_height = 256
image_width = 256
depth_multiplier = 1.0
@@ -62,10 +58,10 @@ class SSDResnetFPNFeatureExtractorTestBase(
(2, 2, 2, 256)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape)
+ expected_feature_map_shape, use_keras=self.is_tf2())
def test_extract_features_returns_correct_shapes_with_dynamic_inputs(
- self, use_keras):
+ self):
image_height = 256
image_width = 256
depth_multiplier = 1.0
@@ -75,10 +71,10 @@ class SSDResnetFPNFeatureExtractorTestBase(
(2, 2, 2, 256)]
self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape, use_keras=use_keras)
+ expected_feature_map_shape, use_keras=self.is_tf2())
def test_extract_features_returns_correct_shapes_with_depth_multiplier(
- self, use_keras):
+ self):
image_height = 256
image_width = 256
depth_multiplier = 0.5
@@ -91,10 +87,10 @@ class SSDResnetFPNFeatureExtractorTestBase(
(2, 2, 2, expected_num_channels)]
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape, use_keras=use_keras)
+ expected_feature_map_shape, use_keras=self.is_tf2())
def test_extract_features_returns_correct_shapes_with_min_depth(
- self, use_keras):
+ self):
image_height = 256
image_width = 256
depth_multiplier = 1.0
@@ -106,23 +102,24 @@ class SSDResnetFPNFeatureExtractorTestBase(
(2, 4, 4, min_depth),
(2, 2, 2, min_depth)]
- def graph_fn(image_tensor):
+ with test_utils.GraphContextOrNone() as g:
+ image_tensor = tf.random.uniform([2, image_height, image_width, 3])
feature_extractor = self._create_feature_extractor(
depth_multiplier, pad_to_multiple, min_depth=min_depth,
- use_keras=use_keras)
- if use_keras:
+ use_keras=self.is_tf2())
+
+ def graph_fn():
+ if self.is_tf2():
return feature_extractor(image_tensor)
return feature_extractor.extract_features(image_tensor)
- image_tensor = np.random.rand(2, image_height, image_width,
- 3).astype(np.float32)
- feature_maps = self.execute(graph_fn, [image_tensor])
+ feature_maps = self.execute(graph_fn, [], graph=g)
for feature_map, expected_shape in zip(feature_maps,
expected_feature_map_shape):
self.assertAllEqual(feature_map.shape, expected_shape)
def test_extract_features_returns_correct_shapes_with_pad_to_multiple(
- self, use_keras):
+ self):
image_height = 254
image_width = 254
depth_multiplier = 1.0
@@ -133,55 +130,62 @@ class SSDResnetFPNFeatureExtractorTestBase(
self.check_extract_features_returns_correct_shape(
2, image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape, use_keras=use_keras)
+ expected_feature_map_shape, use_keras=self.is_tf2())
def test_extract_features_raises_error_with_invalid_image_size(
- self, use_keras):
+ self):
image_height = 32
image_width = 32
depth_multiplier = 1.0
pad_to_multiple = 1
self.check_extract_features_raises_error_with_invalid_image_size(
image_height, image_width, depth_multiplier, pad_to_multiple,
- use_keras=use_keras)
+ use_keras=self.is_tf2())
- def test_preprocess_returns_correct_value_range(self, use_keras):
+ def test_preprocess_returns_correct_value_range(self):
image_height = 128
image_width = 128
depth_multiplier = 1
pad_to_multiple = 1
- test_image = tf.constant(np.random.rand(4, image_height, image_width, 3))
- feature_extractor = self._create_feature_extractor(depth_multiplier,
- pad_to_multiple,
- use_keras=use_keras)
- preprocessed_image = feature_extractor.preprocess(test_image)
- with self.test_session() as sess:
- test_image_out, preprocessed_image_out = sess.run(
- [test_image, preprocessed_image])
- self.assertAllClose(preprocessed_image_out,
- test_image_out - [[123.68, 116.779, 103.939]])
-
- def test_variables_only_created_in_scope(self, use_keras):
+ test_image_np = np.random.rand(4, image_height, image_width, 3)
+ with test_utils.GraphContextOrNone() as g:
+ test_image = tf.constant(test_image_np)
+ feature_extractor = self._create_feature_extractor(
+ depth_multiplier, pad_to_multiple, use_keras=self.is_tf2())
+
+ def graph_fn():
+ preprocessed_image = feature_extractor.preprocess(test_image)
+ return preprocessed_image
+
+ preprocessed_image_out = self.execute(graph_fn, [], graph=g)
+ self.assertAllClose(preprocessed_image_out,
+ test_image_np - [[123.68, 116.779, 103.939]])
+
+ def test_variables_only_created_in_scope(self):
+ if self.is_tf2():
+ self.skipTest('test_variables_only_created_in_scope is only tf1')
depth_multiplier = 1
pad_to_multiple = 1
- scope_name = self._resnet_scope_name(use_keras)
+ scope_name = self._resnet_scope_name()
self.check_feature_extractor_variables_under_scope(
depth_multiplier,
pad_to_multiple,
scope_name,
- use_keras=use_keras)
+ use_keras=self.is_tf2())
- def test_variable_count(self, use_keras):
+ def test_variable_count(self):
+ if self.is_tf2():
+ self.skipTest('test_variable_count is only tf1')
depth_multiplier = 1
pad_to_multiple = 1
variables = self.get_feature_extractor_variables(
depth_multiplier,
pad_to_multiple,
- use_keras=use_keras)
+ use_keras=self.is_tf2())
# The number of expected variables in resnet_v1_50, resnet_v1_101,
# and resnet_v1_152 is 279, 534, and 789 respectively.
expected_variables_len = 279
- scope_name = self._resnet_scope_name(use_keras)
+ scope_name = self._resnet_scope_name()
if scope_name in ('ResNet101V1_FPN', 'resnet_v1_101'):
expected_variables_len = 534
elif scope_name in ('ResNet152V1_FPN', 'resnet_v1_152'):
diff --git a/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_tf1_test.py b/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_tf1_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..58952ff9486d6be3f077c9e21788ce8409806d18
--- /dev/null
+++ b/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_tf1_test.py
@@ -0,0 +1,85 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for ssd resnet v1 FPN feature extractors."""
+import unittest
+import tensorflow.compat.v1 as tf
+
+from object_detection.models import ssd_resnet_v1_fpn_feature_extractor
+from object_detection.models import ssd_resnet_v1_fpn_feature_extractor_testbase
+from object_detection.utils import tf_version
+
+
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
+class SSDResnet50V1FeatureExtractorTest(
+ ssd_resnet_v1_fpn_feature_extractor_testbase.
+ SSDResnetFPNFeatureExtractorTestBase):
+ """SSDResnet50v1Fpn feature extractor test."""
+
+ def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
+ use_explicit_padding=False, min_depth=32,
+ use_keras=False):
+ is_training = True
+ return (
+ ssd_resnet_v1_fpn_feature_extractor.SSDResnet50V1FpnFeatureExtractor(
+ is_training, depth_multiplier, min_depth, pad_to_multiple,
+ self.conv_hyperparams_fn,
+ use_explicit_padding=use_explicit_padding))
+
+ def _resnet_scope_name(self):
+ return 'resnet_v1_50'
+
+
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
+class SSDResnet101V1FeatureExtractorTest(
+ ssd_resnet_v1_fpn_feature_extractor_testbase.
+ SSDResnetFPNFeatureExtractorTestBase):
+ """SSDResnet101v1Fpn feature extractor test."""
+
+ def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
+ use_explicit_padding=False, min_depth=32,
+ use_keras=False):
+ is_training = True
+ return (
+ ssd_resnet_v1_fpn_feature_extractor.SSDResnet101V1FpnFeatureExtractor(
+ is_training, depth_multiplier, min_depth, pad_to_multiple,
+ self.conv_hyperparams_fn,
+ use_explicit_padding=use_explicit_padding))
+
+ def _resnet_scope_name(self):
+ return 'resnet_v1_101'
+
+
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
+class SSDResnet152V1FeatureExtractorTest(
+ ssd_resnet_v1_fpn_feature_extractor_testbase.
+ SSDResnetFPNFeatureExtractorTestBase):
+ """SSDResnet152v1Fpn feature extractor test."""
+
+ def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
+ use_explicit_padding=False, min_depth=32,
+ use_keras=False):
+ is_training = True
+ return (
+ ssd_resnet_v1_fpn_feature_extractor.SSDResnet152V1FpnFeatureExtractor(
+ is_training, depth_multiplier, min_depth, pad_to_multiple,
+ self.conv_hyperparams_fn,
+ use_explicit_padding=use_explicit_padding))
+
+ def _resnet_scope_name(self):
+ return 'resnet_v1_152'
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_tf2_test.py b/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_tf2_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..27c54ddd08ffa866dad4975c9bed7c629e8c46ac
--- /dev/null
+++ b/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_tf2_test.py
@@ -0,0 +1,103 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for ssd resnet v1 FPN feature extractors."""
+import unittest
+import tensorflow.compat.v1 as tf
+
+from object_detection.models import ssd_resnet_v1_fpn_feature_extractor_testbase
+from object_detection.models import ssd_resnet_v1_fpn_keras_feature_extractor
+from object_detection.utils import tf_version
+
+
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class SSDResnet50V1FeatureExtractorTest(
+ ssd_resnet_v1_fpn_feature_extractor_testbase.
+ SSDResnetFPNFeatureExtractorTestBase):
+ """SSDResnet50v1Fpn feature extractor test."""
+
+ def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
+ use_explicit_padding=False, min_depth=32,
+ use_keras=True):
+ is_training = True
+ return (ssd_resnet_v1_fpn_keras_feature_extractor.
+ SSDResNet50V1FpnKerasFeatureExtractor(
+ is_training=is_training,
+ depth_multiplier=depth_multiplier,
+ min_depth=min_depth,
+ pad_to_multiple=pad_to_multiple,
+ conv_hyperparams=self._build_conv_hyperparams(
+ add_batch_norm=False),
+ freeze_batchnorm=False,
+ inplace_batchnorm_update=False,
+ name='ResNet50V1_FPN'))
+
+ def _resnet_scope_name(self):
+ return 'ResNet50V1_FPN'
+
+
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class SSDResnet101V1FeatureExtractorTest(
+ ssd_resnet_v1_fpn_feature_extractor_testbase.
+ SSDResnetFPNFeatureExtractorTestBase):
+ """SSDResnet101v1Fpn feature extractor test."""
+
+ def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
+ use_explicit_padding=False, min_depth=32,
+ use_keras=False):
+ is_training = True
+ return (ssd_resnet_v1_fpn_keras_feature_extractor.
+ SSDResNet101V1FpnKerasFeatureExtractor(
+ is_training=is_training,
+ depth_multiplier=depth_multiplier,
+ min_depth=min_depth,
+ pad_to_multiple=pad_to_multiple,
+ conv_hyperparams=self._build_conv_hyperparams(
+ add_batch_norm=False),
+ freeze_batchnorm=False,
+ inplace_batchnorm_update=False,
+ name='ResNet101V1_FPN'))
+
+ def _resnet_scope_name(self):
+ return 'ResNet101V1_FPN'
+
+
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class SSDResnet152V1FeatureExtractorTest(
+ ssd_resnet_v1_fpn_feature_extractor_testbase.
+ SSDResnetFPNFeatureExtractorTestBase):
+ """SSDResnet152v1Fpn feature extractor test."""
+
+ def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
+ use_explicit_padding=False, min_depth=32,
+ use_keras=False):
+ is_training = True
+ return (ssd_resnet_v1_fpn_keras_feature_extractor.
+ SSDResNet152V1FpnKerasFeatureExtractor(
+ is_training=is_training,
+ depth_multiplier=depth_multiplier,
+ min_depth=min_depth,
+ pad_to_multiple=pad_to_multiple,
+ conv_hyperparams=self._build_conv_hyperparams(
+ add_batch_norm=False),
+ freeze_batchnorm=False,
+ inplace_batchnorm_update=False,
+ name='ResNet152V1_FPN'))
+
+ def _resnet_scope_name(self):
+ return 'ResNet152V1_FPN'
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/research/object_detection/models/ssd_resnet_v1_fpn_keras_feature_extractor.py b/research/object_detection/models/ssd_resnet_v1_fpn_keras_feature_extractor.py
index 6de9ae3e5b8f24885d139b2b277b09ccd1782169..0ac929cc6349a21b541f20adb624ad157d4f4a63 100644
--- a/research/object_detection/models/ssd_resnet_v1_fpn_keras_feature_extractor.py
+++ b/research/object_detection/models/ssd_resnet_v1_fpn_keras_feature_extractor.py
@@ -246,17 +246,6 @@ class SSDResNetV1FpnKerasFeatureExtractor(
feature_maps.append(last_feature_map)
return feature_maps
- def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
- """Returns a map for restoring from an (object-based) checkpoint.
-
- Args:
- feature_extractor_scope: A scope name for the feature extractor (unused).
-
- Returns:
- A dict mapping keys to Keras models
- """
- return {'feature_extractor': self.classification_backbone}
-
class SSDResNet50V1FpnKerasFeatureExtractor(
SSDResNetV1FpnKerasFeatureExtractor):
diff --git a/research/object_detection/models/ssd_resnet_v1_ppn_feature_extractor_test.py b/research/object_detection/models/ssd_resnet_v1_ppn_feature_extractor_tf1_test.py
similarity index 92%
rename from research/object_detection/models/ssd_resnet_v1_ppn_feature_extractor_test.py
rename to research/object_detection/models/ssd_resnet_v1_ppn_feature_extractor_tf1_test.py
index bfcb74cf9619764f6ecbd9399f8607f4e6439e76..bb95cb53f3905ef9288ade7600005c1ba9372be5 100644
--- a/research/object_detection/models/ssd_resnet_v1_ppn_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_resnet_v1_ppn_feature_extractor_tf1_test.py
@@ -13,12 +13,15 @@
# limitations under the License.
# ==============================================================================
"""Tests for ssd resnet v1 feature extractors."""
+import unittest
import tensorflow.compat.v1 as tf
from object_detection.models import ssd_resnet_v1_ppn_feature_extractor
from object_detection.models import ssd_resnet_v1_ppn_feature_extractor_testbase
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class SSDResnet50V1PpnFeatureExtractorTest(
ssd_resnet_v1_ppn_feature_extractor_testbase.
SSDResnetPpnFeatureExtractorTestBase):
@@ -40,6 +43,7 @@ class SSDResnet50V1PpnFeatureExtractorTest(
return 'resnet_v1_50'
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class SSDResnet101V1PpnFeatureExtractorTest(
ssd_resnet_v1_ppn_feature_extractor_testbase.
SSDResnetPpnFeatureExtractorTestBase):
@@ -62,6 +66,7 @@ class SSDResnet101V1PpnFeatureExtractorTest(
return 'resnet_v1_101'
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class SSDResnet152V1PpnFeatureExtractorTest(
ssd_resnet_v1_ppn_feature_extractor_testbase.
SSDResnetPpnFeatureExtractorTestBase):
diff --git a/research/object_detection/predictors/convolutional_box_predictor_test.py b/research/object_detection/predictors/convolutional_box_predictor_tf1_test.py
similarity index 99%
rename from research/object_detection/predictors/convolutional_box_predictor_test.py
rename to research/object_detection/predictors/convolutional_box_predictor_tf1_test.py
index eb608e1e74a87fdaabf16bbae745819c05bdf155..3236615dfb60bc848ec271fc5173b9c4169feb93 100644
--- a/research/object_detection/predictors/convolutional_box_predictor_test.py
+++ b/research/object_detection/predictors/convolutional_box_predictor_tf1_test.py
@@ -19,7 +19,7 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
-
+import unittest
from absl.testing import parameterized
import numpy as np
from six.moves import range
@@ -35,8 +35,10 @@ from object_detection.predictors.heads import class_head
from object_detection.predictors.heads import mask_head
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class ConvolutionalBoxPredictorTest(test_case.TestCase):
def _build_arg_scope_with_conv_hyperparams(self):
@@ -281,6 +283,7 @@ class ConvolutionalBoxPredictorTest(test_case.TestCase):
self.assertEqual(bad_dangling_ops, [])
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
def _build_arg_scope_with_conv_hyperparams(self):
diff --git a/research/object_detection/predictors/convolutional_keras_box_predictor.py b/research/object_detection/predictors/convolutional_keras_box_predictor.py
index 630c680398baa4a60c945a0bd2d874ea0f8c1783..fc72fb04c2d47301b1ac5fc185ca98c6b00073c0 100644
--- a/research/object_detection/predictors/convolutional_keras_box_predictor.py
+++ b/research/object_detection/predictors/convolutional_keras_box_predictor.py
@@ -314,7 +314,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
self, inserted_layer_counter, target_channel):
projection_layers = []
if inserted_layer_counter >= 0:
- use_bias = False if self._apply_batch_norm else True
+ use_bias = False if (self._apply_batch_norm and not
+ self._conv_hyperparams.force_use_bias()) else True
projection_layers.append(keras.Conv2D(
target_channel, [1, 1], strides=1, padding='SAME',
name='ProjectionLayer/conv2d_{}'.format(inserted_layer_counter),
@@ -331,7 +332,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
conv_layers = []
batch_norm_layers = []
activation_layers = []
- use_bias = False if self._apply_batch_norm else True
+ use_bias = False if (self._apply_batch_norm and not
+ self._conv_hyperparams.force_use_bias()) else True
for additional_conv_layer_idx in range(self._num_layers_before_predictor):
layer_name = '{}/conv2d_{}'.format(
tower_name_scope, additional_conv_layer_idx)
@@ -363,7 +365,9 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
training=(self._is_training and not self._freeze_batchnorm),
name='{}/conv2d_{}/BatchNorm/feature_{}'.format(
tower_name_scope, additional_conv_layer_idx, feature_index)))
- activation_layers.append(tf.keras.layers.Lambda(tf.nn.relu6))
+ activation_layers.append(self._conv_hyperparams.build_activation_layer(
+ name='{}/conv2d_{}/activation_{}'.format(
+ tower_name_scope, additional_conv_layer_idx, feature_index)))
# Set conv layers as the shared conv layers for different feature maps with
# the same tower_name_scope.
diff --git a/research/object_detection/predictors/convolutional_keras_box_predictor_test.py b/research/object_detection/predictors/convolutional_keras_box_predictor_tf2_test.py
similarity index 64%
rename from research/object_detection/predictors/convolutional_keras_box_predictor_test.py
rename to research/object_detection/predictors/convolutional_keras_box_predictor_tf2_test.py
index 5db7e962f88624e1a5663e7e2a881c0afbe290f2..180a6e94643a80ac04ee12dfacb5bc6d04e09ec8 100644
--- a/research/object_detection/predictors/convolutional_keras_box_predictor_test.py
+++ b/research/object_detection/predictors/convolutional_keras_box_predictor_tf2_test.py
@@ -14,6 +14,7 @@
# ==============================================================================
"""Tests for object_detection.predictors.convolutional_keras_box_predictor."""
+import unittest
import numpy as np
import tensorflow.compat.v1 as tf
@@ -26,8 +27,10 @@ from object_detection.predictors.heads import keras_class_head
from object_detection.predictors.heads import keras_mask_head
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class ConvolutionalKerasBoxPredictorTest(test_case.TestCase):
def _build_conv_hyperparams(self):
@@ -47,23 +50,23 @@ class ConvolutionalKerasBoxPredictorTest(test_case.TestCase):
return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
def test_get_boxes_for_five_aspect_ratios_per_location(self):
+ conv_box_predictor = (
+ box_predictor_builder.build_convolutional_keras_box_predictor(
+ is_training=False,
+ num_classes=0,
+ conv_hyperparams=self._build_conv_hyperparams(),
+ freeze_batchnorm=False,
+ inplace_batchnorm_update=False,
+ num_predictions_per_location_list=[5],
+ min_depth=0,
+ max_depth=32,
+ num_layers_before_predictor=1,
+ use_dropout=True,
+ dropout_keep_prob=0.8,
+ kernel_size=1,
+ box_code_size=4
+ ))
def graph_fn(image_features):
- conv_box_predictor = (
- box_predictor_builder.build_convolutional_keras_box_predictor(
- is_training=False,
- num_classes=0,
- conv_hyperparams=self._build_conv_hyperparams(),
- freeze_batchnorm=False,
- inplace_batchnorm_update=False,
- num_predictions_per_location_list=[5],
- min_depth=0,
- max_depth=32,
- num_layers_before_predictor=1,
- use_dropout=True,
- dropout_keep_prob=0.8,
- kernel_size=1,
- box_code_size=4
- ))
box_predictions = conv_box_predictor([image_features])
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
@@ -78,23 +81,23 @@ class ConvolutionalKerasBoxPredictorTest(test_case.TestCase):
self.assertAllEqual(objectness_predictions.shape, [4, 320, 1])
def test_get_boxes_for_one_aspect_ratio_per_location(self):
+ conv_box_predictor = (
+ box_predictor_builder.build_convolutional_keras_box_predictor(
+ is_training=False,
+ num_classes=0,
+ conv_hyperparams=self._build_conv_hyperparams(),
+ freeze_batchnorm=False,
+ inplace_batchnorm_update=False,
+ num_predictions_per_location_list=[1],
+ min_depth=0,
+ max_depth=32,
+ num_layers_before_predictor=1,
+ use_dropout=True,
+ dropout_keep_prob=0.8,
+ kernel_size=1,
+ box_code_size=4
+ ))
def graph_fn(image_features):
- conv_box_predictor = (
- box_predictor_builder.build_convolutional_keras_box_predictor(
- is_training=False,
- num_classes=0,
- conv_hyperparams=self._build_conv_hyperparams(),
- freeze_batchnorm=False,
- inplace_batchnorm_update=False,
- num_predictions_per_location_list=[1],
- min_depth=0,
- max_depth=32,
- num_layers_before_predictor=1,
- use_dropout=True,
- dropout_keep_prob=0.8,
- kernel_size=1,
- box_code_size=4
- ))
box_predictions = conv_box_predictor([image_features])
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
@@ -111,23 +114,23 @@ class ConvolutionalKerasBoxPredictorTest(test_case.TestCase):
self):
num_classes_without_background = 6
image_features = np.random.rand(4, 8, 8, 64).astype(np.float32)
+ conv_box_predictor = (
+ box_predictor_builder.build_convolutional_keras_box_predictor(
+ is_training=False,
+ num_classes=num_classes_without_background,
+ conv_hyperparams=self._build_conv_hyperparams(),
+ freeze_batchnorm=False,
+ inplace_batchnorm_update=False,
+ num_predictions_per_location_list=[5],
+ min_depth=0,
+ max_depth=32,
+ num_layers_before_predictor=1,
+ use_dropout=True,
+ dropout_keep_prob=0.8,
+ kernel_size=1,
+ box_code_size=4
+ ))
def graph_fn(image_features):
- conv_box_predictor = (
- box_predictor_builder.build_convolutional_keras_box_predictor(
- is_training=False,
- num_classes=num_classes_without_background,
- conv_hyperparams=self._build_conv_hyperparams(),
- freeze_batchnorm=False,
- inplace_batchnorm_update=False,
- num_predictions_per_location_list=[5],
- min_depth=0,
- max_depth=32,
- num_layers_before_predictor=1,
- use_dropout=True,
- dropout_keep_prob=0.8,
- kernel_size=1,
- box_code_size=4
- ))
box_predictions = conv_box_predictor([image_features])
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
@@ -144,7 +147,7 @@ class ConvolutionalKerasBoxPredictorTest(test_case.TestCase):
def test_get_predictions_with_feature_maps_of_dynamic_shape(
self):
- image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64])
+ tf.keras.backend.clear_session()
conv_box_predictor = (
box_predictor_builder.build_convolutional_keras_box_predictor(
is_training=False,
@@ -161,28 +164,25 @@ class ConvolutionalKerasBoxPredictorTest(test_case.TestCase):
kernel_size=1,
box_code_size=4
))
- box_predictions = conv_box_predictor([image_features])
- box_encodings = tf.concat(
- box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
- objectness_predictions = tf.concat(
- box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
- axis=1)
- init_op = tf.global_variables_initializer()
-
+ variables = []
+ def graph_fn(image_features):
+ box_predictions = conv_box_predictor([image_features])
+ variables.extend(list(conv_box_predictor.variables))
+ box_encodings = tf.concat(
+ box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
+ objectness_predictions = tf.concat(
+ box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
+ axis=1)
+ return box_encodings, objectness_predictions
resolution = 32
expected_num_anchors = resolution*resolution*5
- with self.test_session() as sess:
- sess.run(init_op)
- (box_encodings_shape,
- objectness_predictions_shape) = sess.run(
- [tf.shape(box_encodings), tf.shape(objectness_predictions)],
- feed_dict={image_features:
- np.random.rand(4, resolution, resolution, 64)})
- actual_variable_set = set(
- [var.op.name for var in tf.trainable_variables()])
- self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4])
- self.assertAllEqual(objectness_predictions_shape,
- [4, expected_num_anchors, 1])
+ box_encodings, objectness_predictions = self.execute(
+ graph_fn, [np.random.rand(4, resolution, resolution, 64)])
+
+ actual_variable_set = set([var.name.split(':')[0] for var in variables])
+ self.assertAllEqual(box_encodings.shape, [4, expected_num_anchors, 1, 4])
+ self.assertAllEqual(objectness_predictions.shape,
+ [4, expected_num_anchors, 1])
expected_variable_set = set([
'BoxPredictor/SharedConvolutions_0/Conv2d_0_1x1_32/bias',
'BoxPredictor/SharedConvolutions_0/Conv2d_0_1x1_32/kernel',
@@ -195,7 +195,7 @@ class ConvolutionalKerasBoxPredictorTest(test_case.TestCase):
['box_encodings', 'class_predictions_with_background'])
def test_use_depthwise_convolution(self):
- image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64])
+ tf.keras.backend.clear_session()
conv_box_predictor = (
box_predictor_builder.build_convolutional_keras_box_predictor(
is_training=False,
@@ -213,27 +213,25 @@ class ConvolutionalKerasBoxPredictorTest(test_case.TestCase):
box_code_size=4,
use_depthwise=True
))
- box_predictions = conv_box_predictor([image_features])
- box_encodings = tf.concat(
- box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
- objectness_predictions = tf.concat(
- box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
- axis=1)
- init_op = tf.global_variables_initializer()
+ variables = []
+ def graph_fn(image_features):
+ box_predictions = conv_box_predictor([image_features])
+ variables.extend(list(conv_box_predictor.variables))
+ box_encodings = tf.concat(
+ box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
+ objectness_predictions = tf.concat(
+ box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
+ axis=1)
+ return box_encodings, objectness_predictions
resolution = 32
expected_num_anchors = resolution*resolution*5
- with self.test_session() as sess:
- sess.run(init_op)
- (box_encodings_shape,
- objectness_predictions_shape) = sess.run(
- [tf.shape(box_encodings), tf.shape(objectness_predictions)],
- feed_dict={image_features:
- np.random.rand(4, resolution, resolution, 64)})
- actual_variable_set = set(
- [var.op.name for var in tf.trainable_variables()])
- self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4])
- self.assertAllEqual(objectness_predictions_shape,
+ box_encodings, objectness_predictions = self.execute(
+ graph_fn, [np.random.rand(4, resolution, resolution, 64)])
+
+ actual_variable_set = set([var.name.split(':')[0] for var in variables])
+ self.assertAllEqual(box_encodings.shape, [4, expected_num_anchors, 1, 4])
+ self.assertAllEqual(objectness_predictions.shape,
[4, expected_num_anchors, 1])
expected_variable_set = set([
'BoxPredictor/SharedConvolutions_0/Conv2d_0_1x1_32/bias',
@@ -259,6 +257,7 @@ class ConvolutionalKerasBoxPredictorTest(test_case.TestCase):
['box_encodings', 'class_predictions_with_background'])
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class WeightSharedConvolutionalKerasBoxPredictorTest(test_case.TestCase):
def _build_conv_hyperparams(self, add_batch_norm=True):
@@ -288,19 +287,20 @@ class WeightSharedConvolutionalKerasBoxPredictorTest(test_case.TestCase):
# pylint: disable=line-too-long
def test_get_boxes_for_five_aspect_ratios_per_location(self):
+ conv_box_predictor = (
+ box_predictor_builder
+ .build_weight_shared_convolutional_keras_box_predictor(
+ is_training=False,
+ num_classes=0,
+ conv_hyperparams=self._build_conv_hyperparams(),
+ freeze_batchnorm=False,
+ inplace_batchnorm_update=False,
+ num_predictions_per_location_list=[5],
+ depth=32,
+ num_layers_before_predictor=1,
+ box_code_size=4))
def graph_fn(image_features):
- conv_box_predictor = (
- box_predictor_builder.build_weight_shared_convolutional_keras_box_predictor(
- is_training=False,
- num_classes=0,
- conv_hyperparams=self._build_conv_hyperparams(),
- freeze_batchnorm=False,
- inplace_batchnorm_update=False,
- num_predictions_per_location_list=[5],
- depth=32,
- num_layers_before_predictor=1,
- box_code_size=4))
box_predictions = conv_box_predictor([image_features])
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
@@ -314,20 +314,21 @@ class WeightSharedConvolutionalKerasBoxPredictorTest(test_case.TestCase):
self.assertAllEqual(objectness_predictions.shape, [4, 320, 1])
def test_bias_predictions_to_background_with_sigmoid_score_conversion(self):
+ conv_box_predictor = (
+ box_predictor_builder
+ .build_weight_shared_convolutional_keras_box_predictor(
+ is_training=True,
+ num_classes=2,
+ conv_hyperparams=self._build_conv_hyperparams(),
+ freeze_batchnorm=False,
+ inplace_batchnorm_update=False,
+ num_predictions_per_location_list=[5],
+ depth=32,
+ num_layers_before_predictor=1,
+ class_prediction_bias_init=-4.6,
+ box_code_size=4))
def graph_fn(image_features):
- conv_box_predictor = (
- box_predictor_builder.build_weight_shared_convolutional_keras_box_predictor(
- is_training=True,
- num_classes=2,
- conv_hyperparams=self._build_conv_hyperparams(),
- freeze_batchnorm=False,
- inplace_batchnorm_update=False,
- num_predictions_per_location_list=[5],
- depth=32,
- num_layers_before_predictor=1,
- class_prediction_bias_init=-4.6,
- box_code_size=4))
box_predictions = conv_box_predictor([image_features])
class_predictions = tf.concat(box_predictions[
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1)
@@ -339,20 +340,21 @@ class WeightSharedConvolutionalKerasBoxPredictorTest(test_case.TestCase):
def test_get_multi_class_predictions_for_five_aspect_ratios_per_location(
self):
-
num_classes_without_background = 6
+ conv_box_predictor = (
+ box_predictor_builder
+ .build_weight_shared_convolutional_keras_box_predictor(
+ is_training=False,
+ num_classes=num_classes_without_background,
+ conv_hyperparams=self._build_conv_hyperparams(),
+ freeze_batchnorm=False,
+ inplace_batchnorm_update=False,
+ num_predictions_per_location_list=[5],
+ depth=32,
+ num_layers_before_predictor=1,
+ box_code_size=4))
+
def graph_fn(image_features):
- conv_box_predictor = (
- box_predictor_builder.build_weight_shared_convolutional_keras_box_predictor(
- is_training=False,
- num_classes=num_classes_without_background,
- conv_hyperparams=self._build_conv_hyperparams(),
- freeze_batchnorm=False,
- inplace_batchnorm_update=False,
- num_predictions_per_location_list=[5],
- depth=32,
- num_layers_before_predictor=1,
- box_code_size=4))
box_predictions = conv_box_predictor([image_features])
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
@@ -369,20 +371,21 @@ class WeightSharedConvolutionalKerasBoxPredictorTest(test_case.TestCase):
def test_get_multi_class_predictions_from_two_feature_maps(
self):
-
num_classes_without_background = 6
+ conv_box_predictor = (
+ box_predictor_builder
+ .build_weight_shared_convolutional_keras_box_predictor(
+ is_training=False,
+ num_classes=num_classes_without_background,
+ conv_hyperparams=self._build_conv_hyperparams(),
+ freeze_batchnorm=False,
+ inplace_batchnorm_update=False,
+ num_predictions_per_location_list=[5, 5],
+ depth=32,
+ num_layers_before_predictor=1,
+ box_code_size=4))
+
def graph_fn(image_features1, image_features2):
- conv_box_predictor = (
- box_predictor_builder.build_weight_shared_convolutional_keras_box_predictor(
- is_training=False,
- num_classes=num_classes_without_background,
- conv_hyperparams=self._build_conv_hyperparams(),
- freeze_batchnorm=False,
- inplace_batchnorm_update=False,
- num_predictions_per_location_list=[5, 5],
- depth=32,
- num_layers_before_predictor=1,
- box_code_size=4))
box_predictions = conv_box_predictor([image_features1, image_features2])
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
@@ -401,20 +404,21 @@ class WeightSharedConvolutionalKerasBoxPredictorTest(test_case.TestCase):
def test_get_multi_class_predictions_from_feature_maps_of_different_depth(
self):
-
num_classes_without_background = 6
+ conv_box_predictor = (
+ box_predictor_builder
+ .build_weight_shared_convolutional_keras_box_predictor(
+ is_training=False,
+ num_classes=num_classes_without_background,
+ conv_hyperparams=self._build_conv_hyperparams(),
+ freeze_batchnorm=False,
+ inplace_batchnorm_update=False,
+ num_predictions_per_location_list=[5, 5, 5],
+ depth=32,
+ num_layers_before_predictor=1,
+ box_code_size=4))
+
def graph_fn(image_features1, image_features2, image_features3):
- conv_box_predictor = (
- box_predictor_builder.build_weight_shared_convolutional_keras_box_predictor(
- is_training=False,
- num_classes=num_classes_without_background,
- conv_hyperparams=self._build_conv_hyperparams(),
- freeze_batchnorm=False,
- inplace_batchnorm_update=False,
- num_predictions_per_location_list=[5, 5, 5],
- depth=32,
- num_layers_before_predictor=1,
- box_code_size=4))
box_predictions = conv_box_predictor(
[image_features1, image_features2, image_features3])
box_encodings = tf.concat(
@@ -435,20 +439,25 @@ class WeightSharedConvolutionalKerasBoxPredictorTest(test_case.TestCase):
def test_predictions_multiple_feature_maps_share_weights_separate_batchnorm(
self):
+ tf.keras.backend.clear_session()
num_classes_without_background = 6
+ conv_box_predictor = (
+ box_predictor_builder
+ .build_weight_shared_convolutional_keras_box_predictor(
+ is_training=False,
+ num_classes=num_classes_without_background,
+ conv_hyperparams=self._build_conv_hyperparams(),
+ freeze_batchnorm=False,
+ inplace_batchnorm_update=False,
+ num_predictions_per_location_list=[5, 5],
+ depth=32,
+ num_layers_before_predictor=2,
+ box_code_size=4))
+ variables = []
+
def graph_fn(image_features1, image_features2):
- conv_box_predictor = (
- box_predictor_builder.build_weight_shared_convolutional_keras_box_predictor(
- is_training=False,
- num_classes=num_classes_without_background,
- conv_hyperparams=self._build_conv_hyperparams(),
- freeze_batchnorm=False,
- inplace_batchnorm_update=False,
- num_predictions_per_location_list=[5, 5],
- depth=32,
- num_layers_before_predictor=2,
- box_code_size=4))
box_predictions = conv_box_predictor([image_features1, image_features2])
+ variables.extend(list(conv_box_predictor.variables))
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
class_predictions_with_background = tf.concat(
@@ -456,25 +465,41 @@ class WeightSharedConvolutionalKerasBoxPredictorTest(test_case.TestCase):
axis=1)
return (box_encodings, class_predictions_with_background)
- with self.test_session(graph=tf.Graph()):
- graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32),
- tf.random_uniform([4, 16, 16, 3], dtype=tf.float32))
- actual_variable_set = set(
- [var.op.name for var in tf.trainable_variables()])
+ self.execute(graph_fn, [
+ np.random.rand(4, 32, 32, 3).astype(np.float32),
+ np.random.rand(4, 16, 16, 3).astype(np.float32)
+ ])
+ actual_variable_set = set([var.name.split(':')[0] for var in variables])
expected_variable_set = set([
# Box prediction tower
('WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_0/kernel'),
('WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_0/BatchNorm/feature_0/beta'),
+ ('WeightSharedConvolutionalBoxPredictor/'
+ 'BoxPredictionTower/conv2d_0/BatchNorm/feature_0/moving_mean'),
+ ('WeightSharedConvolutionalBoxPredictor/'
+ 'BoxPredictionTower/conv2d_0/BatchNorm/feature_0/moving_variance'),
('WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_0/BatchNorm/feature_1/beta'),
+ ('WeightSharedConvolutionalBoxPredictor/'
+ 'BoxPredictionTower/conv2d_0/BatchNorm/feature_1/moving_mean'),
+ ('WeightSharedConvolutionalBoxPredictor/'
+ 'BoxPredictionTower/conv2d_0/BatchNorm/feature_1/moving_variance'),
('WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_1/kernel'),
('WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_1/BatchNorm/feature_0/beta'),
+ ('WeightSharedConvolutionalBoxPredictor/'
+ 'BoxPredictionTower/conv2d_1/BatchNorm/feature_0/moving_mean'),
+ ('WeightSharedConvolutionalBoxPredictor/'
+ 'BoxPredictionTower/conv2d_1/BatchNorm/feature_0/moving_variance'),
('WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_1/BatchNorm/feature_1/beta'),
+ ('WeightSharedConvolutionalBoxPredictor/'
+ 'BoxPredictionTower/conv2d_1/BatchNorm/feature_1/moving_mean'),
+ ('WeightSharedConvolutionalBoxPredictor/'
+ 'BoxPredictionTower/conv2d_1/BatchNorm/feature_1/moving_variance'),
# Box prediction head
('WeightSharedConvolutionalBoxPredictor/'
'WeightSharedConvolutionalBoxHead/BoxPredictor/kernel'),
@@ -485,14 +510,30 @@ class WeightSharedConvolutionalKerasBoxPredictorTest(test_case.TestCase):
'ClassPredictionTower/conv2d_0/kernel'),
('WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_0/BatchNorm/feature_0/beta'),
+ ('WeightSharedConvolutionalBoxPredictor/'
+ 'ClassPredictionTower/conv2d_0/BatchNorm/feature_0/moving_mean'),
+ ('WeightSharedConvolutionalBoxPredictor/'
+ 'ClassPredictionTower/conv2d_0/BatchNorm/feature_0/moving_variance'),
('WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_0/BatchNorm/feature_1/beta'),
+ ('WeightSharedConvolutionalBoxPredictor/'
+ 'ClassPredictionTower/conv2d_0/BatchNorm/feature_1/moving_mean'),
+ ('WeightSharedConvolutionalBoxPredictor/'
+ 'ClassPredictionTower/conv2d_0/BatchNorm/feature_1/moving_variance'),
('WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_1/kernel'),
('WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_1/BatchNorm/feature_0/beta'),
+ ('WeightSharedConvolutionalBoxPredictor/'
+ 'ClassPredictionTower/conv2d_1/BatchNorm/feature_0/moving_mean'),
+ ('WeightSharedConvolutionalBoxPredictor/'
+ 'ClassPredictionTower/conv2d_1/BatchNorm/feature_0/moving_variance'),
('WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_1/BatchNorm/feature_1/beta'),
+ ('WeightSharedConvolutionalBoxPredictor/'
+ 'ClassPredictionTower/conv2d_1/BatchNorm/feature_1/moving_mean'),
+ ('WeightSharedConvolutionalBoxPredictor/'
+ 'ClassPredictionTower/conv2d_1/BatchNorm/feature_1/moving_variance'),
# Class prediction head
('WeightSharedConvolutionalBoxPredictor/'
'WeightSharedConvolutionalClassHead/ClassPredictor/kernel'),
@@ -502,21 +543,26 @@ class WeightSharedConvolutionalKerasBoxPredictorTest(test_case.TestCase):
def test_predictions_multiple_feature_maps_share_weights_without_batchnorm(
self):
+ tf.keras.backend.clear_session()
num_classes_without_background = 6
+ conv_box_predictor = (
+ box_predictor_builder
+ .build_weight_shared_convolutional_keras_box_predictor(
+ is_training=False,
+ num_classes=num_classes_without_background,
+ conv_hyperparams=self._build_conv_hyperparams(),
+ freeze_batchnorm=False,
+ inplace_batchnorm_update=False,
+ num_predictions_per_location_list=[5, 5],
+ depth=32,
+ num_layers_before_predictor=2,
+ box_code_size=4,
+ apply_batch_norm=False))
+ variables = []
+
def graph_fn(image_features1, image_features2):
- conv_box_predictor = (
- box_predictor_builder.build_weight_shared_convolutional_keras_box_predictor(
- is_training=False,
- num_classes=num_classes_without_background,
- conv_hyperparams=self._build_conv_hyperparams(),
- freeze_batchnorm=False,
- inplace_batchnorm_update=False,
- num_predictions_per_location_list=[5, 5],
- depth=32,
- num_layers_before_predictor=2,
- box_code_size=4,
- apply_batch_norm=False))
box_predictions = conv_box_predictor([image_features1, image_features2])
+ variables.extend(list(conv_box_predictor.variables))
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
class_predictions_with_background = tf.concat(
@@ -524,11 +570,11 @@ class WeightSharedConvolutionalKerasBoxPredictorTest(test_case.TestCase):
axis=1)
return (box_encodings, class_predictions_with_background)
- with self.test_session(graph=tf.Graph()):
- graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32),
- tf.random_uniform([4, 16, 16, 3], dtype=tf.float32))
- actual_variable_set = set(
- [var.op.name for var in tf.trainable_variables()])
+ self.execute(graph_fn, [
+ np.random.rand(4, 32, 32, 3).astype(np.float32),
+ np.random.rand(4, 16, 16, 3).astype(np.float32)
+ ])
+ actual_variable_set = set([var.name.split(':')[0] for var in variables])
expected_variable_set = set([
# Box prediction tower
('WeightSharedConvolutionalBoxPredictor/'
@@ -562,23 +608,27 @@ class WeightSharedConvolutionalKerasBoxPredictorTest(test_case.TestCase):
def test_predictions_multiple_feature_maps_share_weights_with_depthwise(
self):
+ tf.keras.backend.clear_session()
num_classes_without_background = 6
+ conv_box_predictor = (
+ box_predictor_builder
+ .build_weight_shared_convolutional_keras_box_predictor(
+ is_training=False,
+ num_classes=num_classes_without_background,
+ conv_hyperparams=self._build_conv_hyperparams(add_batch_norm=False),
+ freeze_batchnorm=False,
+ inplace_batchnorm_update=False,
+ num_predictions_per_location_list=[5, 5],
+ depth=32,
+ num_layers_before_predictor=2,
+ box_code_size=4,
+ apply_batch_norm=False,
+ use_depthwise=True))
+ variables = []
+
def graph_fn(image_features1, image_features2):
- conv_box_predictor = (
- box_predictor_builder.build_weight_shared_convolutional_keras_box_predictor(
- is_training=False,
- num_classes=num_classes_without_background,
- conv_hyperparams=self._build_conv_hyperparams(
- add_batch_norm=False),
- freeze_batchnorm=False,
- inplace_batchnorm_update=False,
- num_predictions_per_location_list=[5, 5],
- depth=32,
- num_layers_before_predictor=2,
- box_code_size=4,
- apply_batch_norm=False,
- use_depthwise=True))
box_predictions = conv_box_predictor([image_features1, image_features2])
+ variables.extend(list(conv_box_predictor.variables))
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
class_predictions_with_background = tf.concat(
@@ -586,11 +636,11 @@ class WeightSharedConvolutionalKerasBoxPredictorTest(test_case.TestCase):
axis=1)
return (box_encodings, class_predictions_with_background)
- with self.test_session(graph=tf.Graph()):
- graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32),
- tf.random_uniform([4, 16, 16, 3], dtype=tf.float32))
- actual_variable_set = set(
- [var.op.name for var in tf.trainable_variables()])
+ self.execute(graph_fn, [
+ np.random.rand(4, 32, 32, 3).astype(np.float32),
+ np.random.rand(4, 16, 16, 3).astype(np.float32)
+ ])
+ actual_variable_set = set([var.name.split(':')[0] for var in variables])
expected_variable_set = set([
# Box prediction tower
('WeightSharedConvolutionalBoxPredictor/'
@@ -635,23 +685,27 @@ class WeightSharedConvolutionalKerasBoxPredictorTest(test_case.TestCase):
self.assertEqual(expected_variable_set, actual_variable_set)
def test_no_batchnorm_params_when_batchnorm_is_not_configured(self):
+ tf.keras.backend.clear_session()
num_classes_without_background = 6
+ conv_box_predictor = (
+ box_predictor_builder
+ .build_weight_shared_convolutional_keras_box_predictor(
+ is_training=False,
+ num_classes=num_classes_without_background,
+ conv_hyperparams=self._build_conv_hyperparams(add_batch_norm=False),
+ freeze_batchnorm=False,
+ inplace_batchnorm_update=False,
+ num_predictions_per_location_list=[5, 5],
+ depth=32,
+ num_layers_before_predictor=2,
+ box_code_size=4,
+ apply_batch_norm=False))
+ variables = []
+
def graph_fn(image_features1, image_features2):
- conv_box_predictor = (
- box_predictor_builder.build_weight_shared_convolutional_keras_box_predictor(
- is_training=False,
- num_classes=num_classes_without_background,
- conv_hyperparams=self._build_conv_hyperparams(
- add_batch_norm=False),
- freeze_batchnorm=False,
- inplace_batchnorm_update=False,
- num_predictions_per_location_list=[5, 5],
- depth=32,
- num_layers_before_predictor=2,
- box_code_size=4,
- apply_batch_norm=False))
box_predictions = conv_box_predictor(
[image_features1, image_features2])
+ variables.extend(list(conv_box_predictor.variables))
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
class_predictions_with_background = tf.concat(
@@ -659,11 +713,11 @@ class WeightSharedConvolutionalKerasBoxPredictorTest(test_case.TestCase):
axis=1)
return (box_encodings, class_predictions_with_background)
- with self.test_session(graph=tf.Graph()):
- graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32),
- tf.random_uniform([4, 16, 16, 3], dtype=tf.float32))
- actual_variable_set = set(
- [var.op.name for var in tf.trainable_variables()])
+ self.execute(graph_fn, [
+ np.random.rand(4, 32, 32, 3).astype(np.float32),
+ np.random.rand(4, 16, 16, 3).astype(np.float32)
+ ])
+ actual_variable_set = set([var.name.split(':')[0] for var in variables])
expected_variable_set = set([
# Box prediction tower
('WeightSharedConvolutionalBoxPredictor/'
@@ -697,22 +751,27 @@ class WeightSharedConvolutionalKerasBoxPredictorTest(test_case.TestCase):
def test_predictions_share_weights_share_tower_separate_batchnorm(
self):
+ tf.keras.backend.clear_session()
num_classes_without_background = 6
+ conv_box_predictor = (
+ box_predictor_builder
+ .build_weight_shared_convolutional_keras_box_predictor(
+ is_training=False,
+ num_classes=num_classes_without_background,
+ conv_hyperparams=self._build_conv_hyperparams(),
+ freeze_batchnorm=False,
+ inplace_batchnorm_update=False,
+ num_predictions_per_location_list=[5, 5],
+ depth=32,
+ num_layers_before_predictor=2,
+ box_code_size=4,
+ share_prediction_tower=True))
+ variables = []
+
def graph_fn(image_features1, image_features2):
- conv_box_predictor = (
- box_predictor_builder.build_weight_shared_convolutional_keras_box_predictor(
- is_training=False,
- num_classes=num_classes_without_background,
- conv_hyperparams=self._build_conv_hyperparams(),
- freeze_batchnorm=False,
- inplace_batchnorm_update=False,
- num_predictions_per_location_list=[5, 5],
- depth=32,
- num_layers_before_predictor=2,
- box_code_size=4,
- share_prediction_tower=True))
box_predictions = conv_box_predictor(
[image_features1, image_features2])
+ variables.extend(list(conv_box_predictor.variables))
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
class_predictions_with_background = tf.concat(
@@ -720,11 +779,11 @@ class WeightSharedConvolutionalKerasBoxPredictorTest(test_case.TestCase):
axis=1)
return (box_encodings, class_predictions_with_background)
- with self.test_session(graph=tf.Graph()):
- graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32),
- tf.random_uniform([4, 16, 16, 3], dtype=tf.float32))
- actual_variable_set = set(
- [var.op.name for var in tf.trainable_variables()])
+ self.execute(graph_fn, [
+ np.random.rand(4, 32, 32, 3).astype(np.float32),
+ np.random.rand(4, 16, 16, 3).astype(np.float32)
+ ])
+ actual_variable_set = set([var.name.split(':')[0] for var in variables])
expected_variable_set = set([
# Shared prediction tower
('WeightSharedConvolutionalBoxPredictor/'
@@ -733,12 +792,28 @@ class WeightSharedConvolutionalKerasBoxPredictorTest(test_case.TestCase):
'PredictionTower/conv2d_0/BatchNorm/feature_0/beta'),
('WeightSharedConvolutionalBoxPredictor/'
'PredictionTower/conv2d_0/BatchNorm/feature_1/beta'),
+ ('WeightSharedConvolutionalBoxPredictor/'
+ 'PredictionTower/conv2d_0/BatchNorm/feature_0/moving_mean'),
+ ('WeightSharedConvolutionalBoxPredictor/'
+ 'PredictionTower/conv2d_0/BatchNorm/feature_1/moving_mean'),
+ ('WeightSharedConvolutionalBoxPredictor/'
+ 'PredictionTower/conv2d_0/BatchNorm/feature_0/moving_variance'),
+ ('WeightSharedConvolutionalBoxPredictor/'
+ 'PredictionTower/conv2d_0/BatchNorm/feature_1/moving_variance'),
('WeightSharedConvolutionalBoxPredictor/'
'PredictionTower/conv2d_1/kernel'),
('WeightSharedConvolutionalBoxPredictor/'
'PredictionTower/conv2d_1/BatchNorm/feature_0/beta'),
('WeightSharedConvolutionalBoxPredictor/'
'PredictionTower/conv2d_1/BatchNorm/feature_1/beta'),
+ ('WeightSharedConvolutionalBoxPredictor/'
+ 'PredictionTower/conv2d_1/BatchNorm/feature_0/moving_mean'),
+ ('WeightSharedConvolutionalBoxPredictor/'
+ 'PredictionTower/conv2d_1/BatchNorm/feature_1/moving_mean'),
+ ('WeightSharedConvolutionalBoxPredictor/'
+ 'PredictionTower/conv2d_1/BatchNorm/feature_0/moving_variance'),
+ ('WeightSharedConvolutionalBoxPredictor/'
+ 'PredictionTower/conv2d_1/BatchNorm/feature_1/moving_variance'),
# Box prediction head
('WeightSharedConvolutionalBoxPredictor/'
'WeightSharedConvolutionalBoxHead/BoxPredictor/kernel'),
@@ -753,24 +828,28 @@ class WeightSharedConvolutionalKerasBoxPredictorTest(test_case.TestCase):
def test_predictions_share_weights_share_tower_without_batchnorm(
self):
+ tf.keras.backend.clear_session()
num_classes_without_background = 6
+ conv_box_predictor = (
+ box_predictor_builder
+ .build_weight_shared_convolutional_keras_box_predictor(
+ is_training=False,
+ num_classes=num_classes_without_background,
+ conv_hyperparams=self._build_conv_hyperparams(add_batch_norm=False),
+ freeze_batchnorm=False,
+ inplace_batchnorm_update=False,
+ num_predictions_per_location_list=[5, 5],
+ depth=32,
+ num_layers_before_predictor=2,
+ box_code_size=4,
+ share_prediction_tower=True,
+ apply_batch_norm=False))
+ variables = []
+
def graph_fn(image_features1, image_features2):
- conv_box_predictor = (
- box_predictor_builder.build_weight_shared_convolutional_keras_box_predictor(
- is_training=False,
- num_classes=num_classes_without_background,
- conv_hyperparams=self._build_conv_hyperparams(
- add_batch_norm=False),
- freeze_batchnorm=False,
- inplace_batchnorm_update=False,
- num_predictions_per_location_list=[5, 5],
- depth=32,
- num_layers_before_predictor=2,
- box_code_size=4,
- share_prediction_tower=True,
- apply_batch_norm=False))
box_predictions = conv_box_predictor(
[image_features1, image_features2])
+ variables.extend(list(conv_box_predictor.variables))
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
class_predictions_with_background = tf.concat(
@@ -778,11 +857,11 @@ class WeightSharedConvolutionalKerasBoxPredictorTest(test_case.TestCase):
axis=1)
return (box_encodings, class_predictions_with_background)
- with self.test_session(graph=tf.Graph()):
- graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32),
- tf.random_uniform([4, 16, 16, 3], dtype=tf.float32))
- actual_variable_set = set(
- [var.op.name for var in tf.trainable_variables()])
+ self.execute(graph_fn, [
+ np.random.rand(4, 32, 32, 3).astype(np.float32),
+ np.random.rand(4, 16, 16, 3).astype(np.float32)
+ ])
+ actual_variable_set = set([var.name.split(':')[0] for var in variables])
expected_variable_set = set([
# Shared prediction tower
('WeightSharedConvolutionalBoxPredictor/'
@@ -806,40 +885,6 @@ class WeightSharedConvolutionalKerasBoxPredictorTest(test_case.TestCase):
self.assertEqual(expected_variable_set, actual_variable_set)
- def test_get_predictions_with_feature_maps_of_dynamic_shape(
- self):
- image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64])
- conv_box_predictor = (
- box_predictor_builder.build_weight_shared_convolutional_keras_box_predictor(
- is_training=False,
- num_classes=0,
- conv_hyperparams=self._build_conv_hyperparams(),
- freeze_batchnorm=False,
- inplace_batchnorm_update=False,
- num_predictions_per_location_list=[5],
- depth=32,
- num_layers_before_predictor=1,
- box_code_size=4))
- box_predictions = conv_box_predictor([image_features])
- box_encodings = tf.concat(box_predictions[box_predictor.BOX_ENCODINGS],
- axis=1)
- objectness_predictions = tf.concat(box_predictions[
- box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1)
- init_op = tf.global_variables_initializer()
-
- resolution = 32
- expected_num_anchors = resolution*resolution*5
- with self.test_session() as sess:
- sess.run(init_op)
- (box_encodings_shape,
- objectness_predictions_shape) = sess.run(
- [tf.shape(box_encodings), tf.shape(objectness_predictions)],
- feed_dict={image_features:
- np.random.rand(4, resolution, resolution, 64)})
- self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 4])
- self.assertAllEqual(objectness_predictions_shape,
- [4, expected_num_anchors, 1])
-
def test_other_heads_predictions(self):
box_code_size = 4
num_classes_without_background = 3
@@ -847,37 +892,36 @@ class WeightSharedConvolutionalKerasBoxPredictorTest(test_case.TestCase):
mask_height = 5
mask_width = 5
num_predictions_per_location = 5
-
+ box_prediction_head = keras_box_head.WeightSharedConvolutionalBoxHead(
+ box_code_size=box_code_size,
+ conv_hyperparams=self._build_conv_hyperparams(),
+ num_predictions_per_location=num_predictions_per_location)
+ class_prediction_head = keras_class_head.WeightSharedConvolutionalClassHead(
+ num_class_slots=num_classes_without_background + 1,
+ conv_hyperparams=self._build_conv_hyperparams(),
+ num_predictions_per_location=num_predictions_per_location)
+ other_heads = {
+ other_head_name:
+ keras_mask_head.WeightSharedConvolutionalMaskHead(
+ num_classes=num_classes_without_background,
+ conv_hyperparams=self._build_conv_hyperparams(),
+ num_predictions_per_location=num_predictions_per_location,
+ mask_height=mask_height,
+ mask_width=mask_width)
+ }
+
+ conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
+ is_training=False,
+ num_classes=num_classes_without_background,
+ box_prediction_head=box_prediction_head,
+ class_prediction_head=class_prediction_head,
+ other_heads=other_heads,
+ conv_hyperparams=self._build_conv_hyperparams(),
+ freeze_batchnorm=False,
+ inplace_batchnorm_update=False,
+ depth=32,
+ num_layers_before_predictor=2)
def graph_fn(image_features):
- box_prediction_head = keras_box_head.WeightSharedConvolutionalBoxHead(
- box_code_size=box_code_size,
- conv_hyperparams=self._build_conv_hyperparams(),
- num_predictions_per_location=num_predictions_per_location)
- class_prediction_head = keras_class_head.WeightSharedConvolutionalClassHead(
- num_class_slots=num_classes_without_background + 1,
- conv_hyperparams=self._build_conv_hyperparams(),
- num_predictions_per_location=num_predictions_per_location)
- other_heads = {
- other_head_name:
- keras_mask_head.WeightSharedConvolutionalMaskHead(
- num_classes=num_classes_without_background,
- conv_hyperparams=self._build_conv_hyperparams(),
- num_predictions_per_location=num_predictions_per_location,
- mask_height=mask_height,
- mask_width=mask_width)
- }
-
- conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
- is_training=False,
- num_classes=num_classes_without_background,
- box_prediction_head=box_prediction_head,
- class_prediction_head=class_prediction_head,
- other_heads=other_heads,
- conv_hyperparams=self._build_conv_hyperparams(),
- freeze_batchnorm=False,
- inplace_batchnorm_update=False,
- depth=32,
- num_layers_before_predictor=2)
box_predictions = conv_box_predictor([image_features])
for key, value in box_predictions.items():
box_predictions[key] = tf.concat(value, axis=1)
diff --git a/research/object_detection/predictors/heads/box_head_test.py b/research/object_detection/predictors/heads/box_head_tf1_test.py
similarity index 94%
rename from research/object_detection/predictors/heads/box_head_test.py
rename to research/object_detection/predictors/heads/box_head_tf1_test.py
index dd69115e8ce997bf8ce9d4c3f90a3cc060763456..ab534a2bd029abed5f39e232d023a27dd2e9a361 100644
--- a/research/object_detection/predictors/heads/box_head_test.py
+++ b/research/object_detection/predictors/heads/box_head_tf1_test.py
@@ -14,6 +14,7 @@
# ==============================================================================
"""Tests for object_detection.predictors.heads.box_head."""
+import unittest
import tensorflow.compat.v1 as tf
from google.protobuf import text_format
@@ -21,8 +22,10 @@ from object_detection.builders import hyperparams_builder
from object_detection.predictors.heads import box_head
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class MaskRCNNBoxHeadTest(test_case.TestCase):
def _build_arg_scope_with_hyperparams(self,
@@ -59,6 +62,7 @@ class MaskRCNNBoxHeadTest(test_case.TestCase):
self.assertAllEqual([64, 1, 20, 4], prediction.get_shape().as_list())
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class ConvolutionalBoxPredictorTest(test_case.TestCase):
def _build_arg_scope_with_hyperparams(
@@ -92,6 +96,7 @@ class ConvolutionalBoxPredictorTest(test_case.TestCase):
self.assertAllEqual([64, 323, 1, 4], box_encodings.get_shape().as_list())
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
def _build_arg_scope_with_hyperparams(
diff --git a/research/object_detection/predictors/heads/class_head_test.py b/research/object_detection/predictors/heads/class_head_tf1_test.py
similarity index 96%
rename from research/object_detection/predictors/heads/class_head_test.py
rename to research/object_detection/predictors/heads/class_head_tf1_test.py
index eaadcdc39f4bd147b8d141eb99afd42f6cc3da36..3dc8fb120cb9a4c19ff2d595d31dc3645f6e06d0 100644
--- a/research/object_detection/predictors/heads/class_head_test.py
+++ b/research/object_detection/predictors/heads/class_head_tf1_test.py
@@ -14,6 +14,7 @@
# ==============================================================================
"""Tests for object_detection.predictors.heads.class_head."""
+import unittest
import tensorflow.compat.v1 as tf
from google.protobuf import text_format
@@ -21,8 +22,10 @@ from object_detection.builders import hyperparams_builder
from object_detection.predictors.heads import class_head
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class MaskRCNNClassHeadTest(test_case.TestCase):
def _build_arg_scope_with_hyperparams(self,
@@ -81,6 +84,7 @@ class MaskRCNNClassHeadTest(test_case.TestCase):
self.assertSetEqual(expected_var_names, actual_variable_set)
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class ConvolutionalClassPredictorTest(test_case.TestCase):
def _build_arg_scope_with_hyperparams(
@@ -140,6 +144,7 @@ class ConvolutionalClassPredictorTest(test_case.TestCase):
self.assertSetEqual(expected_var_names, actual_variable_set)
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class WeightSharedConvolutionalClassPredictorTest(test_case.TestCase):
def _build_arg_scope_with_hyperparams(
diff --git a/research/object_detection/predictors/heads/keras_box_head_test.py b/research/object_detection/predictors/heads/keras_box_head_tf2_test.py
similarity index 67%
rename from research/object_detection/predictors/heads/keras_box_head_test.py
rename to research/object_detection/predictors/heads/keras_box_head_tf2_test.py
index 1dcf7ce36bd5938b53a1366ea70b86d16602f18f..e9e8b8dcc3aa07ce6917a881c42cf51db7318576 100644
--- a/research/object_detection/predictors/heads/keras_box_head_test.py
+++ b/research/object_detection/predictors/heads/keras_box_head_tf2_test.py
@@ -14,6 +14,7 @@
# ==============================================================================
"""Tests for object_detection.predictors.heads.box_head."""
+import unittest
import tensorflow.compat.v1 as tf
from google.protobuf import text_format
@@ -21,8 +22,10 @@ from object_detection.builders import hyperparams_builder
from object_detection.predictors.heads import keras_box_head
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class ConvolutionalKerasBoxHeadTest(test_case.TestCase):
def _build_conv_hyperparams(self):
@@ -51,10 +54,13 @@ class ConvolutionalKerasBoxHeadTest(test_case.TestCase):
freeze_batchnorm=False,
num_predictions_per_location=1,
use_depthwise=False)
- image_feature = tf.random_uniform(
- [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
- box_encodings = box_prediction_head(image_feature)
- self.assertAllEqual([64, 323, 1, 4], box_encodings.get_shape().as_list())
+ def graph_fn():
+ image_feature = tf.random_uniform(
+ [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+ box_encodings = box_prediction_head(image_feature)
+ return box_encodings
+ box_encodings = self.execute(graph_fn, [])
+ self.assertAllEqual([64, 323, 1, 4], box_encodings.shape)
def test_prediction_size_depthwise_true(self):
conv_hyperparams = self._build_conv_hyperparams()
@@ -66,12 +72,16 @@ class ConvolutionalKerasBoxHeadTest(test_case.TestCase):
freeze_batchnorm=False,
num_predictions_per_location=1,
use_depthwise=True)
- image_feature = tf.random_uniform(
- [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
- box_encodings = box_prediction_head(image_feature)
- self.assertAllEqual([64, 323, 1, 4], box_encodings.get_shape().as_list())
+ def graph_fn():
+ image_feature = tf.random_uniform(
+ [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+ box_encodings = box_prediction_head(image_feature)
+ return box_encodings
+ box_encodings = self.execute(graph_fn, [])
+ self.assertAllEqual([64, 323, 1, 4], box_encodings.shape)
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class MaskRCNNKerasBoxHeadTest(test_case.TestCase):
def _build_fc_hyperparams(
@@ -102,12 +112,16 @@ class MaskRCNNKerasBoxHeadTest(test_case.TestCase):
dropout_keep_prob=0.5,
box_code_size=4,
share_box_across_classes=False)
- roi_pooled_features = tf.random_uniform(
- [64, 7, 7, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
- prediction = box_prediction_head(roi_pooled_features)
- self.assertAllEqual([64, 1, 20, 4], prediction.get_shape().as_list())
+ def graph_fn():
+ roi_pooled_features = tf.random_uniform(
+ [64, 7, 7, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+ prediction = box_prediction_head(roi_pooled_features)
+ return prediction
+ prediction = self.execute(graph_fn, [])
+ self.assertAllEqual([64, 1, 20, 4], prediction.shape)
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class WeightSharedConvolutionalKerasBoxHead(test_case.TestCase):
def _build_conv_hyperparams(self):
@@ -133,10 +147,13 @@ class WeightSharedConvolutionalKerasBoxHead(test_case.TestCase):
conv_hyperparams=conv_hyperparams,
num_predictions_per_location=1,
use_depthwise=False)
- image_feature = tf.random_uniform(
- [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
- box_encodings = box_prediction_head(image_feature)
- self.assertAllEqual([64, 323, 4], box_encodings.get_shape().as_list())
+ def graph_fn():
+ image_feature = tf.random_uniform(
+ [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+ box_encodings = box_prediction_head(image_feature)
+ return box_encodings
+ box_encodings = self.execute(graph_fn, [])
+ self.assertAllEqual([64, 323, 4], box_encodings.shape)
def test_prediction_size_depthwise_true(self):
conv_hyperparams = self._build_conv_hyperparams()
@@ -145,40 +162,38 @@ class WeightSharedConvolutionalKerasBoxHead(test_case.TestCase):
conv_hyperparams=conv_hyperparams,
num_predictions_per_location=1,
use_depthwise=True)
- image_feature = tf.random_uniform(
- [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
- box_encodings = box_prediction_head(image_feature)
- self.assertAllEqual([64, 323, 4], box_encodings.get_shape().as_list())
-
- def test_variable_count_depth_wise_true(self):
- g = tf.Graph()
- with g.as_default():
- conv_hyperparams = self._build_conv_hyperparams()
- box_prediction_head = keras_box_head.WeightSharedConvolutionalBoxHead(
- box_code_size=4,
- conv_hyperparams=conv_hyperparams,
- num_predictions_per_location=1,
- use_depthwise=True)
+ def graph_fn():
image_feature = tf.random_uniform(
[64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
- _ = box_prediction_head(image_feature)
- variables = g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
- self.assertEqual(len(variables), 3)
+ box_encodings = box_prediction_head(image_feature)
+ return box_encodings
+ box_encodings = self.execute(graph_fn, [])
+ self.assertAllEqual([64, 323, 4], box_encodings.shape)
+
+ def test_variable_count_depth_wise_true(self):
+ conv_hyperparams = self._build_conv_hyperparams()
+ box_prediction_head = keras_box_head.WeightSharedConvolutionalBoxHead(
+ box_code_size=4,
+ conv_hyperparams=conv_hyperparams,
+ num_predictions_per_location=1,
+ use_depthwise=True)
+ image_feature = tf.random_uniform(
+ [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+ box_prediction_head(image_feature)
+ self.assertEqual(len(box_prediction_head.variables), 3)
def test_variable_count_depth_wise_False(self):
- g = tf.Graph()
- with g.as_default():
- conv_hyperparams = self._build_conv_hyperparams()
- box_prediction_head = keras_box_head.WeightSharedConvolutionalBoxHead(
- box_code_size=4,
- conv_hyperparams=conv_hyperparams,
- num_predictions_per_location=1,
- use_depthwise=False)
- image_feature = tf.random_uniform(
- [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
- _ = box_prediction_head(image_feature)
- variables = g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
- self.assertEqual(len(variables), 2)
+ conv_hyperparams = self._build_conv_hyperparams()
+ box_prediction_head = keras_box_head.WeightSharedConvolutionalBoxHead(
+ box_code_size=4,
+ conv_hyperparams=conv_hyperparams,
+ num_predictions_per_location=1,
+ use_depthwise=False)
+ image_feature = tf.random_uniform(
+ [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+ box_prediction_head(image_feature)
+ self.assertEqual(len(box_prediction_head.variables), 2)
+
if __name__ == '__main__':
tf.test.main()
diff --git a/research/object_detection/predictors/heads/keras_class_head_test.py b/research/object_detection/predictors/heads/keras_class_head_tf2_test.py
similarity index 66%
rename from research/object_detection/predictors/heads/keras_class_head_test.py
rename to research/object_detection/predictors/heads/keras_class_head_tf2_test.py
index 4a25efc3eed4aa592646de89dc630326691041eb..aa890ce522defb6ec4c97965846e8f20529bc24b 100644
--- a/research/object_detection/predictors/heads/keras_class_head_test.py
+++ b/research/object_detection/predictors/heads/keras_class_head_tf2_test.py
@@ -14,6 +14,7 @@
# ==============================================================================
"""Tests for object_detection.predictors.heads.class_head."""
+import unittest
import tensorflow.compat.v1 as tf
from google.protobuf import text_format
@@ -21,8 +22,10 @@ from object_detection.builders import hyperparams_builder
from object_detection.predictors.heads import keras_class_head
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class ConvolutionalKerasClassPredictorTest(test_case.TestCase):
def _build_conv_hyperparams(self):
@@ -53,11 +56,13 @@ class ConvolutionalKerasClassPredictorTest(test_case.TestCase):
freeze_batchnorm=False,
num_predictions_per_location=1,
use_depthwise=False)
- image_feature = tf.random_uniform(
- [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
- class_predictions = class_prediction_head(image_feature,)
- self.assertAllEqual([64, 323, 20],
- class_predictions.get_shape().as_list())
+ def graph_fn():
+ image_feature = tf.random_uniform(
+ [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+ class_predictions = class_prediction_head(image_feature,)
+ return class_predictions
+ class_predictions = self.execute(graph_fn, [])
+ self.assertAllEqual([64, 323, 20], class_predictions.shape)
def test_prediction_size_depthwise_true(self):
conv_hyperparams = self._build_conv_hyperparams()
@@ -71,13 +76,16 @@ class ConvolutionalKerasClassPredictorTest(test_case.TestCase):
freeze_batchnorm=False,
num_predictions_per_location=1,
use_depthwise=True)
- image_feature = tf.random_uniform(
- [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
- class_predictions = class_prediction_head(image_feature,)
- self.assertAllEqual([64, 323, 20],
- class_predictions.get_shape().as_list())
+ def graph_fn():
+ image_feature = tf.random_uniform(
+ [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+ class_predictions = class_prediction_head(image_feature,)
+ return class_predictions
+ class_predictions = self.execute(graph_fn, [])
+ self.assertAllEqual([64, 323, 20], class_predictions.shape)
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class MaskRCNNClassHeadTest(test_case.TestCase):
def _build_fc_hyperparams(self,
@@ -106,12 +114,16 @@ class MaskRCNNClassHeadTest(test_case.TestCase):
freeze_batchnorm=False,
use_dropout=True,
dropout_keep_prob=0.5)
- roi_pooled_features = tf.random_uniform(
- [64, 7, 7, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
- prediction = class_prediction_head(roi_pooled_features)
- self.assertAllEqual([64, 1, 20], prediction.get_shape().as_list())
+ def graph_fn():
+ roi_pooled_features = tf.random_uniform(
+ [64, 7, 7, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+ prediction = class_prediction_head(roi_pooled_features)
+ return prediction
+ prediction = self.execute(graph_fn, [])
+ self.assertAllEqual([64, 1, 20], prediction.shape)
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class WeightSharedConvolutionalKerasClassPredictorTest(test_case.TestCase):
def _build_conv_hyperparams(self):
@@ -137,10 +149,13 @@ class WeightSharedConvolutionalKerasClassPredictorTest(test_case.TestCase):
conv_hyperparams=conv_hyperparams,
num_predictions_per_location=1,
use_depthwise=False)
- image_feature = tf.random_uniform(
- [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
- class_predictions = class_prediction_head(image_feature)
- self.assertAllEqual([64, 323, 20], class_predictions.get_shape().as_list())
+ def graph_fn():
+ image_feature = tf.random_uniform(
+ [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+ class_predictions = class_prediction_head(image_feature)
+ return class_predictions
+ class_predictions = self.execute(graph_fn, [])
+ self.assertAllEqual([64, 323, 20], class_predictions.shape)
def test_prediction_size_depthwise_true(self):
conv_hyperparams = self._build_conv_hyperparams()
@@ -149,42 +164,39 @@ class WeightSharedConvolutionalKerasClassPredictorTest(test_case.TestCase):
conv_hyperparams=conv_hyperparams,
num_predictions_per_location=1,
use_depthwise=True)
- image_feature = tf.random_uniform(
- [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
- class_predictions = class_prediction_head(image_feature)
- self.assertAllEqual([64, 323, 20], class_predictions.get_shape().as_list())
-
- def test_variable_count_depth_wise_true(self):
- g = tf.Graph()
- with g.as_default():
- conv_hyperparams = self._build_conv_hyperparams()
- class_prediction_head = (
- keras_class_head.WeightSharedConvolutionalClassHead(
- num_class_slots=20,
- conv_hyperparams=conv_hyperparams,
- num_predictions_per_location=1,
- use_depthwise=True))
+ def graph_fn():
image_feature = tf.random_uniform(
[64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
- _ = class_prediction_head(image_feature)
- variables = g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
- self.assertEqual(len(variables), 3)
+ class_predictions = class_prediction_head(image_feature)
+ return class_predictions
+ class_predictions = self.execute(graph_fn, [])
+ self.assertAllEqual([64, 323, 20], class_predictions.shape)
+
+ def test_variable_count_depth_wise_true(self):
+ conv_hyperparams = self._build_conv_hyperparams()
+ class_prediction_head = (
+ keras_class_head.WeightSharedConvolutionalClassHead(
+ num_class_slots=20,
+ conv_hyperparams=conv_hyperparams,
+ num_predictions_per_location=1,
+ use_depthwise=True))
+ image_feature = tf.random_uniform(
+ [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+ class_prediction_head(image_feature)
+ self.assertEqual(len(class_prediction_head.variables), 3)
def test_variable_count_depth_wise_False(self):
- g = tf.Graph()
- with g.as_default():
- conv_hyperparams = self._build_conv_hyperparams()
- class_prediction_head = (
- keras_class_head.WeightSharedConvolutionalClassHead(
- num_class_slots=20,
- conv_hyperparams=conv_hyperparams,
- num_predictions_per_location=1,
- use_depthwise=False))
- image_feature = tf.random_uniform(
- [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
- _ = class_prediction_head(image_feature)
- variables = g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
- self.assertEqual(len(variables), 2)
+ conv_hyperparams = self._build_conv_hyperparams()
+ class_prediction_head = (
+ keras_class_head.WeightSharedConvolutionalClassHead(
+ num_class_slots=20,
+ conv_hyperparams=conv_hyperparams,
+ num_predictions_per_location=1,
+ use_depthwise=False))
+ image_feature = tf.random_uniform(
+ [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+ class_prediction_head(image_feature)
+ self.assertEqual(len(class_prediction_head.variables), 2)
if __name__ == '__main__':
diff --git a/research/object_detection/predictors/heads/keras_mask_head_test.py b/research/object_detection/predictors/heads/keras_mask_head_tf2_test.py
similarity index 67%
rename from research/object_detection/predictors/heads/keras_mask_head_test.py
rename to research/object_detection/predictors/heads/keras_mask_head_tf2_test.py
index 4cdce7a1c5bb629631d6c1aada0eefe14f9c81a5..5465be06fe1fe5150c8c4c3583bfcd3be5c5d079 100644
--- a/research/object_detection/predictors/heads/keras_mask_head_test.py
+++ b/research/object_detection/predictors/heads/keras_mask_head_tf2_test.py
@@ -14,6 +14,7 @@
# ==============================================================================
"""Tests for object_detection.predictors.heads.mask_head."""
+import unittest
import tensorflow.compat.v1 as tf
from google.protobuf import text_format
@@ -21,8 +22,10 @@ from object_detection.builders import hyperparams_builder
from object_detection.predictors.heads import keras_mask_head
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class ConvolutionalMaskPredictorTest(test_case.TestCase):
def _build_conv_hyperparams(self):
@@ -55,11 +58,13 @@ class ConvolutionalMaskPredictorTest(test_case.TestCase):
use_depthwise=False,
mask_height=7,
mask_width=7)
- image_feature = tf.random_uniform(
- [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
- mask_predictions = mask_prediction_head(image_feature)
- self.assertAllEqual([64, 323, 20, 7, 7],
- mask_predictions.get_shape().as_list())
+ def graph_fn():
+ image_feature = tf.random_uniform(
+ [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+ mask_predictions = mask_prediction_head(image_feature)
+ return mask_predictions
+ mask_predictions = self.execute(graph_fn, [])
+ self.assertAllEqual([64, 323, 20, 7, 7], mask_predictions.shape)
def test_prediction_size_use_depthwise_true(self):
conv_hyperparams = self._build_conv_hyperparams()
@@ -75,11 +80,13 @@ class ConvolutionalMaskPredictorTest(test_case.TestCase):
use_depthwise=True,
mask_height=7,
mask_width=7)
- image_feature = tf.random_uniform(
- [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
- mask_predictions = mask_prediction_head(image_feature)
- self.assertAllEqual([64, 323, 20, 7, 7],
- mask_predictions.get_shape().as_list())
+ def graph_fn():
+ image_feature = tf.random_uniform(
+ [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+ mask_predictions = mask_prediction_head(image_feature)
+ return mask_predictions
+ mask_predictions = self.execute(graph_fn, [])
+ self.assertAllEqual([64, 323, 20, 7, 7], mask_predictions.shape)
def test_class_agnostic_prediction_size_use_depthwise_false(self):
conv_hyperparams = self._build_conv_hyperparams()
@@ -96,11 +103,13 @@ class ConvolutionalMaskPredictorTest(test_case.TestCase):
mask_height=7,
mask_width=7,
masks_are_class_agnostic=True)
- image_feature = tf.random_uniform(
- [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
- mask_predictions = mask_prediction_head(image_feature)
- self.assertAllEqual([64, 323, 1, 7, 7],
- mask_predictions.get_shape().as_list())
+ def graph_fn():
+ image_feature = tf.random_uniform(
+ [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+ mask_predictions = mask_prediction_head(image_feature)
+ return mask_predictions
+ mask_predictions = self.execute(graph_fn, [])
+ self.assertAllEqual([64, 323, 1, 7, 7], mask_predictions.shape)
def test_class_agnostic_prediction_size_use_depthwise_true(self):
conv_hyperparams = self._build_conv_hyperparams()
@@ -117,13 +126,16 @@ class ConvolutionalMaskPredictorTest(test_case.TestCase):
mask_height=7,
mask_width=7,
masks_are_class_agnostic=True)
- image_feature = tf.random_uniform(
- [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
- mask_predictions = mask_prediction_head(image_feature)
- self.assertAllEqual([64, 323, 1, 7, 7],
- mask_predictions.get_shape().as_list())
+ def graph_fn():
+ image_feature = tf.random_uniform(
+ [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+ mask_predictions = mask_prediction_head(image_feature)
+ return mask_predictions
+ mask_predictions = self.execute(graph_fn, [])
+ self.assertAllEqual([64, 323, 1, 7, 7], mask_predictions.shape)
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class MaskRCNNMaskHeadTest(test_case.TestCase):
def _build_conv_hyperparams(self,
@@ -155,10 +167,13 @@ class MaskRCNNMaskHeadTest(test_case.TestCase):
mask_prediction_num_conv_layers=2,
mask_prediction_conv_depth=256,
masks_are_class_agnostic=False)
- roi_pooled_features = tf.random_uniform(
- [64, 7, 7, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
- prediction = mask_prediction_head(roi_pooled_features)
- self.assertAllEqual([64, 1, 20, 14, 14], prediction.get_shape().as_list())
+ def graph_fn():
+ roi_pooled_features = tf.random_uniform(
+ [64, 7, 7, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+ prediction = mask_prediction_head(roi_pooled_features)
+ return prediction
+ prediction = self.execute(graph_fn, [])
+ self.assertAllEqual([64, 1, 20, 14, 14], prediction.shape)
def test_prediction_size_with_convolve_then_upsample(self):
mask_prediction_head = keras_mask_head.MaskRCNNMaskHead(
@@ -172,12 +187,16 @@ class MaskRCNNMaskHeadTest(test_case.TestCase):
mask_prediction_conv_depth=256,
masks_are_class_agnostic=True,
convolve_then_upsample=True)
- roi_pooled_features = tf.random_uniform(
- [64, 14, 14, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
- prediction = mask_prediction_head(roi_pooled_features)
- self.assertAllEqual([64, 1, 1, 28, 28], prediction.get_shape().as_list())
+ def graph_fn():
+ roi_pooled_features = tf.random_uniform(
+ [64, 14, 14, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+ prediction = mask_prediction_head(roi_pooled_features)
+ return prediction
+ prediction = self.execute(graph_fn, [])
+ self.assertAllEqual([64, 1, 1, 28, 28], prediction.shape)
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class WeightSharedConvolutionalMaskPredictorTest(test_case.TestCase):
def _build_conv_hyperparams(self):
@@ -204,11 +223,13 @@ class WeightSharedConvolutionalMaskPredictorTest(test_case.TestCase):
conv_hyperparams=self._build_conv_hyperparams(),
mask_height=7,
mask_width=7))
- image_feature = tf.random_uniform(
- [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
- mask_predictions = mask_prediction_head(image_feature)
- self.assertAllEqual([64, 323, 20, 7, 7],
- mask_predictions.get_shape().as_list())
+ def graph_fn():
+ image_feature = tf.random_uniform(
+ [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+ mask_predictions = mask_prediction_head(image_feature)
+ return mask_predictions
+ mask_predictions = self.execute(graph_fn, [])
+ self.assertAllEqual([64, 323, 20, 7, 7], mask_predictions.shape)
def test_class_agnostic_prediction_size(self):
mask_prediction_head = (
@@ -219,11 +240,13 @@ class WeightSharedConvolutionalMaskPredictorTest(test_case.TestCase):
mask_height=7,
mask_width=7,
masks_are_class_agnostic=True))
- image_feature = tf.random_uniform(
- [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
- mask_predictions = mask_prediction_head(image_feature)
- self.assertAllEqual([64, 323, 1, 7, 7],
- mask_predictions.get_shape().as_list())
+ def graph_fn():
+ image_feature = tf.random_uniform(
+ [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+ mask_predictions = mask_prediction_head(image_feature)
+ return mask_predictions
+ mask_predictions = self.execute(graph_fn, [])
+ self.assertAllEqual([64, 323, 1, 7, 7], mask_predictions.shape)
if __name__ == '__main__':
tf.test.main()
diff --git a/research/object_detection/predictors/heads/keypoint_head_test.py b/research/object_detection/predictors/heads/keypoint_head_tf1_test.py
similarity index 94%
rename from research/object_detection/predictors/heads/keypoint_head_test.py
rename to research/object_detection/predictors/heads/keypoint_head_tf1_test.py
index 0dc4c6f7307000051850d156a3974f8e51415b76..828174989133fd2ec6552ad848985719bdae35a5 100644
--- a/research/object_detection/predictors/heads/keypoint_head_test.py
+++ b/research/object_detection/predictors/heads/keypoint_head_tf1_test.py
@@ -14,6 +14,7 @@
# ==============================================================================
"""Tests for object_detection.predictors.heads.keypoint_head."""
+import unittest
import tensorflow.compat.v1 as tf
from google.protobuf import text_format
@@ -21,8 +22,10 @@ from object_detection.builders import hyperparams_builder
from object_detection.predictors.heads import keypoint_head
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class MaskRCNNKeypointHeadTest(test_case.TestCase):
def _build_arg_scope_with_hyperparams(self,
diff --git a/research/object_detection/predictors/heads/mask_head_test.py b/research/object_detection/predictors/heads/mask_head_tf1_test.py
similarity index 96%
rename from research/object_detection/predictors/heads/mask_head_test.py
rename to research/object_detection/predictors/heads/mask_head_tf1_test.py
index d3bd6819d3cb5499962028153964ee853eb147a5..152394836135abeaa68f32dd48275a89347d4059 100644
--- a/research/object_detection/predictors/heads/mask_head_test.py
+++ b/research/object_detection/predictors/heads/mask_head_tf1_test.py
@@ -14,6 +14,7 @@
# ==============================================================================
"""Tests for object_detection.predictors.heads.mask_head."""
+import unittest
import tensorflow.compat.v1 as tf
from google.protobuf import text_format
@@ -21,8 +22,10 @@ from object_detection.builders import hyperparams_builder
from object_detection.predictors.heads import mask_head
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class MaskRCNNMaskHeadTest(test_case.TestCase):
def _build_arg_scope_with_hyperparams(self,
@@ -75,6 +78,7 @@ class MaskRCNNMaskHeadTest(test_case.TestCase):
self.assertAllEqual([64, 1, 1, 28, 28], prediction.get_shape().as_list())
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class ConvolutionalMaskPredictorTest(test_case.TestCase):
def _build_arg_scope_with_hyperparams(
@@ -131,6 +135,7 @@ class ConvolutionalMaskPredictorTest(test_case.TestCase):
mask_predictions.get_shape().as_list())
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class WeightSharedConvolutionalMaskPredictorTest(test_case.TestCase):
def _build_arg_scope_with_hyperparams(
diff --git a/research/object_detection/predictors/mask_rcnn_box_predictor_test.py b/research/object_detection/predictors/mask_rcnn_box_predictor_tf1_test.py
similarity index 97%
rename from research/object_detection/predictors/mask_rcnn_box_predictor_test.py
rename to research/object_detection/predictors/mask_rcnn_box_predictor_tf1_test.py
index 4733e7a5f4951e808d5f34f0d98f402b98cf9904..d9a4bcbbf004dedc670956baf05615358e33e1a1 100644
--- a/research/object_detection/predictors/mask_rcnn_box_predictor_test.py
+++ b/research/object_detection/predictors/mask_rcnn_box_predictor_tf1_test.py
@@ -14,6 +14,7 @@
# ==============================================================================
"""Tests for object_detection.predictors.mask_rcnn_box_predictor."""
+import unittest
import numpy as np
import tensorflow.compat.v1 as tf
@@ -23,8 +24,10 @@ from object_detection.builders import hyperparams_builder
from object_detection.predictors import mask_rcnn_box_predictor as box_predictor
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class MaskRCNNBoxPredictorTest(test_case.TestCase):
def _build_arg_scope_with_hyperparams(self,
diff --git a/research/object_detection/predictors/mask_rcnn_keras_box_predictor_test.py b/research/object_detection/predictors/mask_rcnn_keras_box_predictor_tf2_test.py
similarity index 76%
rename from research/object_detection/predictors/mask_rcnn_keras_box_predictor_test.py
rename to research/object_detection/predictors/mask_rcnn_keras_box_predictor_tf2_test.py
index fbffe44e29180ae5a11106b7fa88679e638529fe..a92db9e90fb8299ff449bb614886a9a5542033c3 100644
--- a/research/object_detection/predictors/mask_rcnn_keras_box_predictor_test.py
+++ b/research/object_detection/predictors/mask_rcnn_keras_box_predictor_tf2_test.py
@@ -14,6 +14,7 @@
# ==============================================================================
"""Tests for object_detection.predictors.mask_rcnn_box_predictor."""
+import unittest
import numpy as np
import tensorflow.compat.v1 as tf
@@ -23,8 +24,10 @@ from object_detection.builders import hyperparams_builder
from object_detection.predictors import mask_rcnn_keras_box_predictor as box_predictor
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class MaskRCNNKerasBoxPredictorTest(test_case.TestCase):
def _build_hyperparams(self,
@@ -46,17 +49,17 @@ class MaskRCNNKerasBoxPredictorTest(test_case.TestCase):
return hyperparams_builder.KerasLayerHyperparams(hyperparams)
def test_get_boxes_with_five_classes(self):
+ mask_box_predictor = (
+ box_predictor_builder.build_mask_rcnn_keras_box_predictor(
+ is_training=False,
+ num_classes=5,
+ fc_hyperparams=self._build_hyperparams(),
+ freeze_batchnorm=False,
+ use_dropout=False,
+ dropout_keep_prob=0.5,
+ box_code_size=4,
+ ))
def graph_fn(image_features):
- mask_box_predictor = (
- box_predictor_builder.build_mask_rcnn_keras_box_predictor(
- is_training=False,
- num_classes=5,
- fc_hyperparams=self._build_hyperparams(),
- freeze_batchnorm=False,
- use_dropout=False,
- dropout_keep_prob=0.5,
- box_code_size=4,
- ))
box_predictions = mask_box_predictor(
[image_features],
prediction_stage=2)
@@ -70,18 +73,19 @@ class MaskRCNNKerasBoxPredictorTest(test_case.TestCase):
self.assertAllEqual(class_predictions_with_background.shape, [2, 1, 6])
def test_get_boxes_with_five_classes_share_box_across_classes(self):
+ mask_box_predictor = (
+ box_predictor_builder.build_mask_rcnn_keras_box_predictor(
+ is_training=False,
+ num_classes=5,
+ fc_hyperparams=self._build_hyperparams(),
+ freeze_batchnorm=False,
+ use_dropout=False,
+ dropout_keep_prob=0.5,
+ box_code_size=4,
+ share_box_across_classes=True
+ ))
def graph_fn(image_features):
- mask_box_predictor = (
- box_predictor_builder.build_mask_rcnn_keras_box_predictor(
- is_training=False,
- num_classes=5,
- fc_hyperparams=self._build_hyperparams(),
- freeze_batchnorm=False,
- use_dropout=False,
- dropout_keep_prob=0.5,
- box_code_size=4,
- share_box_across_classes=True
- ))
+
box_predictions = mask_box_predictor(
[image_features],
prediction_stage=2)
@@ -95,19 +99,19 @@ class MaskRCNNKerasBoxPredictorTest(test_case.TestCase):
self.assertAllEqual(class_predictions_with_background.shape, [2, 1, 6])
def test_get_instance_masks(self):
+ mask_box_predictor = (
+ box_predictor_builder.build_mask_rcnn_keras_box_predictor(
+ is_training=False,
+ num_classes=5,
+ fc_hyperparams=self._build_hyperparams(),
+ freeze_batchnorm=False,
+ use_dropout=False,
+ dropout_keep_prob=0.5,
+ box_code_size=4,
+ conv_hyperparams=self._build_hyperparams(
+ op_type=hyperparams_pb2.Hyperparams.CONV),
+ predict_instance_masks=True))
def graph_fn(image_features):
- mask_box_predictor = (
- box_predictor_builder.build_mask_rcnn_keras_box_predictor(
- is_training=False,
- num_classes=5,
- fc_hyperparams=self._build_hyperparams(),
- freeze_batchnorm=False,
- use_dropout=False,
- dropout_keep_prob=0.5,
- box_code_size=4,
- conv_hyperparams=self._build_hyperparams(
- op_type=hyperparams_pb2.Hyperparams.CONV),
- predict_instance_masks=True))
box_predictions = mask_box_predictor(
[image_features],
prediction_stage=3)
diff --git a/research/object_detection/predictors/rfcn_box_predictor_test.py b/research/object_detection/predictors/rfcn_box_predictor_tf1_test.py
similarity index 95%
rename from research/object_detection/predictors/rfcn_box_predictor_test.py
rename to research/object_detection/predictors/rfcn_box_predictor_tf1_test.py
index 7a484c0855742b8d09622eb9c2b2fd6a6b7cede4..555c4b2adeaef6142884adbc5c4e1087084fd884 100644
--- a/research/object_detection/predictors/rfcn_box_predictor_test.py
+++ b/research/object_detection/predictors/rfcn_box_predictor_tf1_test.py
@@ -14,6 +14,7 @@
# ==============================================================================
"""Tests for object_detection.predictors.rfcn_box_predictor."""
+import unittest
import numpy as np
import tensorflow.compat.v1 as tf
@@ -22,8 +23,10 @@ from object_detection.builders import hyperparams_builder
from object_detection.predictors import rfcn_box_predictor as box_predictor
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class RfcnBoxPredictorTest(test_case.TestCase):
def _build_arg_scope_with_conv_hyperparams(self):
diff --git a/research/object_detection/predictors/rfcn_keras_box_predictor_test.py b/research/object_detection/predictors/rfcn_keras_box_predictor_tf2_test.py
similarity index 85%
rename from research/object_detection/predictors/rfcn_keras_box_predictor_test.py
rename to research/object_detection/predictors/rfcn_keras_box_predictor_tf2_test.py
index d8cc01e4b4bb0ca7faa1c6145dcc4013c42f7d01..f845068e35b37a9b0d77873fb5adbf59c78450ae 100644
--- a/research/object_detection/predictors/rfcn_keras_box_predictor_test.py
+++ b/research/object_detection/predictors/rfcn_keras_box_predictor_tf2_test.py
@@ -14,6 +14,7 @@
# ==============================================================================
"""Tests for object_detection.predictors.rfcn_box_predictor."""
+import unittest
import numpy as np
import tensorflow.compat.v1 as tf
@@ -22,8 +23,10 @@ from object_detection.builders import hyperparams_builder
from object_detection.predictors import rfcn_keras_box_predictor as box_predictor
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class RfcnKerasBoxPredictorTest(test_case.TestCase):
def _build_conv_hyperparams(self):
@@ -42,18 +45,17 @@ class RfcnKerasBoxPredictorTest(test_case.TestCase):
return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
def test_get_correct_box_encoding_and_class_prediction_shapes(self):
-
+ rfcn_box_predictor = box_predictor.RfcnKerasBoxPredictor(
+ is_training=False,
+ num_classes=2,
+ conv_hyperparams=self._build_conv_hyperparams(),
+ freeze_batchnorm=False,
+ num_spatial_bins=[3, 3],
+ depth=4,
+ crop_size=[12, 12],
+ box_code_size=4)
def graph_fn(image_features, proposal_boxes):
- rfcn_box_predictor = box_predictor.RfcnKerasBoxPredictor(
- is_training=False,
- num_classes=2,
- conv_hyperparams=self._build_conv_hyperparams(),
- freeze_batchnorm=False,
- num_spatial_bins=[3, 3],
- depth=4,
- crop_size=[12, 12],
- box_code_size=4
- )
+
box_predictions = rfcn_box_predictor(
[image_features],
proposal_boxes=proposal_boxes)
diff --git a/research/object_detection/protos/center_net.proto b/research/object_detection/protos/center_net.proto
new file mode 100644
index 0000000000000000000000000000000000000000..5047c000f3d4ba22d42127b54e61fbb8726429f8
--- /dev/null
+++ b/research/object_detection/protos/center_net.proto
@@ -0,0 +1,203 @@
+syntax = "proto2";
+
+package object_detection.protos;
+
+import "object_detection/protos/image_resizer.proto";
+import "object_detection/protos/losses.proto";
+
+// Configuration for the CenterNet meta architecture from the "Objects as
+// Points" paper [1]
+// [1]: https://arxiv.org/abs/1904.07850
+
+message CenterNet {
+ // Number of classes to predict.
+ optional int32 num_classes = 1;
+
+ // Feature extractor config.
+ optional CenterNetFeatureExtractor feature_extractor = 2;
+
+ // Image resizer for preprocessing the input image.
+ optional ImageResizer image_resizer = 3;
+
+ // Parameters which are related to object detection task.
+ message ObjectDetection {
+ // The original fields are moved to ObjectCenterParams or deleted.
+ reserved 2, 5, 6, 7;
+
+ // Weight of the task loss. The total loss of the model will be the
+ // summation of task losses weighted by the weights.
+ optional float task_loss_weight = 1 [default = 1.0];
+
+ // Weight for the offset localization loss.
+ optional float offset_loss_weight = 3 [default = 1.0];
+
+ // Weight for the height/width localization loss.
+ optional float scale_loss_weight = 4 [default = 0.1];
+
+ // Localization loss configuration for object scale and offset losses.
+ optional LocalizationLoss localization_loss = 8;
+ }
+ optional ObjectDetection object_detection_task = 4;
+
+ // Parameters related to object center prediction. This is required for both
+ // object detection and keypoint estimation tasks.
+ message ObjectCenterParams {
+ // Weight for the object center loss.
+ optional float object_center_loss_weight = 1 [default = 1.0];
+
+ // Classification loss configuration for object center loss.
+ optional ClassificationLoss classification_loss = 2;
+
+ // The initial bias value of the convlution kernel of the class heatmap
+ // prediction head. -2.19 corresponds to predicting foreground with
+ // a probability of 0.1. See "Focal Loss for Dense Object Detection"
+ // at https://arxiv.org/abs/1708.02002.
+ optional float heatmap_bias_init = 3 [default = -2.19];
+
+ // The minimum IOU overlap boxes need to have to not be penalized.
+ optional float min_box_overlap_iou = 4 [default = 0.7];
+
+ // Maximum number of boxes to predict.
+ optional int32 max_box_predictions = 5 [default = 100];
+
+ // If set, loss is only computed for the labeled classes.
+ optional bool use_labeled_classes = 6 [default = false];
+ }
+ optional ObjectCenterParams object_center_params = 5;
+
+ // Path of the file that conatins the label map along with the keypoint
+ // information, including the keypoint indices, corresponding labels, and the
+ // corresponding class. The file should be the same one as used in the input
+ // pipeline. Note that a plain text of StringIntLabelMap proto is expected in
+ // this file.
+ // It is required only if the keypoint estimation task is specified.
+ optional string keypoint_label_map_path = 6;
+
+ // Parameters which are related to keypoint estimation task.
+ message KeypointEstimation {
+ // Name of the task, e.g. "human pose". Note that the task name should be
+ // unique to each keypoint task.
+ optional string task_name = 1;
+
+ // Weight of the task loss. The total loss of the model will be their
+ // summation of task losses weighted by the weights.
+ optional float task_loss_weight = 2 [default = 1.0];
+
+ // Loss configuration for keypoint heatmap, offset, regression losses. Note
+ // that the localization loss is used for offset/regression losses and
+ // classification loss is used for heatmap loss.
+ optional Loss loss = 3;
+
+ // The name of the class that contains the keypoints for this task. This is
+ // used to retrieve the corresponding keypoint indices from the label map.
+ // Note that this corresponds to the "name" field, not "display_name".
+ optional string keypoint_class_name = 4;
+
+ // The standard deviation of the Gaussian kernel used to generate the
+ // keypoint heatmap. The unit is the pixel in the output image. It is to
+ // provide the flexibility of using different sizes of Gaussian kernel for
+ // each keypoint class. Note that if provided, the keypoint standard
+ // deviations will be overridden by the specified values here, otherwise,
+ // the default value 5.0 will be used.
+ // TODO(yuhuic): Update the default value once we found the best value.
+ map keypoint_label_to_std = 5;
+
+ // Loss weights corresponding to different heads.
+ optional float keypoint_regression_loss_weight = 6 [default = 1.0];
+ optional float keypoint_heatmap_loss_weight = 7 [default = 1.0];
+ optional float keypoint_offset_loss_weight = 8 [default = 1.0];
+
+ // The initial bias value of the convolution kernel of the keypoint heatmap
+ // prediction head. -2.19 corresponds to predicting foreground with
+ // a probability of 0.1. See "Focal Loss for Dense Object Detection"
+ // at https://arxiv.org/abs/1708.02002.
+ optional float heatmap_bias_init = 9 [default = -2.19];
+
+ // The heatmap score threshold for a keypoint to become a valid candidate.
+ optional float keypoint_candidate_score_threshold = 10 [default = 0.1];
+
+ // The maximum number of candidates to retrieve for each keypoint.
+ optional int32 num_candidates_per_keypoint = 11 [default = 100];
+
+ // Max pool kernel size to use to pull off peak score locations in a
+ // neighborhood (independently for each keypoint types).
+ optional int32 peak_max_pool_kernel_size = 12 [default = 3];
+
+ // The default score to use for regressed keypoints that are not
+ // successfully snapped to a nearby candidate.
+ optional float unmatched_keypoint_score = 13 [default = 0.1];
+
+ // The multiplier to expand the bounding boxes (either the provided boxes or
+ // those which tightly cover the regressed keypoints). Note that new
+ // expanded box for an instance becomes the feasible search window for all
+ // associated keypoints.
+ optional float box_scale = 14 [default = 1.2];
+
+ // The scale parameter that multiplies the largest dimension of a bounding
+ // box. The resulting distance becomes a search radius for candidates in the
+ // vicinity of each regressed keypoint.
+ optional float candidate_search_scale = 15 [default = 0.3];
+
+ // One of ['min_distance', 'score_distance_ratio'] indicating how to select
+ // the keypoint candidate.
+ optional string candidate_ranking_mode = 16 [default = "min_distance"];
+
+ // The radius (in the unit of output pixel) around heatmap peak to assign
+ // the offset targets. If set 0, then the offset target will only be
+ // assigned to the heatmap peak (same behavior as the original paper).
+ optional int32 offset_peak_radius = 17 [default = 0];
+
+ // Indicates whether to assign offsets for each keypoint channel
+ // separately. If set False, the output offset target has the shape
+ // [batch_size, out_height, out_width, 2] (same behavior as the original
+ // paper). If set True, the output offset target has the shape [batch_size,
+ // out_height, out_width, 2 * num_keypoints] (recommended when the
+ // offset_peak_radius is not zero).
+ optional bool per_keypoint_offset = 18 [default = false];
+ }
+ repeated KeypointEstimation keypoint_estimation_task = 7;
+
+ // Parameters which are related to mask estimation task.
+ // Note: Currently, CenterNet supports a weak instance segmentation, where
+ // semantic segmentation masks are estimated, and then cropped based on
+ // bounding box detections. Therefore, it is possible for the same image
+ // pixel to be assigned to multiple instances.
+ message MaskEstimation {
+ // Weight of the task loss. The total loss of the model will be their
+ // summation of task losses weighted by the weights.
+ optional float task_loss_weight = 1 [default = 1.0];
+
+ // Classification loss configuration for segmentation loss.
+ optional ClassificationLoss classification_loss = 2;
+
+ // Each instance mask (one per detection) is cropped and resized (bilinear
+ // resampling) from the predicted segmentation feature map. After
+ // resampling, the masks are binarized with the provided score threshold.
+ optional int32 mask_height = 4 [default = 256];
+ optional int32 mask_width = 5 [default = 256];
+ optional float score_threshold = 6 [default = 0.5];
+
+ // The initial bias value of the convlution kernel of the class heatmap
+ // prediction head. -2.19 corresponds to predicting foreground with
+ // a probability of 0.1.
+ optional float heatmap_bias_init = 3 [default = -2.19];
+ }
+ optional MaskEstimation mask_estimation_task = 8;
+}
+
+message CenterNetFeatureExtractor {
+ optional string type = 1;
+
+ // Channel means to be subtracted from each image channel. If not specified,
+ // we use a default value of 0.
+ repeated float channel_means = 2;
+
+ // Channel standard deviations. Each channel will be normalized by dividing
+ // it by its standard deviation. If not specified, we use a default value
+ // of 1.
+ repeated float channel_stds = 3;
+
+ // If set, will change channel order to be [blue, green, red]. This can be
+ // useful to be compatible with some pre-trained feature extractors.
+ optional bool bgr_ordering = 4 [default = false];
+}
diff --git a/research/object_detection/protos/faster_rcnn.proto b/research/object_detection/protos/faster_rcnn.proto
index 7e06fbcf41986e4272d09c57ae68fd2e50034034..486cc77ea8b156fb54500b0bbf7a01d4b17ac7b6 100644
--- a/research/object_detection/protos/faster_rcnn.proto
+++ b/research/object_detection/protos/faster_rcnn.proto
@@ -188,7 +188,7 @@ message Context {
// Next id: 4
// The maximum number of contextual features per-image, used for padding
- optional int32 max_num_context_features = 1 [default = 8500];
+ optional int32 max_num_context_features = 1 [default = 2000];
// The bottleneck feature dimension of the attention block.
optional int32 attention_bottleneck_dimension = 2 [default = 2048];
diff --git a/research/object_detection/protos/hyperparams.proto b/research/object_detection/protos/hyperparams.proto
index 2b1053877613c3ced70515a559014b6463175f9e..e2fee247ca1303dfdbb9bdb69f187b7520c4e89c 100644
--- a/research/object_detection/protos/hyperparams.proto
+++ b/research/object_detection/protos/hyperparams.proto
@@ -52,6 +52,12 @@ message Hyperparams {
// Whether depthwise convolutions should be regularized. If this parameter is
// NOT set then the conv hyperparams will default to the parent scope.
optional bool regularize_depthwise = 6 [default = false];
+
+ // By default, use_bias is set to False if batch_norm is not None and
+ // batch_norm.center is True. When force_use_bias is set to True, this
+ // behavior will be overridden, and use_bias will be set to True, regardless
+ // of batch norm parameters. Note, this only applies to KerasLayerHyperparams.
+ optional bool force_use_bias = 8 [default = false];
}
// Proto with one-of field for regularizers.
diff --git a/research/object_detection/protos/input_reader.proto b/research/object_detection/protos/input_reader.proto
index 2d9deda117d69031b9acca8047c8a3bc1db2c4a4..27d022532dc14fffc2b8078a500933d44ae5bf68 100644
--- a/research/object_detection/protos/input_reader.proto
+++ b/research/object_detection/protos/input_reader.proto
@@ -31,7 +31,7 @@ enum InputType {
TF_SEQUENCE_EXAMPLE = 2; // TfSequenceExample Input
}
-// Next id: 31
+// Next id: 32
message InputReader {
// Name of input reader. Typically used to describe the dataset that is read
// by this input reader.
@@ -119,6 +119,10 @@ message InputReader {
// Type of instance mask.
optional InstanceMaskType mask_type = 10 [default = NUMERICAL_MASKS];
+ // Whether to load DensePose data. If set, must also set load_instance_masks
+ // to true.
+ optional bool load_dense_pose = 31 [default = false];
+
// Whether to use the display name when decoding examples. This is only used
// when mapping class text strings to integers.
optional bool use_display_name = 17 [default = false];
diff --git a/research/object_detection/protos/model.proto b/research/object_detection/protos/model.proto
index 9333f2df149162b830950a991a18a0155941ca30..4fb6aed0b790e1f5b621a0d0c7c788cb80876c15 100644
--- a/research/object_detection/protos/model.proto
+++ b/research/object_detection/protos/model.proto
@@ -2,6 +2,7 @@ syntax = "proto2";
package object_detection.protos;
+import "object_detection/protos/center_net.proto";
import "object_detection/protos/faster_rcnn.proto";
import "object_detection/protos/ssd.proto";
@@ -17,6 +18,7 @@ message DetectionModel {
// value to a function that builds your model.
ExperimentalModel experimental_model = 3;
+ CenterNet center_net = 4;
}
}
diff --git a/research/object_detection/protos/preprocessor.proto b/research/object_detection/protos/preprocessor.proto
index aa83939f334b3bbd80e54c2ac7f367cb9cbf8869..3201df2bd08d551fda346a54282c866aa9890c28 100644
--- a/research/object_detection/protos/preprocessor.proto
+++ b/research/object_detection/protos/preprocessor.proto
@@ -57,7 +57,8 @@ message NormalizeImage {
optional float target_maxval = 4 [default=1];
}
-// Randomly horizontally flips the image and detections 50% of the time.
+// Randomly horizontally flips the image and detections with the specified
+// probability, default to 50% of the time.
message RandomHorizontalFlip {
// Specifies a mapping from the original keypoint indices to horizontally
// flipped indices. This is used in the event that keypoints are specified,
@@ -71,10 +72,15 @@ message RandomHorizontalFlip {
// keypoint_flip_permutation: 3
// keypoint_flip_permutation: 5
// keypoint_flip_permutation: 4
+ // If nothing is specified the order of keypoint will be mantained.
repeated int32 keypoint_flip_permutation = 1;
+
+ // The probability of running this augmentation for each image.
+ optional float probability = 2 [default=0.5];
}
-// Randomly vertically flips the image and detections 50% of the time.
+// Randomly vertically flips the image and detections with the specified
+// probability, default to 50% of the time.
message RandomVerticalFlip {
// Specifies a mapping from the original keypoint indices to vertically
// flipped indices. This is used in the event that keypoints are specified,
@@ -89,11 +95,23 @@ message RandomVerticalFlip {
// keypoint_flip_permutation: 5
// keypoint_flip_permutation: 4
repeated int32 keypoint_flip_permutation = 1;
+
+ // The probability of running this augmentation for each image.
+ optional float probability = 2 [default=0.5];
}
// Randomly rotates the image and detections by 90 degrees counter-clockwise
-// 50% of the time.
-message RandomRotation90 {}
+// with the specified probability, default to 50% of the time.
+message RandomRotation90 {
+ // Specifies a mapping from the original keypoint indices to 90 degree counter
+ // clockwise indices. This is used in the event that keypoints are specified,
+ // in which case when the image is rotated the keypoints might need to be
+ // permuted.
+ repeated int32 keypoint_rot_permutation = 1;
+
+ // The probability of running this augmentation for each image.
+ optional float probability = 2 [default=0.5];
+}
// Randomly scales the values of all pixels in the image by some constant value
// between [minval, maxval], then clip the value to a range between [0, 1.0].
@@ -457,7 +475,6 @@ message SSDRandomCropPadFixedAspectRatio {
// Converts class logits to softmax optionally scaling the values by temperature
// first.
message ConvertClassLogitsToSoftmax {
-
// Scale to use on logits before applying softmax.
optional float temperature = 1 [default=1.0];
}
@@ -472,12 +489,10 @@ message RandomSelfConcatImage {
// Apply an Autoaugment policy to the image and bounding boxes.
message AutoAugmentImage {
-
// What AutoAugment policy to apply to the Image
optional string policy_name = 1 [default="v0"];
}
-
// Randomly drops ground truth boxes for a label with some probability.
message DropLabelProbabilistically {
// The label that should be dropped. This corresponds to one of the entries
@@ -487,7 +502,6 @@ message DropLabelProbabilistically {
optional float drop_probability = 2 [default = 1.0];
}
-
//Remap a set of labels to a new label.
message RemapLabels {
// Labels to be remapped.
diff --git a/research/object_detection/protos/train.proto b/research/object_detection/protos/train.proto
index 0da8b2ede150f645e44e3d8f8a5ba995703c8712..62d326cdf67c7329ddaa22250a4f2734a4f43066 100644
--- a/research/object_detection/protos/train.proto
+++ b/research/object_detection/protos/train.proto
@@ -59,7 +59,8 @@ message TrainConfig {
// Whether to load all checkpoint vars that match model variable names and
// sizes. This option is only available if `from_detection_checkpoint` is
- // True.
+ // True. This option is *not* supported for TF2 --- setting it to true
+ // will raise an error.
optional bool load_all_detection_checkpoint_vars = 19 [default = false];
// Number of steps to train the DetectionModel for. If 0, will train the model
diff --git a/research/object_detection/samples/configs/context_rcnn_resnet101_snapshot_serengeti.config b/research/object_detection/samples/configs/context_rcnn_resnet101_snapshot_serengeti.config
new file mode 100644
index 0000000000000000000000000000000000000000..8167731c7b72b5598391c65794f9f8c265903a4c
--- /dev/null
+++ b/research/object_detection/samples/configs/context_rcnn_resnet101_snapshot_serengeti.config
@@ -0,0 +1,164 @@
+# Context R-CNN configuration for Snapshot Serengeti Dataset, with sequence
+# example input data with context_features.
+# This model uses attention into contextual features within the Faster R-CNN
+# object detection framework to improve object detection performance.
+# See https://arxiv.org/abs/1912.03538 for more information.
+# Search for "PATH_TO_BE_CONFIGURED" to find the fields that should be
+# configured.
+
+model {
+ faster_rcnn {
+ num_classes: 48
+ image_resizer {
+ fixed_shape_resizer {
+ height: 640
+ width: 640
+ }
+ }
+ feature_extractor {
+ type: "faster_rcnn_resnet101"
+ first_stage_features_stride: 16
+ batch_norm_trainable: true
+ }
+ first_stage_anchor_generator {
+ grid_anchor_generator {
+ height_stride: 16
+ width_stride: 16
+ scales: 0.25
+ scales: 0.5
+ scales: 1.0
+ scales: 2.0
+ aspect_ratios: 0.5
+ aspect_ratios: 1.0
+ aspect_ratios: 2.0
+ }
+ }
+ first_stage_box_predictor_conv_hyperparams {
+ op: CONV
+ regularizer {
+ l2_regularizer {
+ weight: 0.0
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ stddev: 0.00999999977648
+ }
+ }
+ }
+ first_stage_nms_score_threshold: 0.0
+ first_stage_nms_iou_threshold: 0.699999988079
+ first_stage_max_proposals: 300
+ first_stage_localization_loss_weight: 2.0
+ first_stage_objectness_loss_weight: 1.0
+ initial_crop_size: 14
+ maxpool_kernel_size: 2
+ maxpool_stride: 2
+ second_stage_box_predictor {
+ mask_rcnn_box_predictor {
+ fc_hyperparams {
+ op: FC
+ regularizer {
+ l2_regularizer {
+ weight: 0.0
+ }
+ }
+ initializer {
+ variance_scaling_initializer {
+ factor: 1.0
+ uniform: true
+ mode: FAN_AVG
+ }
+ }
+ }
+ use_dropout: false
+ dropout_keep_probability: 1.0
+ share_box_across_classes: true
+ }
+ }
+ second_stage_post_processing {
+ batch_non_max_suppression {
+ score_threshold: 0.0
+ iou_threshold: 0.600000023842
+ max_detections_per_class: 100
+ max_total_detections: 300
+ }
+ score_converter: SOFTMAX
+ }
+ second_stage_localization_loss_weight: 2.0
+ second_stage_classification_loss_weight: 1.0
+ use_matmul_crop_and_resize: true
+ clip_anchors_to_image: true
+ use_matmul_gather_in_matcher: true
+ use_static_balanced_label_sampler: true
+ use_static_shapes: true
+ context_config {
+ max_num_context_features: 2000
+ context_feature_length: 2057
+ }
+ }
+}
+train_config {
+ batch_size: 8
+ data_augmentation_options {
+ random_horizontal_flip {
+ }
+ }
+ sync_replicas: true
+ optimizer {
+ momentum_optimizer {
+ learning_rate {
+ manual_step_learning_rate {
+ initial_learning_rate: 0.0
+ schedule {
+ step: 400000
+ learning_rate: 0.002
+ }
+ schedule {
+ step: 500000
+ learning_rate: 0.0002
+ }
+ schedule {
+ step: 600000
+ learning_rate: 0.00002
+ }
+ warmup: true
+ }
+ }
+ momentum_optimizer_value: 0.9
+ }
+ use_moving_average: false
+ }
+ gradient_clipping_by_norm: 10.0
+ fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/faster_rcnn_resnet101_coco_2018_08_14/model.ckpt"
+ from_detection_checkpoint: true
+ num_steps: 5000000
+ replicas_to_aggregate: 8
+ max_number_of_boxes: 100
+ unpad_groundtruth_tensors: false
+ use_bfloat16: true
+}
+train_input_reader {
+ label_map_path: "PATH_TO_BE_CONFIGURED/ss_label_map.pbtxt"
+ tf_record_input_reader {
+ input_path: "PATH_TO_BE_CONFIGURED/snapshot_serengeti_train-?????-of-?????"
+ }
+ load_context_features: true
+ input_type: TF_SEQUENCE_EXAMPLE
+}
+eval_config {
+ max_evals: 50
+ metrics_set: "coco_detection_metrics"
+ use_moving_averages: false
+ batch_size: 1
+}
+eval_input_reader {
+ label_map_path: "PATH_TO_BE_CONFIGURED/ss_label_map.pbtxt"
+ shuffle: false
+ num_epochs: 1
+ tf_record_input_reader {
+ input_path: "PATH_TO_BE_CONFIGURED/snapshot_serengeti_val-?????-of-?????"
+ }
+ load_context_features: true
+ input_type: TF_SEQUENCE_EXAMPLE
+}
diff --git a/research/object_detection/samples/configs/context_rcnn_resnet101_snapshot_serengeti_sync.config b/research/object_detection/samples/configs/context_rcnn_resnet101_snapshot_serengeti_sync.config
new file mode 100644
index 0000000000000000000000000000000000000000..b96dea467ed600c648219595f33c4b147a0c3215
--- /dev/null
+++ b/research/object_detection/samples/configs/context_rcnn_resnet101_snapshot_serengeti_sync.config
@@ -0,0 +1,166 @@
+# Context R-CNN configuration for Snapshot Serengeti Dataset, with sequence
+# example input data with context_features.
+# This model uses attention into contextual features within the Faster R-CNN
+# object detection framework to improve object detection performance.
+# See https://arxiv.org/abs/1912.03538 for more information.
+# Search for "PATH_TO_BE_CONFIGURED" to find the fields that should be
+# configured.
+
+# This config is TPU compatible.
+
+model {
+ faster_rcnn {
+ num_classes: 48
+ image_resizer {
+ fixed_shape_resizer {
+ height: 640
+ width: 640
+ }
+ }
+ feature_extractor {
+ type: "faster_rcnn_resnet101"
+ first_stage_features_stride: 16
+ batch_norm_trainable: true
+ }
+ first_stage_anchor_generator {
+ grid_anchor_generator {
+ height_stride: 16
+ width_stride: 16
+ scales: 0.25
+ scales: 0.5
+ scales: 1.0
+ scales: 2.0
+ aspect_ratios: 0.5
+ aspect_ratios: 1.0
+ aspect_ratios: 2.0
+ }
+ }
+ first_stage_box_predictor_conv_hyperparams {
+ op: CONV
+ regularizer {
+ l2_regularizer {
+ weight: 0.0
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ stddev: 0.00999999977648
+ }
+ }
+ }
+ first_stage_nms_score_threshold: 0.0
+ first_stage_nms_iou_threshold: 0.699999988079
+ first_stage_max_proposals: 300
+ first_stage_localization_loss_weight: 2.0
+ first_stage_objectness_loss_weight: 1.0
+ initial_crop_size: 14
+ maxpool_kernel_size: 2
+ maxpool_stride: 2
+ second_stage_box_predictor {
+ mask_rcnn_box_predictor {
+ fc_hyperparams {
+ op: FC
+ regularizer {
+ l2_regularizer {
+ weight: 0.0
+ }
+ }
+ initializer {
+ variance_scaling_initializer {
+ factor: 1.0
+ uniform: true
+ mode: FAN_AVG
+ }
+ }
+ }
+ use_dropout: false
+ dropout_keep_probability: 1.0
+ share_box_across_classes: true
+ }
+ }
+ second_stage_post_processing {
+ batch_non_max_suppression {
+ score_threshold: 0.0
+ iou_threshold: 0.600000023842
+ max_detections_per_class: 100
+ max_total_detections: 300
+ }
+ score_converter: SOFTMAX
+ }
+ second_stage_localization_loss_weight: 2.0
+ second_stage_classification_loss_weight: 1.0
+ use_matmul_crop_and_resize: true
+ clip_anchors_to_image: true
+ use_matmul_gather_in_matcher: true
+ use_static_balanced_label_sampler: true
+ use_static_shapes: true
+ context_config {
+ max_num_context_features: 2000
+ context_feature_length: 2057
+ }
+ }
+}
+train_config {
+ batch_size: 64
+ data_augmentation_options {
+ random_horizontal_flip {
+ }
+ }
+ sync_replicas: true
+ optimizer {
+ momentum_optimizer {
+ learning_rate {
+ manual_step_learning_rate {
+ initial_learning_rate: 0.0
+ schedule {
+ step: 2000
+ learning_rate: 0.00200000009499
+ }
+ schedule {
+ step: 200000
+ learning_rate: 0.000199999994948
+ }
+ schedule {
+ step: 300000
+ learning_rate: 1.99999994948e-05
+ }
+ warmup: true
+ }
+ }
+ momentum_optimizer_value: 0.899999976158
+ }
+ use_moving_average: false
+ }
+ gradient_clipping_by_norm: 10.0
+ fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/faster_rcnn_resnet101_coco_2018_08_14/model.ckpt"
+ from_detection_checkpoint: true
+ num_steps: 500000
+ replicas_to_aggregate: 8
+ max_number_of_boxes: 100
+ unpad_groundtruth_tensors: false
+ use_bfloat16: true
+}
+train_input_reader {
+ label_map_path: "PATH_TO_BE_CONFIGURED/ss_label_map.pbtxt"
+ tf_record_input_reader {
+ input_path: "PATH_TO_BE_CONFIGURED/snapshot_serengeti_train-?????-of-?????"
+ }
+ load_context_features: true
+ input_type: TF_SEQUENCE_EXAMPLE
+}
+eval_config {
+ max_evals: 50
+ metrics_set: "coco_detection_metrics"
+ use_moving_averages: false
+ batch_size: 4
+}
+eval_input_reader {
+ label_map_path: "PATH_TO_BE_CONFIGURED/ss_label_map.pbtxt"
+ shuffle: false
+ num_epochs: 1
+ tf_record_input_reader {
+ input_path: "PATH_TO_BE_CONFIGURED/snapshot_serengeti_val-?????-of-?????"
+ }
+ load_context_features: true
+ input_type: TF_SEQUENCE_EXAMPLE
+}
diff --git a/research/object_detection/samples/configs/ssdlite_mobiledet_gpu_320x320_coco_sync_4x4.config b/research/object_detection/samples/configs/ssdlite_mobiledet_gpu_320x320_coco_sync_4x4.config
new file mode 100644
index 0000000000000000000000000000000000000000..634eb9e49a08d80e8735778d05da99f0c25cbdc2
--- /dev/null
+++ b/research/object_detection/samples/configs/ssdlite_mobiledet_gpu_320x320_coco_sync_4x4.config
@@ -0,0 +1,204 @@
+# SSDLite with MobileDet-GPU feature extractor.
+# Reference: Xiong & Liu et al., https://arxiv.org/abs/2004.14525
+# Trained on COCO, initialized from scratch.
+#
+# 5.07B MulAdds, 13.11M Parameters.
+# Latencies are 11.0ms (fp32), 3.2ms (fp16) and 2.3ms (int8) on Jetson Xavier,
+# optimized using TensorRT 7.1.
+# Achieves 28.7 mAP on COCO14 minival dataset.
+# Achieves 27.5 mAP on COCO17 val dataset.
+#
+# This config is TPU compatible.
+
+model {
+ ssd {
+ inplace_batchnorm_update: true
+ freeze_batchnorm: false
+ num_classes: 90
+ box_coder {
+ faster_rcnn_box_coder {
+ y_scale: 10.0
+ x_scale: 10.0
+ height_scale: 5.0
+ width_scale: 5.0
+ }
+ }
+ matcher {
+ argmax_matcher {
+ matched_threshold: 0.5
+ unmatched_threshold: 0.5
+ ignore_thresholds: false
+ negatives_lower_than_unmatched: true
+ force_match_for_each_row: true
+ use_matmul_gather: true
+ }
+ }
+ similarity_calculator {
+ iou_similarity {
+ }
+ }
+ encode_background_as_zeros: true
+ anchor_generator {
+ ssd_anchor_generator {
+ num_layers: 6
+ min_scale: 0.2
+ max_scale: 0.95
+ aspect_ratios: 1.0
+ aspect_ratios: 2.0
+ aspect_ratios: 0.5
+ aspect_ratios: 3.0
+ aspect_ratios: 0.3333
+ }
+ }
+ image_resizer {
+ fixed_shape_resizer {
+ height: 320
+ width: 320
+ }
+ }
+ box_predictor {
+ convolutional_box_predictor {
+ min_depth: 0
+ max_depth: 0
+ num_layers_before_predictor: 0
+ use_dropout: false
+ dropout_keep_probability: 0.8
+ kernel_size: 3
+ use_depthwise: true
+ box_code_size: 4
+ apply_sigmoid_to_scores: false
+ class_prediction_bias_init: -4.6
+ conv_hyperparams {
+ activation: RELU_6,
+ regularizer {
+ l2_regularizer {
+ weight: 0.00004
+ }
+ }
+ initializer {
+ random_normal_initializer {
+ stddev: 0.03
+ mean: 0.0
+ }
+ }
+ batch_norm {
+ train: true,
+ scale: true,
+ center: true,
+ decay: 0.97,
+ epsilon: 0.001,
+ }
+ }
+ }
+ }
+ feature_extractor {
+ type: 'ssd_mobiledet_gpu'
+ min_depth: 16
+ depth_multiplier: 1.0
+ use_depthwise: true
+ conv_hyperparams {
+ activation: RELU_6,
+ regularizer {
+ l2_regularizer {
+ weight: 0.00004
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ stddev: 0.03
+ mean: 0.0
+ }
+ }
+ batch_norm {
+ train: true,
+ scale: true,
+ center: true,
+ decay: 0.97,
+ epsilon: 0.001,
+ }
+ }
+ override_base_feature_extractor_hyperparams: false
+ }
+ loss {
+ classification_loss {
+ weighted_sigmoid_focal {
+ alpha: 0.75,
+ gamma: 2.0
+ }
+ }
+ localization_loss {
+ weighted_smooth_l1 {
+ delta: 1.0
+ }
+ }
+ classification_weight: 1.0
+ localization_weight: 1.0
+ }
+ normalize_loss_by_num_matches: true
+ normalize_loc_loss_by_codesize: true
+ post_processing {
+ batch_non_max_suppression {
+ score_threshold: 1e-8
+ iou_threshold: 0.6
+ max_detections_per_class: 100
+ max_total_detections: 100
+ use_static_shapes: true
+ }
+ score_converter: SIGMOID
+ }
+ }
+}
+
+train_config: {
+ batch_size: 512
+ sync_replicas: true
+ startup_delay_steps: 0
+ replicas_to_aggregate: 32
+ num_steps: 400000
+ data_augmentation_options {
+ random_horizontal_flip {
+ }
+ }
+ data_augmentation_options {
+ ssd_random_crop {
+ }
+ }
+ optimizer {
+ momentum_optimizer: {
+ learning_rate: {
+ cosine_decay_learning_rate {
+ learning_rate_base: 0.8
+ total_steps: 400000
+ warmup_learning_rate: 0.13333
+ warmup_steps: 2000
+ }
+ }
+ momentum_optimizer_value: 0.9
+ }
+ use_moving_average: false
+ }
+ max_number_of_boxes: 100
+ unpad_groundtruth_tensors: false
+}
+
+train_input_reader: {
+ label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
+ tf_record_input_reader {
+ input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record-?????-of-00100"
+ }
+}
+
+eval_config: {
+ metrics_set: "coco_detection_metrics"
+ use_moving_averages: false
+ num_examples: 8000
+}
+
+eval_input_reader: {
+ label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
+ shuffle: false
+ num_epochs: 1
+ tf_record_input_reader {
+ input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record-?????-of-00010"
+ }
+}
diff --git a/research/object_detection/test_images/snapshot_serengeti/README.md b/research/object_detection/test_images/snapshot_serengeti/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..bec44871e23e0970eddad571af55f7f12dd8f8c9
--- /dev/null
+++ b/research/object_detection/test_images/snapshot_serengeti/README.md
@@ -0,0 +1,17 @@
+# Citation and license
+
+The images and metadata in this folder come from the Snapshot Serengeti dataset,
+and were accessed via [LILA.science](http://lila.science/datasets/snapshot-serengeti).
+The images and species-level labels are described in more detail in the
+associated manuscript:
+
+```
+Swanson AB, Kosmala M, Lintott CJ, Simpson RJ, Smith A, Packer C (2015)
+Snapshot Serengeti, high-frequency annotated camera trap images of 40 mammalian
+species in an African savanna. Scientific Data 2: 150026. (DOI) (bibtex)
+```
+
+Please cite this manuscript if you use this dataset.
+
+This data set is released under the
+[Community Data License Agreement (permissive variant)](https://cdla.io/permissive-1-0/).
diff --git a/research/object_detection/test_images/snapshot_serengeti/S1_E03_R3_PICT0038.jpeg b/research/object_detection/test_images/snapshot_serengeti/S1_E03_R3_PICT0038.jpeg
new file mode 100644
index 0000000000000000000000000000000000000000..a843d7618c5968b367babc3f6778b2a1efbecd1c
Binary files /dev/null and b/research/object_detection/test_images/snapshot_serengeti/S1_E03_R3_PICT0038.jpeg differ
diff --git a/research/object_detection/test_images/snapshot_serengeti/S1_E03_R3_PICT0039.jpeg b/research/object_detection/test_images/snapshot_serengeti/S1_E03_R3_PICT0039.jpeg
new file mode 100644
index 0000000000000000000000000000000000000000..5bc16924b51f57832ccf3bac1b401467c24d2842
Binary files /dev/null and b/research/object_detection/test_images/snapshot_serengeti/S1_E03_R3_PICT0039.jpeg differ
diff --git a/research/object_detection/test_images/snapshot_serengeti/S1_E03_R3_PICT0040.jpeg b/research/object_detection/test_images/snapshot_serengeti/S1_E03_R3_PICT0040.jpeg
new file mode 100644
index 0000000000000000000000000000000000000000..2ab245ae8da7a700a30913e07251d32c55d6cfa8
Binary files /dev/null and b/research/object_detection/test_images/snapshot_serengeti/S1_E03_R3_PICT0040.jpeg differ
diff --git a/research/object_detection/test_images/snapshot_serengeti/S1_E03_R3_PICT0041.jpeg b/research/object_detection/test_images/snapshot_serengeti/S1_E03_R3_PICT0041.jpeg
new file mode 100644
index 0000000000000000000000000000000000000000..7159d71611ed80831cd489dbf1b17abff8db1508
Binary files /dev/null and b/research/object_detection/test_images/snapshot_serengeti/S1_E03_R3_PICT0041.jpeg differ
diff --git a/research/object_detection/test_images/snapshot_serengeti/context_rcnn_demo_metadata.json b/research/object_detection/test_images/snapshot_serengeti/context_rcnn_demo_metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..110793e2fbaa231106c527eac410618ba55a1a93
--- /dev/null
+++ b/research/object_detection/test_images/snapshot_serengeti/context_rcnn_demo_metadata.json
@@ -0,0 +1 @@
+{"images": [{"file_name": "models/research/object_detection/test_images/snapshot_serengeti/S1_E03_R3_PICT0038.jpeg", "frame_num": 0, "seq_num_frames": 2, "id": "S1/E03/E03_R3/S1_E03_R3_PICT0038", "height": 1536, "season": "S1", "date_captured": "2010-08-07 01:04:14", "width": 2048, "seq_id": "ASG0003041", "location": "E03"}, {"file_name": "models/research/object_detection/test_images/snapshot_serengeti/S1_E03_R3_PICT0039.jpeg", "frame_num": 1, "seq_num_frames": 2, "id": "S1/E03/E03_R3/S1_E03_R3_PICT0039", "height": 1536, "season": "S1", "date_captured": "2010-08-07 01:04:14", "width": 2048, "seq_id": "ASG0003041", "location": "E03"}, {"file_name": "models/research/object_detection/test_images/snapshot_serengeti/S1_E03_R3_PICT0040.jpeg", "frame_num": 0, "seq_num_frames": 2, "id": "S1/E03/E03_R3/S1_E03_R3_PICT0040", "height": 1536, "season": "S1", "date_captured": "2010-08-07 02:53:46", "width": 2048, "seq_id": "ASG0003042", "location": "E03"}, {"file_name": "models/research/object_detection/test_images/snapshot_serengeti/S1_E03_R3_PICT0041.jpeg", "frame_num": 1, "seq_num_frames": 2, "id": "S1/E03/E03_R3/S1_E03_R3_PICT0041", "height": 1536, "season": "S1", "date_captured": "2010-08-07 02:53:46", "width": 2048, "seq_id": "ASG0003042", "location": "E03"}], "categories": [{"name": "empty", "id": 0}, {"name": "human", "id": 1}, {"name": "gazelleGrants", "id": 2}, {"name": "reedbuck", "id": 3}, {"name": "dikDik", "id": 4}, {"name": "zebra", "id": 5}, {"name": "porcupine", "id": 6}, {"name": "gazelleThomsons", "id": 7}, {"name": "hyenaSpotted", "id": 8}, {"name": "warthog", "id": 9}, {"name": "impala", "id": 10}, {"name": "elephant", "id": 11}, {"name": "giraffe", "id": 12}, {"name": "mongoose", "id": 13}, {"name": "buffalo", "id": 14}, {"name": "hartebeest", "id": 15}, {"name": "guineaFowl", "id": 16}, {"name": "wildebeest", "id": 17}, {"name": "leopard", "id": 18}, {"name": "ostrich", "id": 19}, {"name": "lionFemale", "id": 20}, {"name": "koriBustard", "id": 21}, {"name": "otherBird", "id": 22}, {"name": "batEaredFox", "id": 23}, {"name": "bushbuck", "id": 24}, {"name": "jackal", "id": 25}, {"name": "cheetah", "id": 26}, {"name": "eland", "id": 27}, {"name": "aardwolf", "id": 28}, {"name": "hippopotamus", "id": 29}, {"name": "hyenaStriped", "id": 30}, {"name": "aardvark", "id": 31}, {"name": "hare", "id": 32}, {"name": "baboon", "id": 33}, {"name": "vervetMonkey", "id": 34}, {"name": "waterbuck", "id": 35}, {"name": "secretaryBird", "id": 36}, {"name": "serval", "id": 37}, {"name": "lionMale", "id": 38}, {"name": "topi", "id": 39}, {"name": "honeyBadger", "id": 40}, {"name": "rodents", "id": 41}, {"name": "wildcat", "id": 42}, {"name": "civet", "id": 43}, {"name": "genet", "id": 44}, {"name": "caracal", "id": 45}, {"name": "rhinoceros", "id": 46}, {"name": "reptiles", "id": 47}, {"name": "zorilla", "id": 48}], "annotations": [{"category_id": 29, "image_id": "S1/E03/E03_R3/S1_E03_R3_PICT0038", "bbox": [614.9233639240294, 476.2385201454182, 685.5741333961523, 374.18740868568574], "id": "0154T1541168895361"}, {"category_id": 29, "image_id": "S1/E03/E03_R3/S1_E03_R3_PICT0039", "bbox": [382.03749418258434, 471.005129814144, 756.2249028682752, 397.73766517639683], "id": "Lxtry1541168934504"}, {"category_id": 29, "image_id": "S1/E03/E03_R3/S1_E03_R3_PICT0040", "bbox": [786.9475708007834, 461.0229187011687, 749.0524291992166, 385.0301413536], "id": "Xmyih1541168739115"}, {"category_id": 29, "image_id": "S1/E03/E03_R3/S1_E03_R3_PICT0041", "bbox": [573.8866577148518, 453.0573425292903, 845.0, 398.9770812988263], "id": "ZllAa1541168769217"}]}
\ No newline at end of file
diff --git a/research/object_detection/tpu_exporters/export_saved_model_tpu_lib_test.py b/research/object_detection/tpu_exporters/export_saved_model_tpu_lib_tf1_test.py
similarity index 95%
rename from research/object_detection/tpu_exporters/export_saved_model_tpu_lib_test.py
rename to research/object_detection/tpu_exporters/export_saved_model_tpu_lib_tf1_test.py
index 4bbffed3655a764bedc88c2cf1f9f0b7f483b116..653535aa3b75576fa73662c3fc3ea2d257908107 100644
--- a/research/object_detection/tpu_exporters/export_saved_model_tpu_lib_test.py
+++ b/research/object_detection/tpu_exporters/export_saved_model_tpu_lib_tf1_test.py
@@ -19,12 +19,14 @@ from __future__ import division
from __future__ import print_function
import os
+import unittest
from absl.testing import parameterized
import numpy as np
import tensorflow.compat.v1 as tf
from object_detection.tpu_exporters import export_saved_model_tpu_lib
+from object_detection.utils import tf_version
flags = tf.app.flags
FLAGS = flags.FLAGS
@@ -35,6 +37,7 @@ def get_path(path_suffix):
path_suffix)
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class ExportSavedModelTPUTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(
diff --git a/research/object_detection/utils/bifpn_utils.py b/research/object_detection/utils/bifpn_utils.py
index b4b2443553850cd5ce4ac9f921f614552cd4364b..d14cb841e3eb848bec6e4bf4257ee70e768b5108 100644
--- a/research/object_detection/utils/bifpn_utils.py
+++ b/research/object_detection/utils/bifpn_utils.py
@@ -26,7 +26,8 @@ from object_detection.utils import shape_utils
def create_conv_block(name, num_filters, kernel_size, strides, padding,
use_separable, apply_batchnorm, apply_activation,
- conv_hyperparams, is_training, freeze_batchnorm):
+ conv_hyperparams, is_training, freeze_batchnorm,
+ conv_bn_act_pattern=True):
"""Create Keras layers for regular or separable convolutions.
Args:
@@ -50,6 +51,9 @@ def create_conv_block(name, num_filters, kernel_size, strides, padding,
training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm
params.
+ conv_bn_act_pattern: Bool. By default, when True, the layers returned by
+ this function are in the order [conv, batchnorm, activation]. Otherwise,
+ when False, the order of the layers is [activation, conv, batchnorm].
Returns:
A list of keras layers, including (regular or seperable) convolution, and
@@ -73,7 +77,7 @@ def create_conv_block(name, num_filters, kernel_size, strides, padding,
depth_multiplier=1,
padding=padding,
strides=strides,
- name=name + '_separable_conv',
+ name=name + 'separable_conv',
**kwargs))
else:
layers.append(
@@ -82,18 +86,22 @@ def create_conv_block(name, num_filters, kernel_size, strides, padding,
kernel_size=kernel_size,
padding=padding,
strides=strides,
- name=name + '_conv',
+ name=name + 'conv',
**conv_hyperparams.params()))
if apply_batchnorm:
layers.append(
conv_hyperparams.build_batch_norm(
training=(is_training and not freeze_batchnorm),
- name=name + '_batchnorm'))
+ name=name + 'batchnorm'))
if apply_activation:
- layers.append(
- conv_hyperparams.build_activation_layer(name=name + '_activation'))
+ activation_layer = conv_hyperparams.build_activation_layer(
+ name=name + 'activation')
+ if conv_bn_act_pattern:
+ layers.append(activation_layer)
+ else:
+ layers = [activation_layer] + layers
return layers
@@ -133,28 +141,28 @@ def create_downsample_feature_map_ops(scale, downsample_method,
pool_size=kernel_size,
strides=stride,
padding=padding,
- name=name + '_downsample_max_x{}'.format(stride)))
+ name=name + 'downsample_max_x{}'.format(stride)))
elif downsample_method == 'avg_pooling':
layers.append(
tf.keras.layers.AveragePooling2D(
pool_size=kernel_size,
strides=stride,
padding=padding,
- name=name + '_downsample_avg_x{}'.format(stride)))
+ name=name + 'downsample_avg_x{}'.format(stride)))
elif downsample_method == 'depthwise_conv':
layers.append(
tf.keras.layers.DepthwiseConv2D(
kernel_size=kernel_size,
strides=stride,
padding=padding,
- name=name + '_downsample_depthwise_x{}'.format(stride)))
+ name=name + 'downsample_depthwise_x{}'.format(stride)))
layers.append(
conv_hyperparams.build_batch_norm(
training=(is_training and not freeze_batchnorm),
- name=name + '_downsample_batchnorm'))
+ name=name + 'downsample_batchnorm'))
layers.append(
conv_hyperparams.build_activation_layer(name=name +
- '_downsample_activation'))
+ 'downsample_activation'))
else:
raise ValueError('Unknown downsample method: {}'.format(downsample_method))
diff --git a/research/object_detection/utils/config_util.py b/research/object_detection/utils/config_util.py
index 71185a5a601272cb7801050c4ccca3bcb4b1be99..662d42e1305538534e1cb6671086e4faa6cdf00c 100644
--- a/research/object_detection/utils/config_util.py
+++ b/research/object_detection/utils/config_util.py
@@ -142,6 +142,35 @@ def get_configs_from_pipeline_file(pipeline_config_path, config_override=None):
return create_configs_from_pipeline_proto(pipeline_config)
+def clear_fine_tune_checkpoint(pipeline_config_path,
+ new_pipeline_config_path):
+ """Clears fine_tune_checkpoint and writes a new pipeline config file."""
+ configs = get_configs_from_pipeline_file(pipeline_config_path)
+ configs["train_config"].fine_tune_checkpoint = ""
+ configs["train_config"].load_all_detection_checkpoint_vars = False
+ pipeline_proto = create_pipeline_proto_from_configs(configs)
+ with tf.gfile.Open(new_pipeline_config_path, "wb") as f:
+ f.write(text_format.MessageToString(pipeline_proto))
+
+
+def update_fine_tune_checkpoint_type(train_config):
+ """Set `fine_tune_checkpoint_type` using `from_detection_checkpoint`.
+
+ `train_config.from_detection_checkpoint` field is deprecated. For backward
+ compatibility, this function sets `train_config.fine_tune_checkpoint_type`
+ based on `train_config.from_detection_checkpoint`.
+
+ Args:
+ train_config: train_pb2.TrainConfig proto object.
+
+ """
+ if not train_config.fine_tune_checkpoint_type:
+ if train_config.from_detection_checkpoint:
+ train_config.fine_tune_checkpoint_type = "detection"
+ else:
+ train_config.fine_tune_checkpoint_type = "classification"
+
+
def create_configs_from_pipeline_proto(pipeline_config):
"""Creates a configs dictionary from pipeline_pb2.TrainEvalPipelineConfig.
diff --git a/research/object_detection/utils/config_util_test.py b/research/object_detection/utils/config_util_test.py
index cd5f87d8d0a40e92cd20533b3eec3e5294e00640..f36970c11078b222710427b46ffd502be608c109 100644
--- a/research/object_detection/utils/config_util_test.py
+++ b/research/object_detection/utils/config_util_test.py
@@ -19,7 +19,7 @@ from __future__ import division
from __future__ import print_function
import os
-
+import unittest
from six.moves import range
import tensorflow.compat.v1 as tf
@@ -32,6 +32,7 @@ from object_detection.protos import model_pb2
from object_detection.protos import pipeline_pb2
from object_detection.protos import train_pb2
from object_detection.utils import config_util
+from object_detection.utils import tf_version
# pylint: disable=g-import-not-at-top
try:
@@ -282,18 +283,22 @@ class ConfigUtilTest(tf.test.TestCase):
self.assertAlmostEqual(hparams.learning_rate * warmup_scale_factor,
cosine_lr.warmup_learning_rate)
+ @unittest.skipIf(tf_version.is_tf2(), "Skipping TF1.X only test.")
def testRMSPropWithNewLearingRate(self):
"""Tests new learning rates for RMSProp Optimizer."""
self._assertOptimizerWithNewLearningRate("rms_prop_optimizer")
+ @unittest.skipIf(tf_version.is_tf2(), "Skipping TF1.X only test.")
def testMomentumOptimizerWithNewLearningRate(self):
"""Tests new learning rates for Momentum Optimizer."""
self._assertOptimizerWithNewLearningRate("momentum_optimizer")
+ @unittest.skipIf(tf_version.is_tf2(), "Skipping TF1.X only test.")
def testAdamOptimizerWithNewLearningRate(self):
"""Tests new learning rates for Adam Optimizer."""
self._assertOptimizerWithNewLearningRate("adam_optimizer")
+ @unittest.skipIf(tf_version.is_tf2(), "Skipping TF1.X only test.")
def testGenericConfigOverride(self):
"""Tests generic config overrides for all top-level configs."""
# Set one parameter for each of the top-level pipeline configs:
@@ -329,6 +334,7 @@ class ConfigUtilTest(tf.test.TestCase):
self.assertEqual(2,
configs["graph_rewriter_config"].quantization.weight_bits)
+ @unittest.skipIf(tf_version.is_tf2(), "Skipping TF1.X only test.")
def testNewBatchSize(self):
"""Tests that batch size is updated appropriately."""
original_batch_size = 2
@@ -344,6 +350,7 @@ class ConfigUtilTest(tf.test.TestCase):
new_batch_size = configs["train_config"].batch_size
self.assertEqual(16, new_batch_size)
+ @unittest.skipIf(tf_version.is_tf2(), "Skipping TF1.X only test.")
def testNewBatchSizeWithClipping(self):
"""Tests that batch size is clipped to 1 from below."""
original_batch_size = 2
@@ -359,6 +366,7 @@ class ConfigUtilTest(tf.test.TestCase):
new_batch_size = configs["train_config"].batch_size
self.assertEqual(1, new_batch_size) # Clipped to 1.0.
+ @unittest.skipIf(tf_version.is_tf2(), "Skipping TF1.X only test.")
def testOverwriteBatchSizeWithKeyValue(self):
"""Tests that batch size is overwritten based on key/value."""
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
@@ -369,6 +377,7 @@ class ConfigUtilTest(tf.test.TestCase):
new_batch_size = configs["train_config"].batch_size
self.assertEqual(10, new_batch_size)
+ @unittest.skipIf(tf_version.is_tf2(), "Skipping TF1.X only test.")
def testKeyValueOverrideBadKey(self):
"""Tests that overwriting with a bad key causes an exception."""
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
@@ -377,6 +386,7 @@ class ConfigUtilTest(tf.test.TestCase):
with self.assertRaises(ValueError):
config_util.merge_external_params_with_configs(configs, hparams)
+ @unittest.skipIf(tf_version.is_tf2(), "Skipping TF1.X only test.")
def testOverwriteBatchSizeWithBadValueType(self):
"""Tests that overwriting with a bad valuye type causes an exception."""
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
@@ -387,6 +397,7 @@ class ConfigUtilTest(tf.test.TestCase):
with self.assertRaises(TypeError):
config_util.merge_external_params_with_configs(configs, hparams)
+ @unittest.skipIf(tf_version.is_tf2(), "Skipping TF1.X only test.")
def testNewMomentumOptimizerValue(self):
"""Tests that new momentum value is updated appropriately."""
original_momentum_value = 0.4
@@ -404,6 +415,7 @@ class ConfigUtilTest(tf.test.TestCase):
new_momentum_value = optimizer_config.momentum_optimizer_value
self.assertAlmostEqual(1.0, new_momentum_value) # Clipped to 1.0.
+ @unittest.skipIf(tf_version.is_tf2(), "Skipping TF1.X only test.")
def testNewClassificationLocalizationWeightRatio(self):
"""Tests that the loss weight ratio is updated appropriately."""
original_localization_weight = 0.1
@@ -426,6 +438,7 @@ class ConfigUtilTest(tf.test.TestCase):
self.assertAlmostEqual(1.0, loss.localization_weight)
self.assertAlmostEqual(new_weight_ratio, loss.classification_weight)
+ @unittest.skipIf(tf_version.is_tf2(), "Skipping TF1.X only test.")
def testNewFocalLossParameters(self):
"""Tests that the loss weight ratio is updated appropriately."""
original_alpha = 1.0
diff --git a/research/object_detection/utils/model_util.py b/research/object_detection/utils/model_util.py
index 6a46265c3d596230a9073c5b5b8f4bb622f346bf..bc5cfe48231bccc034147f6a06a4d5f09cca0159 100644
--- a/research/object_detection/utils/model_util.py
+++ b/research/object_detection/utils/model_util.py
@@ -54,8 +54,8 @@ def extract_submodel(model, inputs, outputs, name=None):
for layer in model.layers:
layer_output = layer.output
layer_inputs = layer.input
- output_to_layer[layer_output.ref()] = layer
- output_to_layer_input[layer_output.ref()] = layer_inputs
+ output_to_layer[layer_output.experimental_ref()] = layer
+ output_to_layer_input[layer_output.experimental_ref()] = layer_inputs
model_inputs_dict = {}
memoized_results = {}
@@ -63,21 +63,22 @@ def extract_submodel(model, inputs, outputs, name=None):
# Relies on recursion, very low limit in python
def _recurse_in_model(tensor):
"""Walk the existing model recursively to copy a submodel."""
- if tensor.ref() in memoized_results:
- return memoized_results[tensor.ref()]
- if (tensor.ref() == inputs.ref()) or (
+ if tensor.experimental_ref() in memoized_results:
+ return memoized_results[tensor.experimental_ref()]
+ if (tensor.experimental_ref() == inputs.experimental_ref()) or (
isinstance(inputs, list) and tensor in inputs):
- if tensor.ref() not in model_inputs_dict:
- model_inputs_dict[tensor.ref()] = tf.keras.layers.Input(tensor=tensor)
- out = model_inputs_dict[tensor.ref()]
+ if tensor.experimental_ref() not in model_inputs_dict:
+ model_inputs_dict[tensor.experimental_ref()] = tf.keras.layers.Input(
+ tensor=tensor)
+ out = model_inputs_dict[tensor.experimental_ref()]
else:
- cur_inputs = output_to_layer_input[tensor.ref()]
- cur_layer = output_to_layer[tensor.ref()]
+ cur_inputs = output_to_layer_input[tensor.experimental_ref()]
+ cur_layer = output_to_layer[tensor.experimental_ref()]
if isinstance(cur_inputs, list):
out = cur_layer([_recurse_in_model(inp) for inp in cur_inputs])
else:
out = cur_layer(_recurse_in_model(cur_inputs))
- memoized_results[tensor.ref()] = out
+ memoized_results[tensor.experimental_ref()] = out
return out
if isinstance(outputs, list):
@@ -86,8 +87,10 @@ def extract_submodel(model, inputs, outputs, name=None):
model_outputs = _recurse_in_model(outputs)
if isinstance(inputs, list):
- model_inputs = [model_inputs_dict[tensor.ref()] for tensor in inputs]
+ model_inputs = [
+ model_inputs_dict[tensor.experimental_ref()] for tensor in inputs
+ ]
else:
- model_inputs = model_inputs_dict[inputs.ref()]
+ model_inputs = model_inputs_dict[inputs.experimental_ref()]
return tf.keras.Model(inputs=model_inputs, outputs=model_outputs, name=name)
diff --git a/research/object_detection/utils/model_util_test.py b/research/object_detection/utils/model_util_tf2_test.py
similarity index 94%
rename from research/object_detection/utils/model_util_test.py
rename to research/object_detection/utils/model_util_tf2_test.py
index c505464c7831e812e813e085b3dc59231f66802c..77b1d01725e4dfd51176d676168657612d5826bc 100644
--- a/research/object_detection/utils/model_util_test.py
+++ b/research/object_detection/utils/model_util_tf2_test.py
@@ -19,11 +19,14 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+import unittest
import tensorflow.compat.v1 as tf
from object_detection.utils import model_util
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class ExtractSubmodelUtilTest(tf.test.TestCase):
def test_simple_model(self):
diff --git a/research/object_detection/utils/object_detection_evaluation_test.py b/research/object_detection/utils/object_detection_evaluation_test.py
index 5b2b5c8011fa57a18f6262b1ad895df1e9fb16a3..ff399ed4bad4d4872eb135685789678237f0e0b1 100644
--- a/research/object_detection/utils/object_detection_evaluation_test.py
+++ b/research/object_detection/utils/object_detection_evaluation_test.py
@@ -18,6 +18,8 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+
+import unittest
from absl.testing import parameterized
import numpy as np
import six
@@ -26,6 +28,7 @@ import tensorflow.compat.v1 as tf
from object_detection import eval_util
from object_detection.core import standard_fields
from object_detection.utils import object_detection_evaluation
+from object_detection.utils import tf_version
class OpenImagesV2EvaluationTest(tf.test.TestCase):
@@ -970,6 +973,8 @@ class ObjectDetectionEvaluationTest(tf.test.TestCase):
self.assertAlmostEqual(copy_mean_corloc, mean_corloc)
+@unittest.skipIf(tf_version.is_tf2(), 'Eval Metrics ops are supported in TF1.X '
+ 'only.')
class ObjectDetectionEvaluatorTest(tf.test.TestCase, parameterized.TestCase):
def setUp(self):
diff --git a/research/object_detection/utils/ops.py b/research/object_detection/utils/ops.py
index f59881580947acb010c3bc1fd308d7ea2965eeeb..0cd83d38d5a679d420bfbb3c81f0964c662f98af 100644
--- a/research/object_detection/utils/ops.py
+++ b/research/object_detection/utils/ops.py
@@ -268,7 +268,7 @@ def padded_one_hot_encoding(indices, depth, left_pad):
on_value=1, off_value=0), tf.float32)
return tf.pad(one_hot, [[0, 0], [left_pad, 0]], mode='CONSTANT')
result = tf.cond(tf.greater(tf.size(indices), 0), one_hot_and_pad,
- lambda: tf.zeros((depth + left_pad, 0)))
+ lambda: tf.zeros((tf.size(indices), depth + left_pad)))
return tf.reshape(result, [-1, depth + left_pad])
diff --git a/research/object_detection/utils/ops_test.py b/research/object_detection/utils/ops_test.py
index a7a6f8df32a4f9f881b0002d4e3d00d4830913fc..d4da7b1071b899002faa830a07686541c69d99bd 100644
--- a/research/object_detection/utils/ops_test.py
+++ b/research/object_detection/utils/ops_test.py
@@ -196,8 +196,7 @@ class OpsTestPaddedOneHotEncoding(test_case.TestCase):
[0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 1]], np.float32)
- # Executing on CPU only because output shape is not constant.
- out_one_hot_tensor = self.execute_cpu(graph_fn, [])
+ out_one_hot_tensor = self.execute(graph_fn, [])
self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10,
atol=1e-10)
@@ -212,8 +211,7 @@ class OpsTestPaddedOneHotEncoding(test_case.TestCase):
[0, 0, 0, 1, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 1]], np.float32)
- # Executing on CPU only because output shape is not constant.
- out_one_hot_tensor = self.execute_cpu(graph_fn, [])
+ out_one_hot_tensor = self.execute(graph_fn, [])
self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10,
atol=1e-10)
@@ -229,8 +227,7 @@ class OpsTestPaddedOneHotEncoding(test_case.TestCase):
[0, 0, 0, 0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 1]], np.float32)
- # executing on CPU only because output shape is not constant.
- out_one_hot_tensor = self.execute_cpu(graph_fn, [])
+ out_one_hot_tensor = self.execute(graph_fn, [])
self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10,
atol=1e-10)
@@ -246,8 +243,7 @@ class OpsTestPaddedOneHotEncoding(test_case.TestCase):
return one_hot_tensor
expected_tensor = np.zeros((0, depth + pad))
- # executing on CPU only because output shape is not constant.
- out_one_hot_tensor = self.execute_cpu(graph_fn, [])
+ out_one_hot_tensor = self.execute(graph_fn, [])
self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10,
atol=1e-10)
diff --git a/research/object_detection/utils/target_assigner_utils.py b/research/object_detection/utils/target_assigner_utils.py
index ca7918f3ddc6a6046771bda1f8039a3035b57725..0aa26a47ed75ac918a82aaee184fa2bb0dfa7127 100644
--- a/research/object_detection/utils/target_assigner_utils.py
+++ b/research/object_detection/utils/target_assigner_utils.py
@@ -118,12 +118,17 @@ def compute_floor_offsets_with_indices(y_source,
they were put on the grids) to target coordinates. Note that the input
coordinates should be the "absolute" coordinates in terms of the output image
dimensions as opposed to the normalized coordinates (i.e. values in [0, 1]).
+ If the input y and x source have the second dimension (representing the
+ neighboring pixels), then the offsets are computed from each of the
+ neighboring pixels to their corresponding target (first dimension).
Args:
- y_source: A tensor with shape [num_points] representing the absolute
- y-coordinates (in the output image space) of the source points.
- x_source: A tensor with shape [num_points] representing the absolute
- x-coordinates (in the output image space) of the source points.
+ y_source: A tensor with shape [num_points] (or [num_points, num_neighbors])
+ representing the absolute y-coordinates (in the output image space) of the
+ source points.
+ x_source: A tensor with shape [num_points] (or [num_points, num_neighbors])
+ representing the absolute x-coordinates (in the output image space) of the
+ source points.
y_target: A tensor with shape [num_points] representing the absolute
y-coordinates (in the output image space) of the target points. If not
provided, then y_source is used as the targets.
@@ -133,18 +138,33 @@ def compute_floor_offsets_with_indices(y_source,
Returns:
A tuple of two tensors:
- offsets: A tensor with shape [num_points, 2] representing the offsets of
- each input point.
- indices: A tensor with shape [num_points, 2] representing the indices of
- where the offsets should be retrieved in the output image dimension
- space.
+ offsets: A tensor with shape [num_points, 2] (or
+ [num_points, num_neighbors, 2]) representing the offsets of each input
+ point.
+ indices: A tensor with shape [num_points, 2] (or
+ [num_points, num_neighbors, 2]) representing the indices of where the
+ offsets should be retrieved in the output image dimension space.
+
+ Raise:
+ ValueError: source and target shapes have unexpected values.
"""
y_source_floored = tf.floor(y_source)
x_source_floored = tf.floor(x_source)
- if y_target is None:
+
+ source_shape = shape_utils.combined_static_and_dynamic_shape(y_source)
+ if y_target is None and x_target is None:
y_target = y_source
- if x_target is None:
x_target = x_source
+ else:
+ target_shape = shape_utils.combined_static_and_dynamic_shape(y_target)
+ if len(source_shape) == 2 and len(target_shape) == 1:
+ _, num_neighbors = source_shape
+ y_target = tf.tile(
+ tf.expand_dims(y_target, -1), multiples=[1, num_neighbors])
+ x_target = tf.tile(
+ tf.expand_dims(x_target, -1), multiples=[1, num_neighbors])
+ elif source_shape != target_shape:
+ raise ValueError('Inconsistent source and target shape.')
y_offset = y_target - y_source_floored
x_offset = x_target - x_source_floored
@@ -152,9 +172,8 @@ def compute_floor_offsets_with_indices(y_source,
y_source_indices = tf.cast(y_source_floored, tf.int32)
x_source_indices = tf.cast(x_source_floored, tf.int32)
- indices = tf.stack([y_source_indices, x_source_indices], axis=1)
- offsets = tf.stack([y_offset, x_offset], axis=1)
-
+ indices = tf.stack([y_source_indices, x_source_indices], axis=-1)
+ offsets = tf.stack([y_offset, x_offset], axis=-1)
return offsets, indices
@@ -231,6 +250,12 @@ def blackout_pixel_weights_by_box_regions(height, width, boxes, blackout):
A float tensor with shape [height, width] where all values within the
regions of the blackout boxes are 0.0 and 1.0 else where.
"""
+ num_instances, _ = shape_utils.combined_static_and_dynamic_shape(boxes)
+ # If no annotation instance is provided, return all ones (instead of
+ # unexpected values) to avoid NaN loss value.
+ if num_instances == 0:
+ return tf.ones([height, width], dtype=tf.float32)
+
(y_grid, x_grid) = image_shape_to_grids(height, width)
y_grid = tf.expand_dims(y_grid, axis=0)
x_grid = tf.expand_dims(x_grid, axis=0)
@@ -257,3 +282,72 @@ def blackout_pixel_weights_by_box_regions(height, width, boxes, blackout):
out_boxes = tf.reduce_max(selected_in_boxes, axis=0)
out_boxes = tf.ones_like(out_boxes) - out_boxes
return out_boxes
+
+
+def _get_yx_indices_offset_by_radius(radius):
+ """Gets the y and x index offsets that are within the radius."""
+ y_offsets = []
+ x_offsets = []
+ for y_offset in range(-radius, radius + 1, 1):
+ for x_offset in range(-radius, radius + 1, 1):
+ if x_offset ** 2 + y_offset ** 2 <= radius ** 2:
+ y_offsets.append(y_offset)
+ x_offsets.append(x_offset)
+ return (tf.constant(y_offsets, dtype=tf.float32),
+ tf.constant(x_offsets, dtype=tf.float32))
+
+
+def get_surrounding_grids(height, width, y_coordinates, x_coordinates, radius):
+ """Gets the indices of the surrounding pixels of the input y, x coordinates.
+
+ This function returns the pixel indices corresponding to the (floor of the)
+ input coordinates and their surrounding pixels within the radius. If the
+ radius is set to 0, then only the pixels that correspond to the floor of the
+ coordinates will be returned. If the radius is larger than 0, then all of the
+ pixels within the radius of the "floor pixels" will also be returned. For
+ example, if the input coorindate is [2.1, 3.5] and radius is 1, then the five
+ pixel indices will be returned: [2, 3], [1, 3], [2, 2], [2, 4], [3, 3]. Also,
+ if the surrounding pixels are outside of valid image region, then the returned
+ pixel indices will be [0, 0] and its corresponding "valid" value will be
+ False.
+
+ Args:
+ height: int, the height of the output image.
+ width: int, the width of the output image.
+ y_coordinates: A tensor with shape [num_points] representing the absolute
+ y-coordinates (in the output image space) of the points.
+ x_coordinates: A tensor with shape [num_points] representing the absolute
+ x-coordinates (in the output image space) of the points.
+ radius: int, the radius of the neighboring pixels to be considered and
+ returned. If set to 0, then only the pixel indices corresponding to the
+ floor of the input coordinates will be returned.
+
+ Returns:
+ A tuple of three tensors:
+ y_indices: A [num_points, num_neighbors] float tensor representing the
+ pixel y indices corresponding to the input points within radius. The
+ "num_neighbors" is determined by the size of the radius.
+ x_indices: A [num_points, num_neighbors] float tensor representing the
+ pixel x indices corresponding to the input points within radius. The
+ "num_neighbors" is determined by the size of the radius.
+ valid: A [num_points, num_neighbors] boolean tensor representing whether
+ each returned index is in valid image region or not.
+ """
+ # Floored y, x: [num_points, 1].
+ y_center = tf.expand_dims(tf.math.floor(y_coordinates), axis=-1)
+ x_center = tf.expand_dims(tf.math.floor(x_coordinates), axis=-1)
+ y_offsets, x_offsets = _get_yx_indices_offset_by_radius(radius)
+ # Indices offsets: [1, num_neighbors].
+ y_offsets = tf.expand_dims(y_offsets, axis=0)
+ x_offsets = tf.expand_dims(x_offsets, axis=0)
+
+ # Floor + offsets: [num_points, num_neighbors].
+ y_output = y_center + y_offsets
+ x_output = x_center + x_offsets
+ default_output = tf.zeros_like(y_output)
+ valid = tf.logical_and(
+ tf.logical_and(x_output >= 0, x_output < width),
+ tf.logical_and(y_output >= 0, y_output < height))
+ y_output = tf.where(valid, y_output, default_output)
+ x_output = tf.where(valid, x_output, default_output)
+ return (y_output, x_output, valid)
diff --git a/research/object_detection/utils/target_assigner_utils_test.py b/research/object_detection/utils/target_assigner_utils_test.py
index b895cca012128078641cf8e70d20c201f3a51a4d..f663445324d7ee648130018b522fdcbaaeb74d54 100644
--- a/research/object_detection/utils/target_assigner_utils_test.py
+++ b/research/object_detection/utils/target_assigner_utils_test.py
@@ -87,8 +87,32 @@ class TargetUtilTest(test_case.TestCase):
np.testing.assert_array_almost_equal(offsets,
np.array([[1.1, -0.8], [0.1, 0.5]]))
- np.testing.assert_array_almost_equal(indices,
- np.array([[1, 2], [0, 4]]))
+ np.testing.assert_array_almost_equal(indices, np.array([[1, 2], [0, 4]]))
+
+ def test_compute_floor_offsets_with_indices_multisources(self):
+
+ def graph_fn():
+ y_source = tf.constant([[1.0, 0.0], [2.0, 3.0]], dtype=tf.float32)
+ x_source = tf.constant([[2.0, 4.0], [3.0, 3.0]], dtype=tf.float32)
+ y_target = tf.constant([2.1, 0.1], dtype=tf.float32)
+ x_target = tf.constant([1.2, 4.5], dtype=tf.float32)
+ (offsets, indices) = ta_utils.compute_floor_offsets_with_indices(
+ y_source, x_source, y_target, x_target)
+ return offsets, indices
+
+ offsets, indices = self.execute(graph_fn, [])
+ # Offset from the first source to target.
+ np.testing.assert_array_almost_equal(offsets[:, 0, :],
+ np.array([[1.1, -0.8], [-1.9, 1.5]]))
+ # Offset from the second source to target.
+ np.testing.assert_array_almost_equal(offsets[:, 1, :],
+ np.array([[2.1, -2.8], [-2.9, 1.5]]))
+ # Indices from the first source to target.
+ np.testing.assert_array_almost_equal(indices[:, 0, :],
+ np.array([[1, 2], [2, 3]]))
+ # Indices from the second source to target.
+ np.testing.assert_array_almost_equal(indices[:, 1, :],
+ np.array([[0, 4], [3, 3]]))
def test_get_valid_keypoints_mask(self):
@@ -174,6 +198,44 @@ class TargetUtilTest(test_case.TestCase):
# 20 * 10 - 6 * 6 - 3 * 7 = 143.0
self.assertAlmostEqual(np.sum(output), 143.0)
+ def test_blackout_pixel_weights_by_box_regions_zero_instance(self):
+ def graph_fn():
+ boxes = tf.zeros([0, 4], dtype=tf.float32)
+ blackout = tf.zeros([0], dtype=tf.bool)
+ blackout_pixel_weights_by_box_regions = tf.function(
+ ta_utils.blackout_pixel_weights_by_box_regions)
+ output = blackout_pixel_weights_by_box_regions(10, 20, boxes, blackout)
+ return output
+
+ output = self.execute(graph_fn, [])
+ # The output should be all 1s since there's no annotation provided.
+ np.testing.assert_array_equal(output, np.ones([10, 20], dtype=np.float32))
+
+ def test_get_surrounding_grids(self):
+
+ def graph_fn():
+ y_coordinates = tf.constant([0.5], dtype=tf.float32)
+ x_coordinates = tf.constant([4.5], dtype=tf.float32)
+ output = ta_utils.get_surrounding_grids(
+ height=3,
+ width=5,
+ y_coordinates=y_coordinates,
+ x_coordinates=x_coordinates,
+ radius=1)
+ return output
+
+ y_indices, x_indices, valid = self.execute(graph_fn, [])
+
+ # Five neighboring indices: [-1, 4] (out of bound), [0, 3], [0, 4],
+ # [0, 5] (out of bound), [1, 4].
+ np.testing.assert_array_almost_equal(
+ y_indices,
+ np.array([[0.0, 0.0, 0.0, 0.0, 1.0]]))
+ np.testing.assert_array_almost_equal(
+ x_indices,
+ np.array([[0.0, 3.0, 4.0, 0.0, 4.0]]))
+ self.assertAllEqual(valid, [[False, True, True, False, True]])
+
if __name__ == '__main__':
tf.test.main()
diff --git a/research/object_detection/utils/test_utils.py b/research/object_detection/utils/test_utils.py
index f7e92c0bfda9099580a5761d6dcc526608b2ffd0..666a29adbad262054e039c30fb9deb52e66ac665 100644
--- a/research/object_detection/utils/test_utils.py
+++ b/research/object_detection/utils/test_utils.py
@@ -271,3 +271,19 @@ class GraphContextOrNone(object):
return False
else:
return self.graph.__exit__(ttype, value, traceback)
+
+
+def image_with_dynamic_shape(height, width, channels):
+ """Returns a single image with dynamic shape."""
+ h = tf.random.uniform([], minval=height, maxval=height+1, dtype=tf.int32)
+ w = tf.random.uniform([], minval=width, maxval=width+1, dtype=tf.int32)
+ image = tf.random.uniform([h, w, channels])
+ return image
+
+
+def keypoints_with_dynamic_shape(num_instances, num_keypoints, num_coordinates):
+ """Returns keypoints with dynamic shape."""
+ n = tf.random.uniform([], minval=num_instances, maxval=num_instances+1,
+ dtype=tf.int32)
+ keypoints = tf.random.uniform([n, num_keypoints, num_coordinates])
+ return keypoints
diff --git a/research/object_detection/utils/variables_helper.py b/research/object_detection/utils/variables_helper.py
index 327f3b679a2fb6abe48da4894ae7ea9ab4f70afa..17b63eb70bfdda9156d51bbfb15281c206ab9ed4 100644
--- a/research/object_detection/utils/variables_helper.py
+++ b/research/object_detection/utils/variables_helper.py
@@ -47,8 +47,6 @@ def filter_variables(variables, filter_regex_list, invert=False):
Returns:
a list of filtered variables.
"""
- if tf.executing_eagerly():
- raise ValueError('Accessing variables is not supported in eager mode.')
kept_vars = []
variables_to_ignore_patterns = list([fre for fre in filter_regex_list if fre])
for var in variables:
@@ -74,8 +72,6 @@ def multiply_gradients_matching_regex(grads_and_vars, regex_list, multiplier):
Returns:
grads_and_vars: A list of gradient to variable pairs (tuples).
"""
- if tf.executing_eagerly():
- raise ValueError('Accessing variables is not supported in eager mode.')
variables = [pair[1] for pair in grads_and_vars]
matching_vars = filter_variables(variables, regex_list, invert=True)
for var in matching_vars:
@@ -97,8 +93,6 @@ def freeze_gradients_matching_regex(grads_and_vars, regex_list):
grads_and_vars: A list of gradient to variable pairs (tuples) that do not
contain the variables and gradients matching the regex.
"""
- if tf.executing_eagerly():
- raise ValueError('Accessing variables is not supported in eager mode.')
variables = [pair[1] for pair in grads_and_vars]
matching_vars = filter_variables(variables, regex_list, invert=True)
kept_grads_and_vars = [pair for pair in grads_and_vars
@@ -129,8 +123,6 @@ def get_variables_available_in_checkpoint(variables,
Raises:
ValueError: if `variables` is not a list or dict.
"""
- if tf.executing_eagerly():
- raise ValueError('Accessing variables is not supported in eager mode.')
if isinstance(variables, list):
variable_names_map = {}
for variable in variables:
@@ -178,8 +170,6 @@ def get_global_variables_safely():
Returns:
The result of tf.global_variables()
"""
- if tf.executing_eagerly():
- raise ValueError('Accessing variables is not supported in eager mode.')
with tf.init_scope():
if tf.executing_eagerly():
raise ValueError("Global variables collection is not tracked when "
diff --git a/research/object_detection/utils/variables_helper_test.py b/research/object_detection/utils/variables_helper_tf1_test.py
similarity index 96%
rename from research/object_detection/utils/variables_helper_test.py
rename to research/object_detection/utils/variables_helper_tf1_test.py
index 44e72d0d1a588507378166083de4ab4c61e83304..a8bd43ed9ab5a5b57b72733877b93fd39662a885 100644
--- a/research/object_detection/utils/variables_helper_test.py
+++ b/research/object_detection/utils/variables_helper_tf1_test.py
@@ -20,13 +20,15 @@ from __future__ import division
from __future__ import print_function
import os
-
+import unittest
import tensorflow.compat.v1 as tf
from object_detection.utils import test_case
+from object_detection.utils import tf_version
from object_detection.utils import variables_helper
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class FilterVariablesTest(test_case.TestCase):
def _create_variables(self):
@@ -68,6 +70,7 @@ class FilterVariablesTest(test_case.TestCase):
self.assertCountEqual(out_variables, [variables[1], variables[3]])
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class MultiplyGradientsMatchingRegexTest(tf.test.TestCase):
def _create_grads_and_vars(self):
@@ -107,6 +110,7 @@ class MultiplyGradientsMatchingRegexTest(tf.test.TestCase):
self.assertCountEqual(output, exp_output)
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class FreezeGradientsMatchingRegexTest(test_case.TestCase):
def _create_grads_and_vars(self):
@@ -132,6 +136,7 @@ class FreezeGradientsMatchingRegexTest(test_case.TestCase):
self.assertCountEqual(output, exp_output)
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class GetVariablesAvailableInCheckpointTest(test_case.TestCase):
def test_return_all_variables_from_checkpoint(self):
diff --git a/research/sentiment_analysis/sentiment_main.py b/research/sentiment_analysis/sentiment_main.py
index a749d25f3098e071b630f07bac81450b40ade9ba..8b9ba5f921eef72377b480669bd81087fd1b160a 100644
--- a/research/sentiment_analysis/sentiment_main.py
+++ b/research/sentiment_analysis/sentiment_main.py
@@ -10,17 +10,20 @@ from __future__ import division
from __future__ import print_function
import argparse
+import os
import tensorflow as tf
from data import dataset
import sentiment_model
+
+
_DROPOUT_RATE = 0.95
def run_model(dataset_name, emb_dim, voc_size, sen_len,
- hid_dim, batch_size, epochs):
+ hid_dim, batch_size, epochs, model_save_dir):
"""Run training loop and an evaluation at the end.
Args:
@@ -48,9 +51,23 @@ def run_model(dataset_name, emb_dim, voc_size, sen_len,
x_train, y_train, x_test, y_test = dataset.load(
dataset_name, voc_size, sen_len)
+ if not os.path.exists(model_save_dir):
+ os.makedirs(model_save_dir)
+
+ filepath=model_save_dir+"/model-{epoch:02d}.hdf5"
+
+ checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath, monitor='val_accuracy',
+ verbose=1,save_best_only=True,
+ save_weights_only=True,mode='auto')
+
+
model.fit(x_train, y_train, batch_size=batch_size,
- validation_split=0.4, epochs=epochs)
+ validation_split=0.4, epochs=epochs, callbacks=[checkpoint_callback])
+
score = model.evaluate(x_test, y_test, batch_size=batch_size)
+
+ model.save(os.path.join(model_save_dir, "full-model.h5"))
+
tf.logging.info("Score: {}".format(score))
if __name__ == "__main__":
@@ -85,8 +102,14 @@ if __name__ == "__main__":
help="The number of epochs for training.",
type=int, default=55)
+ parser.add_argument("-f", "--folder",
+ help="folder/dir to save trained model",
+ type=str, default=None)
args = parser.parse_args()
+ if args.folder is None:
+ parser.error("-f argument folder/dir to save is None,provide path to save model.")
+
run_model(args.dataset, args.embedding_dim, args.vocabulary_size,
args.sentence_length, args.hidden_dim,
- args.batch_size, args.epochs)
+ args.batch_size, args.epochs, args.folder)