diff --git a/research/mlperf_object_detection/Mask_RCNN/README.md b/research/mlperf_object_detection/Mask_RCNN/README.md
deleted file mode 100644
index 16bdc70c4bf9152560215bdd8a435940d20051df..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/README.md
+++ /dev/null
@@ -1 +0,0 @@
-Mask RCNN Implimentation adopted from models/research/object_detection/
diff --git a/research/mlperf_object_detection/Mask_RCNN/configs/e2e_mask_rcnn_R-50-C4_atrous.config b/research/mlperf_object_detection/Mask_RCNN/configs/e2e_mask_rcnn_R-50-C4_atrous.config
deleted file mode 100644
index 6085c7838582260fad17858ab4f65bd08e6ea14f..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/configs/e2e_mask_rcnn_R-50-C4_atrous.config
+++ /dev/null
@@ -1,170 +0,0 @@
-# Mask R-CNN with Resnet-50 (v1), Atrous version
-# Configured for MSCOCO Dataset.
-# Users should configure the fine_tune_checkpoint field in the train config as
-# well as the label_map_path and input_path fields in the train_input_reader and
-# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
-# should be configured.
-
-model {
- faster_rcnn {
- num_classes: 81
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 800
- max_dimension: 1365
- }
- }
- number_of_stages: 3
- feature_extractor {
- type: 'faster_rcnn_resnet50'
- first_stage_features_stride: 8
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.125, 0.25, 0.5, 1.0, 2.0] # base size=256**2 => anchor sizes=32 64 128 256 512
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 8
- width_stride: 8
- }
- }
- first_stage_atrous_rate: 2
- first_stage_box_predictor_conv_hyperparams {
- op: CONV
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.01
- }
- }
- }
- first_stage_nms_score_threshold: 0.0
- first_stage_nms_iou_threshold: 0.7
- first_stage_max_proposals: 512
- first_stage_localization_loss_weight: 2.0
- first_stage_objectness_loss_weight: 1.0
- initial_crop_size: 14
- maxpool_kernel_size: 2
- maxpool_stride: 2
- second_stage_batch_size: 512
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- use_dropout: false
- dropout_keep_probability: 1.0
- predict_instance_masks: true
- mask_height: 14
- mask_width: 14
- mask_prediction_conv_depth: 0
- mask_prediction_num_conv_layers: 3 #from mask rcnn heads
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 1.0
- uniform: true
- mode: FAN_AVG
- }
- }
- }
- conv_hyperparams {
- op: CONV
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.01
- }
- }
- }
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.0
- iou_threshold: 0.6
- max_detections_per_class: 2000
- max_total_detections: 2000
- }
- score_converter: SOFTMAX
- }
- second_stage_localization_loss_weight: 2.0
- second_stage_classification_loss_weight: 1.0
- second_stage_mask_prediction_loss_weight: 4.0
- }
-}
-
-train_config: {
- batch_size: 4
- optimizer {
- momentum_optimizer: {
- learning_rate: {
- manual_step_learning_rate {
- initial_learning_rate: 0.01
- schedule {
- step: 120000
- learning_rate: .001
- }
- schedule {
- step: 160000
- learning_rate: .0001
- }
- }
- }
- momentum_optimizer_value: 0.9
- }
- use_moving_average: false
- }
- gradient_clipping_by_norm: 10.0
- #fine_tune_checkpoint: "/home/mehdisharif/data/coco/resnet_v1_50.ckpt"
- #from_detection_checkpoint: True
- # Note: The below line limits the training process to 200K steps, which we
- # empirically found to be sufficient enough to train the pets dataset. This
- # effectively bypasses the learning rate schedule (the learning rate will
- # never decay). Remove the below line to train indefinitely.
- num_steps: 20000000
- data_augmentation_options {
- random_horizontal_flip {
- }
- }
-}
-
-train_input_reader: {
- tf_record_input_reader {
- input_path: "/home/mehdisharif/data/coco/output2017/coco_train.record"
- }
- label_map_path: "/home/mehdisharif/data/coco/output2017/mscoco_label_map.pbtxt"
- load_instance_masks: true
- mask_type: PNG_MASKS
-}
-
-eval_config: {
- metrics_set: ['coco_detection_metrics', 'coco_mask_metrics']
- num_examples: 50
- # Note: The below line limits the evaluation process to 10 evaluations.
- # Remove the below line to evaluate indefinitely.
- max_evals: 1
- num_visualizations: 50
- eval_interval_secs: 120
-}
-
-eval_input_reader: {
- tf_record_input_reader {
- input_path: "/home/mehdisharif/data/coco/output2017/coco_val.record"
- }
- label_map_path: "/home/mehdisharif/data/coco/output2017/mscoco_label_map.pbtxt"
- load_instance_masks: true
- mask_type: PNG_MASKS
- shuffle: false
- num_readers: 1
-}
diff --git a/research/mlperf_object_detection/Mask_RCNN/configs/mask_rcnn_resnet50_atrous_coco.config b/research/mlperf_object_detection/Mask_RCNN/configs/mask_rcnn_resnet50_atrous_coco.config
deleted file mode 100644
index 17c8bd5e7c1400938f14eb068bb6ff60f445f109..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/configs/mask_rcnn_resnet50_atrous_coco.config
+++ /dev/null
@@ -1,169 +0,0 @@
-# Mask R-CNN with Resnet-50 (v1), Atrous version
-# Configured for MSCOCO Dataset.
-# Users should configure the fine_tune_checkpoint field in the train config as
-# well as the label_map_path and input_path fields in the train_input_reader and
-# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
-# should be configured.
-
-model {
- faster_rcnn {
- num_classes: 90
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 800
- max_dimension: 1365
- }
- }
- number_of_stages: 3
- feature_extractor {
- type: 'faster_rcnn_resnet50'
- first_stage_features_stride: 8
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 8
- width_stride: 8
- }
- }
- first_stage_atrous_rate: 2
- first_stage_box_predictor_conv_hyperparams {
- op: CONV
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.01
- }
- }
- }
- first_stage_nms_score_threshold: 0.0
- first_stage_nms_iou_threshold: 0.7
- first_stage_max_proposals: 300
- first_stage_localization_loss_weight: 2.0
- first_stage_objectness_loss_weight: 1.0
- initial_crop_size: 14
- maxpool_kernel_size: 2
- maxpool_stride: 2
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- use_dropout: false
- dropout_keep_probability: 1.0
- predict_instance_masks: true
- mask_height: 33
- mask_width: 33
- mask_prediction_conv_depth: 0
- mask_prediction_num_conv_layers: 4
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 1.0
- uniform: true
- mode: FAN_AVG
- }
- }
- }
- conv_hyperparams {
- op: CONV
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.01
- }
- }
- }
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.0
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- score_converter: SOFTMAX
- }
- second_stage_localization_loss_weight: 2.0
- second_stage_classification_loss_weight: 1.0
- second_stage_mask_prediction_loss_weight: 4.0
- }
-}
-
-train_config: {
- batch_size: 2
- optimizer {
- momentum_optimizer: {
- learning_rate: {
- manual_step_learning_rate {
- initial_learning_rate: 0.0003
- schedule {
- step: 900000
- learning_rate: .00003
- }
- schedule {
- step: 1200000
- learning_rate: .000003
- }
- }
- }
- momentum_optimizer_value: 0.9
- }
- use_moving_average: false
- }
- gradient_clipping_by_norm: 10.0
- #fine_tune_checkpoint: ""
- from_detection_checkpoint: false
- # Note: The below line limits the training process to 200K steps, which we
- # empirically found to be sufficient enough to train the pets dataset. This
- # effectively bypasses the learning rate schedule (the learning rate will
- # never decay). Remove the below line to train indefinitely.
- #num_steps: 200000
- data_augmentation_options {
- random_horizontal_flip {
- }
- }
-}
-
-train_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/coco_train.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
- load_instance_masks: true
- mask_type: PNG_MASKS
-}
-
-eval_config: {
- metrics_set: ['coco_detection_metrics', 'coco_mask_metrics']
- num_examples: 50
- # Note: The below line limits the evaluation process to 10 evaluations.
- # Remove the below line to evaluate indefinitely.
- max_evals: 1
- num_visualizations: 50
- eval_interval_secs: 120
-}
-
-eval_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/coco_val.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
- load_instance_masks: true
- mask_type: PNG_MASKS
- shuffle: true
- num_readers: 1
-}
diff --git a/research/mlperf_object_detection/Mask_RCNN/mask_rcnn_run_loop.py b/research/mlperf_object_detection/Mask_RCNN/mask_rcnn_run_loop.py
deleted file mode 100644
index 70c9185c94d25b39c2f778ed6c6a769d8aa3108c..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/mask_rcnn_run_loop.py
+++ /dev/null
@@ -1,243 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Training and evaluation for Mask_RCNN.
-
- This module repeatedly runs 1 training epoch and then evaluation
- ##add explanation for all the options!!!!!!!
-"""
-
-import functools
-import json
-import os
-
-from object_detection import evaluator
-from object_detection import trainer
-from object_detection.builders import dataset_builder
-from object_detection.builders import graph_rewriter_builder
-from object_detection.builders import model_builder
-from object_detection.utils import config_util
-from object_detection.utils import dataset_util
-from object_detection.utils import label_map_util
-
-import tensorflow as tf
-
-tf.logging.set_verbosity(tf.logging.INFO)
-
-flags = tf.app.flags
-flags.DEFINE_string('master', '', 'Name of the TensorFlow master to use.')
-flags.DEFINE_integer('task', 0, 'task id')
-flags.DEFINE_integer('num_clones', 1, 'Number of clones to deploy per worker.')
-flags.DEFINE_boolean('clone_on_cpu', False,
- 'Force clones to be deployed on CPU. Note that even if '
- 'set to False (allowing ops to run on gpu), some ops may '
- 'still be run on the CPU if they have no GPU kernel.')
-flags.DEFINE_integer('worker_replicas', 1, 'Number of worker+trainer '
- 'replicas.')
-flags.DEFINE_integer('parameter_server_tasks', 0,
- 'Number of parameter server tasks. If None, does not use '
- 'a parameter server.')
-flags.DEFINE_string('train_dir', '',
- 'Directory to save the checkpoints and training summaries.')
-
-flags.DEFINE_string('pipeline_config_path', '',
- 'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
- 'file. If provided, other configs are ignored')
-
-flags.DEFINE_boolean('eval_training_data', False,
- 'If training data should be evaluated for this job.')
-
-flags.DEFINE_string('eval_dir', '',
- 'Directory to write eval summaries to.')
-
-flags.DEFINE_boolean('run_once', False, 'Option to only run a single pass of '
- 'evaluation. Overrides the `max_evals`'
- ' parameter in the provided config.')
-flags.DEFINE_float('box_min_ap', -1, 'Option to run until the box average'
- 'precision reaches this number')
-flags.DEFINE_float('mask_min_ap', -1, 'Option to run until the mask average'
- 'precision reaches this number')
-flags.DEFINE_integer('epochs_between_evals', 1, 'Number of training epochs to '
- 'run before running eval.')
-FLAGS = flags.FLAGS
-
-
-def stopping_criteria_met(eval_metrics, mask_min_ap, box_min_ap):
- """Returns true if both of the min precision criteria are met in the given
- evaluation metrics.
-
- Args:
- eval_metrics: dict of metrics names as keys and their corresponding values,
- containing "DetectionMasks_Precision/mAP", and
- "DetectionBoxes_Precision/mAP" fields.
- mask_min_ap: minimum desired mask average precision, will be ignored if -1
- box_min_ap: minimum desired box average precision, will be ignored if -1
-
- Returns:
- True if non -1 criteria are met, false o.w.
- """
- assert mask_min_ap == -1 or 0 < mask_min_ap < 1
- assert box_min_ap == -1 or 0 < box_min_ap < 1
- try:
- mask_mAP_reached = eval_metrics['DetectionMasks_Precision/mAP']
- box_mAP_reached = eval_metrics['DetectionBoxes_Precision/mAP']
- except KeyError as err:
- raise Exception('eval_metrics dict does not contain the mAP field') from err
-
- return (mask_min_ap == -1 or mask_mAP_reached > mask_min_ap) & \
- (box_min_ap == -1 or box_mAP_reached > box_min_ap) & \
- (mask_min_ap != -1 or box_min_ap != -1)
-
-
-def main(_):
- assert FLAGS.train_dir, '`train_dir` is missing.'
- assert FLAGS.pipeline_config_path, '`pipeline_config_path` is missing'
- assert FLAGS.eval_dir, '`eval_dir` is missing.'
-
- configs = config_util.get_configs_from_pipeline_file(
- FLAGS.pipeline_config_path)
- if FLAGS.task == 0:
- tf.gfile.MakeDirs(FLAGS.train_dir)
- tf.gfile.Copy(FLAGS.pipeline_config_path,
- os.path.join(FLAGS.train_dir, 'pipeline.config'),
- overwrite=True)
-
- tf.gfile.MakeDirs(FLAGS.eval_dir)
- tf.gfile.Copy(FLAGS.pipeline_config_path,
- os.path.join(FLAGS.eval_dir, 'pipeline.config'),
- overwrite=True)
-
- model_config = configs['model']
-
- train_config = configs['train_config']
- train_input_config = configs['train_input_config']
-
- eval_config = configs['eval_config']
- if FLAGS.eval_training_data:
- eval_input_config = configs['train_input_config']
- else:
- eval_input_config = configs['eval_input_config']
-
- # setting to run evaluation after EPOCHS_BETWEEN_EVALS epochs of training.
- # total number of training is set to total_num_epochs provided in the config
- if train_config.num_steps:
- total_num_epochs = train_config.num_steps
- train_config.num_steps = FLAGS.epochs_between_evals
- total_training_cycle = total_num_epochs // train_config.num_steps
- else:
- # TODO(mehdi): make it run indef
- total_num_epochs = 20000000
- train_config.num_steps = FLAGS.epochs_between_evals
- total_training_cycle = total_num_epochs // train_config.num_steps
-
- train_model_fn = functools.partial(model_builder.build,
- model_config=model_config,
- is_training=True)
- eval_model_fn = functools.partial(model_builder.build,
- model_config=model_config,
- is_training=False)
-
- def get_next(config):
- return dataset_util.make_initializable_iterator(
- dataset_builder.build(config)).get_next()
-
- # functions to create a tensor input dictionary for both training & evaluation
- train_input_dict_fn = functools.partial(get_next, train_input_config)
- eval_input_dict_fn = functools.partial(get_next, eval_input_config)
-
- # If not explicitly specified in the constructor and the TF_CONFIG
- # environment variable is present, load cluster_spec from TF_CONFIG.
- env = json.loads(os.environ.get('TF_CONFIG', '{}'))
- cluster_data = env.get('cluster', None)
- cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None
- task_data = env.get('task', {'type': 'master', 'index': 0})
- task_info = type('TaskSpec', (object,), task_data)
-
- # Parameters for a single worker.
- parameter_server_tasks = 0
- worker_replicas = 1
- worker_job_name = 'lonely_worker'
- task = 0
- is_chief = True
- master = ''
-
- if cluster_data and 'worker' in cluster_data:
- # Number of total worker replicas include "worker"s and the "master".
- worker_replicas = len(cluster_data['worker']) + 1
- if cluster_data and 'ps' in cluster_data:
- parameter_server_tasks = len(cluster_data['ps'])
-
- if worker_replicas > 1 and parameter_server_tasks < 1:
- raise ValueError('At least 1 ps task is needed for distributed training.')
-
- if worker_replicas >= 1 and parameter_server_tasks > 0:
- # Set up distributed training.
- server = tf.train.Server(tf.train.ClusterSpec(cluster), protocol='grpc',
- job_name=task_info.type,
- task_index=task_info.index)
- if task_info.type == 'ps':
- server.join()
- return
-
- worker_job_name = '%s/task:%d' % (task_info.type, task_info.index)
- task = task_info.index
- is_chief = (task_info.type == 'master')
- master = server.target
-
- label_map = label_map_util.load_labelmap(eval_input_config.label_map_path)
- max_num_classes = max([item.id for item in label_map.item])
- categories = label_map_util.convert_label_map_to_categories(label_map,
- max_num_classes)
-
- if FLAGS.run_once:
- eval_config.max_evals = 1
-
- train_graph_rewriter_fn = eval_graph_rewriter_fn = None
- if 'graph_rewriter_config' in configs:
- train_graph_rewriter_fn = graph_rewriter_builder.build(
- configs['graph_rewriter_config'], is_training=True)
- eval_graph_rewriter_fn = graph_rewriter_builder.build(
- configs['eval_rewriter_config'], is_training=False)
-
- def train():
- return trainer.train(create_tensor_dict_fn=train_input_dict_fn,
- create_model_fn=train_model_fn,
- train_config=train_config, master=master, task=task,
- num_clones=FLAGS.num_clones,
- worker_replicas=worker_replicas,
- clone_on_cpu=FLAGS.clone_on_cpu,
- ps_tasks=parameter_server_tasks,
- worker_job_name=worker_job_name,
- is_chief=is_chief, train_dir=FLAGS.train_dir,
- graph_hook_fn=train_graph_rewriter_fn)
-
- def evaluate():
- return evaluator.evaluate(eval_input_dict_fn, eval_model_fn, eval_config,
- categories, FLAGS.train_dir, FLAGS.eval_dir,
- graph_hook_fn=eval_graph_rewriter_fn)
-
- for cycle_index in range(total_training_cycle):
- tf.logging.info('Starting a training cycle: %d/%d',
- cycle_index, total_training_cycle)
- train()
- tf.logging.info('Starting to evaluate.')
- eval_metrics = evaluate()
- if stopping_criteria_met(eval_metrics, FLAGS.mask_min_ap, FLAGS.box_min_ap):
- tf.logging.info('Stopping criteria met. Training stopped')
- break
-
-
-if __name__ == '__main__':
- tf.app.run()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/CONTRIBUTING.md b/research/mlperf_object_detection/Mask_RCNN/object_detection/CONTRIBUTING.md
deleted file mode 100644
index e3d87e3ce90fb4dd22b00a2c5368bf17c3610661..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/CONTRIBUTING.md
+++ /dev/null
@@ -1,13 +0,0 @@
-# Contributing to the Tensorflow Object Detection API
-
-Patches to Tensorflow Object Detection API are welcome!
-
-We require contributors to fill out either the individual or corporate
-Contributor License Agreement (CLA).
-
- * If you are an individual writing original source code and you're sure you own the intellectual property, then you'll need to sign an [individual CLA](http://code.google.com/legal/individual-cla-v1.0.html).
- * If you work for a company that wants to allow you to contribute your work, then you'll need to sign a [corporate CLA](http://code.google.com/legal/corporate-cla-v1.0.html).
-
-Please follow the
-[Tensorflow contributing guidelines](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md)
-when submitting pull requests.
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/README.md b/research/mlperf_object_detection/Mask_RCNN/object_detection/README.md
deleted file mode 100644
index 52bf3565ede8269b90bd148f86c2bb73b4fc112f..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/README.md
+++ /dev/null
@@ -1,190 +0,0 @@
-
-# Tensorflow Object Detection API
-Creating accurate machine learning models capable of localizing and identifying
-multiple objects in a single image remains a core challenge in computer vision.
-The TensorFlow Object Detection API is an open source framework built on top of
-TensorFlow that makes it easy to construct, train and deploy object detection
-models. At Google we’ve certainly found this codebase to be useful for our
-computer vision needs, and we hope that you will as well.
-
-
-
-Contributions to the codebase are welcome and we would love to hear back from
-you if you find this API useful. Finally if you use the Tensorflow Object
-Detection API for a research publication, please consider citing:
-
-```
-"Speed/accuracy trade-offs for modern convolutional object detectors."
-Huang J, Rathod V, Sun C, Zhu M, Korattikara A, Fathi A, Fischer I, Wojna Z,
-Song Y, Guadarrama S, Murphy K, CVPR 2017
-```
-\[[link](https://arxiv.org/abs/1611.10012)\]\[[bibtex](
-https://scholar.googleusercontent.com/scholar.bib?q=info:l291WsrB-hQJ:scholar.google.com/&output=citation&scisig=AAGBfm0AAAAAWUIIlnPZ_L9jxvPwcC49kDlELtaeIyU-&scisf=4&ct=citation&cd=-1&hl=en&scfhb=1)\]
-
-
-
-
-
-## Maintainers
-
-* Jonathan Huang, github: [jch1](https://github.com/jch1)
-* Vivek Rathod, github: [tombstone](https://github.com/tombstone)
-* Ronny Votel, github: [ronnyvotel](https://github.com/ronnyvotel)
-* Derek Chow, github: [derekjchow](https://github.com/derekjchow)
-* Chen Sun, github: [jesu9](https://github.com/jesu9)
-* Menglong Zhu, github: [dreamdragon](https://github.com/dreamdragon)
-* Alireza Fathi, github: [afathi3](https://github.com/afathi3)
-* Zhichao Lu, github: [pkulzc](https://github.com/pkulzc)
-
-
-## Table of contents
-
-Quick Start:
-
- *
- Quick Start: Jupyter notebook for off-the-shelf inference
- * Quick Start: Training a pet detector
-
-Setup:
-
- * Installation
- *
- Configuring an object detection pipeline
- * Preparing inputs
-
-Running:
-
- * Running locally
- * Running on the cloud
-
-Extras:
-
- * Tensorflow detection model zoo
- *
- Exporting a trained model for inference
- *
- Defining your own model architecture
- *
- Bringing in your own dataset
- *
- Supported object detection evaluation protocols
- *
- Inference and evaluation on the Open Images dataset
- *
- Run an instance segmentation model
-
-## Getting Help
-
-To get help with issues you may encounter using the Tensorflow Object Detection
-API, create a new question on [StackOverflow](https://stackoverflow.com/) with
-the tags "tensorflow" and "object-detection".
-
-Please report bugs (actually broken code, not usage questions) to the
-tensorflow/models GitHub
-[issue tracker](https://github.com/tensorflow/models/issues), prefixing the
-issue name with "object_detection".
-
-Please check [FAQ](g3doc/faq.md) for frequently asked questions before
-reporting an issue.
-
-
-## Release information
-
-### April 30, 2018
-
-We have released a Faster R-CNN detector with ResNet-101 feature extractor trained on [AVA](https://research.google.com/ava/) v2.1.
-Compared with other commonly used object detectors, it changes the action classification loss function to per-class Sigmoid loss to handle boxes with multiple labels.
-The model is trained on the training split of AVA v2.1 for 1.5M iterations, it achieves mean AP of 11.25% over 60 classes on the validation split of AVA v2.1.
-For more details please refer to this [paper](https://arxiv.org/abs/1705.08421).
-
-Thanks to contributors: Chen Sun, David Ross
-
-### April 2, 2018
-
-Supercharge your mobile phones with the next generation mobile object detector!
-We are adding support for MobileNet V2 with SSDLite presented in
-[MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381).
-This model is 35% faster than Mobilenet V1 SSD on a Google Pixel phone CPU (200ms vs. 270ms) at the same accuracy.
-Along with the model definition, we are also releasing a model checkpoint trained on the COCO dataset.
-
-Thanks to contributors: Menglong Zhu, Mark Sandler, Zhichao Lu, Vivek Rathod, Jonathan Huang
-
-### February 9, 2018
-
-We now support instance segmentation!! In this API update we support a number of instance segmentation models similar to those discussed in the [Mask R-CNN paper](https://arxiv.org/abs/1703.06870). For further details refer to
-[our slides](http://presentations.cocodataset.org/Places17-GMRI.pdf) from the 2017 Coco + Places Workshop.
-Refer to the section on [Running an Instance Segmentation Model](g3doc/instance_segmentation.md) for instructions on how to configure a model
-that predicts masks in addition to object bounding boxes.
-
-Thanks to contributors: Alireza Fathi, Zhichao Lu, Vivek Rathod, Ronny Votel, Jonathan Huang
-
-### November 17, 2017
-
-As a part of the Open Images V3 release we have released:
-
-* An implementation of the Open Images evaluation metric and the [protocol](g3doc/evaluation_protocols.md#open-images).
-* Additional tools to separate inference of detection and evaluation (see [this tutorial](g3doc/oid_inference_and_evaluation.md)).
-* A new detection model trained on the Open Images V2 data release (see [Open Images model](g3doc/detection_model_zoo.md#open-images-models)).
-
-See more information on the [Open Images website](https://github.com/openimages/dataset)!
-
-Thanks to contributors: Stefan Popov, Alina Kuznetsova
-
-### November 6, 2017
-
-We have re-released faster versions of our (pre-trained) models in the
-model zoo. In addition to what
-was available before, we are also adding Faster R-CNN models trained on COCO
-with Inception V2 and Resnet-50 feature extractors, as well as a Faster R-CNN
-with Resnet-101 model trained on the KITTI dataset.
-
-Thanks to contributors: Jonathan Huang, Vivek Rathod, Derek Chow,
-Tal Remez, Chen Sun.
-
-### October 31, 2017
-
-We have released a new state-of-the-art model for object detection using
-the Faster-RCNN with the
-[NASNet-A image featurization](https://arxiv.org/abs/1707.07012). This
-model achieves mAP of 43.1% on the test-dev validation dataset for COCO,
-improving on the best available model in the zoo by 6% in terms
-of absolute mAP.
-
-Thanks to contributors: Barret Zoph, Vijay Vasudevan, Jonathon Shlens, Quoc Le
-
-### August 11, 2017
-
-We have released an update to the [Android Detect
-demo](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android)
-which will now run models trained using the Tensorflow Object
-Detection API on an Android device. By default, it currently runs a
-frozen SSD w/Mobilenet detector trained on COCO, but we encourage
-you to try out other detection models!
-
-Thanks to contributors: Jonathan Huang, Andrew Harp
-
-
-### June 15, 2017
-
-In addition to our base Tensorflow detection model definitions, this
-release includes:
-
-* A selection of trainable detection models, including:
- * Single Shot Multibox Detector (SSD) with MobileNet,
- * SSD with Inception V2,
- * Region-Based Fully Convolutional Networks (R-FCN) with Resnet 101,
- * Faster RCNN with Resnet 101,
- * Faster RCNN with Inception Resnet v2
-* Frozen weights (trained on the COCO dataset) for each of the above models to
- be used for out-of-the-box inference purposes.
-* A [Jupyter notebook](object_detection_tutorial.ipynb) for performing
- out-of-the-box inference with one of our released models
-* Convenient [local training](g3doc/running_locally.md) scripts as well as
- distributed training and evaluation pipelines via
- [Google Cloud](g3doc/running_on_cloud.md).
-
-
-Thanks to contributors: Jonathan Huang, Vivek Rathod, Derek Chow,
-Chen Sun, Menglong Zhu, Matthew Tang, Anoop Korattikara, Alireza Fathi, Ian Fischer, Zbigniew Wojna, Yang Song, Sergio Guadarrama, Jasper Uijlings,
-Viacheslav Kovalevskyi, Kevin Murphy
-
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/__init__.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/__init__.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/grid_anchor_generator.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/grid_anchor_generator.py
deleted file mode 100644
index ba43f0135481e433402b77e17a5db39a90ace8be..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/grid_anchor_generator.py
+++ /dev/null
@@ -1,205 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Generates grid anchors on the fly as used in Faster RCNN.
-
-Generates grid anchors on the fly as described in:
-"Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"
-Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun.
-"""
-
-import tensorflow as tf
-
-from object_detection.core import anchor_generator
-from object_detection.core import box_list
-from object_detection.utils import ops
-
-
-class GridAnchorGenerator(anchor_generator.AnchorGenerator):
- """Generates a grid of anchors at given scales and aspect ratios."""
-
- def __init__(self,
- scales=(0.5, 1.0, 2.0),
- aspect_ratios=(0.5, 1.0, 2.0),
- base_anchor_size=None,
- anchor_stride=None,
- anchor_offset=None):
- """Constructs a GridAnchorGenerator.
-
- Args:
- scales: a list of (float) scales, default=(0.5, 1.0, 2.0)
- aspect_ratios: a list of (float) aspect ratios, default=(0.5, 1.0, 2.0)
- base_anchor_size: base anchor size as height, width (
- (length-2 float32 list or tensor, default=[256, 256])
- anchor_stride: difference in centers between base anchors for adjacent
- grid positions (length-2 float32 list or tensor,
- default=[16, 16])
- anchor_offset: center of the anchor with scale and aspect ratio 1 for the
- upper left element of the grid, this should be zero for
- feature networks with only VALID padding and even receptive
- field size, but may need additional calculation if other
- padding is used (length-2 float32 list or tensor,
- default=[0, 0])
- """
- # Handle argument defaults
- if base_anchor_size is None:
- base_anchor_size = [256, 256]
- base_anchor_size = tf.to_float(tf.convert_to_tensor(base_anchor_size))
- if anchor_stride is None:
- anchor_stride = [16, 16]
- anchor_stride = tf.to_float(tf.convert_to_tensor(anchor_stride))
- if anchor_offset is None:
- anchor_offset = [0, 0]
- anchor_offset = tf.to_float(tf.convert_to_tensor(anchor_offset))
-
- self._scales = scales
- self._aspect_ratios = aspect_ratios
- self._base_anchor_size = base_anchor_size
- self._anchor_stride = anchor_stride
- self._anchor_offset = anchor_offset
-
- def name_scope(self):
- return 'GridAnchorGenerator'
-
- def num_anchors_per_location(self):
- """Returns the number of anchors per spatial location.
-
- Returns:
- a list of integers, one for each expected feature map to be passed to
- the `generate` function.
- """
- return [len(self._scales) * len(self._aspect_ratios)]
-
- def _generate(self, feature_map_shape_list):
- """Generates a collection of bounding boxes to be used as anchors.
-
- Args:
- feature_map_shape_list: list of pairs of convnet layer resolutions in the
- format [(height_0, width_0)]. For example, setting
- feature_map_shape_list=[(8, 8)] asks for anchors that correspond
- to an 8x8 layer. For this anchor generator, only lists of length 1 are
- allowed.
-
- Returns:
- boxes_list: a list of BoxLists each holding anchor boxes corresponding to
- the input feature map shapes.
-
- Raises:
- ValueError: if feature_map_shape_list, box_specs_list do not have the same
- length.
- ValueError: if feature_map_shape_list does not consist of pairs of
- integers
- """
- if not (isinstance(feature_map_shape_list, list)
- and len(feature_map_shape_list) == 1):
- raise ValueError('feature_map_shape_list must be a list of length 1.')
- if not all([isinstance(list_item, tuple) and len(list_item) == 2
- for list_item in feature_map_shape_list]):
- raise ValueError('feature_map_shape_list must be a list of pairs.')
- grid_height, grid_width = feature_map_shape_list[0]
- scales_grid, aspect_ratios_grid = ops.meshgrid(self._scales,
- self._aspect_ratios)
- scales_grid = tf.reshape(scales_grid, [-1])
- aspect_ratios_grid = tf.reshape(aspect_ratios_grid, [-1])
- anchors = tile_anchors(grid_height,
- grid_width,
- scales_grid,
- aspect_ratios_grid,
- self._base_anchor_size,
- self._anchor_stride,
- self._anchor_offset)
-
- num_anchors = anchors.num_boxes_static()
- if num_anchors is None:
- num_anchors = anchors.num_boxes()
- anchor_indices = tf.zeros([num_anchors])
- anchors.add_field('feature_map_index', anchor_indices)
- return [anchors]
-
-
-def tile_anchors(grid_height,
- grid_width,
- scales,
- aspect_ratios,
- base_anchor_size,
- anchor_stride,
- anchor_offset):
- """Create a tiled set of anchors strided along a grid in image space.
-
- This op creates a set of anchor boxes by placing a "basis" collection of
- boxes with user-specified scales and aspect ratios centered at evenly
- distributed points along a grid. The basis collection is specified via the
- scale and aspect_ratios arguments. For example, setting scales=[.1, .2, .2]
- and aspect ratios = [2,2,1/2] means that we create three boxes: one with scale
- .1, aspect ratio 2, one with scale .2, aspect ratio 2, and one with scale .2
- and aspect ratio 1/2. Each box is multiplied by "base_anchor_size" before
- placing it over its respective center.
-
- Grid points are specified via grid_height, grid_width parameters as well as
- the anchor_stride and anchor_offset parameters.
-
- Args:
- grid_height: size of the grid in the y direction (int or int scalar tensor)
- grid_width: size of the grid in the x direction (int or int scalar tensor)
- scales: a 1-d (float) tensor representing the scale of each box in the
- basis set.
- aspect_ratios: a 1-d (float) tensor representing the aspect ratio of each
- box in the basis set. The length of the scales and aspect_ratios tensors
- must be equal.
- base_anchor_size: base anchor size as [height, width]
- (float tensor of shape [2])
- anchor_stride: difference in centers between base anchors for adjacent grid
- positions (float tensor of shape [2])
- anchor_offset: center of the anchor with scale and aspect ratio 1 for the
- upper left element of the grid, this should be zero for
- feature networks with only VALID padding and even receptive
- field size, but may need some additional calculation if other
- padding is used (float tensor of shape [2])
- Returns:
- a BoxList holding a collection of N anchor boxes
- """
- ratio_sqrts = tf.sqrt(aspect_ratios)
- heights = scales / ratio_sqrts * base_anchor_size[0]
- widths = scales * ratio_sqrts * base_anchor_size[1]
-
- # Get a grid of box centers
- y_centers = tf.to_float(tf.range(grid_height))
- y_centers = y_centers * anchor_stride[0] + anchor_offset[0]
- x_centers = tf.to_float(tf.range(grid_width))
- x_centers = x_centers * anchor_stride[1] + anchor_offset[1]
- x_centers, y_centers = ops.meshgrid(x_centers, y_centers)
-
- widths_grid, x_centers_grid = ops.meshgrid(widths, x_centers)
- heights_grid, y_centers_grid = ops.meshgrid(heights, y_centers)
- bbox_centers = tf.stack([y_centers_grid, x_centers_grid], axis=3)
- bbox_sizes = tf.stack([heights_grid, widths_grid], axis=3)
- bbox_centers = tf.reshape(bbox_centers, [-1, 2])
- bbox_sizes = tf.reshape(bbox_sizes, [-1, 2])
- bbox_corners = _center_size_bbox_to_corners_bbox(bbox_centers, bbox_sizes)
- return box_list.BoxList(bbox_corners)
-
-
-def _center_size_bbox_to_corners_bbox(centers, sizes):
- """Converts bbox center-size representation to corners representation.
-
- Args:
- centers: a tensor with shape [N, 2] representing bounding box centers
- sizes: a tensor with shape [N, 2] representing bounding boxes
-
- Returns:
- corners: tensor with shape [N, 4] representing bounding boxes in corners
- representation
- """
- return tf.concat([centers - .5 * sizes, centers + .5 * sizes], 1)
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/grid_anchor_generator_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/grid_anchor_generator_test.py
deleted file mode 100644
index 8de74aa7ede1c5d26bb72cff3d04e1a1a544f4f3..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/grid_anchor_generator_test.py
+++ /dev/null
@@ -1,104 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.grid_anchor_generator."""
-import numpy as np
-import tensorflow as tf
-
-from object_detection.anchor_generators import grid_anchor_generator
-from object_detection.utils import test_case
-
-
-class GridAnchorGeneratorTest(test_case.TestCase):
-
- def test_construct_single_anchor(self):
- """Builds a 1x1 anchor grid to test the size of the output boxes."""
- def graph_fn():
- scales = [0.5, 1.0, 2.0]
- aspect_ratios = [0.25, 1.0, 4.0]
- anchor_offset = [7, -3]
- anchor_generator = grid_anchor_generator.GridAnchorGenerator(
- scales, aspect_ratios, anchor_offset=anchor_offset)
- anchors_list = anchor_generator.generate(feature_map_shape_list=[(1, 1)])
- anchor_corners = anchors_list[0].get()
- return (anchor_corners,)
- exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61],
- [-505, -131, 519, 125], [-57, -67, 71, 61],
- [-121, -131, 135, 125], [-249, -259, 263, 253],
- [-25, -131, 39, 125], [-57, -259, 71, 253],
- [-121, -515, 135, 509]]
- anchor_corners_out = self.execute(graph_fn, [])
- self.assertAllClose(anchor_corners_out, exp_anchor_corners)
-
- def test_construct_anchor_grid(self):
- def graph_fn():
- base_anchor_size = [10, 10]
- anchor_stride = [19, 19]
- anchor_offset = [0, 0]
- scales = [0.5, 1.0, 2.0]
- aspect_ratios = [1.0]
-
- anchor_generator = grid_anchor_generator.GridAnchorGenerator(
- scales,
- aspect_ratios,
- base_anchor_size=base_anchor_size,
- anchor_stride=anchor_stride,
- anchor_offset=anchor_offset)
-
- anchors_list = anchor_generator.generate(feature_map_shape_list=[(2, 2)])
- anchor_corners = anchors_list[0].get()
- return (anchor_corners,)
- exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
- [-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
- [-5., 14., 5, 24], [-10., 9., 10, 29],
- [16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
- [9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
- [14., 14., 24, 24], [9., 9., 29, 29]]
- anchor_corners_out = self.execute(graph_fn, [])
- self.assertAllClose(anchor_corners_out, exp_anchor_corners)
-
- def test_construct_anchor_grid_with_dynamic_feature_map_shapes(self):
- def graph_fn(feature_map_height, feature_map_width):
- base_anchor_size = [10, 10]
- anchor_stride = [19, 19]
- anchor_offset = [0, 0]
- scales = [0.5, 1.0, 2.0]
- aspect_ratios = [1.0]
- anchor_generator = grid_anchor_generator.GridAnchorGenerator(
- scales,
- aspect_ratios,
- base_anchor_size=base_anchor_size,
- anchor_stride=anchor_stride,
- anchor_offset=anchor_offset)
-
- anchors_list = anchor_generator.generate(
- feature_map_shape_list=[(feature_map_height, feature_map_width)])
- anchor_corners = anchors_list[0].get()
- return (anchor_corners,)
-
- exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
- [-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
- [-5., 14., 5, 24], [-10., 9., 10, 29],
- [16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
- [9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
- [14., 14., 24, 24], [9., 9., 29, 29]]
- anchor_corners_out = self.execute_cpu(graph_fn,
- [np.array(2, dtype=np.int32),
- np.array(2, dtype=np.int32)])
- self.assertAllClose(anchor_corners_out, exp_anchor_corners)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiple_grid_anchor_generator.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiple_grid_anchor_generator.py
deleted file mode 100644
index bd785c171f686f1c524b78efbc7d03dbae4f7940..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiple_grid_anchor_generator.py
+++ /dev/null
@@ -1,336 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Generates grid anchors on the fly corresponding to multiple CNN layers.
-
-Generates grid anchors on the fly corresponding to multiple CNN layers as
-described in:
-"SSD: Single Shot MultiBox Detector"
-Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
-Cheng-Yang Fu, Alexander C. Berg
-(see Section 2.2: Choosing scales and aspect ratios for default boxes)
-"""
-
-import numpy as np
-
-import tensorflow as tf
-
-from object_detection.anchor_generators import grid_anchor_generator
-from object_detection.core import anchor_generator
-from object_detection.core import box_list_ops
-
-
-class MultipleGridAnchorGenerator(anchor_generator.AnchorGenerator):
- """Generate a grid of anchors for multiple CNN layers."""
-
- def __init__(self,
- box_specs_list,
- base_anchor_size=None,
- anchor_strides=None,
- anchor_offsets=None,
- clip_window=None):
- """Constructs a MultipleGridAnchorGenerator.
-
- To construct anchors, at multiple grid resolutions, one must provide a
- list of feature_map_shape_list (e.g., [(8, 8), (4, 4)]), and for each grid
- size, a corresponding list of (scale, aspect ratio) box specifications.
-
- For example:
- box_specs_list = [[(.1, 1.0), (.1, 2.0)], # for 8x8 grid
- [(.2, 1.0), (.3, 1.0), (.2, 2.0)]] # for 4x4 grid
-
- To support the fully convolutional setting, we pass grid sizes in at
- generation time, while scale and aspect ratios are fixed at construction
- time.
-
- Args:
- box_specs_list: list of list of (scale, aspect ratio) pairs with the
- outside list having the same number of entries as feature_map_shape_list
- (which is passed in at generation time).
- base_anchor_size: base anchor size as [height, width]
- (length-2 float tensor, default=[1.0, 1.0]).
- The height and width values are normalized to the
- minimum dimension of the input height and width, so that
- when the base anchor height equals the base anchor
- width, the resulting anchor is square even if the input
- image is not square.
- anchor_strides: list of pairs of strides in pixels (in y and x directions
- respectively). For example, setting anchor_strides=[(25, 25), (50, 50)]
- means that we want the anchors corresponding to the first layer to be
- strided by 25 pixels and those in the second layer to be strided by 50
- pixels in both y and x directions. If anchor_strides=None, they are set
- to be the reciprocal of the corresponding feature map shapes.
- anchor_offsets: list of pairs of offsets in pixels (in y and x directions
- respectively). The offset specifies where we want the center of the
- (0, 0)-th anchor to lie for each layer. For example, setting
- anchor_offsets=[(10, 10), (20, 20)]) means that we want the
- (0, 0)-th anchor of the first layer to lie at (10, 10) in pixel space
- and likewise that we want the (0, 0)-th anchor of the second layer to
- lie at (25, 25) in pixel space. If anchor_offsets=None, then they are
- set to be half of the corresponding anchor stride.
- clip_window: a tensor of shape [4] specifying a window to which all
- anchors should be clipped. If clip_window is None, then no clipping
- is performed.
-
- Raises:
- ValueError: if box_specs_list is not a list of list of pairs
- ValueError: if clip_window is not either None or a tensor of shape [4]
- """
- if isinstance(box_specs_list, list) and all(
- [isinstance(list_item, list) for list_item in box_specs_list]):
- self._box_specs = box_specs_list
- else:
- raise ValueError('box_specs_list is expected to be a '
- 'list of lists of pairs')
- if base_anchor_size is None:
- base_anchor_size = tf.constant([256, 256], dtype=tf.float32)
- self._base_anchor_size = base_anchor_size
- self._anchor_strides = anchor_strides
- self._anchor_offsets = anchor_offsets
- if clip_window is not None and clip_window.get_shape().as_list() != [4]:
- raise ValueError('clip_window must either be None or a shape [4] tensor')
- self._clip_window = clip_window
- self._scales = []
- self._aspect_ratios = []
- for box_spec in self._box_specs:
- if not all([isinstance(entry, tuple) and len(entry) == 2
- for entry in box_spec]):
- raise ValueError('box_specs_list is expected to be a '
- 'list of lists of pairs')
- scales, aspect_ratios = zip(*box_spec)
- self._scales.append(scales)
- self._aspect_ratios.append(aspect_ratios)
-
- for arg, arg_name in zip([self._anchor_strides, self._anchor_offsets],
- ['anchor_strides', 'anchor_offsets']):
- if arg and not (isinstance(arg, list) and
- len(arg) == len(self._box_specs)):
- raise ValueError('%s must be a list with the same length '
- 'as self._box_specs' % arg_name)
- if arg and not all([
- isinstance(list_item, tuple) and len(list_item) == 2
- for list_item in arg
- ]):
- raise ValueError('%s must be a list of pairs.' % arg_name)
-
- def name_scope(self):
- return 'MultipleGridAnchorGenerator'
-
- def num_anchors_per_location(self):
- """Returns the number of anchors per spatial location.
-
- Returns:
- a list of integers, one for each expected feature map to be passed to
- the Generate function.
- """
- return [len(box_specs) for box_specs in self._box_specs]
-
- def _generate(self, feature_map_shape_list, im_height=1, im_width=1):
- """Generates a collection of bounding boxes to be used as anchors.
-
- The number of anchors generated for a single grid with shape MxM where we
- place k boxes over each grid center is k*M^2 and thus the total number of
- anchors is the sum over all grids. In our box_specs_list example
- (see the constructor docstring), we would place two boxes over each grid
- point on an 8x8 grid and three boxes over each grid point on a 4x4 grid and
- thus end up with 2*8^2 + 3*4^2 = 176 anchors in total. The layout of the
- output anchors follows the order of how the grid sizes and box_specs are
- specified (with box_spec index varying the fastest, followed by width
- index, then height index, then grid index).
-
- Args:
- feature_map_shape_list: list of pairs of convnet layer resolutions in the
- format [(height_0, width_0), (height_1, width_1), ...]. For example,
- setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that
- correspond to an 8x8 layer followed by a 7x7 layer.
- im_height: the height of the image to generate the grid for. If both
- im_height and im_width are 1, the generated anchors default to
- normalized coordinates, otherwise absolute coordinates are used for the
- grid.
- im_width: the width of the image to generate the grid for. If both
- im_height and im_width are 1, the generated anchors default to
- normalized coordinates, otherwise absolute coordinates are used for the
- grid.
-
- Returns:
- boxes_list: a list of BoxLists each holding anchor boxes corresponding to
- the input feature map shapes.
-
- Raises:
- ValueError: if feature_map_shape_list, box_specs_list do not have the same
- length.
- ValueError: if feature_map_shape_list does not consist of pairs of
- integers
- """
- if not (isinstance(feature_map_shape_list, list)
- and len(feature_map_shape_list) == len(self._box_specs)):
- raise ValueError('feature_map_shape_list must be a list with the same '
- 'length as self._box_specs')
- if not all([isinstance(list_item, tuple) and len(list_item) == 2
- for list_item in feature_map_shape_list]):
- raise ValueError('feature_map_shape_list must be a list of pairs.')
-
- im_height = tf.to_float(im_height)
- im_width = tf.to_float(im_width)
-
- if not self._anchor_strides:
- anchor_strides = [(1.0 / tf.to_float(pair[0]), 1.0 / tf.to_float(pair[1]))
- for pair in feature_map_shape_list]
- else:
- anchor_strides = [(tf.to_float(stride[0]) / im_height,
- tf.to_float(stride[1]) / im_width)
- for stride in self._anchor_strides]
- if not self._anchor_offsets:
- anchor_offsets = [(0.5 * stride[0], 0.5 * stride[1])
- for stride in anchor_strides]
- else:
- anchor_offsets = [(tf.to_float(offset[0]) / im_height,
- tf.to_float(offset[1]) / im_width)
- for offset in self._anchor_offsets]
-
- for arg, arg_name in zip([anchor_strides, anchor_offsets],
- ['anchor_strides', 'anchor_offsets']):
- if not (isinstance(arg, list) and len(arg) == len(self._box_specs)):
- raise ValueError('%s must be a list with the same length '
- 'as self._box_specs' % arg_name)
- if not all([isinstance(list_item, tuple) and len(list_item) == 2
- for list_item in arg]):
- raise ValueError('%s must be a list of pairs.' % arg_name)
-
- anchor_grid_list = []
- min_im_shape = tf.minimum(im_height, im_width)
- scale_height = min_im_shape / im_height
- scale_width = min_im_shape / im_width
- base_anchor_size = [
- scale_height * self._base_anchor_size[0],
- scale_width * self._base_anchor_size[1]
- ]
- for feature_map_index, (grid_size, scales, aspect_ratios, stride,
- offset) in enumerate(
- zip(feature_map_shape_list, self._scales,
- self._aspect_ratios, anchor_strides,
- anchor_offsets)):
- tiled_anchors = grid_anchor_generator.tile_anchors(
- grid_height=grid_size[0],
- grid_width=grid_size[1],
- scales=scales,
- aspect_ratios=aspect_ratios,
- base_anchor_size=base_anchor_size,
- anchor_stride=stride,
- anchor_offset=offset)
- if self._clip_window is not None:
- tiled_anchors = box_list_ops.clip_to_window(
- tiled_anchors, self._clip_window, filter_nonoverlapping=False)
- num_anchors_in_layer = tiled_anchors.num_boxes_static()
- if num_anchors_in_layer is None:
- num_anchors_in_layer = tiled_anchors.num_boxes()
- anchor_indices = feature_map_index * tf.ones([num_anchors_in_layer])
- tiled_anchors.add_field('feature_map_index', anchor_indices)
- anchor_grid_list.append(tiled_anchors)
-
- return anchor_grid_list
-
-
-def create_ssd_anchors(num_layers=6,
- min_scale=0.2,
- max_scale=0.95,
- scales=None,
- aspect_ratios=(1.0, 2.0, 3.0, 1.0 / 2, 1.0 / 3),
- interpolated_scale_aspect_ratio=1.0,
- base_anchor_size=None,
- anchor_strides=None,
- anchor_offsets=None,
- reduce_boxes_in_lowest_layer=True):
- """Creates MultipleGridAnchorGenerator for SSD anchors.
-
- This function instantiates a MultipleGridAnchorGenerator that reproduces
- ``default box`` construction proposed by Liu et al in the SSD paper.
- See Section 2.2 for details. Grid sizes are assumed to be passed in
- at generation time from finest resolution to coarsest resolution --- this is
- used to (linearly) interpolate scales of anchor boxes corresponding to the
- intermediate grid sizes.
-
- Anchors that are returned by calling the `generate` method on the returned
- MultipleGridAnchorGenerator object are always in normalized coordinates
- and clipped to the unit square: (i.e. all coordinates lie in [0, 1]x[0, 1]).
-
- Args:
- num_layers: integer number of grid layers to create anchors for (actual
- grid sizes passed in at generation time)
- min_scale: scale of anchors corresponding to finest resolution (float)
- max_scale: scale of anchors corresponding to coarsest resolution (float)
- scales: As list of anchor scales to use. When not None and not empty,
- min_scale and max_scale are not used.
- aspect_ratios: list or tuple of (float) aspect ratios to place on each
- grid point.
- interpolated_scale_aspect_ratio: An additional anchor is added with this
- aspect ratio and a scale interpolated between the scale for a layer
- and the scale for the next layer (1.0 for the last layer).
- This anchor is not included if this value is 0.
- base_anchor_size: base anchor size as [height, width].
- The height and width values are normalized to the minimum dimension of the
- input height and width, so that when the base anchor height equals the
- base anchor width, the resulting anchor is square even if the input image
- is not square.
- anchor_strides: list of pairs of strides in pixels (in y and x directions
- respectively). For example, setting anchor_strides=[(25, 25), (50, 50)]
- means that we want the anchors corresponding to the first layer to be
- strided by 25 pixels and those in the second layer to be strided by 50
- pixels in both y and x directions. If anchor_strides=None, they are set to
- be the reciprocal of the corresponding feature map shapes.
- anchor_offsets: list of pairs of offsets in pixels (in y and x directions
- respectively). The offset specifies where we want the center of the
- (0, 0)-th anchor to lie for each layer. For example, setting
- anchor_offsets=[(10, 10), (20, 20)]) means that we want the
- (0, 0)-th anchor of the first layer to lie at (10, 10) in pixel space
- and likewise that we want the (0, 0)-th anchor of the second layer to lie
- at (25, 25) in pixel space. If anchor_offsets=None, then they are set to
- be half of the corresponding anchor stride.
- reduce_boxes_in_lowest_layer: a boolean to indicate whether the fixed 3
- boxes per location is used in the lowest layer.
-
- Returns:
- a MultipleGridAnchorGenerator
- """
- if base_anchor_size is None:
- base_anchor_size = [1.0, 1.0]
- base_anchor_size = tf.constant(base_anchor_size, dtype=tf.float32)
- box_specs_list = []
- if scales is None or not scales:
- scales = [min_scale + (max_scale - min_scale) * i / (num_layers - 1)
- for i in range(num_layers)] + [1.0]
- else:
- # Add 1.0 to the end, which will only be used in scale_next below and used
- # for computing an interpolated scale for the largest scale in the list.
- scales += [1.0]
-
- for layer, scale, scale_next in zip(
- range(num_layers), scales[:-1], scales[1:]):
- layer_box_specs = []
- if layer == 0 and reduce_boxes_in_lowest_layer:
- layer_box_specs = [(0.1, 1.0), (scale, 2.0), (scale, 0.5)]
- else:
- for aspect_ratio in aspect_ratios:
- layer_box_specs.append((scale, aspect_ratio))
- # Add one more anchor, with a scale between the current scale, and the
- # scale for the next layer, with a specified aspect ratio (1.0 by
- # default).
- if interpolated_scale_aspect_ratio > 0.0:
- layer_box_specs.append((np.sqrt(scale*scale_next),
- interpolated_scale_aspect_ratio))
- box_specs_list.append(layer_box_specs)
-
- return MultipleGridAnchorGenerator(box_specs_list, base_anchor_size,
- anchor_strides, anchor_offsets)
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiple_grid_anchor_generator_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiple_grid_anchor_generator_test.py
deleted file mode 100644
index 070d81d36e79368c9fd46c7f3e03df7a93baee76..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiple_grid_anchor_generator_test.py
+++ /dev/null
@@ -1,289 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for anchor_generators.multiple_grid_anchor_generator_test.py."""
-
-import numpy as np
-
-import tensorflow as tf
-
-from object_detection.anchor_generators import multiple_grid_anchor_generator as ag
-from object_detection.utils import test_case
-
-
-class MultipleGridAnchorGeneratorTest(test_case.TestCase):
-
- def test_construct_single_anchor_grid(self):
- """Builds a 1x1 anchor grid to test the size of the output boxes."""
- def graph_fn():
-
- box_specs_list = [[(.5, .25), (1.0, .25), (2.0, .25),
- (.5, 1.0), (1.0, 1.0), (2.0, 1.0),
- (.5, 4.0), (1.0, 4.0), (2.0, 4.0)]]
- anchor_generator = ag.MultipleGridAnchorGenerator(
- box_specs_list,
- base_anchor_size=tf.constant([256, 256], dtype=tf.float32),
- anchor_strides=[(16, 16)],
- anchor_offsets=[(7, -3)])
- anchors_list = anchor_generator.generate(feature_map_shape_list=[(1, 1)])
- return anchors_list[0].get()
- exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61],
- [-505, -131, 519, 125], [-57, -67, 71, 61],
- [-121, -131, 135, 125], [-249, -259, 263, 253],
- [-25, -131, 39, 125], [-57, -259, 71, 253],
- [-121, -515, 135, 509]]
-
- anchor_corners_out = self.execute(graph_fn, [])
- self.assertAllClose(anchor_corners_out, exp_anchor_corners)
-
- def test_construct_anchor_grid(self):
- def graph_fn():
- box_specs_list = [[(0.5, 1.0), (1.0, 1.0), (2.0, 1.0)]]
-
- anchor_generator = ag.MultipleGridAnchorGenerator(
- box_specs_list,
- base_anchor_size=tf.constant([10, 10], dtype=tf.float32),
- anchor_strides=[(19, 19)],
- anchor_offsets=[(0, 0)])
- anchors_list = anchor_generator.generate(feature_map_shape_list=[(2, 2)])
- return anchors_list[0].get()
- exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
- [-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
- [-5., 14., 5, 24], [-10., 9., 10, 29],
- [16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
- [9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
- [14., 14., 24, 24], [9., 9., 29, 29]]
-
- anchor_corners_out = self.execute(graph_fn, [])
- self.assertAllClose(anchor_corners_out, exp_anchor_corners)
-
- def test_construct_anchor_grid_non_square(self):
-
- def graph_fn():
- box_specs_list = [[(1.0, 1.0)]]
- anchor_generator = ag.MultipleGridAnchorGenerator(
- box_specs_list, base_anchor_size=tf.constant([1, 1],
- dtype=tf.float32))
- anchors_list = anchor_generator.generate(feature_map_shape_list=[(
- tf.constant(1, dtype=tf.int32), tf.constant(2, dtype=tf.int32))])
- return anchors_list[0].get()
-
- exp_anchor_corners = [[0., -0.25, 1., 0.75], [0., 0.25, 1., 1.25]]
- anchor_corners_out = self.execute(graph_fn, [])
- self.assertAllClose(anchor_corners_out, exp_anchor_corners)
-
- def test_construct_dynamic_size_anchor_grid(self):
-
- def graph_fn(height, width):
- box_specs_list = [[(1.0, 1.0)]]
- anchor_generator = ag.MultipleGridAnchorGenerator(
- box_specs_list, base_anchor_size=tf.constant([1, 1],
- dtype=tf.float32))
- anchors_list = anchor_generator.generate(feature_map_shape_list=[(height,
- width)])
- return anchors_list[0].get()
-
- exp_anchor_corners = [[0., -0.25, 1., 0.75], [0., 0.25, 1., 1.25]]
-
- anchor_corners_out = self.execute_cpu(graph_fn,
- [np.array(1, dtype=np.int32),
- np.array(2, dtype=np.int32)])
- self.assertAllClose(anchor_corners_out, exp_anchor_corners)
-
- def test_construct_anchor_grid_normalized(self):
- def graph_fn():
- box_specs_list = [[(1.0, 1.0)]]
-
- anchor_generator = ag.MultipleGridAnchorGenerator(
- box_specs_list, base_anchor_size=tf.constant([1, 1],
- dtype=tf.float32))
- anchors_list = anchor_generator.generate(
- feature_map_shape_list=[(tf.constant(1, dtype=tf.int32), tf.constant(
- 2, dtype=tf.int32))],
- im_height=320,
- im_width=640)
- return anchors_list[0].get()
-
- exp_anchor_corners = [[0., 0., 1., 0.5], [0., 0.5, 1., 1.]]
- anchor_corners_out = self.execute(graph_fn, [])
- self.assertAllClose(anchor_corners_out, exp_anchor_corners)
-
- def test_construct_multiple_grids(self):
-
- def graph_fn():
- box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
- [(1.0, 1.0), (1.0, 0.5)]]
-
- anchor_generator = ag.MultipleGridAnchorGenerator(
- box_specs_list,
- base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
- anchor_strides=[(.25, .25), (.5, .5)],
- anchor_offsets=[(.125, .125), (.25, .25)])
- anchors_list = anchor_generator.generate(feature_map_shape_list=[(4, 4), (
- 2, 2)])
- return [anchors.get() for anchors in anchors_list]
- # height and width of box with .5 aspect ratio
- h = np.sqrt(2)
- w = 1.0/np.sqrt(2)
- exp_small_grid_corners = [[-.25, -.25, .75, .75],
- [.25-.5*h, .25-.5*w, .25+.5*h, .25+.5*w],
- [-.25, .25, .75, 1.25],
- [.25-.5*h, .75-.5*w, .25+.5*h, .75+.5*w],
- [.25, -.25, 1.25, .75],
- [.75-.5*h, .25-.5*w, .75+.5*h, .25+.5*w],
- [.25, .25, 1.25, 1.25],
- [.75-.5*h, .75-.5*w, .75+.5*h, .75+.5*w]]
- # only test first entry of larger set of anchors
- exp_big_grid_corners = [[.125-.5, .125-.5, .125+.5, .125+.5],
- [.125-1.0, .125-1.0, .125+1.0, .125+1.0],
- [.125-.5*h, .125-.5*w, .125+.5*h, .125+.5*w],]
-
- anchor_corners_out = np.concatenate(self.execute(graph_fn, []), axis=0)
- self.assertEquals(anchor_corners_out.shape, (56, 4))
- big_grid_corners = anchor_corners_out[0:3, :]
- small_grid_corners = anchor_corners_out[48:, :]
- self.assertAllClose(small_grid_corners, exp_small_grid_corners)
- self.assertAllClose(big_grid_corners, exp_big_grid_corners)
-
- def test_construct_multiple_grids_with_clipping(self):
-
- def graph_fn():
- box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
- [(1.0, 1.0), (1.0, 0.5)]]
-
- clip_window = tf.constant([0, 0, 1, 1], dtype=tf.float32)
- anchor_generator = ag.MultipleGridAnchorGenerator(
- box_specs_list,
- base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
- clip_window=clip_window)
- anchors_list = anchor_generator.generate(feature_map_shape_list=[(4, 4), (
- 2, 2)])
- return [anchors.get() for anchors in anchors_list]
- # height and width of box with .5 aspect ratio
- h = np.sqrt(2)
- w = 1.0/np.sqrt(2)
- exp_small_grid_corners = [[0, 0, .75, .75],
- [0, 0, .25+.5*h, .25+.5*w],
- [0, .25, .75, 1],
- [0, .75-.5*w, .25+.5*h, 1],
- [.25, 0, 1, .75],
- [.75-.5*h, 0, 1, .25+.5*w],
- [.25, .25, 1, 1],
- [.75-.5*h, .75-.5*w, 1, 1]]
-
- anchor_corners_out = np.concatenate(self.execute(graph_fn, []), axis=0)
- small_grid_corners = anchor_corners_out[48:, :]
- self.assertAllClose(small_grid_corners, exp_small_grid_corners)
-
- def test_invalid_box_specs(self):
- # not all box specs are pairs
- box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
- [(1.0, 1.0), (1.0, 0.5, .3)]]
- with self.assertRaises(ValueError):
- ag.MultipleGridAnchorGenerator(box_specs_list)
-
- # box_specs_list is not a list of lists
- box_specs_list = [(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)]
- with self.assertRaises(ValueError):
- ag.MultipleGridAnchorGenerator(box_specs_list)
-
- def test_invalid_generate_arguments(self):
- box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
- [(1.0, 1.0), (1.0, 0.5)]]
-
- # incompatible lengths with box_specs_list
- with self.assertRaises(ValueError):
- anchor_generator = ag.MultipleGridAnchorGenerator(
- box_specs_list,
- base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
- anchor_strides=[(.25, .25)],
- anchor_offsets=[(.125, .125), (.25, .25)])
- anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
- with self.assertRaises(ValueError):
- anchor_generator = ag.MultipleGridAnchorGenerator(
- box_specs_list,
- base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
- anchor_strides=[(.25, .25), (.5, .5)],
- anchor_offsets=[(.125, .125), (.25, .25)])
- anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2), (1, 1)])
- with self.assertRaises(ValueError):
- anchor_generator = ag.MultipleGridAnchorGenerator(
- box_specs_list,
- base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
- anchor_strides=[(.5, .5)],
- anchor_offsets=[(.25, .25)])
- anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
-
- # not pairs
- with self.assertRaises(ValueError):
- anchor_generator = ag.MultipleGridAnchorGenerator(
- box_specs_list,
- base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
- anchor_strides=[(.25, .25), (.5, .5)],
- anchor_offsets=[(.125, .125), (.25, .25)])
- anchor_generator.generate(feature_map_shape_list=[(4, 4, 4), (2, 2)])
- with self.assertRaises(ValueError):
- anchor_generator = ag.MultipleGridAnchorGenerator(
- box_specs_list,
- base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
- anchor_strides=[(.25, .25, .1), (.5, .5)],
- anchor_offsets=[(.125, .125), (.25, .25)])
- anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
- with self.assertRaises(ValueError):
- anchor_generator = ag.MultipleGridAnchorGenerator(
- box_specs_list,
- base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
- anchor_strides=[(.25, .25), (.5, .5)],
- anchor_offsets=[(.125, .125), (.25, .25)])
- anchor_generator.generate(feature_map_shape_list=[(4), (2, 2)])
-
-
-class CreateSSDAnchorsTest(test_case.TestCase):
-
- def test_create_ssd_anchors_returns_correct_shape(self):
-
- def graph_fn1():
- anchor_generator = ag.create_ssd_anchors(
- num_layers=6,
- min_scale=0.2,
- max_scale=0.95,
- aspect_ratios=(1.0, 2.0, 3.0, 1.0 / 2, 1.0 / 3),
- reduce_boxes_in_lowest_layer=True)
-
- feature_map_shape_list = [(38, 38), (19, 19), (10, 10),
- (5, 5), (3, 3), (1, 1)]
- anchors_list = anchor_generator.generate(
- feature_map_shape_list=feature_map_shape_list)
- return [anchors.get() for anchors in anchors_list]
- anchor_corners_out = np.concatenate(self.execute(graph_fn1, []), axis=0)
- self.assertEquals(anchor_corners_out.shape, (7308, 4))
-
- def graph_fn2():
- anchor_generator = ag.create_ssd_anchors(
- num_layers=6, min_scale=0.2, max_scale=0.95,
- aspect_ratios=(1.0, 2.0, 3.0, 1.0/2, 1.0/3),
- reduce_boxes_in_lowest_layer=False)
-
- feature_map_shape_list = [(38, 38), (19, 19), (10, 10),
- (5, 5), (3, 3), (1, 1)]
- anchors_list = anchor_generator.generate(
- feature_map_shape_list=feature_map_shape_list)
- return [anchors.get() for anchors in anchors_list]
- anchor_corners_out = np.concatenate(self.execute(graph_fn2, []), axis=0)
- self.assertEquals(anchor_corners_out.shape, (11640, 4))
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiscale_grid_anchor_generator.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiscale_grid_anchor_generator.py
deleted file mode 100644
index a8d227c77155eb45eb737c86c416d2e3d1fdda83..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiscale_grid_anchor_generator.py
+++ /dev/null
@@ -1,138 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Generates grid anchors on the fly corresponding to multiple CNN layers.
-
-Generates grid anchors on the fly corresponding to multiple CNN layers as
-described in:
-"Focal Loss for Dense Object Detection" (https://arxiv.org/abs/1708.02002)
-T.-Y. Lin, P. Goyal, R. Girshick, K. He, P. Dollar
-"""
-
-from object_detection.anchor_generators import grid_anchor_generator
-from object_detection.core import anchor_generator
-from object_detection.core import box_list_ops
-
-
-class MultiscaleGridAnchorGenerator(anchor_generator.AnchorGenerator):
- """Generate a grid of anchors for multiple CNN layers of different scale."""
-
- def __init__(self, min_level, max_level, anchor_scale, aspect_ratios,
- scales_per_octave, normalize_coordinates=True):
- """Constructs a MultiscaleGridAnchorGenerator.
-
- To construct anchors, at multiple scale resolutions, one must provide a
- the minimum level and maximum levels on a scale pyramid. To define the size
- of anchor, the anchor scale is provided to decide the size relatively to the
- stride of the corresponding feature map. The generator allows one pixel
- location on feature map maps to multiple anchors, that have different aspect
- ratios and intermediate scales.
-
- Args:
- min_level: minimum level in feature pyramid.
- max_level: maximum level in feature pyramid.
- anchor_scale: anchor scale and feature stride define the size of the base
- anchor on an image. For example, given a feature pyramid with strides
- [2^3, ..., 2^7] and anchor scale 4. The base anchor size is
- 4 * [2^3, ..., 2^7].
- aspect_ratios: list or tuple of (float) aspect ratios to place on each
- grid point.
- scales_per_octave: integer number of intermediate scales per scale octave.
- normalize_coordinates: whether to produce anchors in normalized
- coordinates. (defaults to True).
- """
- self._anchor_grid_info = []
- self._aspect_ratios = aspect_ratios
- self._scales_per_octave = scales_per_octave
- self._normalize_coordinates = normalize_coordinates
-
- for level in range(min_level, max_level + 1):
- anchor_stride = [2**level, 2**level]
- scales = []
- aspects = []
- for scale in range(scales_per_octave):
- scales.append(2**(float(scale) / scales_per_octave))
- for aspect_ratio in aspect_ratios:
- aspects.append(aspect_ratio)
- base_anchor_size = [2**level * anchor_scale, 2**level * anchor_scale]
- self._anchor_grid_info.append({
- 'level': level,
- 'info': [scales, aspects, base_anchor_size, anchor_stride]
- })
-
- def name_scope(self):
- return 'MultiscaleGridAnchorGenerator'
-
- def num_anchors_per_location(self):
- """Returns the number of anchors per spatial location.
-
- Returns:
- a list of integers, one for each expected feature map to be passed to
- the Generate function.
- """
- return len(self._anchor_grid_info) * [
- len(self._aspect_ratios) * self._scales_per_octave]
-
- def _generate(self, feature_map_shape_list, im_height, im_width):
- """Generates a collection of bounding boxes to be used as anchors.
-
- Currently we require the input image shape to be statically defined. That
- is, im_height and im_width should be integers rather than tensors.
-
- Args:
- feature_map_shape_list: list of pairs of convnet layer resolutions in the
- format [(height_0, width_0), (height_1, width_1), ...]. For example,
- setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that
- correspond to an 8x8 layer followed by a 7x7 layer.
- im_height: the height of the image to generate the grid for.
- im_width: the width of the image to generate the grid for.
-
- Returns:
- boxes_list: a list of BoxLists each holding anchor boxes corresponding to
- the input feature map shapes.
- Raises:
- ValueError: if im_height and im_width are not integers.
- """
- if not isinstance(im_height, int) or not isinstance(im_width, int):
- raise ValueError('MultiscaleGridAnchorGenerator currently requires '
- 'input image shape to be statically defined.')
- anchor_grid_list = []
- for feat_shape, grid_info in zip(feature_map_shape_list,
- self._anchor_grid_info):
- # TODO(rathodv) check the feature_map_shape_list is consistent with
- # self._anchor_grid_info
- level = grid_info['level']
- stride = 2**level
- scales, aspect_ratios, base_anchor_size, anchor_stride = grid_info['info']
- feat_h = feat_shape[0]
- feat_w = feat_shape[1]
- anchor_offset = [0, 0]
- if im_height % 2.0**level == 0:
- anchor_offset[0] = stride / 2.0
- if im_width % 2.0**level == 0:
- anchor_offset[1] = stride / 2.0
- ag = grid_anchor_generator.GridAnchorGenerator(
- scales,
- aspect_ratios,
- base_anchor_size=base_anchor_size,
- anchor_stride=anchor_stride,
- anchor_offset=anchor_offset)
- (anchor_grid,) = ag.generate(feature_map_shape_list=[(feat_h, feat_w)])
-
- if self._normalize_coordinates:
- anchor_grid = box_list_ops.to_normalized_coordinates(
- anchor_grid, im_height, im_width, check_range=False)
- anchor_grid_list.append(anchor_grid)
-
- return anchor_grid_list
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiscale_grid_anchor_generator_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiscale_grid_anchor_generator_test.py
deleted file mode 100644
index c96bdae7b9bcb59d295350ac31a5f8f56b720280..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiscale_grid_anchor_generator_test.py
+++ /dev/null
@@ -1,258 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for anchor_generators.multiscale_grid_anchor_generator_test.py."""
-import numpy as np
-import tensorflow as tf
-
-from object_detection.anchor_generators import multiscale_grid_anchor_generator as mg
-from object_detection.utils import test_case
-
-
-class MultiscaleGridAnchorGeneratorTest(test_case.TestCase):
-
- def test_construct_single_anchor(self):
- min_level = 5
- max_level = 5
- anchor_scale = 4.0
- aspect_ratios = [1.0]
- scales_per_octave = 1
- im_height = 64
- im_width = 64
- feature_map_shape_list = [(2, 2)]
- exp_anchor_corners = [[-48, -48, 80, 80],
- [-48, -16, 80, 112],
- [-16, -48, 112, 80],
- [-16, -16, 112, 112]]
- anchor_generator = mg.MultiscaleGridAnchorGenerator(
- min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
- normalize_coordinates=False)
- anchors_list = anchor_generator.generate(
- feature_map_shape_list, im_height=im_height, im_width=im_width)
- anchor_corners = anchors_list[0].get()
-
- with self.test_session():
- anchor_corners_out = anchor_corners.eval()
- self.assertAllClose(anchor_corners_out, exp_anchor_corners)
-
- def test_construct_single_anchor_in_normalized_coordinates(self):
- min_level = 5
- max_level = 5
- anchor_scale = 4.0
- aspect_ratios = [1.0]
- scales_per_octave = 1
- im_height = 64
- im_width = 128
- feature_map_shape_list = [(2, 2)]
- exp_anchor_corners = [[-48./64, -48./128, 80./64, 80./128],
- [-48./64, -16./128, 80./64, 112./128],
- [-16./64, -48./128, 112./64, 80./128],
- [-16./64, -16./128, 112./64, 112./128]]
- anchor_generator = mg.MultiscaleGridAnchorGenerator(
- min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
- normalize_coordinates=True)
- anchors_list = anchor_generator.generate(
- feature_map_shape_list, im_height=im_height, im_width=im_width)
- anchor_corners = anchors_list[0].get()
-
- with self.test_session():
- anchor_corners_out = anchor_corners.eval()
- self.assertAllClose(anchor_corners_out, exp_anchor_corners)
-
- def test_num_anchors_per_location(self):
- min_level = 5
- max_level = 6
- anchor_scale = 4.0
- aspect_ratios = [1.0, 2.0]
- scales_per_octave = 3
- anchor_generator = mg.MultiscaleGridAnchorGenerator(
- min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
- normalize_coordinates=False)
- self.assertEqual(anchor_generator.num_anchors_per_location(), [6, 6])
-
- def test_construct_single_anchor_fails_with_tensor_image_size(self):
- min_level = 5
- max_level = 5
- anchor_scale = 4.0
- aspect_ratios = [1.0]
- scales_per_octave = 1
- im_height = tf.constant(64)
- im_width = tf.constant(64)
- feature_map_shape_list = [(2, 2)]
- anchor_generator = mg.MultiscaleGridAnchorGenerator(
- min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
- normalize_coordinates=False)
- with self.assertRaises(ValueError):
- anchor_generator.generate(
- feature_map_shape_list, im_height=im_height, im_width=im_width)
-
- def test_construct_single_anchor_with_odd_input_dimension(self):
-
- def graph_fn():
- min_level = 5
- max_level = 5
- anchor_scale = 4.0
- aspect_ratios = [1.0]
- scales_per_octave = 1
- im_height = 65
- im_width = 65
- feature_map_shape_list = [(3, 3)]
- anchor_generator = mg.MultiscaleGridAnchorGenerator(
- min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
- normalize_coordinates=False)
- anchors_list = anchor_generator.generate(
- feature_map_shape_list, im_height=im_height, im_width=im_width)
- anchor_corners = anchors_list[0].get()
- return (anchor_corners,)
- anchor_corners_out = self.execute(graph_fn, [])
- exp_anchor_corners = [[-64, -64, 64, 64],
- [-64, -32, 64, 96],
- [-64, 0, 64, 128],
- [-32, -64, 96, 64],
- [-32, -32, 96, 96],
- [-32, 0, 96, 128],
- [0, -64, 128, 64],
- [0, -32, 128, 96],
- [0, 0, 128, 128]]
- self.assertAllClose(anchor_corners_out, exp_anchor_corners)
-
- def test_construct_single_anchor_on_two_feature_maps(self):
-
- def graph_fn():
- min_level = 5
- max_level = 6
- anchor_scale = 4.0
- aspect_ratios = [1.0]
- scales_per_octave = 1
- im_height = 64
- im_width = 64
- feature_map_shape_list = [(2, 2), (1, 1)]
- anchor_generator = mg.MultiscaleGridAnchorGenerator(
- min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
- normalize_coordinates=False)
- anchors_list = anchor_generator.generate(feature_map_shape_list,
- im_height=im_height,
- im_width=im_width)
- anchor_corners = [anchors.get() for anchors in anchors_list]
- return anchor_corners
-
- anchor_corners_out = np.concatenate(self.execute(graph_fn, []), axis=0)
- exp_anchor_corners = [[-48, -48, 80, 80],
- [-48, -16, 80, 112],
- [-16, -48, 112, 80],
- [-16, -16, 112, 112],
- [-96, -96, 160, 160]]
- self.assertAllClose(anchor_corners_out, exp_anchor_corners)
-
- def test_construct_single_anchor_with_two_scales_per_octave(self):
-
- def graph_fn():
- min_level = 6
- max_level = 6
- anchor_scale = 4.0
- aspect_ratios = [1.0]
- scales_per_octave = 2
- im_height = 64
- im_width = 64
- feature_map_shape_list = [(1, 1)]
-
- anchor_generator = mg.MultiscaleGridAnchorGenerator(
- min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
- normalize_coordinates=False)
- anchors_list = anchor_generator.generate(feature_map_shape_list,
- im_height=im_height,
- im_width=im_width)
- anchor_corners = [anchors.get() for anchors in anchors_list]
- return anchor_corners
- # There are 4 set of anchors in this configuration. The order is:
- # [[2**0.0 intermediate scale + 1.0 aspect],
- # [2**0.5 intermediate scale + 1.0 aspect]]
- exp_anchor_corners = [[-96., -96., 160., 160.],
- [-149.0193, -149.0193, 213.0193, 213.0193]]
-
- anchor_corners_out = self.execute(graph_fn, [])
- self.assertAllClose(anchor_corners_out, exp_anchor_corners)
-
- def test_construct_single_anchor_with_two_scales_per_octave_and_aspect(self):
- def graph_fn():
- min_level = 6
- max_level = 6
- anchor_scale = 4.0
- aspect_ratios = [1.0, 2.0]
- scales_per_octave = 2
- im_height = 64
- im_width = 64
- feature_map_shape_list = [(1, 1)]
- anchor_generator = mg.MultiscaleGridAnchorGenerator(
- min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
- normalize_coordinates=False)
- anchors_list = anchor_generator.generate(feature_map_shape_list,
- im_height=im_height,
- im_width=im_width)
- anchor_corners = [anchors.get() for anchors in anchors_list]
- return anchor_corners
- # There are 4 set of anchors in this configuration. The order is:
- # [[2**0.0 intermediate scale + 1.0 aspect],
- # [2**0.5 intermediate scale + 1.0 aspect],
- # [2**0.0 intermediate scale + 2.0 aspect],
- # [2**0.5 intermediate scale + 2.0 aspect]]
-
- exp_anchor_corners = [[-96., -96., 160., 160.],
- [-149.0193, -149.0193, 213.0193, 213.0193],
- [-58.50967, -149.0193, 122.50967, 213.0193],
- [-96., -224., 160., 288.]]
- anchor_corners_out = self.execute(graph_fn, [])
- self.assertAllClose(anchor_corners_out, exp_anchor_corners)
-
- def test_construct_single_anchors_on_feature_maps_with_dynamic_shape(self):
-
- def graph_fn(feature_map1_height, feature_map1_width, feature_map2_height,
- feature_map2_width):
- min_level = 5
- max_level = 6
- anchor_scale = 4.0
- aspect_ratios = [1.0]
- scales_per_octave = 1
- im_height = 64
- im_width = 64
- feature_map_shape_list = [(feature_map1_height, feature_map1_width),
- (feature_map2_height, feature_map2_width)]
- anchor_generator = mg.MultiscaleGridAnchorGenerator(
- min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
- normalize_coordinates=False)
- anchors_list = anchor_generator.generate(feature_map_shape_list,
- im_height=im_height,
- im_width=im_width)
- anchor_corners = [anchors.get() for anchors in anchors_list]
- return anchor_corners
-
- anchor_corners_out = np.concatenate(
- self.execute_cpu(graph_fn, [
- np.array(2, dtype=np.int32),
- np.array(2, dtype=np.int32),
- np.array(1, dtype=np.int32),
- np.array(1, dtype=np.int32)
- ]),
- axis=0)
- exp_anchor_corners = [[-48, -48, 80, 80],
- [-48, -16, 80, 112],
- [-16, -48, 112, 80],
- [-16, -16, 112, 112],
- [-96, -96, 160, 160]]
- self.assertAllClose(anchor_corners_out, exp_anchor_corners)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/__init__.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/faster_rcnn_box_coder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/faster_rcnn_box_coder.py
deleted file mode 100644
index af25e21a105ffa85931d3f30a1ca41c89c5dde53..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/faster_rcnn_box_coder.py
+++ /dev/null
@@ -1,118 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Faster RCNN box coder.
-
-Faster RCNN box coder follows the coding schema described below:
- ty = (y - ya) / ha
- tx = (x - xa) / wa
- th = log(h / ha)
- tw = log(w / wa)
- where x, y, w, h denote the box's center coordinates, width and height
- respectively. Similarly, xa, ya, wa, ha denote the anchor's center
- coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
- center, width and height respectively.
-
- See http://arxiv.org/abs/1506.01497 for details.
-"""
-
-import tensorflow as tf
-
-from object_detection.core import box_coder
-from object_detection.core import box_list
-
-EPSILON = 1e-8
-
-
-class FasterRcnnBoxCoder(box_coder.BoxCoder):
- """Faster RCNN box coder."""
-
- def __init__(self, scale_factors=None):
- """Constructor for FasterRcnnBoxCoder.
-
- Args:
- scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
- If set to None, does not perform scaling. For Faster RCNN,
- the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0].
- """
- if scale_factors:
- assert len(scale_factors) == 4
- for scalar in scale_factors:
- assert scalar > 0
- self._scale_factors = scale_factors
-
- @property
- def code_size(self):
- return 4
-
- def _encode(self, boxes, anchors):
- """Encode a box collection with respect to anchor collection.
-
- Args:
- boxes: BoxList holding N boxes to be encoded.
- anchors: BoxList of anchors.
-
- Returns:
- a tensor representing N anchor-encoded boxes of the format
- [ty, tx, th, tw].
- """
- # Convert anchors to the center coordinate representation.
- ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
- ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
- # Avoid NaN in division and log below.
- ha += EPSILON
- wa += EPSILON
- h += EPSILON
- w += EPSILON
-
- tx = (xcenter - xcenter_a) / wa
- ty = (ycenter - ycenter_a) / ha
- tw = tf.log(w / wa)
- th = tf.log(h / ha)
- # Scales location targets as used in paper for joint training.
- if self._scale_factors:
- ty *= self._scale_factors[0]
- tx *= self._scale_factors[1]
- th *= self._scale_factors[2]
- tw *= self._scale_factors[3]
- return tf.transpose(tf.stack([ty, tx, th, tw]))
-
- def _decode(self, rel_codes, anchors):
- """Decode relative codes to boxes.
-
- Args:
- rel_codes: a tensor representing N anchor-encoded boxes.
- anchors: BoxList of anchors.
-
- Returns:
- boxes: BoxList holding N bounding boxes.
- """
- ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
-
- ty, tx, th, tw = tf.unstack(tf.transpose(rel_codes))
- if self._scale_factors:
- ty /= self._scale_factors[0]
- tx /= self._scale_factors[1]
- th /= self._scale_factors[2]
- tw /= self._scale_factors[3]
- w = tf.exp(tw) * wa
- h = tf.exp(th) * ha
- ycenter = ty * ha + ycenter_a
- xcenter = tx * wa + xcenter_a
- ymin = ycenter - h / 2.
- xmin = xcenter - w / 2.
- ymax = ycenter + h / 2.
- xmax = xcenter + w / 2.
- return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/faster_rcnn_box_coder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/faster_rcnn_box_coder_test.py
deleted file mode 100644
index b2135f06eea093110c9da17c1c46b7d247f8e806..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/faster_rcnn_box_coder_test.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.box_coder.faster_rcnn_box_coder."""
-
-import tensorflow as tf
-
-from object_detection.box_coders import faster_rcnn_box_coder
-from object_detection.core import box_list
-
-
-class FasterRcnnBoxCoderTest(tf.test.TestCase):
-
- def test_get_correct_relative_codes_after_encoding(self):
- boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
- anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
- expected_rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321],
- [-0.083333, -0.222222, -0.693147, -1.098612]]
- boxes = box_list.BoxList(tf.constant(boxes))
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
- rel_codes = coder.encode(boxes, anchors)
- with self.test_session() as sess:
- rel_codes_out, = sess.run([rel_codes])
- self.assertAllClose(rel_codes_out, expected_rel_codes)
-
- def test_get_correct_relative_codes_after_encoding_with_scaling(self):
- boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
- anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
- scale_factors = [2, 3, 4, 5]
- expected_rel_codes = [[-1., -1.25, -1.62186, -0.911608],
- [-0.166667, -0.666667, -2.772588, -5.493062]]
- boxes = box_list.BoxList(tf.constant(boxes))
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
- scale_factors=scale_factors)
- rel_codes = coder.encode(boxes, anchors)
- with self.test_session() as sess:
- rel_codes_out, = sess.run([rel_codes])
- self.assertAllClose(rel_codes_out, expected_rel_codes)
-
- def test_get_correct_boxes_after_decoding(self):
- anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
- rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321],
- [-0.083333, -0.222222, -0.693147, -1.098612]]
- expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
- boxes = coder.decode(rel_codes, anchors)
- with self.test_session() as sess:
- boxes_out, = sess.run([boxes.get()])
- self.assertAllClose(boxes_out, expected_boxes)
-
- def test_get_correct_boxes_after_decoding_with_scaling(self):
- anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
- rel_codes = [[-1., -1.25, -1.62186, -0.911608],
- [-0.166667, -0.666667, -2.772588, -5.493062]]
- scale_factors = [2, 3, 4, 5]
- expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
- scale_factors=scale_factors)
- boxes = coder.decode(rel_codes, anchors)
- with self.test_session() as sess:
- boxes_out, = sess.run([boxes.get()])
- self.assertAllClose(boxes_out, expected_boxes)
-
- def test_very_small_Width_nan_after_encoding(self):
- boxes = [[10.0, 10.0, 10.0000001, 20.0]]
- anchors = [[15.0, 12.0, 30.0, 18.0]]
- expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826]]
- boxes = box_list.BoxList(tf.constant(boxes))
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
- rel_codes = coder.encode(boxes, anchors)
- with self.test_session() as sess:
- rel_codes_out, = sess.run([rel_codes])
- self.assertAllClose(rel_codes_out, expected_rel_codes)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/keypoint_box_coder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/keypoint_box_coder.py
deleted file mode 100644
index 67df3b82ebd83308578bc850ebba2e7c074a9679..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/keypoint_box_coder.py
+++ /dev/null
@@ -1,171 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Keypoint box coder.
-
-The keypoint box coder follows the coding schema described below (this is
-similar to the FasterRcnnBoxCoder, except that it encodes keypoints in addition
-to box coordinates):
- ty = (y - ya) / ha
- tx = (x - xa) / wa
- th = log(h / ha)
- tw = log(w / wa)
- tky0 = (ky0 - ya) / ha
- tkx0 = (kx0 - xa) / wa
- tky1 = (ky1 - ya) / ha
- tkx1 = (kx1 - xa) / wa
- ...
- where x, y, w, h denote the box's center coordinates, width and height
- respectively. Similarly, xa, ya, wa, ha denote the anchor's center
- coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
- center, width and height respectively. ky0, kx0, ky1, kx1, ... denote the
- keypoints' coordinates, and tky0, tkx0, tky1, tkx1, ... denote the
- anchor-encoded keypoint coordinates.
-"""
-
-import tensorflow as tf
-
-from object_detection.core import box_coder
-from object_detection.core import box_list
-from object_detection.core import standard_fields as fields
-
-EPSILON = 1e-8
-
-
-class KeypointBoxCoder(box_coder.BoxCoder):
- """Keypoint box coder."""
-
- def __init__(self, num_keypoints, scale_factors=None):
- """Constructor for KeypointBoxCoder.
-
- Args:
- num_keypoints: Number of keypoints to encode/decode.
- scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
- In addition to scaling ty and tx, the first 2 scalars are used to scale
- the y and x coordinates of the keypoints as well. If set to None, does
- not perform scaling.
- """
- self._num_keypoints = num_keypoints
-
- if scale_factors:
- assert len(scale_factors) == 4
- for scalar in scale_factors:
- assert scalar > 0
- self._scale_factors = scale_factors
- self._keypoint_scale_factors = None
- if scale_factors is not None:
- self._keypoint_scale_factors = tf.expand_dims(tf.tile(
- [tf.to_float(scale_factors[0]), tf.to_float(scale_factors[1])],
- [num_keypoints]), 1)
-
- @property
- def code_size(self):
- return 4 + self._num_keypoints * 2
-
- def _encode(self, boxes, anchors):
- """Encode a box and keypoint collection with respect to anchor collection.
-
- Args:
- boxes: BoxList holding N boxes and keypoints to be encoded. Boxes are
- tensors with the shape [N, 4], and keypoints are tensors with the shape
- [N, num_keypoints, 2].
- anchors: BoxList of anchors.
-
- Returns:
- a tensor representing N anchor-encoded boxes of the format
- [ty, tx, th, tw, tky0, tkx0, tky1, tkx1, ...] where tky0 and tkx0
- represent the y and x coordinates of the first keypoint, tky1 and tkx1
- represent the y and x coordinates of the second keypoint, and so on.
- """
- # Convert anchors to the center coordinate representation.
- ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
- ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
- keypoints = boxes.get_field(fields.BoxListFields.keypoints)
- keypoints = tf.transpose(tf.reshape(keypoints,
- [-1, self._num_keypoints * 2]))
- num_boxes = boxes.num_boxes()
-
- # Avoid NaN in division and log below.
- ha += EPSILON
- wa += EPSILON
- h += EPSILON
- w += EPSILON
-
- tx = (xcenter - xcenter_a) / wa
- ty = (ycenter - ycenter_a) / ha
- tw = tf.log(w / wa)
- th = tf.log(h / ha)
-
- tiled_anchor_centers = tf.tile(
- tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])
- tiled_anchor_sizes = tf.tile(
- tf.stack([ha, wa]), [self._num_keypoints, 1])
- tkeypoints = (keypoints - tiled_anchor_centers) / tiled_anchor_sizes
-
- # Scales location targets as used in paper for joint training.
- if self._scale_factors:
- ty *= self._scale_factors[0]
- tx *= self._scale_factors[1]
- th *= self._scale_factors[2]
- tw *= self._scale_factors[3]
- tkeypoints *= tf.tile(self._keypoint_scale_factors, [1, num_boxes])
-
- tboxes = tf.stack([ty, tx, th, tw])
- return tf.transpose(tf.concat([tboxes, tkeypoints], 0))
-
- def _decode(self, rel_codes, anchors):
- """Decode relative codes to boxes and keypoints.
-
- Args:
- rel_codes: a tensor with shape [N, 4 + 2 * num_keypoints] representing N
- anchor-encoded boxes and keypoints
- anchors: BoxList of anchors.
-
- Returns:
- boxes: BoxList holding N bounding boxes and keypoints.
- """
- ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
-
- num_codes = tf.shape(rel_codes)[0]
- result = tf.unstack(tf.transpose(rel_codes))
- ty, tx, th, tw = result[:4]
- tkeypoints = result[4:]
- if self._scale_factors:
- ty /= self._scale_factors[0]
- tx /= self._scale_factors[1]
- th /= self._scale_factors[2]
- tw /= self._scale_factors[3]
- tkeypoints /= tf.tile(self._keypoint_scale_factors, [1, num_codes])
-
- w = tf.exp(tw) * wa
- h = tf.exp(th) * ha
- ycenter = ty * ha + ycenter_a
- xcenter = tx * wa + xcenter_a
- ymin = ycenter - h / 2.
- xmin = xcenter - w / 2.
- ymax = ycenter + h / 2.
- xmax = xcenter + w / 2.
- decoded_boxes_keypoints = box_list.BoxList(
- tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
-
- tiled_anchor_centers = tf.tile(
- tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])
- tiled_anchor_sizes = tf.tile(
- tf.stack([ha, wa]), [self._num_keypoints, 1])
- keypoints = tkeypoints * tiled_anchor_sizes + tiled_anchor_centers
- keypoints = tf.reshape(tf.transpose(keypoints),
- [-1, self._num_keypoints, 2])
- decoded_boxes_keypoints.add_field(fields.BoxListFields.keypoints, keypoints)
- return decoded_boxes_keypoints
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/keypoint_box_coder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/keypoint_box_coder_test.py
deleted file mode 100644
index 330641e586af98af5f4764fb08f5307458777458..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/keypoint_box_coder_test.py
+++ /dev/null
@@ -1,140 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.box_coder.keypoint_box_coder."""
-
-import tensorflow as tf
-
-from object_detection.box_coders import keypoint_box_coder
-from object_detection.core import box_list
-from object_detection.core import standard_fields as fields
-
-
-class KeypointBoxCoderTest(tf.test.TestCase):
-
- def test_get_correct_relative_codes_after_encoding(self):
- boxes = [[10., 10., 20., 15.],
- [0.2, 0.1, 0.5, 0.4]]
- keypoints = [[[15., 12.], [10., 15.]],
- [[0.5, 0.3], [0.2, 0.4]]]
- num_keypoints = len(keypoints[0])
- anchors = [[15., 12., 30., 18.],
- [0.1, 0.0, 0.7, 0.9]]
- expected_rel_codes = [
- [-0.5, -0.416666, -0.405465, -0.182321,
- -0.5, -0.5, -0.833333, 0.],
- [-0.083333, -0.222222, -0.693147, -1.098612,
- 0.166667, -0.166667, -0.333333, -0.055556]
- ]
- boxes = box_list.BoxList(tf.constant(boxes))
- boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints)
- rel_codes = coder.encode(boxes, anchors)
- with self.test_session() as sess:
- rel_codes_out, = sess.run([rel_codes])
- self.assertAllClose(rel_codes_out, expected_rel_codes)
-
- def test_get_correct_relative_codes_after_encoding_with_scaling(self):
- boxes = [[10., 10., 20., 15.],
- [0.2, 0.1, 0.5, 0.4]]
- keypoints = [[[15., 12.], [10., 15.]],
- [[0.5, 0.3], [0.2, 0.4]]]
- num_keypoints = len(keypoints[0])
- anchors = [[15., 12., 30., 18.],
- [0.1, 0.0, 0.7, 0.9]]
- scale_factors = [2, 3, 4, 5]
- expected_rel_codes = [
- [-1., -1.25, -1.62186, -0.911608,
- -1.0, -1.5, -1.666667, 0.],
- [-0.166667, -0.666667, -2.772588, -5.493062,
- 0.333333, -0.5, -0.666667, -0.166667]
- ]
- boxes = box_list.BoxList(tf.constant(boxes))
- boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = keypoint_box_coder.KeypointBoxCoder(
- num_keypoints, scale_factors=scale_factors)
- rel_codes = coder.encode(boxes, anchors)
- with self.test_session() as sess:
- rel_codes_out, = sess.run([rel_codes])
- self.assertAllClose(rel_codes_out, expected_rel_codes)
-
- def test_get_correct_boxes_after_decoding(self):
- anchors = [[15., 12., 30., 18.],
- [0.1, 0.0, 0.7, 0.9]]
- rel_codes = [
- [-0.5, -0.416666, -0.405465, -0.182321,
- -0.5, -0.5, -0.833333, 0.],
- [-0.083333, -0.222222, -0.693147, -1.098612,
- 0.166667, -0.166667, -0.333333, -0.055556]
- ]
- expected_boxes = [[10., 10., 20., 15.],
- [0.2, 0.1, 0.5, 0.4]]
- expected_keypoints = [[[15., 12.], [10., 15.]],
- [[0.5, 0.3], [0.2, 0.4]]]
- num_keypoints = len(expected_keypoints[0])
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints)
- boxes = coder.decode(rel_codes, anchors)
- with self.test_session() as sess:
- boxes_out, keypoints_out = sess.run(
- [boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)])
- self.assertAllClose(boxes_out, expected_boxes)
- self.assertAllClose(keypoints_out, expected_keypoints)
-
- def test_get_correct_boxes_after_decoding_with_scaling(self):
- anchors = [[15., 12., 30., 18.],
- [0.1, 0.0, 0.7, 0.9]]
- rel_codes = [
- [-1., -1.25, -1.62186, -0.911608,
- -1.0, -1.5, -1.666667, 0.],
- [-0.166667, -0.666667, -2.772588, -5.493062,
- 0.333333, -0.5, -0.666667, -0.166667]
- ]
- scale_factors = [2, 3, 4, 5]
- expected_boxes = [[10., 10., 20., 15.],
- [0.2, 0.1, 0.5, 0.4]]
- expected_keypoints = [[[15., 12.], [10., 15.]],
- [[0.5, 0.3], [0.2, 0.4]]]
- num_keypoints = len(expected_keypoints[0])
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = keypoint_box_coder.KeypointBoxCoder(
- num_keypoints, scale_factors=scale_factors)
- boxes = coder.decode(rel_codes, anchors)
- with self.test_session() as sess:
- boxes_out, keypoints_out = sess.run(
- [boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)])
- self.assertAllClose(boxes_out, expected_boxes)
- self.assertAllClose(keypoints_out, expected_keypoints)
-
- def test_very_small_width_nan_after_encoding(self):
- boxes = [[10., 10., 10.0000001, 20.]]
- keypoints = [[[10., 10.], [10.0000001, 20.]]]
- anchors = [[15., 12., 30., 18.]]
- expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826,
- -0.833333, -0.833333, -0.833333, 0.833333]]
- boxes = box_list.BoxList(tf.constant(boxes))
- boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = keypoint_box_coder.KeypointBoxCoder(2)
- rel_codes = coder.encode(boxes, anchors)
- with self.test_session() as sess:
- rel_codes_out, = sess.run([rel_codes])
- self.assertAllClose(rel_codes_out, expected_rel_codes)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/mean_stddev_box_coder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/mean_stddev_box_coder.py
deleted file mode 100644
index 256f53fd036798cd7b3da8fcdd720c7e3c46e2e4..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/mean_stddev_box_coder.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Mean stddev box coder.
-
-This box coder use the following coding schema to encode boxes:
-rel_code = (box_corner - anchor_corner_mean) / anchor_corner_stddev.
-"""
-from object_detection.core import box_coder
-from object_detection.core import box_list
-
-
-class MeanStddevBoxCoder(box_coder.BoxCoder):
- """Mean stddev box coder."""
-
- def __init__(self, stddev=0.01):
- """Constructor for MeanStddevBoxCoder.
-
- Args:
- stddev: The standard deviation used to encode and decode boxes.
- """
- self._stddev = stddev
-
- @property
- def code_size(self):
- return 4
-
- def _encode(self, boxes, anchors):
- """Encode a box collection with respect to anchor collection.
-
- Args:
- boxes: BoxList holding N boxes to be encoded.
- anchors: BoxList of N anchors.
-
- Returns:
- a tensor representing N anchor-encoded boxes
-
- Raises:
- ValueError: if the anchors still have deprecated stddev field.
- """
- box_corners = boxes.get()
- if anchors.has_field('stddev'):
- raise ValueError("'stddev' is a parameter of MeanStddevBoxCoder and "
- "should not be specified in the box list.")
- means = anchors.get()
- return (box_corners - means) / self._stddev
-
- def _decode(self, rel_codes, anchors):
- """Decode.
-
- Args:
- rel_codes: a tensor representing N anchor-encoded boxes.
- anchors: BoxList of anchors.
-
- Returns:
- boxes: BoxList holding N bounding boxes
-
- Raises:
- ValueError: if the anchors still have deprecated stddev field and expects
- the decode method to use stddev value from that field.
- """
- means = anchors.get()
- if anchors.has_field('stddev'):
- raise ValueError("'stddev' is a parameter of MeanStddevBoxCoder and "
- "should not be specified in the box list.")
- box_corners = rel_codes * self._stddev + means
- return box_list.BoxList(box_corners)
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/mean_stddev_box_coder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/mean_stddev_box_coder_test.py
deleted file mode 100644
index 3e0eba936fe5a47e34501af73a926d8f83f9f163..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/mean_stddev_box_coder_test.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.box_coder.mean_stddev_boxcoder."""
-
-import tensorflow as tf
-
-from object_detection.box_coders import mean_stddev_box_coder
-from object_detection.core import box_list
-
-
-class MeanStddevBoxCoderTest(tf.test.TestCase):
-
- def testGetCorrectRelativeCodesAfterEncoding(self):
- box_corners = [[0.0, 0.0, 0.5, 0.5], [0.0, 0.0, 0.5, 0.5]]
- boxes = box_list.BoxList(tf.constant(box_corners))
- expected_rel_codes = [[0.0, 0.0, 0.0, 0.0], [-5.0, -5.0, -5.0, -3.0]]
- prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 0.8]])
- priors = box_list.BoxList(prior_means)
-
- coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
- rel_codes = coder.encode(boxes, priors)
- with self.test_session() as sess:
- rel_codes_out = sess.run(rel_codes)
- self.assertAllClose(rel_codes_out, expected_rel_codes)
-
- def testGetCorrectBoxesAfterDecoding(self):
- rel_codes = tf.constant([[0.0, 0.0, 0.0, 0.0], [-5.0, -5.0, -5.0, -3.0]])
- expected_box_corners = [[0.0, 0.0, 0.5, 0.5], [0.0, 0.0, 0.5, 0.5]]
- prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 0.8]])
- priors = box_list.BoxList(prior_means)
-
- coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
- decoded_boxes = coder.decode(rel_codes, priors)
- decoded_box_corners = decoded_boxes.get()
- with self.test_session() as sess:
- decoded_out = sess.run(decoded_box_corners)
- self.assertAllClose(decoded_out, expected_box_corners)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/square_box_coder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/square_box_coder.py
deleted file mode 100644
index ee46b689524838518182ff0f9208168e78c8b2cf..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/square_box_coder.py
+++ /dev/null
@@ -1,126 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Square box coder.
-
-Square box coder follows the coding schema described below:
-l = sqrt(h * w)
-la = sqrt(ha * wa)
-ty = (y - ya) / la
-tx = (x - xa) / la
-tl = log(l / la)
-where x, y, w, h denote the box's center coordinates, width, and height,
-respectively. Similarly, xa, ya, wa, ha denote the anchor's center
-coordinates, width and height. tx, ty, tl denote the anchor-encoded
-center, and length, respectively. Because the encoded box is a square, only
-one length is encoded.
-
-This has shown to provide performance improvements over the Faster RCNN box
-coder when the objects being detected tend to be square (e.g. faces) and when
-the input images are not distorted via resizing.
-"""
-
-import tensorflow as tf
-
-from object_detection.core import box_coder
-from object_detection.core import box_list
-
-EPSILON = 1e-8
-
-
-class SquareBoxCoder(box_coder.BoxCoder):
- """Encodes a 3-scalar representation of a square box."""
-
- def __init__(self, scale_factors=None):
- """Constructor for SquareBoxCoder.
-
- Args:
- scale_factors: List of 3 positive scalars to scale ty, tx, and tl.
- If set to None, does not perform scaling. For faster RCNN,
- the open-source implementation recommends using [10.0, 10.0, 5.0].
-
- Raises:
- ValueError: If scale_factors is not length 3 or contains values less than
- or equal to 0.
- """
- if scale_factors:
- if len(scale_factors) != 3:
- raise ValueError('The argument scale_factors must be a list of length '
- '3.')
- if any(scalar <= 0 for scalar in scale_factors):
- raise ValueError('The values in scale_factors must all be greater '
- 'than 0.')
- self._scale_factors = scale_factors
-
- @property
- def code_size(self):
- return 3
-
- def _encode(self, boxes, anchors):
- """Encodes a box collection with respect to an anchor collection.
-
- Args:
- boxes: BoxList holding N boxes to be encoded.
- anchors: BoxList of anchors.
-
- Returns:
- a tensor representing N anchor-encoded boxes of the format
- [ty, tx, tl].
- """
- # Convert anchors to the center coordinate representation.
- ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
- la = tf.sqrt(ha * wa)
- ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
- l = tf.sqrt(h * w)
- # Avoid NaN in division and log below.
- la += EPSILON
- l += EPSILON
-
- tx = (xcenter - xcenter_a) / la
- ty = (ycenter - ycenter_a) / la
- tl = tf.log(l / la)
- # Scales location targets for joint training.
- if self._scale_factors:
- ty *= self._scale_factors[0]
- tx *= self._scale_factors[1]
- tl *= self._scale_factors[2]
- return tf.transpose(tf.stack([ty, tx, tl]))
-
- def _decode(self, rel_codes, anchors):
- """Decodes relative codes to boxes.
-
- Args:
- rel_codes: a tensor representing N anchor-encoded boxes.
- anchors: BoxList of anchors.
-
- Returns:
- boxes: BoxList holding N bounding boxes.
- """
- ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
- la = tf.sqrt(ha * wa)
-
- ty, tx, tl = tf.unstack(tf.transpose(rel_codes))
- if self._scale_factors:
- ty /= self._scale_factors[0]
- tx /= self._scale_factors[1]
- tl /= self._scale_factors[2]
- l = tf.exp(tl) * la
- ycenter = ty * la + ycenter_a
- xcenter = tx * la + xcenter_a
- ymin = ycenter - l / 2.
- xmin = xcenter - l / 2.
- ymax = ycenter + l / 2.
- xmax = xcenter + l / 2.
- return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/square_box_coder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/square_box_coder_test.py
deleted file mode 100644
index 7f739c6b4f38de3d280cb91e9c8e04a661a621e4..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/square_box_coder_test.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.box_coder.square_box_coder."""
-
-import tensorflow as tf
-
-from object_detection.box_coders import square_box_coder
-from object_detection.core import box_list
-
-
-class SquareBoxCoderTest(tf.test.TestCase):
-
- def test_correct_relative_codes_with_default_scale(self):
- boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
- anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
- scale_factors = None
- expected_rel_codes = [[-0.790569, -0.263523, -0.293893],
- [-0.068041, -0.272166, -0.89588]]
-
- boxes = box_list.BoxList(tf.constant(boxes))
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
- rel_codes = coder.encode(boxes, anchors)
- with self.test_session() as sess:
- (rel_codes_out,) = sess.run([rel_codes])
- self.assertAllClose(rel_codes_out, expected_rel_codes)
-
- def test_correct_relative_codes_with_non_default_scale(self):
- boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
- anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
- scale_factors = [2, 3, 4]
- expected_rel_codes = [[-1.581139, -0.790569, -1.175573],
- [-0.136083, -0.816497, -3.583519]]
- boxes = box_list.BoxList(tf.constant(boxes))
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
- rel_codes = coder.encode(boxes, anchors)
- with self.test_session() as sess:
- (rel_codes_out,) = sess.run([rel_codes])
- self.assertAllClose(rel_codes_out, expected_rel_codes)
-
- def test_correct_relative_codes_with_small_width(self):
- boxes = [[10.0, 10.0, 10.0000001, 20.0]]
- anchors = [[15.0, 12.0, 30.0, 18.0]]
- scale_factors = None
- expected_rel_codes = [[-1.317616, 0., -20.670586]]
- boxes = box_list.BoxList(tf.constant(boxes))
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
- rel_codes = coder.encode(boxes, anchors)
- with self.test_session() as sess:
- (rel_codes_out,) = sess.run([rel_codes])
- self.assertAllClose(rel_codes_out, expected_rel_codes)
-
- def test_correct_boxes_with_default_scale(self):
- anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
- rel_codes = [[-0.5, -0.416666, -0.405465],
- [-0.083333, -0.222222, -0.693147]]
- scale_factors = None
- expected_boxes = [[14.594306, 7.884875, 20.918861, 14.209432],
- [0.155051, 0.102989, 0.522474, 0.470412]]
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
- boxes = coder.decode(rel_codes, anchors)
- with self.test_session() as sess:
- (boxes_out,) = sess.run([boxes.get()])
- self.assertAllClose(boxes_out, expected_boxes)
-
- def test_correct_boxes_with_non_default_scale(self):
- anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
- rel_codes = [[-1., -1.25, -1.62186], [-0.166667, -0.666667, -2.772588]]
- scale_factors = [2, 3, 4]
- expected_boxes = [[14.594306, 7.884875, 20.918861, 14.209432],
- [0.155051, 0.102989, 0.522474, 0.470412]]
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
- boxes = coder.decode(rel_codes, anchors)
- with self.test_session() as sess:
- (boxes_out,) = sess.run([boxes.get()])
- self.assertAllClose(boxes_out, expected_boxes)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/__init__.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/anchor_generator_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/anchor_generator_builder.py
deleted file mode 100644
index 54cec3a1df57f06466cde5e2bd9c6b706133c174..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/anchor_generator_builder.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""A function to build an object detection anchor generator from config."""
-
-from object_detection.anchor_generators import grid_anchor_generator
-from object_detection.anchor_generators import multiple_grid_anchor_generator
-from object_detection.anchor_generators import multiscale_grid_anchor_generator
-from object_detection.protos import anchor_generator_pb2
-
-
-def build(anchor_generator_config):
- """Builds an anchor generator based on the config.
-
- Args:
- anchor_generator_config: An anchor_generator.proto object containing the
- config for the desired anchor generator.
-
- Returns:
- Anchor generator based on the config.
-
- Raises:
- ValueError: On empty anchor generator proto.
- """
- if not isinstance(anchor_generator_config,
- anchor_generator_pb2.AnchorGenerator):
- raise ValueError('anchor_generator_config not of type '
- 'anchor_generator_pb2.AnchorGenerator')
- if anchor_generator_config.WhichOneof(
- 'anchor_generator_oneof') == 'grid_anchor_generator':
- grid_anchor_generator_config = anchor_generator_config.grid_anchor_generator
- return grid_anchor_generator.GridAnchorGenerator(
- scales=[float(scale) for scale in grid_anchor_generator_config.scales],
- aspect_ratios=[float(aspect_ratio)
- for aspect_ratio
- in grid_anchor_generator_config.aspect_ratios],
- base_anchor_size=[grid_anchor_generator_config.height,
- grid_anchor_generator_config.width],
- anchor_stride=[grid_anchor_generator_config.height_stride,
- grid_anchor_generator_config.width_stride],
- anchor_offset=[grid_anchor_generator_config.height_offset,
- grid_anchor_generator_config.width_offset])
- elif anchor_generator_config.WhichOneof(
- 'anchor_generator_oneof') == 'ssd_anchor_generator':
- ssd_anchor_generator_config = anchor_generator_config.ssd_anchor_generator
- anchor_strides = None
- if ssd_anchor_generator_config.height_stride:
- anchor_strides = zip(ssd_anchor_generator_config.height_stride,
- ssd_anchor_generator_config.width_stride)
- anchor_offsets = None
- if ssd_anchor_generator_config.height_offset:
- anchor_offsets = zip(ssd_anchor_generator_config.height_offset,
- ssd_anchor_generator_config.width_offset)
- return multiple_grid_anchor_generator.create_ssd_anchors(
- num_layers=ssd_anchor_generator_config.num_layers,
- min_scale=ssd_anchor_generator_config.min_scale,
- max_scale=ssd_anchor_generator_config.max_scale,
- scales=[float(scale) for scale in ssd_anchor_generator_config.scales],
- aspect_ratios=ssd_anchor_generator_config.aspect_ratios,
- interpolated_scale_aspect_ratio=(
- ssd_anchor_generator_config.interpolated_scale_aspect_ratio),
- base_anchor_size=[
- ssd_anchor_generator_config.base_anchor_height,
- ssd_anchor_generator_config.base_anchor_width
- ],
- anchor_strides=anchor_strides,
- anchor_offsets=anchor_offsets,
- reduce_boxes_in_lowest_layer=(
- ssd_anchor_generator_config.reduce_boxes_in_lowest_layer))
- elif anchor_generator_config.WhichOneof(
- 'anchor_generator_oneof') == 'multiscale_anchor_generator':
- cfg = anchor_generator_config.multiscale_anchor_generator
- return multiscale_grid_anchor_generator.MultiscaleGridAnchorGenerator(
- cfg.min_level,
- cfg.max_level,
- cfg.anchor_scale,
- [float(aspect_ratio) for aspect_ratio in cfg.aspect_ratios],
- cfg.scales_per_octave,
- cfg.normalize_coordinates
- )
- else:
- raise ValueError('Empty anchor generator.')
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/anchor_generator_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/anchor_generator_builder_test.py
deleted file mode 100644
index 2a23c2d96b411634263ef7bd20ed045c6305c790..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/anchor_generator_builder_test.py
+++ /dev/null
@@ -1,300 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for anchor_generator_builder."""
-
-import math
-
-import tensorflow as tf
-
-from google.protobuf import text_format
-from object_detection.anchor_generators import grid_anchor_generator
-from object_detection.anchor_generators import multiple_grid_anchor_generator
-from object_detection.anchor_generators import multiscale_grid_anchor_generator
-from object_detection.builders import anchor_generator_builder
-from object_detection.protos import anchor_generator_pb2
-
-
-class AnchorGeneratorBuilderTest(tf.test.TestCase):
-
- def assert_almost_list_equal(self, expected_list, actual_list, delta=None):
- self.assertEqual(len(expected_list), len(actual_list))
- for expected_item, actual_item in zip(expected_list, actual_list):
- self.assertAlmostEqual(expected_item, actual_item, delta=delta)
-
- def test_build_grid_anchor_generator_with_defaults(self):
- anchor_generator_text_proto = """
- grid_anchor_generator {
- }
- """
- anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
- text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
- anchor_generator_object = anchor_generator_builder.build(
- anchor_generator_proto)
- self.assertTrue(isinstance(anchor_generator_object,
- grid_anchor_generator.GridAnchorGenerator))
- self.assertListEqual(anchor_generator_object._scales, [])
- self.assertListEqual(anchor_generator_object._aspect_ratios, [])
- with self.test_session() as sess:
- base_anchor_size, anchor_offset, anchor_stride = sess.run(
- [anchor_generator_object._base_anchor_size,
- anchor_generator_object._anchor_offset,
- anchor_generator_object._anchor_stride])
- self.assertAllEqual(anchor_offset, [0, 0])
- self.assertAllEqual(anchor_stride, [16, 16])
- self.assertAllEqual(base_anchor_size, [256, 256])
-
- def test_build_grid_anchor_generator_with_non_default_parameters(self):
- anchor_generator_text_proto = """
- grid_anchor_generator {
- height: 128
- width: 512
- height_stride: 10
- width_stride: 20
- height_offset: 30
- width_offset: 40
- scales: [0.4, 2.2]
- aspect_ratios: [0.3, 4.5]
- }
- """
- anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
- text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
- anchor_generator_object = anchor_generator_builder.build(
- anchor_generator_proto)
- self.assertTrue(isinstance(anchor_generator_object,
- grid_anchor_generator.GridAnchorGenerator))
- self.assert_almost_list_equal(anchor_generator_object._scales,
- [0.4, 2.2])
- self.assert_almost_list_equal(anchor_generator_object._aspect_ratios,
- [0.3, 4.5])
- with self.test_session() as sess:
- base_anchor_size, anchor_offset, anchor_stride = sess.run(
- [anchor_generator_object._base_anchor_size,
- anchor_generator_object._anchor_offset,
- anchor_generator_object._anchor_stride])
- self.assertAllEqual(anchor_offset, [30, 40])
- self.assertAllEqual(anchor_stride, [10, 20])
- self.assertAllEqual(base_anchor_size, [128, 512])
-
- def test_build_ssd_anchor_generator_with_defaults(self):
- anchor_generator_text_proto = """
- ssd_anchor_generator {
- aspect_ratios: [1.0]
- }
- """
- anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
- text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
- anchor_generator_object = anchor_generator_builder.build(
- anchor_generator_proto)
- self.assertTrue(isinstance(anchor_generator_object,
- multiple_grid_anchor_generator.
- MultipleGridAnchorGenerator))
- for actual_scales, expected_scales in zip(
- list(anchor_generator_object._scales),
- [(0.1, 0.2, 0.2),
- (0.35, 0.418),
- (0.499, 0.570),
- (0.649, 0.721),
- (0.799, 0.871),
- (0.949, 0.974)]):
- self.assert_almost_list_equal(expected_scales, actual_scales, delta=1e-2)
- for actual_aspect_ratio, expected_aspect_ratio in zip(
- list(anchor_generator_object._aspect_ratios),
- [(1.0, 2.0, 0.5)] + 5 * [(1.0, 1.0)]):
- self.assert_almost_list_equal(expected_aspect_ratio, actual_aspect_ratio)
-
- with self.test_session() as sess:
- base_anchor_size = sess.run(anchor_generator_object._base_anchor_size)
- self.assertAllClose(base_anchor_size, [1.0, 1.0])
-
- def test_build_ssd_anchor_generator_with_custom_scales(self):
- anchor_generator_text_proto = """
- ssd_anchor_generator {
- aspect_ratios: [1.0]
- scales: [0.1, 0.15, 0.2, 0.4, 0.6, 0.8]
- reduce_boxes_in_lowest_layer: false
- }
- """
- anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
- text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
- anchor_generator_object = anchor_generator_builder.build(
- anchor_generator_proto)
- self.assertTrue(isinstance(anchor_generator_object,
- multiple_grid_anchor_generator.
- MultipleGridAnchorGenerator))
- for actual_scales, expected_scales in zip(
- list(anchor_generator_object._scales),
- [(0.1, math.sqrt(0.1 * 0.15)),
- (0.15, math.sqrt(0.15 * 0.2)),
- (0.2, math.sqrt(0.2 * 0.4)),
- (0.4, math.sqrt(0.4 * 0.6)),
- (0.6, math.sqrt(0.6 * 0.8)),
- (0.8, math.sqrt(0.8 * 1.0))]):
- self.assert_almost_list_equal(expected_scales, actual_scales, delta=1e-2)
-
- def test_build_ssd_anchor_generator_with_custom_interpolated_scale(self):
- anchor_generator_text_proto = """
- ssd_anchor_generator {
- aspect_ratios: [0.5]
- interpolated_scale_aspect_ratio: 0.5
- reduce_boxes_in_lowest_layer: false
- }
- """
- anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
- text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
- anchor_generator_object = anchor_generator_builder.build(
- anchor_generator_proto)
- self.assertTrue(isinstance(anchor_generator_object,
- multiple_grid_anchor_generator.
- MultipleGridAnchorGenerator))
- for actual_aspect_ratio, expected_aspect_ratio in zip(
- list(anchor_generator_object._aspect_ratios),
- 6 * [(0.5, 0.5)]):
- self.assert_almost_list_equal(expected_aspect_ratio, actual_aspect_ratio)
-
- def test_build_ssd_anchor_generator_without_reduced_boxes(self):
- anchor_generator_text_proto = """
- ssd_anchor_generator {
- aspect_ratios: [1.0]
- reduce_boxes_in_lowest_layer: false
- }
- """
- anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
- text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
- anchor_generator_object = anchor_generator_builder.build(
- anchor_generator_proto)
- self.assertTrue(isinstance(anchor_generator_object,
- multiple_grid_anchor_generator.
- MultipleGridAnchorGenerator))
-
- for actual_scales, expected_scales in zip(
- list(anchor_generator_object._scales),
- [(0.2, 0.264),
- (0.35, 0.418),
- (0.499, 0.570),
- (0.649, 0.721),
- (0.799, 0.871),
- (0.949, 0.974)]):
- self.assert_almost_list_equal(expected_scales, actual_scales, delta=1e-2)
-
- for actual_aspect_ratio, expected_aspect_ratio in zip(
- list(anchor_generator_object._aspect_ratios),
- 6 * [(1.0, 1.0)]):
- self.assert_almost_list_equal(expected_aspect_ratio, actual_aspect_ratio)
-
- with self.test_session() as sess:
- base_anchor_size = sess.run(anchor_generator_object._base_anchor_size)
- self.assertAllClose(base_anchor_size, [1.0, 1.0])
-
- def test_build_ssd_anchor_generator_with_non_default_parameters(self):
- anchor_generator_text_proto = """
- ssd_anchor_generator {
- num_layers: 2
- min_scale: 0.3
- max_scale: 0.8
- aspect_ratios: [2.0]
- height_stride: 16
- height_stride: 32
- width_stride: 20
- width_stride: 30
- height_offset: 8
- height_offset: 16
- width_offset: 0
- width_offset: 10
- }
- """
- anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
- text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
- anchor_generator_object = anchor_generator_builder.build(
- anchor_generator_proto)
- self.assertTrue(isinstance(anchor_generator_object,
- multiple_grid_anchor_generator.
- MultipleGridAnchorGenerator))
-
- for actual_scales, expected_scales in zip(
- list(anchor_generator_object._scales),
- [(0.1, 0.3, 0.3), (0.8, 0.894)]):
- self.assert_almost_list_equal(expected_scales, actual_scales, delta=1e-2)
-
- for actual_aspect_ratio, expected_aspect_ratio in zip(
- list(anchor_generator_object._aspect_ratios),
- [(1.0, 2.0, 0.5), (2.0, 1.0)]):
- self.assert_almost_list_equal(expected_aspect_ratio, actual_aspect_ratio)
-
- for actual_strides, expected_strides in zip(
- list(anchor_generator_object._anchor_strides), [(16, 20), (32, 30)]):
- self.assert_almost_list_equal(expected_strides, actual_strides)
-
- for actual_offsets, expected_offsets in zip(
- list(anchor_generator_object._anchor_offsets), [(8, 0), (16, 10)]):
- self.assert_almost_list_equal(expected_offsets, actual_offsets)
-
- with self.test_session() as sess:
- base_anchor_size = sess.run(anchor_generator_object._base_anchor_size)
- self.assertAllClose(base_anchor_size, [1.0, 1.0])
-
- def test_raise_value_error_on_empty_anchor_genertor(self):
- anchor_generator_text_proto = """
- """
- anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
- text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
- with self.assertRaises(ValueError):
- anchor_generator_builder.build(anchor_generator_proto)
-
- def test_build_multiscale_anchor_generator_custom_aspect_ratios(self):
- anchor_generator_text_proto = """
- multiscale_anchor_generator {
- aspect_ratios: [1.0]
- }
- """
- anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
- text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
- anchor_generator_object = anchor_generator_builder.build(
- anchor_generator_proto)
- self.assertTrue(isinstance(anchor_generator_object,
- multiscale_grid_anchor_generator.
- MultiscaleGridAnchorGenerator))
- for level, anchor_grid_info in zip(
- range(3, 8), anchor_generator_object._anchor_grid_info):
- self.assertEqual(set(anchor_grid_info.keys()), set(['level', 'info']))
- self.assertTrue(level, anchor_grid_info['level'])
- self.assertEqual(len(anchor_grid_info['info']), 4)
- self.assertAllClose(anchor_grid_info['info'][0], [2**0, 2**0.5])
- self.assertTrue(anchor_grid_info['info'][1], 1.0)
- self.assertAllClose(anchor_grid_info['info'][2],
- [4.0 * 2**level, 4.0 * 2**level])
- self.assertAllClose(anchor_grid_info['info'][3], [2**level, 2**level])
- self.assertTrue(anchor_generator_object._normalize_coordinates)
-
- def test_build_multiscale_anchor_generator_with_anchors_in_pixel_coordinates(
- self):
- anchor_generator_text_proto = """
- multiscale_anchor_generator {
- aspect_ratios: [1.0]
- normalize_coordinates: false
- }
- """
- anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
- text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
- anchor_generator_object = anchor_generator_builder.build(
- anchor_generator_proto)
- self.assertTrue(isinstance(anchor_generator_object,
- multiscale_grid_anchor_generator.
- MultiscaleGridAnchorGenerator))
- self.assertFalse(anchor_generator_object._normalize_coordinates)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/box_coder_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/box_coder_builder.py
deleted file mode 100644
index cc13d5a2f01c5a1f66e83abc5bb5ada542047d83..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/box_coder_builder.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""A function to build an object detection box coder from configuration."""
-from object_detection.box_coders import faster_rcnn_box_coder
-from object_detection.box_coders import keypoint_box_coder
-from object_detection.box_coders import mean_stddev_box_coder
-from object_detection.box_coders import square_box_coder
-from object_detection.protos import box_coder_pb2
-
-
-def build(box_coder_config):
- """Builds a box coder object based on the box coder config.
-
- Args:
- box_coder_config: A box_coder.proto object containing the config for the
- desired box coder.
-
- Returns:
- BoxCoder based on the config.
-
- Raises:
- ValueError: On empty box coder proto.
- """
- if not isinstance(box_coder_config, box_coder_pb2.BoxCoder):
- raise ValueError('box_coder_config not of type box_coder_pb2.BoxCoder.')
-
- if box_coder_config.WhichOneof('box_coder_oneof') == 'faster_rcnn_box_coder':
- return faster_rcnn_box_coder.FasterRcnnBoxCoder(scale_factors=[
- box_coder_config.faster_rcnn_box_coder.y_scale,
- box_coder_config.faster_rcnn_box_coder.x_scale,
- box_coder_config.faster_rcnn_box_coder.height_scale,
- box_coder_config.faster_rcnn_box_coder.width_scale
- ])
- if box_coder_config.WhichOneof('box_coder_oneof') == 'keypoint_box_coder':
- return keypoint_box_coder.KeypointBoxCoder(
- box_coder_config.keypoint_box_coder.num_keypoints,
- scale_factors=[
- box_coder_config.keypoint_box_coder.y_scale,
- box_coder_config.keypoint_box_coder.x_scale,
- box_coder_config.keypoint_box_coder.height_scale,
- box_coder_config.keypoint_box_coder.width_scale
- ])
- if (box_coder_config.WhichOneof('box_coder_oneof') ==
- 'mean_stddev_box_coder'):
- return mean_stddev_box_coder.MeanStddevBoxCoder(
- stddev=box_coder_config.mean_stddev_box_coder.stddev)
- if box_coder_config.WhichOneof('box_coder_oneof') == 'square_box_coder':
- return square_box_coder.SquareBoxCoder(scale_factors=[
- box_coder_config.square_box_coder.y_scale,
- box_coder_config.square_box_coder.x_scale,
- box_coder_config.square_box_coder.length_scale
- ])
- raise ValueError('Empty box coder.')
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/box_coder_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/box_coder_builder_test.py
deleted file mode 100644
index 286012e9de7661a5663e0ba2873818337f106985..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/box_coder_builder_test.py
+++ /dev/null
@@ -1,136 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for box_coder_builder."""
-
-import tensorflow as tf
-
-from google.protobuf import text_format
-from object_detection.box_coders import faster_rcnn_box_coder
-from object_detection.box_coders import keypoint_box_coder
-from object_detection.box_coders import mean_stddev_box_coder
-from object_detection.box_coders import square_box_coder
-from object_detection.builders import box_coder_builder
-from object_detection.protos import box_coder_pb2
-
-
-class BoxCoderBuilderTest(tf.test.TestCase):
-
- def test_build_faster_rcnn_box_coder_with_defaults(self):
- box_coder_text_proto = """
- faster_rcnn_box_coder {
- }
- """
- box_coder_proto = box_coder_pb2.BoxCoder()
- text_format.Merge(box_coder_text_proto, box_coder_proto)
- box_coder_object = box_coder_builder.build(box_coder_proto)
- self.assertIsInstance(box_coder_object,
- faster_rcnn_box_coder.FasterRcnnBoxCoder)
- self.assertEqual(box_coder_object._scale_factors, [10.0, 10.0, 5.0, 5.0])
-
- def test_build_faster_rcnn_box_coder_with_non_default_parameters(self):
- box_coder_text_proto = """
- faster_rcnn_box_coder {
- y_scale: 6.0
- x_scale: 3.0
- height_scale: 7.0
- width_scale: 8.0
- }
- """
- box_coder_proto = box_coder_pb2.BoxCoder()
- text_format.Merge(box_coder_text_proto, box_coder_proto)
- box_coder_object = box_coder_builder.build(box_coder_proto)
- self.assertIsInstance(box_coder_object,
- faster_rcnn_box_coder.FasterRcnnBoxCoder)
- self.assertEqual(box_coder_object._scale_factors, [6.0, 3.0, 7.0, 8.0])
-
- def test_build_keypoint_box_coder_with_defaults(self):
- box_coder_text_proto = """
- keypoint_box_coder {
- }
- """
- box_coder_proto = box_coder_pb2.BoxCoder()
- text_format.Merge(box_coder_text_proto, box_coder_proto)
- box_coder_object = box_coder_builder.build(box_coder_proto)
- self.assertIsInstance(box_coder_object, keypoint_box_coder.KeypointBoxCoder)
- self.assertEqual(box_coder_object._scale_factors, [10.0, 10.0, 5.0, 5.0])
-
- def test_build_keypoint_box_coder_with_non_default_parameters(self):
- box_coder_text_proto = """
- keypoint_box_coder {
- num_keypoints: 6
- y_scale: 6.0
- x_scale: 3.0
- height_scale: 7.0
- width_scale: 8.0
- }
- """
- box_coder_proto = box_coder_pb2.BoxCoder()
- text_format.Merge(box_coder_text_proto, box_coder_proto)
- box_coder_object = box_coder_builder.build(box_coder_proto)
- self.assertIsInstance(box_coder_object, keypoint_box_coder.KeypointBoxCoder)
- self.assertEqual(box_coder_object._num_keypoints, 6)
- self.assertEqual(box_coder_object._scale_factors, [6.0, 3.0, 7.0, 8.0])
-
- def test_build_mean_stddev_box_coder(self):
- box_coder_text_proto = """
- mean_stddev_box_coder {
- }
- """
- box_coder_proto = box_coder_pb2.BoxCoder()
- text_format.Merge(box_coder_text_proto, box_coder_proto)
- box_coder_object = box_coder_builder.build(box_coder_proto)
- self.assertTrue(
- isinstance(box_coder_object,
- mean_stddev_box_coder.MeanStddevBoxCoder))
-
- def test_build_square_box_coder_with_defaults(self):
- box_coder_text_proto = """
- square_box_coder {
- }
- """
- box_coder_proto = box_coder_pb2.BoxCoder()
- text_format.Merge(box_coder_text_proto, box_coder_proto)
- box_coder_object = box_coder_builder.build(box_coder_proto)
- self.assertTrue(
- isinstance(box_coder_object, square_box_coder.SquareBoxCoder))
- self.assertEqual(box_coder_object._scale_factors, [10.0, 10.0, 5.0])
-
- def test_build_square_box_coder_with_non_default_parameters(self):
- box_coder_text_proto = """
- square_box_coder {
- y_scale: 6.0
- x_scale: 3.0
- length_scale: 7.0
- }
- """
- box_coder_proto = box_coder_pb2.BoxCoder()
- text_format.Merge(box_coder_text_proto, box_coder_proto)
- box_coder_object = box_coder_builder.build(box_coder_proto)
- self.assertTrue(
- isinstance(box_coder_object, square_box_coder.SquareBoxCoder))
- self.assertEqual(box_coder_object._scale_factors, [6.0, 3.0, 7.0])
-
- def test_raise_error_on_empty_box_coder(self):
- box_coder_text_proto = """
- """
- box_coder_proto = box_coder_pb2.BoxCoder()
- text_format.Merge(box_coder_text_proto, box_coder_proto)
- with self.assertRaises(ValueError):
- box_coder_builder.build(box_coder_proto)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/box_predictor_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/box_predictor_builder.py
deleted file mode 100644
index 2f311221ce4729599501c93d2192764d5ed8207b..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/box_predictor_builder.py
+++ /dev/null
@@ -1,138 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Function to build box predictor from configuration."""
-
-from object_detection.core import box_predictor
-from object_detection.protos import box_predictor_pb2
-
-
-def build(argscope_fn, box_predictor_config, is_training, num_classes):
- """Builds box predictor based on the configuration.
-
- Builds box predictor based on the configuration. See box_predictor.proto for
- configurable options. Also, see box_predictor.py for more details.
-
- Args:
- argscope_fn: A function that takes the following inputs:
- * hyperparams_pb2.Hyperparams proto
- * a boolean indicating if the model is in training mode.
- and returns a tf slim argscope for Conv and FC hyperparameters.
- box_predictor_config: box_predictor_pb2.BoxPredictor proto containing
- configuration.
- is_training: Whether the models is in training mode.
- num_classes: Number of classes to predict.
-
- Returns:
- box_predictor: box_predictor.BoxPredictor object.
-
- Raises:
- ValueError: On unknown box predictor.
- """
- if not isinstance(box_predictor_config, box_predictor_pb2.BoxPredictor):
- raise ValueError('box_predictor_config not of type '
- 'box_predictor_pb2.BoxPredictor.')
-
- box_predictor_oneof = box_predictor_config.WhichOneof('box_predictor_oneof')
-
- if box_predictor_oneof == 'convolutional_box_predictor':
- conv_box_predictor = box_predictor_config.convolutional_box_predictor
- conv_hyperparams_fn = argscope_fn(conv_box_predictor.conv_hyperparams,
- is_training)
- box_predictor_object = box_predictor.ConvolutionalBoxPredictor(
- is_training=is_training,
- num_classes=num_classes,
- conv_hyperparams_fn=conv_hyperparams_fn,
- min_depth=conv_box_predictor.min_depth,
- max_depth=conv_box_predictor.max_depth,
- num_layers_before_predictor=(conv_box_predictor.
- num_layers_before_predictor),
- use_dropout=conv_box_predictor.use_dropout,
- dropout_keep_prob=conv_box_predictor.dropout_keep_probability,
- kernel_size=conv_box_predictor.kernel_size,
- box_code_size=conv_box_predictor.box_code_size,
- apply_sigmoid_to_scores=conv_box_predictor.apply_sigmoid_to_scores,
- class_prediction_bias_init=(conv_box_predictor.
- class_prediction_bias_init),
- use_depthwise=conv_box_predictor.use_depthwise
- )
- return box_predictor_object
-
- if box_predictor_oneof == 'weight_shared_convolutional_box_predictor':
- conv_box_predictor = (box_predictor_config.
- weight_shared_convolutional_box_predictor)
- conv_hyperparams_fn = argscope_fn(conv_box_predictor.conv_hyperparams,
- is_training)
- box_predictor_object = box_predictor.WeightSharedConvolutionalBoxPredictor(
- is_training=is_training,
- num_classes=num_classes,
- conv_hyperparams_fn=conv_hyperparams_fn,
- depth=conv_box_predictor.depth,
- num_layers_before_predictor=(
- conv_box_predictor.num_layers_before_predictor),
- kernel_size=conv_box_predictor.kernel_size,
- box_code_size=conv_box_predictor.box_code_size,
- class_prediction_bias_init=conv_box_predictor.
- class_prediction_bias_init,
- use_dropout=conv_box_predictor.use_dropout,
- dropout_keep_prob=conv_box_predictor.dropout_keep_probability)
- return box_predictor_object
-
- if box_predictor_oneof == 'mask_rcnn_box_predictor':
- mask_rcnn_box_predictor = box_predictor_config.mask_rcnn_box_predictor
- fc_hyperparams_fn = argscope_fn(mask_rcnn_box_predictor.fc_hyperparams,
- is_training)
- conv_hyperparams_fn = None
- if mask_rcnn_box_predictor.HasField('conv_hyperparams'):
- conv_hyperparams_fn = argscope_fn(
- mask_rcnn_box_predictor.conv_hyperparams, is_training)
- box_predictor_object = box_predictor.MaskRCNNBoxPredictor(
- is_training=is_training,
- num_classes=num_classes,
- fc_hyperparams_fn=fc_hyperparams_fn,
- use_dropout=mask_rcnn_box_predictor.use_dropout,
- dropout_keep_prob=mask_rcnn_box_predictor.dropout_keep_probability,
- box_code_size=mask_rcnn_box_predictor.box_code_size,
- conv_hyperparams_fn=conv_hyperparams_fn,
- predict_instance_masks=mask_rcnn_box_predictor.predict_instance_masks,
- mask_height=mask_rcnn_box_predictor.mask_height,
- mask_width=mask_rcnn_box_predictor.mask_width,
- mask_prediction_num_conv_layers=(
- mask_rcnn_box_predictor.mask_prediction_num_conv_layers),
- mask_prediction_conv_depth=(
- mask_rcnn_box_predictor.mask_prediction_conv_depth),
- masks_are_class_agnostic=(
- mask_rcnn_box_predictor.masks_are_class_agnostic),
- predict_keypoints=mask_rcnn_box_predictor.predict_keypoints,
- share_box_across_classes=(
- mask_rcnn_box_predictor.share_box_across_classes))
- return box_predictor_object
-
- if box_predictor_oneof == 'rfcn_box_predictor':
- rfcn_box_predictor = box_predictor_config.rfcn_box_predictor
- conv_hyperparams_fn = argscope_fn(rfcn_box_predictor.conv_hyperparams,
- is_training)
- box_predictor_object = box_predictor.RfcnBoxPredictor(
- is_training=is_training,
- num_classes=num_classes,
- conv_hyperparams_fn=conv_hyperparams_fn,
- crop_size=[rfcn_box_predictor.crop_height,
- rfcn_box_predictor.crop_width],
- num_spatial_bins=[rfcn_box_predictor.num_spatial_bins_height,
- rfcn_box_predictor.num_spatial_bins_width],
- depth=rfcn_box_predictor.depth,
- box_code_size=rfcn_box_predictor.box_code_size)
- return box_predictor_object
- raise ValueError('Unknown box predictor: {}'.format(box_predictor_oneof))
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/box_predictor_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/box_predictor_builder_test.py
deleted file mode 100644
index 35ad57be9975bedb93a953d3189062ef0d8d5568..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/box_predictor_builder_test.py
+++ /dev/null
@@ -1,514 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for box_predictor_builder."""
-import mock
-import tensorflow as tf
-
-from google.protobuf import text_format
-from object_detection.builders import box_predictor_builder
-from object_detection.builders import hyperparams_builder
-from object_detection.protos import box_predictor_pb2
-from object_detection.protos import hyperparams_pb2
-
-
-class ConvolutionalBoxPredictorBuilderTest(tf.test.TestCase):
-
- def test_box_predictor_calls_conv_argscope_fn(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l1_regularizer {
- weight: 0.0003
- }
- }
- initializer {
- truncated_normal_initializer {
- mean: 0.0
- stddev: 0.3
- }
- }
- activation: RELU_6
- """
- hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto)
- def mock_conv_argscope_builder(conv_hyperparams_arg, is_training):
- return (conv_hyperparams_arg, is_training)
-
- box_predictor_proto = box_predictor_pb2.BoxPredictor()
- box_predictor_proto.convolutional_box_predictor.conv_hyperparams.CopyFrom(
- hyperparams_proto)
- box_predictor = box_predictor_builder.build(
- argscope_fn=mock_conv_argscope_builder,
- box_predictor_config=box_predictor_proto,
- is_training=False,
- num_classes=10)
- (conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams_fn
- self.assertAlmostEqual((hyperparams_proto.regularizer.
- l1_regularizer.weight),
- (conv_hyperparams_actual.regularizer.l1_regularizer.
- weight))
- self.assertAlmostEqual((hyperparams_proto.initializer.
- truncated_normal_initializer.stddev),
- (conv_hyperparams_actual.initializer.
- truncated_normal_initializer.stddev))
- self.assertAlmostEqual((hyperparams_proto.initializer.
- truncated_normal_initializer.mean),
- (conv_hyperparams_actual.initializer.
- truncated_normal_initializer.mean))
- self.assertEqual(hyperparams_proto.activation,
- conv_hyperparams_actual.activation)
- self.assertFalse(is_training)
-
- def test_construct_non_default_conv_box_predictor(self):
- box_predictor_text_proto = """
- convolutional_box_predictor {
- min_depth: 2
- max_depth: 16
- num_layers_before_predictor: 2
- use_dropout: false
- dropout_keep_probability: 0.4
- kernel_size: 3
- box_code_size: 3
- apply_sigmoid_to_scores: true
- class_prediction_bias_init: 4.0
- use_depthwise: true
- }
- """
- conv_hyperparams_text_proto = """
- regularizer {
- l1_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- """
- hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto)
- def mock_conv_argscope_builder(conv_hyperparams_arg, is_training):
- return (conv_hyperparams_arg, is_training)
-
- box_predictor_proto = box_predictor_pb2.BoxPredictor()
- text_format.Merge(box_predictor_text_proto, box_predictor_proto)
- box_predictor_proto.convolutional_box_predictor.conv_hyperparams.CopyFrom(
- hyperparams_proto)
- box_predictor = box_predictor_builder.build(
- argscope_fn=mock_conv_argscope_builder,
- box_predictor_config=box_predictor_proto,
- is_training=False,
- num_classes=10)
- self.assertEqual(box_predictor._min_depth, 2)
- self.assertEqual(box_predictor._max_depth, 16)
- self.assertEqual(box_predictor._num_layers_before_predictor, 2)
- self.assertFalse(box_predictor._use_dropout)
- self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.4)
- self.assertTrue(box_predictor._apply_sigmoid_to_scores)
- self.assertAlmostEqual(box_predictor._class_prediction_bias_init, 4.0)
- self.assertEqual(box_predictor.num_classes, 10)
- self.assertFalse(box_predictor._is_training)
- self.assertTrue(box_predictor._use_depthwise)
-
- def test_construct_default_conv_box_predictor(self):
- box_predictor_text_proto = """
- convolutional_box_predictor {
- conv_hyperparams {
- regularizer {
- l1_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }"""
- box_predictor_proto = box_predictor_pb2.BoxPredictor()
- text_format.Merge(box_predictor_text_proto, box_predictor_proto)
- box_predictor = box_predictor_builder.build(
- argscope_fn=hyperparams_builder.build,
- box_predictor_config=box_predictor_proto,
- is_training=True,
- num_classes=90)
- self.assertEqual(box_predictor._min_depth, 0)
- self.assertEqual(box_predictor._max_depth, 0)
- self.assertEqual(box_predictor._num_layers_before_predictor, 0)
- self.assertTrue(box_predictor._use_dropout)
- self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.8)
- self.assertFalse(box_predictor._apply_sigmoid_to_scores)
- self.assertEqual(box_predictor.num_classes, 90)
- self.assertTrue(box_predictor._is_training)
- self.assertFalse(box_predictor._use_depthwise)
-
-
-class WeightSharedConvolutionalBoxPredictorBuilderTest(tf.test.TestCase):
-
- def test_box_predictor_calls_conv_argscope_fn(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l1_regularizer {
- weight: 0.0003
- }
- }
- initializer {
- truncated_normal_initializer {
- mean: 0.0
- stddev: 0.3
- }
- }
- activation: RELU_6
- """
- hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto)
- def mock_conv_argscope_builder(conv_hyperparams_arg, is_training):
- return (conv_hyperparams_arg, is_training)
-
- box_predictor_proto = box_predictor_pb2.BoxPredictor()
- (box_predictor_proto.weight_shared_convolutional_box_predictor
- .conv_hyperparams.CopyFrom(hyperparams_proto))
- box_predictor = box_predictor_builder.build(
- argscope_fn=mock_conv_argscope_builder,
- box_predictor_config=box_predictor_proto,
- is_training=False,
- num_classes=10)
- (conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams_fn
- self.assertAlmostEqual((hyperparams_proto.regularizer.
- l1_regularizer.weight),
- (conv_hyperparams_actual.regularizer.l1_regularizer.
- weight))
- self.assertAlmostEqual((hyperparams_proto.initializer.
- truncated_normal_initializer.stddev),
- (conv_hyperparams_actual.initializer.
- truncated_normal_initializer.stddev))
- self.assertAlmostEqual((hyperparams_proto.initializer.
- truncated_normal_initializer.mean),
- (conv_hyperparams_actual.initializer.
- truncated_normal_initializer.mean))
- self.assertEqual(hyperparams_proto.activation,
- conv_hyperparams_actual.activation)
- self.assertFalse(is_training)
-
- def test_construct_non_default_conv_box_predictor(self):
- box_predictor_text_proto = """
- weight_shared_convolutional_box_predictor {
- depth: 2
- num_layers_before_predictor: 2
- kernel_size: 7
- box_code_size: 3
- class_prediction_bias_init: 4.0
- }
- """
- conv_hyperparams_text_proto = """
- regularizer {
- l1_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- """
- hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto)
- def mock_conv_argscope_builder(conv_hyperparams_arg, is_training):
- return (conv_hyperparams_arg, is_training)
-
- box_predictor_proto = box_predictor_pb2.BoxPredictor()
- text_format.Merge(box_predictor_text_proto, box_predictor_proto)
- (box_predictor_proto.weight_shared_convolutional_box_predictor.
- conv_hyperparams.CopyFrom(hyperparams_proto))
- box_predictor = box_predictor_builder.build(
- argscope_fn=mock_conv_argscope_builder,
- box_predictor_config=box_predictor_proto,
- is_training=False,
- num_classes=10)
- self.assertEqual(box_predictor._depth, 2)
- self.assertEqual(box_predictor._num_layers_before_predictor, 2)
- self.assertAlmostEqual(box_predictor._class_prediction_bias_init, 4.0)
- self.assertEqual(box_predictor.num_classes, 10)
- self.assertFalse(box_predictor._is_training)
-
- def test_construct_default_conv_box_predictor(self):
- box_predictor_text_proto = """
- weight_shared_convolutional_box_predictor {
- conv_hyperparams {
- regularizer {
- l1_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }"""
- box_predictor_proto = box_predictor_pb2.BoxPredictor()
- text_format.Merge(box_predictor_text_proto, box_predictor_proto)
- box_predictor = box_predictor_builder.build(
- argscope_fn=hyperparams_builder.build,
- box_predictor_config=box_predictor_proto,
- is_training=True,
- num_classes=90)
- self.assertEqual(box_predictor._depth, 0)
- self.assertEqual(box_predictor._num_layers_before_predictor, 0)
- self.assertEqual(box_predictor.num_classes, 90)
- self.assertTrue(box_predictor._is_training)
-
-
-class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase):
-
- def test_box_predictor_builder_calls_fc_argscope_fn(self):
- fc_hyperparams_text_proto = """
- regularizer {
- l1_regularizer {
- weight: 0.0003
- }
- }
- initializer {
- truncated_normal_initializer {
- mean: 0.0
- stddev: 0.3
- }
- }
- activation: RELU_6
- op: FC
- """
- hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(fc_hyperparams_text_proto, hyperparams_proto)
- box_predictor_proto = box_predictor_pb2.BoxPredictor()
- box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.CopyFrom(
- hyperparams_proto)
- mock_argscope_fn = mock.Mock(return_value='arg_scope')
- box_predictor = box_predictor_builder.build(
- argscope_fn=mock_argscope_fn,
- box_predictor_config=box_predictor_proto,
- is_training=False,
- num_classes=10)
- mock_argscope_fn.assert_called_with(hyperparams_proto, False)
- self.assertEqual(box_predictor._fc_hyperparams_fn, 'arg_scope')
-
- def test_non_default_mask_rcnn_box_predictor(self):
- fc_hyperparams_text_proto = """
- regularizer {
- l1_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- activation: RELU_6
- op: FC
- """
- box_predictor_text_proto = """
- mask_rcnn_box_predictor {
- use_dropout: true
- dropout_keep_probability: 0.8
- box_code_size: 3
- share_box_across_classes: true
- }
- """
- hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(fc_hyperparams_text_proto, hyperparams_proto)
- def mock_fc_argscope_builder(fc_hyperparams_arg, is_training):
- return (fc_hyperparams_arg, is_training)
-
- box_predictor_proto = box_predictor_pb2.BoxPredictor()
- text_format.Merge(box_predictor_text_proto, box_predictor_proto)
- box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.CopyFrom(
- hyperparams_proto)
- box_predictor = box_predictor_builder.build(
- argscope_fn=mock_fc_argscope_builder,
- box_predictor_config=box_predictor_proto,
- is_training=True,
- num_classes=90)
- self.assertTrue(box_predictor._use_dropout)
- self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.8)
- self.assertEqual(box_predictor.num_classes, 90)
- self.assertTrue(box_predictor._is_training)
- self.assertEqual(box_predictor._box_code_size, 3)
- self.assertEqual(box_predictor._share_box_across_classes, True)
-
- def test_build_default_mask_rcnn_box_predictor(self):
- box_predictor_proto = box_predictor_pb2.BoxPredictor()
- box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.op = (
- hyperparams_pb2.Hyperparams.FC)
- box_predictor = box_predictor_builder.build(
- argscope_fn=mock.Mock(return_value='arg_scope'),
- box_predictor_config=box_predictor_proto,
- is_training=True,
- num_classes=90)
- self.assertFalse(box_predictor._use_dropout)
- self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.5)
- self.assertEqual(box_predictor.num_classes, 90)
- self.assertTrue(box_predictor._is_training)
- self.assertEqual(box_predictor._box_code_size, 4)
- self.assertFalse(box_predictor._predict_instance_masks)
- self.assertFalse(box_predictor._predict_keypoints)
-
- def test_build_box_predictor_with_mask_branch(self):
- box_predictor_proto = box_predictor_pb2.BoxPredictor()
- box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.op = (
- hyperparams_pb2.Hyperparams.FC)
- box_predictor_proto.mask_rcnn_box_predictor.conv_hyperparams.op = (
- hyperparams_pb2.Hyperparams.CONV)
- box_predictor_proto.mask_rcnn_box_predictor.predict_instance_masks = True
- box_predictor_proto.mask_rcnn_box_predictor.mask_prediction_conv_depth = 512
- box_predictor_proto.mask_rcnn_box_predictor.mask_height = 16
- box_predictor_proto.mask_rcnn_box_predictor.mask_width = 16
- mock_argscope_fn = mock.Mock(return_value='arg_scope')
- box_predictor = box_predictor_builder.build(
- argscope_fn=mock_argscope_fn,
- box_predictor_config=box_predictor_proto,
- is_training=True,
- num_classes=90)
- mock_argscope_fn.assert_has_calls(
- [mock.call(box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams,
- True),
- mock.call(box_predictor_proto.mask_rcnn_box_predictor.conv_hyperparams,
- True)], any_order=True)
- self.assertFalse(box_predictor._use_dropout)
- self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.5)
- self.assertEqual(box_predictor.num_classes, 90)
- self.assertTrue(box_predictor._is_training)
- self.assertEqual(box_predictor._box_code_size, 4)
- self.assertTrue(box_predictor._predict_instance_masks)
- self.assertEqual(box_predictor._mask_prediction_conv_depth, 512)
- self.assertFalse(box_predictor._predict_keypoints)
-
-
-class RfcnBoxPredictorBuilderTest(tf.test.TestCase):
-
- def test_box_predictor_calls_fc_argscope_fn(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l1_regularizer {
- weight: 0.0003
- }
- }
- initializer {
- truncated_normal_initializer {
- mean: 0.0
- stddev: 0.3
- }
- }
- activation: RELU_6
- """
- hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto)
- def mock_conv_argscope_builder(conv_hyperparams_arg, is_training):
- return (conv_hyperparams_arg, is_training)
-
- box_predictor_proto = box_predictor_pb2.BoxPredictor()
- box_predictor_proto.rfcn_box_predictor.conv_hyperparams.CopyFrom(
- hyperparams_proto)
- box_predictor = box_predictor_builder.build(
- argscope_fn=mock_conv_argscope_builder,
- box_predictor_config=box_predictor_proto,
- is_training=False,
- num_classes=10)
- (conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams_fn
- self.assertAlmostEqual((hyperparams_proto.regularizer.
- l1_regularizer.weight),
- (conv_hyperparams_actual.regularizer.l1_regularizer.
- weight))
- self.assertAlmostEqual((hyperparams_proto.initializer.
- truncated_normal_initializer.stddev),
- (conv_hyperparams_actual.initializer.
- truncated_normal_initializer.stddev))
- self.assertAlmostEqual((hyperparams_proto.initializer.
- truncated_normal_initializer.mean),
- (conv_hyperparams_actual.initializer.
- truncated_normal_initializer.mean))
- self.assertEqual(hyperparams_proto.activation,
- conv_hyperparams_actual.activation)
- self.assertFalse(is_training)
-
- def test_non_default_rfcn_box_predictor(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l1_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- activation: RELU_6
- """
- box_predictor_text_proto = """
- rfcn_box_predictor {
- num_spatial_bins_height: 4
- num_spatial_bins_width: 4
- depth: 4
- box_code_size: 3
- crop_height: 16
- crop_width: 16
- }
- """
- hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto)
- def mock_conv_argscope_builder(conv_hyperparams_arg, is_training):
- return (conv_hyperparams_arg, is_training)
-
- box_predictor_proto = box_predictor_pb2.BoxPredictor()
- text_format.Merge(box_predictor_text_proto, box_predictor_proto)
- box_predictor_proto.rfcn_box_predictor.conv_hyperparams.CopyFrom(
- hyperparams_proto)
- box_predictor = box_predictor_builder.build(
- argscope_fn=mock_conv_argscope_builder,
- box_predictor_config=box_predictor_proto,
- is_training=True,
- num_classes=90)
- self.assertEqual(box_predictor.num_classes, 90)
- self.assertTrue(box_predictor._is_training)
- self.assertEqual(box_predictor._box_code_size, 3)
- self.assertEqual(box_predictor._num_spatial_bins, [4, 4])
- self.assertEqual(box_predictor._crop_size, [16, 16])
-
- def test_default_rfcn_box_predictor(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l1_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- activation: RELU_6
- """
- hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto)
- def mock_conv_argscope_builder(conv_hyperparams_arg, is_training):
- return (conv_hyperparams_arg, is_training)
-
- box_predictor_proto = box_predictor_pb2.BoxPredictor()
- box_predictor_proto.rfcn_box_predictor.conv_hyperparams.CopyFrom(
- hyperparams_proto)
- box_predictor = box_predictor_builder.build(
- argscope_fn=mock_conv_argscope_builder,
- box_predictor_config=box_predictor_proto,
- is_training=True,
- num_classes=90)
- self.assertEqual(box_predictor.num_classes, 90)
- self.assertTrue(box_predictor._is_training)
- self.assertEqual(box_predictor._box_code_size, 4)
- self.assertEqual(box_predictor._num_spatial_bins, [3, 3])
- self.assertEqual(box_predictor._crop_size, [12, 12])
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/dataset_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/dataset_builder.py
deleted file mode 100644
index 3628a85ea3ec33373e0642244a6a96984677358b..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/dataset_builder.py
+++ /dev/null
@@ -1,196 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""tf.data.Dataset builder.
-
-Creates data sources for DetectionModels from an InputReader config. See
-input_reader.proto for options.
-
-Note: If users wishes to also use their own InputReaders with the Object
-Detection configuration framework, they should define their own builder function
-that wraps the build function.
-"""
-import functools
-import tensorflow as tf
-
-from object_detection.core import standard_fields as fields
-from object_detection.data_decoders import tf_example_decoder
-from object_detection.protos import input_reader_pb2
-from object_detection.utils import dataset_util
-
-
-def _get_padding_shapes(dataset, max_num_boxes=None, num_classes=None,
- spatial_image_shape=None):
- """Returns shapes to pad dataset tensors to before batching.
-
- Args:
- dataset: tf.data.Dataset object.
- max_num_boxes: Max number of groundtruth boxes needed to computes shapes for
- padding.
- num_classes: Number of classes in the dataset needed to compute shapes for
- padding.
- spatial_image_shape: A list of two integers of the form [height, width]
- containing expected spatial shape of the image.
-
- Returns:
- A dictionary keyed by fields.InputDataFields containing padding shapes for
- tensors in the dataset.
-
- Raises:
- ValueError: If groundtruth classes is neither rank 1 nor rank 2.
- """
-
- if not spatial_image_shape or spatial_image_shape == [-1, -1]:
- height, width = None, None
- else:
- height, width = spatial_image_shape # pylint: disable=unpacking-non-sequence
-
- num_additional_channels = 0
- if fields.InputDataFields.image_additional_channels in dataset.output_shapes:
- num_additional_channels = dataset.output_shapes[
- fields.InputDataFields.image_additional_channels].dims[2].value
- padding_shapes = {
- # Additional channels are merged before batching.
- fields.InputDataFields.image: [
- height, width, 3 + num_additional_channels
- ],
- fields.InputDataFields.image_additional_channels: [
- height, width, num_additional_channels
- ],
- fields.InputDataFields.source_id: [],
- fields.InputDataFields.filename: [],
- fields.InputDataFields.key: [],
- fields.InputDataFields.groundtruth_difficult: [max_num_boxes],
- fields.InputDataFields.groundtruth_boxes: [max_num_boxes, 4],
- fields.InputDataFields.groundtruth_instance_masks: [
- max_num_boxes, height, width
- ],
- fields.InputDataFields.groundtruth_is_crowd: [max_num_boxes],
- fields.InputDataFields.groundtruth_group_of: [max_num_boxes],
- fields.InputDataFields.groundtruth_area: [max_num_boxes],
- fields.InputDataFields.groundtruth_weights: [max_num_boxes],
- fields.InputDataFields.num_groundtruth_boxes: [],
- fields.InputDataFields.groundtruth_label_types: [max_num_boxes],
- fields.InputDataFields.groundtruth_label_scores: [max_num_boxes],
- fields.InputDataFields.true_image_shape: [3],
- fields.InputDataFields.multiclass_scores: [
- max_num_boxes, num_classes + 1 if num_classes is not None else None
- ],
- }
- # Determine whether groundtruth_classes are integers or one-hot encodings, and
- # apply batching appropriately.
- classes_shape = dataset.output_shapes[
- fields.InputDataFields.groundtruth_classes]
- if len(classes_shape) == 1: # Class integers.
- padding_shapes[fields.InputDataFields.groundtruth_classes] = [max_num_boxes]
- elif len(classes_shape) == 2: # One-hot or k-hot encoding.
- padding_shapes[fields.InputDataFields.groundtruth_classes] = [
- max_num_boxes, num_classes]
- else:
- raise ValueError('Groundtruth classes must be a rank 1 tensor (classes) or '
- 'rank 2 tensor (one-hot encodings)')
-
- if fields.InputDataFields.original_image in dataset.output_shapes:
- padding_shapes[fields.InputDataFields.original_image] = [
- None, None, 3 + num_additional_channels
- ]
- if fields.InputDataFields.groundtruth_keypoints in dataset.output_shapes:
- tensor_shape = dataset.output_shapes[fields.InputDataFields.
- groundtruth_keypoints]
- padding_shape = [max_num_boxes, tensor_shape[1].value,
- tensor_shape[2].value]
- padding_shapes[fields.InputDataFields.groundtruth_keypoints] = padding_shape
- if (fields.InputDataFields.groundtruth_keypoint_visibilities
- in dataset.output_shapes):
- tensor_shape = dataset.output_shapes[fields.InputDataFields.
- groundtruth_keypoint_visibilities]
- padding_shape = [max_num_boxes, tensor_shape[1].value]
- padding_shapes[fields.InputDataFields.
- groundtruth_keypoint_visibilities] = padding_shape
- return {tensor_key: padding_shapes[tensor_key]
- for tensor_key, _ in dataset.output_shapes.items()}
-
-
-def build(input_reader_config,
- transform_input_data_fn=None,
- batch_size=None,
- max_num_boxes=None,
- num_classes=None,
- spatial_image_shape=None,
- num_additional_channels=0):
- """Builds a tf.data.Dataset.
-
- Builds a tf.data.Dataset by applying the `transform_input_data_fn` on all
- records. Applies a padded batch to the resulting dataset.
-
- Args:
- input_reader_config: A input_reader_pb2.InputReader object.
- transform_input_data_fn: Function to apply to all records, or None if
- no extra decoding is required.
- batch_size: Batch size. If None, batching is not performed.
- max_num_boxes: Max number of groundtruth boxes needed to compute shapes for
- padding. If None, will use a dynamic shape.
- num_classes: Number of classes in the dataset needed to compute shapes for
- padding. If None, will use a dynamic shape.
- spatial_image_shape: A list of two integers of the form [height, width]
- containing expected spatial shape of the image after applying
- transform_input_data_fn. If None, will use dynamic shapes.
- num_additional_channels: Number of additional channels to use in the input.
-
- Returns:
- A tf.data.Dataset based on the input_reader_config.
-
- Raises:
- ValueError: On invalid input reader proto.
- ValueError: If no input paths are specified.
- """
- if not isinstance(input_reader_config, input_reader_pb2.InputReader):
- raise ValueError('input_reader_config not of type '
- 'input_reader_pb2.InputReader.')
-
- if input_reader_config.WhichOneof('input_reader') == 'tf_record_input_reader':
- config = input_reader_config.tf_record_input_reader
- if not config.input_path:
- raise ValueError('At least one input path must be specified in '
- '`input_reader_config`.')
-
- label_map_proto_file = None
- if input_reader_config.HasField('label_map_path'):
- label_map_proto_file = input_reader_config.label_map_path
- decoder = tf_example_decoder.TfExampleDecoder(
- load_instance_masks=input_reader_config.load_instance_masks,
- instance_mask_type=input_reader_config.mask_type,
- label_map_proto_file=label_map_proto_file,
- use_display_name=input_reader_config.use_display_name,
- num_additional_channels=num_additional_channels)
-
- def process_fn(value):
- processed = decoder.decode(value)
- if transform_input_data_fn is not None:
- return transform_input_data_fn(processed)
- return processed
-
- dataset = dataset_util.read_dataset(
- functools.partial(tf.data.TFRecordDataset, buffer_size=8 * 1000 * 1000),
- process_fn, config.input_path[:], input_reader_config)
-
- if batch_size:
- padding_shapes = _get_padding_shapes(dataset, max_num_boxes, num_classes,
- spatial_image_shape)
- dataset = dataset.apply(
- tf.contrib.data.padded_batch_and_drop_remainder(batch_size,
- padding_shapes))
- return dataset
-
- raise ValueError('Unsupported input_reader_config.')
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/dataset_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/dataset_builder_test.py
deleted file mode 100644
index 0f1360f5e18892ebf4155407e65b46e12e69a96a..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/dataset_builder_test.py
+++ /dev/null
@@ -1,260 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for dataset_builder."""
-
-import os
-import numpy as np
-import tensorflow as tf
-
-from google.protobuf import text_format
-
-from tensorflow.core.example import example_pb2
-from tensorflow.core.example import feature_pb2
-from object_detection.builders import dataset_builder
-from object_detection.core import standard_fields as fields
-from object_detection.protos import input_reader_pb2
-from object_detection.utils import dataset_util
-
-
-class DatasetBuilderTest(tf.test.TestCase):
-
- def create_tf_record(self, has_additional_channels=False):
- path = os.path.join(self.get_temp_dir(), 'tfrecord')
- writer = tf.python_io.TFRecordWriter(path)
-
- image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
- additional_channels_tensor = np.random.randint(
- 255, size=(4, 5, 1)).astype(np.uint8)
- flat_mask = (4 * 5) * [1.0]
- with self.test_session():
- encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
- encoded_additional_channels_jpeg = tf.image.encode_jpeg(
- tf.constant(additional_channels_tensor)).eval()
- features = {
- 'image/encoded':
- feature_pb2.Feature(
- bytes_list=feature_pb2.BytesList(value=[encoded_jpeg])),
- 'image/format':
- feature_pb2.Feature(
- bytes_list=feature_pb2.BytesList(value=['jpeg'.encode('utf-8')])
- ),
- 'image/height':
- feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=[4])),
- 'image/width':
- feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=[5])),
- 'image/object/bbox/xmin':
- feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[0.0])),
- 'image/object/bbox/xmax':
- feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[1.0])),
- 'image/object/bbox/ymin':
- feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[0.0])),
- 'image/object/bbox/ymax':
- feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[1.0])),
- 'image/object/class/label':
- feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=[2])),
- 'image/object/mask':
- feature_pb2.Feature(
- float_list=feature_pb2.FloatList(value=flat_mask)),
- }
- if has_additional_channels:
- features['image/additional_channels/encoded'] = feature_pb2.Feature(
- bytes_list=feature_pb2.BytesList(
- value=[encoded_additional_channels_jpeg] * 2))
- example = example_pb2.Example(
- features=feature_pb2.Features(feature=features))
- writer.write(example.SerializeToString())
- writer.close()
-
- return path
-
- def test_build_tf_record_input_reader(self):
- tf_record_path = self.create_tf_record()
-
- input_reader_text_proto = """
- shuffle: false
- num_readers: 1
- tf_record_input_reader {{
- input_path: '{0}'
- }}
- """.format(tf_record_path)
- input_reader_proto = input_reader_pb2.InputReader()
- text_format.Merge(input_reader_text_proto, input_reader_proto)
- tensor_dict = dataset_util.make_initializable_iterator(
- dataset_builder.build(input_reader_proto, batch_size=1)).get_next()
-
- sv = tf.train.Supervisor(logdir=self.get_temp_dir())
- with sv.prepare_or_wait_for_session() as sess:
- sv.start_queue_runners(sess)
- output_dict = sess.run(tensor_dict)
-
- self.assertTrue(
- fields.InputDataFields.groundtruth_instance_masks not in output_dict)
- self.assertEquals((1, 4, 5, 3),
- output_dict[fields.InputDataFields.image].shape)
- self.assertAllEqual([[2]],
- output_dict[fields.InputDataFields.groundtruth_classes])
- self.assertEquals(
- (1, 1, 4), output_dict[fields.InputDataFields.groundtruth_boxes].shape)
- self.assertAllEqual(
- [0.0, 0.0, 1.0, 1.0],
- output_dict[fields.InputDataFields.groundtruth_boxes][0][0])
-
- def test_build_tf_record_input_reader_and_load_instance_masks(self):
- tf_record_path = self.create_tf_record()
-
- input_reader_text_proto = """
- shuffle: false
- num_readers: 1
- load_instance_masks: true
- tf_record_input_reader {{
- input_path: '{0}'
- }}
- """.format(tf_record_path)
- input_reader_proto = input_reader_pb2.InputReader()
- text_format.Merge(input_reader_text_proto, input_reader_proto)
- tensor_dict = dataset_util.make_initializable_iterator(
- dataset_builder.build(input_reader_proto, batch_size=1)).get_next()
-
- sv = tf.train.Supervisor(logdir=self.get_temp_dir())
- with sv.prepare_or_wait_for_session() as sess:
- sv.start_queue_runners(sess)
- output_dict = sess.run(tensor_dict)
- self.assertAllEqual(
- (1, 1, 4, 5),
- output_dict[fields.InputDataFields.groundtruth_instance_masks].shape)
-
- def test_build_tf_record_input_reader_with_batch_size_two(self):
- tf_record_path = self.create_tf_record()
-
- input_reader_text_proto = """
- shuffle: false
- num_readers: 1
- tf_record_input_reader {{
- input_path: '{0}'
- }}
- """.format(tf_record_path)
- input_reader_proto = input_reader_pb2.InputReader()
- text_format.Merge(input_reader_text_proto, input_reader_proto)
-
- def one_hot_class_encoding_fn(tensor_dict):
- tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.one_hot(
- tensor_dict[fields.InputDataFields.groundtruth_classes] - 1, depth=3)
- return tensor_dict
-
- tensor_dict = dataset_util.make_initializable_iterator(
- dataset_builder.build(
- input_reader_proto,
- transform_input_data_fn=one_hot_class_encoding_fn,
- batch_size=2,
- max_num_boxes=2,
- num_classes=3,
- spatial_image_shape=[4, 5])).get_next()
-
- sv = tf.train.Supervisor(logdir=self.get_temp_dir())
- with sv.prepare_or_wait_for_session() as sess:
- sv.start_queue_runners(sess)
- output_dict = sess.run(tensor_dict)
-
- self.assertAllEqual([2, 4, 5, 3],
- output_dict[fields.InputDataFields.image].shape)
- self.assertAllEqual([2, 2, 3],
- output_dict[fields.InputDataFields.groundtruth_classes].
- shape)
- self.assertAllEqual([2, 2, 4],
- output_dict[fields.InputDataFields.groundtruth_boxes].
- shape)
- self.assertAllEqual(
- [[[0.0, 0.0, 1.0, 1.0],
- [0.0, 0.0, 0.0, 0.0]],
- [[0.0, 0.0, 1.0, 1.0],
- [0.0, 0.0, 0.0, 0.0]]],
- output_dict[fields.InputDataFields.groundtruth_boxes])
-
- def test_build_tf_record_input_reader_with_batch_size_two_and_masks(self):
- tf_record_path = self.create_tf_record()
-
- input_reader_text_proto = """
- shuffle: false
- num_readers: 1
- load_instance_masks: true
- tf_record_input_reader {{
- input_path: '{0}'
- }}
- """.format(tf_record_path)
- input_reader_proto = input_reader_pb2.InputReader()
- text_format.Merge(input_reader_text_proto, input_reader_proto)
-
- def one_hot_class_encoding_fn(tensor_dict):
- tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.one_hot(
- tensor_dict[fields.InputDataFields.groundtruth_classes] - 1, depth=3)
- return tensor_dict
-
- tensor_dict = dataset_util.make_initializable_iterator(
- dataset_builder.build(
- input_reader_proto,
- transform_input_data_fn=one_hot_class_encoding_fn,
- batch_size=2,
- max_num_boxes=2,
- num_classes=3,
- spatial_image_shape=[4, 5])).get_next()
-
- sv = tf.train.Supervisor(logdir=self.get_temp_dir())
- with sv.prepare_or_wait_for_session() as sess:
- sv.start_queue_runners(sess)
- output_dict = sess.run(tensor_dict)
-
- self.assertAllEqual(
- [2, 2, 4, 5],
- output_dict[fields.InputDataFields.groundtruth_instance_masks].shape)
-
- def test_build_tf_record_input_reader_with_additional_channels(self):
- tf_record_path = self.create_tf_record(has_additional_channels=True)
-
- input_reader_text_proto = """
- shuffle: false
- num_readers: 1
- tf_record_input_reader {{
- input_path: '{0}'
- }}
- """.format(tf_record_path)
- input_reader_proto = input_reader_pb2.InputReader()
- text_format.Merge(input_reader_text_proto, input_reader_proto)
- tensor_dict = dataset_util.make_initializable_iterator(
- dataset_builder.build(
- input_reader_proto, batch_size=2,
- num_additional_channels=2)).get_next()
-
- sv = tf.train.Supervisor(logdir=self.get_temp_dir())
- with sv.prepare_or_wait_for_session() as sess:
- sv.start_queue_runners(sess)
- output_dict = sess.run(tensor_dict)
-
- self.assertEquals((2, 4, 5, 5),
- output_dict[fields.InputDataFields.image].shape)
-
- def test_raises_error_with_no_input_paths(self):
- input_reader_text_proto = """
- shuffle: false
- num_readers: 1
- load_instance_masks: true
- """
- input_reader_proto = input_reader_pb2.InputReader()
- text_format.Merge(input_reader_text_proto, input_reader_proto)
- with self.assertRaises(ValueError):
- dataset_builder.build(input_reader_proto)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/graph_rewriter_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/graph_rewriter_builder.py
deleted file mode 100644
index 77e60479bd8f6e6267acabcec9a4995ed1622959..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/graph_rewriter_builder.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Functions for quantized training and evaluation."""
-
-import tensorflow as tf
-
-
-def build(graph_rewriter_config, is_training):
- """Returns a function that modifies default graph based on options.
-
- Args:
- graph_rewriter_config: graph_rewriter_pb2.GraphRewriter proto.
- is_training: whether in training of eval mode.
- """
- def graph_rewrite_fn():
- """Function to quantize weights and activation of the default graph."""
- if (graph_rewriter_config.quantization.weight_bits != 8 or
- graph_rewriter_config.quantization.activation_bits != 8):
- raise ValueError('Only 8bit quantization is supported')
-
- # Quantize the graph by inserting quantize ops for weights and activations
- if is_training:
- tf.contrib.quantize.create_training_graph(
- input_graph=tf.get_default_graph(),
- quant_delay=graph_rewriter_config.quantization.delay)
- else:
- tf.contrib.quantize.create_eval_graph(input_graph=tf.get_default_graph())
-
- tf.contrib.layers.summarize_collection('quant_vars')
- return graph_rewrite_fn
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/graph_rewriter_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/graph_rewriter_builder_test.py
deleted file mode 100644
index 5f38d5a27df1e74674e74748687efdef191781f0..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/graph_rewriter_builder_test.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for graph_rewriter_builder."""
-import mock
-import tensorflow as tf
-from object_detection.builders import graph_rewriter_builder
-from object_detection.protos import graph_rewriter_pb2
-
-
-class QuantizationBuilderTest(tf.test.TestCase):
-
- def testQuantizationBuilderSetsUpCorrectTrainArguments(self):
- with mock.patch.object(
- tf.contrib.quantize, 'create_training_graph') as mock_quant_fn:
- with mock.patch.object(tf.contrib.layers,
- 'summarize_collection') as mock_summarize_col:
- graph_rewriter_proto = graph_rewriter_pb2.GraphRewriter()
- graph_rewriter_proto.quantization.delay = 10
- graph_rewriter_proto.quantization.weight_bits = 8
- graph_rewriter_proto.quantization.activation_bits = 8
- graph_rewrite_fn = graph_rewriter_builder.build(
- graph_rewriter_proto, is_training=True)
- graph_rewrite_fn()
- _, kwargs = mock_quant_fn.call_args
- self.assertEqual(kwargs['input_graph'], tf.get_default_graph())
- self.assertEqual(kwargs['quant_delay'], 10)
- mock_summarize_col.assert_called_with('quant_vars')
-
- def testQuantizationBuilderSetsUpCorrectEvalArguments(self):
- with mock.patch.object(tf.contrib.quantize,
- 'create_eval_graph') as mock_quant_fn:
- with mock.patch.object(tf.contrib.layers,
- 'summarize_collection') as mock_summarize_col:
- graph_rewriter_proto = graph_rewriter_pb2.GraphRewriter()
- graph_rewriter_proto.quantization.delay = 10
- graph_rewrite_fn = graph_rewriter_builder.build(
- graph_rewriter_proto, is_training=False)
- graph_rewrite_fn()
- _, kwargs = mock_quant_fn.call_args
- self.assertEqual(kwargs['input_graph'], tf.get_default_graph())
- mock_summarize_col.assert_called_with('quant_vars')
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/hyperparams_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/hyperparams_builder.py
deleted file mode 100644
index 05addddaafa5785aa0995fb58181841511a250bc..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/hyperparams_builder.py
+++ /dev/null
@@ -1,182 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Builder function to construct tf-slim arg_scope for convolution, fc ops."""
-import tensorflow as tf
-
-from object_detection.protos import hyperparams_pb2
-from object_detection.utils import context_manager
-
-slim = tf.contrib.slim
-
-
-def build(hyperparams_config, is_training):
- """Builds tf-slim arg_scope for convolution ops based on the config.
-
- Returns an arg_scope to use for convolution ops containing weights
- initializer, weights regularizer, activation function, batch norm function
- and batch norm parameters based on the configuration.
-
- Note that if the batch_norm parameteres are not specified in the config
- (i.e. left to default) then batch norm is excluded from the arg_scope.
-
- The batch norm parameters are set for updates based on `is_training` argument
- and conv_hyperparams_config.batch_norm.train parameter. During training, they
- are updated only if batch_norm.train parameter is true. However, during eval,
- no updates are made to the batch norm variables. In both cases, their current
- values are used during forward pass.
-
- Args:
- hyperparams_config: hyperparams.proto object containing
- hyperparameters.
- is_training: Whether the network is in training mode.
-
- Returns:
- arg_scope_fn: A function to construct tf-slim arg_scope containing
- hyperparameters for ops.
-
- Raises:
- ValueError: if hyperparams_config is not of type hyperparams.Hyperparams.
- """
- if not isinstance(hyperparams_config,
- hyperparams_pb2.Hyperparams):
- raise ValueError('hyperparams_config not of type '
- 'hyperparams_pb.Hyperparams.')
-
- batch_norm = None
- batch_norm_params = None
- if hyperparams_config.HasField('batch_norm'):
- batch_norm = slim.batch_norm
- batch_norm_params = _build_batch_norm_params(
- hyperparams_config.batch_norm, is_training)
-
- affected_ops = [slim.conv2d, slim.separable_conv2d, slim.conv2d_transpose]
- if hyperparams_config.HasField('op') and (
- hyperparams_config.op == hyperparams_pb2.Hyperparams.FC):
- affected_ops = [slim.fully_connected]
- def scope_fn():
- with (slim.arg_scope([slim.batch_norm], **batch_norm_params)
- if batch_norm_params is not None else
- context_manager.IdentityContextManager()):
- with slim.arg_scope(
- affected_ops,
- weights_regularizer=_build_regularizer(
- hyperparams_config.regularizer),
- weights_initializer=_build_initializer(
- hyperparams_config.initializer),
- activation_fn=_build_activation_fn(hyperparams_config.activation),
- normalizer_fn=batch_norm) as sc:
- return sc
-
- return scope_fn
-
-
-def _build_activation_fn(activation_fn):
- """Builds a callable activation from config.
-
- Args:
- activation_fn: hyperparams_pb2.Hyperparams.activation
-
- Returns:
- Callable activation function.
-
- Raises:
- ValueError: On unknown activation function.
- """
- if activation_fn == hyperparams_pb2.Hyperparams.NONE:
- return None
- if activation_fn == hyperparams_pb2.Hyperparams.RELU:
- return tf.nn.relu
- if activation_fn == hyperparams_pb2.Hyperparams.RELU_6:
- return tf.nn.relu6
- raise ValueError('Unknown activation function: {}'.format(activation_fn))
-
-
-def _build_regularizer(regularizer):
- """Builds a tf-slim regularizer from config.
-
- Args:
- regularizer: hyperparams_pb2.Hyperparams.regularizer proto.
-
- Returns:
- tf-slim regularizer.
-
- Raises:
- ValueError: On unknown regularizer.
- """
- regularizer_oneof = regularizer.WhichOneof('regularizer_oneof')
- if regularizer_oneof == 'l1_regularizer':
- return slim.l1_regularizer(scale=float(regularizer.l1_regularizer.weight))
- if regularizer_oneof == 'l2_regularizer':
- return slim.l2_regularizer(scale=float(regularizer.l2_regularizer.weight))
- raise ValueError('Unknown regularizer function: {}'.format(regularizer_oneof))
-
-
-def _build_initializer(initializer):
- """Build a tf initializer from config.
-
- Args:
- initializer: hyperparams_pb2.Hyperparams.regularizer proto.
-
- Returns:
- tf initializer.
-
- Raises:
- ValueError: On unknown initializer.
- """
- initializer_oneof = initializer.WhichOneof('initializer_oneof')
- if initializer_oneof == 'truncated_normal_initializer':
- return tf.truncated_normal_initializer(
- mean=initializer.truncated_normal_initializer.mean,
- stddev=initializer.truncated_normal_initializer.stddev)
- if initializer_oneof == 'random_normal_initializer':
- return tf.random_normal_initializer(
- mean=initializer.random_normal_initializer.mean,
- stddev=initializer.random_normal_initializer.stddev)
- if initializer_oneof == 'variance_scaling_initializer':
- enum_descriptor = (hyperparams_pb2.VarianceScalingInitializer.
- DESCRIPTOR.enum_types_by_name['Mode'])
- mode = enum_descriptor.values_by_number[initializer.
- variance_scaling_initializer.
- mode].name
- return slim.variance_scaling_initializer(
- factor=initializer.variance_scaling_initializer.factor,
- mode=mode,
- uniform=initializer.variance_scaling_initializer.uniform)
- raise ValueError('Unknown initializer function: {}'.format(
- initializer_oneof))
-
-
-def _build_batch_norm_params(batch_norm, is_training):
- """Build a dictionary of batch_norm params from config.
-
- Args:
- batch_norm: hyperparams_pb2.ConvHyperparams.batch_norm proto.
- is_training: Whether the models is in training mode.
-
- Returns:
- A dictionary containing batch_norm parameters.
- """
- batch_norm_params = {
- 'decay': batch_norm.decay,
- 'center': batch_norm.center,
- 'scale': batch_norm.scale,
- 'epsilon': batch_norm.epsilon,
- # Remove is_training parameter from here and deprecate it in the proto
- # once we refactor Faster RCNN models to set is_training through an outer
- # arg_scope in the meta architecture.
- 'is_training': is_training and batch_norm.train,
- }
- return batch_norm_params
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/hyperparams_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/hyperparams_builder_test.py
deleted file mode 100644
index 943532fbebca2870e5035fa39becd994f6d0b1ca..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/hyperparams_builder_test.py
+++ /dev/null
@@ -1,509 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests object_detection.core.hyperparams_builder."""
-
-import numpy as np
-import tensorflow as tf
-
-from google.protobuf import text_format
-
-from object_detection.builders import hyperparams_builder
-from object_detection.protos import hyperparams_pb2
-
-slim = tf.contrib.slim
-
-
-def _get_scope_key(op):
- return getattr(op, '_key_op', str(op))
-
-
-class HyperparamsBuilderTest(tf.test.TestCase):
-
- def test_default_arg_scope_has_conv2d_op(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l1_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- self.assertTrue(_get_scope_key(slim.conv2d) in scope)
-
- def test_default_arg_scope_has_separable_conv2d_op(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l1_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- self.assertTrue(_get_scope_key(slim.separable_conv2d) in scope)
-
- def test_default_arg_scope_has_conv2d_transpose_op(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l1_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- self.assertTrue(_get_scope_key(slim.conv2d_transpose) in scope)
-
- def test_explicit_fc_op_arg_scope_has_fully_connected_op(self):
- conv_hyperparams_text_proto = """
- op: FC
- regularizer {
- l1_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- self.assertTrue(_get_scope_key(slim.fully_connected) in scope)
-
- def test_separable_conv2d_and_conv2d_and_transpose_have_same_parameters(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l1_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- kwargs_1, kwargs_2, kwargs_3 = scope.values()
- self.assertDictEqual(kwargs_1, kwargs_2)
- self.assertDictEqual(kwargs_1, kwargs_3)
-
- def test_return_l1_regularized_weights(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l1_regularizer {
- weight: 0.5
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- conv_scope_arguments = scope.values()[0]
- regularizer = conv_scope_arguments['weights_regularizer']
- weights = np.array([1., -1, 4., 2.])
- with self.test_session() as sess:
- result = sess.run(regularizer(tf.constant(weights)))
- self.assertAllClose(np.abs(weights).sum() * 0.5, result)
-
- def test_return_l2_regularizer_weights(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- weight: 0.42
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
-
- regularizer = conv_scope_arguments['weights_regularizer']
- weights = np.array([1., -1, 4., 2.])
- with self.test_session() as sess:
- result = sess.run(regularizer(tf.constant(weights)))
- self.assertAllClose(np.power(weights, 2).sum() / 2.0 * 0.42, result)
-
- def test_return_non_default_batch_norm_params_with_train_during_train(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- batch_norm {
- decay: 0.7
- center: false
- scale: true
- epsilon: 0.03
- train: true
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
- self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
- batch_norm_params = scope[_get_scope_key(slim.batch_norm)]
- self.assertAlmostEqual(batch_norm_params['decay'], 0.7)
- self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
- self.assertFalse(batch_norm_params['center'])
- self.assertTrue(batch_norm_params['scale'])
- self.assertTrue(batch_norm_params['is_training'])
-
- def test_return_batch_norm_params_with_notrain_during_eval(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- batch_norm {
- decay: 0.7
- center: false
- scale: true
- epsilon: 0.03
- train: true
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=False)
- scope = scope_fn()
- conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
- self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
- batch_norm_params = scope[_get_scope_key(slim.batch_norm)]
- self.assertAlmostEqual(batch_norm_params['decay'], 0.7)
- self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
- self.assertFalse(batch_norm_params['center'])
- self.assertTrue(batch_norm_params['scale'])
- self.assertFalse(batch_norm_params['is_training'])
-
- def test_return_batch_norm_params_with_notrain_when_train_is_false(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- batch_norm {
- decay: 0.7
- center: false
- scale: true
- epsilon: 0.03
- train: false
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
- self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
- batch_norm_params = scope[_get_scope_key(slim.batch_norm)]
- self.assertAlmostEqual(batch_norm_params['decay'], 0.7)
- self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
- self.assertFalse(batch_norm_params['center'])
- self.assertTrue(batch_norm_params['scale'])
- self.assertFalse(batch_norm_params['is_training'])
-
- def test_do_not_use_batch_norm_if_default(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
- self.assertEqual(conv_scope_arguments['normalizer_fn'], None)
-
- def test_use_none_activation(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- activation: NONE
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
- self.assertEqual(conv_scope_arguments['activation_fn'], None)
-
- def test_use_relu_activation(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- activation: RELU
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
- self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu)
-
- def test_use_relu_6_activation(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- activation: RELU_6
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
- self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu6)
-
- def _assert_variance_in_range(self, initializer, shape, variance,
- tol=1e-2):
- with tf.Graph().as_default() as g:
- with self.test_session(graph=g) as sess:
- var = tf.get_variable(
- name='test',
- shape=shape,
- dtype=tf.float32,
- initializer=initializer)
- sess.run(tf.global_variables_initializer())
- values = sess.run(var)
- self.assertAllClose(np.var(values), variance, tol, tol)
-
- def test_variance_in_range_with_variance_scaling_initializer_fan_in(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 2.0
- mode: FAN_IN
- uniform: false
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
- initializer = conv_scope_arguments['weights_initializer']
- self._assert_variance_in_range(initializer, shape=[100, 40],
- variance=2. / 100.)
-
- def test_variance_in_range_with_variance_scaling_initializer_fan_out(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 2.0
- mode: FAN_OUT
- uniform: false
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
- initializer = conv_scope_arguments['weights_initializer']
- self._assert_variance_in_range(initializer, shape=[100, 40],
- variance=2. / 40.)
-
- def test_variance_in_range_with_variance_scaling_initializer_fan_avg(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 2.0
- mode: FAN_AVG
- uniform: false
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
- initializer = conv_scope_arguments['weights_initializer']
- self._assert_variance_in_range(initializer, shape=[100, 40],
- variance=4. / (100. + 40.))
-
- def test_variance_in_range_with_variance_scaling_initializer_uniform(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 2.0
- mode: FAN_IN
- uniform: true
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
- initializer = conv_scope_arguments['weights_initializer']
- self._assert_variance_in_range(initializer, shape=[100, 40],
- variance=2. / 100.)
-
- def test_variance_in_range_with_truncated_normal_initializer(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- mean: 0.0
- stddev: 0.8
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
- initializer = conv_scope_arguments['weights_initializer']
- self._assert_variance_in_range(initializer, shape=[100, 40],
- variance=0.49, tol=1e-1)
-
- def test_variance_in_range_with_random_normal_initializer(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- random_normal_initializer {
- mean: 0.0
- stddev: 0.8
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
- is_training=True)
- scope = scope_fn()
- conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
- initializer = conv_scope_arguments['weights_initializer']
- self._assert_variance_in_range(initializer, shape=[100, 40],
- variance=0.64, tol=1e-1)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/image_resizer_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/image_resizer_builder.py
deleted file mode 100644
index 3b3014f727e13d2bf671ea12f3ff30972cc67684..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/image_resizer_builder.py
+++ /dev/null
@@ -1,115 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Builder function for image resizing operations."""
-import functools
-import tensorflow as tf
-
-from object_detection.core import preprocessor
-from object_detection.protos import image_resizer_pb2
-
-
-def _tf_resize_method(resize_method):
- """Maps image resize method from enumeration type to TensorFlow.
-
- Args:
- resize_method: The resize_method attribute of keep_aspect_ratio_resizer or
- fixed_shape_resizer.
-
- Returns:
- method: The corresponding TensorFlow ResizeMethod.
-
- Raises:
- ValueError: if `resize_method` is of unknown type.
- """
- dict_method = {
- image_resizer_pb2.BILINEAR:
- tf.image.ResizeMethod.BILINEAR,
- image_resizer_pb2.NEAREST_NEIGHBOR:
- tf.image.ResizeMethod.NEAREST_NEIGHBOR,
- image_resizer_pb2.BICUBIC:
- tf.image.ResizeMethod.BICUBIC,
- image_resizer_pb2.AREA:
- tf.image.ResizeMethod.AREA
- }
- if resize_method in dict_method:
- return dict_method[resize_method]
- else:
- raise ValueError('Unknown resize_method')
-
-
-def build(image_resizer_config):
- """Builds callable for image resizing operations.
-
- Args:
- image_resizer_config: image_resizer.proto object containing parameters for
- an image resizing operation.
-
- Returns:
- image_resizer_fn: Callable for image resizing. This callable always takes
- a rank-3 image tensor (corresponding to a single image) and returns a
- rank-3 image tensor, possibly with new spatial dimensions.
-
- Raises:
- ValueError: if `image_resizer_config` is of incorrect type.
- ValueError: if `image_resizer_config.image_resizer_oneof` is of expected
- type.
- ValueError: if min_dimension > max_dimension when keep_aspect_ratio_resizer
- is used.
- """
- if not isinstance(image_resizer_config, image_resizer_pb2.ImageResizer):
- raise ValueError('image_resizer_config not of type '
- 'image_resizer_pb2.ImageResizer.')
-
- image_resizer_oneof = image_resizer_config.WhichOneof('image_resizer_oneof')
- if image_resizer_oneof == 'keep_aspect_ratio_resizer':
- keep_aspect_ratio_config = image_resizer_config.keep_aspect_ratio_resizer
- if not (keep_aspect_ratio_config.min_dimension <=
- keep_aspect_ratio_config.max_dimension):
- raise ValueError('min_dimension > max_dimension')
- method = _tf_resize_method(keep_aspect_ratio_config.resize_method)
- per_channel_pad_value = (0, 0, 0)
- if keep_aspect_ratio_config.per_channel_pad_value:
- per_channel_pad_value = tuple(keep_aspect_ratio_config.
- per_channel_pad_value)
- image_resizer_fn = functools.partial(
- preprocessor.resize_to_range,
- min_dimension=keep_aspect_ratio_config.min_dimension,
- max_dimension=keep_aspect_ratio_config.max_dimension,
- method=method,
- pad_to_max_dimension=keep_aspect_ratio_config.pad_to_max_dimension,
- per_channel_pad_value=per_channel_pad_value)
- if not keep_aspect_ratio_config.convert_to_grayscale:
- return image_resizer_fn
- elif image_resizer_oneof == 'fixed_shape_resizer':
- fixed_shape_resizer_config = image_resizer_config.fixed_shape_resizer
- method = _tf_resize_method(fixed_shape_resizer_config.resize_method)
- image_resizer_fn = functools.partial(
- preprocessor.resize_image,
- new_height=fixed_shape_resizer_config.height,
- new_width=fixed_shape_resizer_config.width,
- method=method)
- if not fixed_shape_resizer_config.convert_to_grayscale:
- return image_resizer_fn
- else:
- raise ValueError(
- 'Invalid image resizer option: \'%s\'.' % image_resizer_oneof)
-
- def grayscale_image_resizer(image):
- [resized_image, resized_image_shape] = image_resizer_fn(image)
- grayscale_image = preprocessor.rgb_to_gray(resized_image)
- grayscale_image_shape = tf.concat([resized_image_shape[:-1], [1]], 0)
- return [grayscale_image, grayscale_image_shape]
-
- return functools.partial(grayscale_image_resizer)
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/image_resizer_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/image_resizer_builder_test.py
deleted file mode 100644
index 38f620e04050888c5f3b1c73cdab8942a99b9d57..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/image_resizer_builder_test.py
+++ /dev/null
@@ -1,113 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for object_detection.builders.image_resizer_builder."""
-import numpy as np
-import tensorflow as tf
-from google.protobuf import text_format
-from object_detection.builders import image_resizer_builder
-from object_detection.protos import image_resizer_pb2
-
-
-class ImageResizerBuilderTest(tf.test.TestCase):
-
- def _shape_of_resized_random_image_given_text_proto(self, input_shape,
- text_proto):
- image_resizer_config = image_resizer_pb2.ImageResizer()
- text_format.Merge(text_proto, image_resizer_config)
- image_resizer_fn = image_resizer_builder.build(image_resizer_config)
- images = tf.to_float(
- tf.random_uniform(input_shape, minval=0, maxval=255, dtype=tf.int32))
- resized_images, _ = image_resizer_fn(images)
- with self.test_session() as sess:
- return sess.run(resized_images).shape
-
- def test_build_keep_aspect_ratio_resizer_returns_expected_shape(self):
- image_resizer_text_proto = """
- keep_aspect_ratio_resizer {
- min_dimension: 10
- max_dimension: 20
- }
- """
- input_shape = (50, 25, 3)
- expected_output_shape = (20, 10, 3)
- output_shape = self._shape_of_resized_random_image_given_text_proto(
- input_shape, image_resizer_text_proto)
- self.assertEqual(output_shape, expected_output_shape)
-
- def test_build_keep_aspect_ratio_resizer_with_padding(self):
- image_resizer_text_proto = """
- keep_aspect_ratio_resizer {
- min_dimension: 10
- max_dimension: 20
- pad_to_max_dimension: true
- per_channel_pad_value: 3
- per_channel_pad_value: 4
- per_channel_pad_value: 5
- }
- """
- input_shape = (50, 25, 3)
- expected_output_shape = (20, 20, 3)
- output_shape = self._shape_of_resized_random_image_given_text_proto(
- input_shape, image_resizer_text_proto)
- self.assertEqual(output_shape, expected_output_shape)
-
- def test_built_fixed_shape_resizer_returns_expected_shape(self):
- image_resizer_text_proto = """
- fixed_shape_resizer {
- height: 10
- width: 20
- }
- """
- input_shape = (50, 25, 3)
- expected_output_shape = (10, 20, 3)
- output_shape = self._shape_of_resized_random_image_given_text_proto(
- input_shape, image_resizer_text_proto)
- self.assertEqual(output_shape, expected_output_shape)
-
- def test_raises_error_on_invalid_input(self):
- invalid_input = 'invalid_input'
- with self.assertRaises(ValueError):
- image_resizer_builder.build(invalid_input)
-
- def _resized_image_given_text_proto(self, image, text_proto):
- image_resizer_config = image_resizer_pb2.ImageResizer()
- text_format.Merge(text_proto, image_resizer_config)
- image_resizer_fn = image_resizer_builder.build(image_resizer_config)
- image_placeholder = tf.placeholder(tf.uint8, [1, None, None, 3])
- resized_image, _ = image_resizer_fn(image_placeholder)
- with self.test_session() as sess:
- return sess.run(resized_image, feed_dict={image_placeholder: image})
-
- def test_fixed_shape_resizer_nearest_neighbor_method(self):
- image_resizer_text_proto = """
- fixed_shape_resizer {
- height: 1
- width: 1
- resize_method: NEAREST_NEIGHBOR
- }
- """
- image = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
- image = np.expand_dims(image, axis=2)
- image = np.tile(image, (1, 1, 3))
- image = np.expand_dims(image, axis=0)
- resized_image = self._resized_image_given_text_proto(
- image, image_resizer_text_proto)
- vals = np.unique(resized_image).tolist()
- self.assertEqual(len(vals), 1)
- self.assertEqual(vals[0], 1)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/input_reader_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/input_reader_builder.py
deleted file mode 100644
index 8cb5e2f05448f1817a7644f1a553eac1ee98ba17..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/input_reader_builder.py
+++ /dev/null
@@ -1,76 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Input reader builder.
-
-Creates data sources for DetectionModels from an InputReader config. See
-input_reader.proto for options.
-
-Note: If users wishes to also use their own InputReaders with the Object
-Detection configuration framework, they should define their own builder function
-that wraps the build function.
-"""
-
-import tensorflow as tf
-
-from object_detection.data_decoders import tf_example_decoder
-from object_detection.protos import input_reader_pb2
-
-parallel_reader = tf.contrib.slim.parallel_reader
-
-
-def build(input_reader_config):
- """Builds a tensor dictionary based on the InputReader config.
-
- Args:
- input_reader_config: A input_reader_pb2.InputReader object.
-
- Returns:
- A tensor dict based on the input_reader_config.
-
- Raises:
- ValueError: On invalid input reader proto.
- ValueError: If no input paths are specified.
- """
- if not isinstance(input_reader_config, input_reader_pb2.InputReader):
- raise ValueError('input_reader_config not of type '
- 'input_reader_pb2.InputReader.')
-
- if input_reader_config.WhichOneof('input_reader') == 'tf_record_input_reader':
- config = input_reader_config.tf_record_input_reader
- if not config.input_path:
- raise ValueError('At least one input path must be specified in '
- '`input_reader_config`.')
- _, string_tensor = parallel_reader.parallel_read(
- config.input_path[:], # Convert `RepeatedScalarContainer` to list.
- reader_class=tf.TFRecordReader,
- num_epochs=(input_reader_config.num_epochs
- if input_reader_config.num_epochs else None),
- num_readers=input_reader_config.num_readers,
- shuffle=input_reader_config.shuffle,
- dtypes=[tf.string, tf.string],
- capacity=input_reader_config.queue_capacity,
- min_after_dequeue=input_reader_config.min_after_dequeue)
-
- label_map_proto_file = None
- if input_reader_config.HasField('label_map_path'):
- label_map_proto_file = input_reader_config.label_map_path
- decoder = tf_example_decoder.TfExampleDecoder(
- load_instance_masks=input_reader_config.load_instance_masks,
- instance_mask_type=input_reader_config.mask_type,
- label_map_proto_file=label_map_proto_file)
- return decoder.decode(string_tensor)
-
- raise ValueError('Unsupported input_reader_config.')
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/input_reader_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/input_reader_builder_test.py
deleted file mode 100644
index f09f60e5777b133e5fa50840d63728f2de55c147..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/input_reader_builder_test.py
+++ /dev/null
@@ -1,144 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for input_reader_builder."""
-
-import os
-import numpy as np
-import tensorflow as tf
-
-from google.protobuf import text_format
-
-from tensorflow.core.example import example_pb2
-from tensorflow.core.example import feature_pb2
-from object_detection.builders import input_reader_builder
-from object_detection.core import standard_fields as fields
-from object_detection.protos import input_reader_pb2
-
-
-class InputReaderBuilderTest(tf.test.TestCase):
-
- def create_tf_record(self):
- path = os.path.join(self.get_temp_dir(), 'tfrecord')
- writer = tf.python_io.TFRecordWriter(path)
-
- image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
- flat_mask = (4 * 5) * [1.0]
- with self.test_session():
- encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
- example = example_pb2.Example(features=feature_pb2.Features(feature={
- 'image/encoded': feature_pb2.Feature(
- bytes_list=feature_pb2.BytesList(value=[encoded_jpeg])),
- 'image/format': feature_pb2.Feature(
- bytes_list=feature_pb2.BytesList(value=['jpeg'.encode('utf-8')])),
- 'image/height': feature_pb2.Feature(
- int64_list=feature_pb2.Int64List(value=[4])),
- 'image/width': feature_pb2.Feature(
- int64_list=feature_pb2.Int64List(value=[5])),
- 'image/object/bbox/xmin': feature_pb2.Feature(
- float_list=feature_pb2.FloatList(value=[0.0])),
- 'image/object/bbox/xmax': feature_pb2.Feature(
- float_list=feature_pb2.FloatList(value=[1.0])),
- 'image/object/bbox/ymin': feature_pb2.Feature(
- float_list=feature_pb2.FloatList(value=[0.0])),
- 'image/object/bbox/ymax': feature_pb2.Feature(
- float_list=feature_pb2.FloatList(value=[1.0])),
- 'image/object/class/label': feature_pb2.Feature(
- int64_list=feature_pb2.Int64List(value=[2])),
- 'image/object/mask': feature_pb2.Feature(
- float_list=feature_pb2.FloatList(value=flat_mask)),
- }))
- writer.write(example.SerializeToString())
- writer.close()
-
- return path
-
- def test_build_tf_record_input_reader(self):
- tf_record_path = self.create_tf_record()
-
- input_reader_text_proto = """
- shuffle: false
- num_readers: 1
- tf_record_input_reader {{
- input_path: '{0}'
- }}
- """.format(tf_record_path)
- input_reader_proto = input_reader_pb2.InputReader()
- text_format.Merge(input_reader_text_proto, input_reader_proto)
- tensor_dict = input_reader_builder.build(input_reader_proto)
-
- sv = tf.train.Supervisor(logdir=self.get_temp_dir())
- with sv.prepare_or_wait_for_session() as sess:
- sv.start_queue_runners(sess)
- output_dict = sess.run(tensor_dict)
-
- self.assertTrue(fields.InputDataFields.groundtruth_instance_masks
- not in output_dict)
- self.assertEquals(
- (4, 5, 3), output_dict[fields.InputDataFields.image].shape)
- self.assertEquals(
- [2], output_dict[fields.InputDataFields.groundtruth_classes])
- self.assertEquals(
- (1, 4), output_dict[fields.InputDataFields.groundtruth_boxes].shape)
- self.assertAllEqual(
- [0.0, 0.0, 1.0, 1.0],
- output_dict[fields.InputDataFields.groundtruth_boxes][0])
-
- def test_build_tf_record_input_reader_and_load_instance_masks(self):
- tf_record_path = self.create_tf_record()
-
- input_reader_text_proto = """
- shuffle: false
- num_readers: 1
- load_instance_masks: true
- tf_record_input_reader {{
- input_path: '{0}'
- }}
- """.format(tf_record_path)
- input_reader_proto = input_reader_pb2.InputReader()
- text_format.Merge(input_reader_text_proto, input_reader_proto)
- tensor_dict = input_reader_builder.build(input_reader_proto)
-
- sv = tf.train.Supervisor(logdir=self.get_temp_dir())
- with sv.prepare_or_wait_for_session() as sess:
- sv.start_queue_runners(sess)
- output_dict = sess.run(tensor_dict)
-
- self.assertEquals(
- (4, 5, 3), output_dict[fields.InputDataFields.image].shape)
- self.assertEquals(
- [2], output_dict[fields.InputDataFields.groundtruth_classes])
- self.assertEquals(
- (1, 4), output_dict[fields.InputDataFields.groundtruth_boxes].shape)
- self.assertAllEqual(
- [0.0, 0.0, 1.0, 1.0],
- output_dict[fields.InputDataFields.groundtruth_boxes][0])
- self.assertAllEqual(
- (1, 4, 5),
- output_dict[fields.InputDataFields.groundtruth_instance_masks].shape)
-
- def test_raises_error_with_no_input_paths(self):
- input_reader_text_proto = """
- shuffle: false
- num_readers: 1
- load_instance_masks: true
- """
- input_reader_proto = input_reader_pb2.InputReader()
- text_format.Merge(input_reader_text_proto, input_reader_proto)
- with self.assertRaises(ValueError):
- input_reader_builder.build(input_reader_proto)
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/losses_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/losses_builder.py
deleted file mode 100644
index e4f7a12400fc3ce8c90407943c4530da1cef9594..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/losses_builder.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""A function to build localization and classification losses from config."""
-
-from object_detection.core import balanced_positive_negative_sampler as sampler
-from object_detection.core import losses
-from object_detection.protos import losses_pb2
-
-
-def build(loss_config):
- """Build losses based on the config.
-
- Builds classification, localization losses and optionally a hard example miner
- based on the config.
-
- Args:
- loss_config: A losses_pb2.Loss object.
-
- Returns:
- classification_loss: Classification loss object.
- localization_loss: Localization loss object.
- classification_weight: Classification loss weight.
- localization_weight: Localization loss weight.
- hard_example_miner: Hard example miner object.
- random_example_sampler: BalancedPositiveNegativeSampler object.
-
- Raises:
- ValueError: If hard_example_miner is used with sigmoid_focal_loss.
- ValueError: If random_example_sampler is getting non-positive value as
- desired positive example fraction.
- """
- classification_loss = _build_classification_loss(
- loss_config.classification_loss)
- localization_loss = _build_localization_loss(
- loss_config.localization_loss)
- classification_weight = loss_config.classification_weight
- localization_weight = loss_config.localization_weight
- hard_example_miner = None
- if loss_config.HasField('hard_example_miner'):
- if (loss_config.classification_loss.WhichOneof('classification_loss') ==
- 'weighted_sigmoid_focal'):
- raise ValueError('HardExampleMiner should not be used with sigmoid focal '
- 'loss')
- hard_example_miner = build_hard_example_miner(
- loss_config.hard_example_miner,
- classification_weight,
- localization_weight)
- random_example_sampler = None
- if loss_config.HasField('random_example_sampler'):
- if loss_config.random_example_sampler.positive_sample_fraction <= 0:
- raise ValueError('RandomExampleSampler should not use non-positive'
- 'value as positive sample fraction.')
- random_example_sampler = sampler.BalancedPositiveNegativeSampler(
- positive_fraction=loss_config.random_example_sampler.
- positive_sample_fraction)
- return (classification_loss, localization_loss, classification_weight,
- localization_weight, hard_example_miner, random_example_sampler)
-
-
-def build_hard_example_miner(config,
- classification_weight,
- localization_weight):
- """Builds hard example miner based on the config.
-
- Args:
- config: A losses_pb2.HardExampleMiner object.
- classification_weight: Classification loss weight.
- localization_weight: Localization loss weight.
-
- Returns:
- Hard example miner.
-
- """
- loss_type = None
- if config.loss_type == losses_pb2.HardExampleMiner.BOTH:
- loss_type = 'both'
- if config.loss_type == losses_pb2.HardExampleMiner.CLASSIFICATION:
- loss_type = 'cls'
- if config.loss_type == losses_pb2.HardExampleMiner.LOCALIZATION:
- loss_type = 'loc'
-
- max_negatives_per_positive = None
- num_hard_examples = None
- if config.max_negatives_per_positive > 0:
- max_negatives_per_positive = config.max_negatives_per_positive
- if config.num_hard_examples > 0:
- num_hard_examples = config.num_hard_examples
- hard_example_miner = losses.HardExampleMiner(
- num_hard_examples=num_hard_examples,
- iou_threshold=config.iou_threshold,
- loss_type=loss_type,
- cls_loss_weight=classification_weight,
- loc_loss_weight=localization_weight,
- max_negatives_per_positive=max_negatives_per_positive,
- min_negatives_per_image=config.min_negatives_per_image)
- return hard_example_miner
-
-
-def build_faster_rcnn_classification_loss(loss_config):
- """Builds a classification loss for Faster RCNN based on the loss config.
-
- Args:
- loss_config: A losses_pb2.ClassificationLoss object.
-
- Returns:
- Loss based on the config.
-
- Raises:
- ValueError: On invalid loss_config.
- """
- if not isinstance(loss_config, losses_pb2.ClassificationLoss):
- raise ValueError('loss_config not of type losses_pb2.ClassificationLoss.')
-
- loss_type = loss_config.WhichOneof('classification_loss')
-
- if loss_type == 'weighted_sigmoid':
- return losses.WeightedSigmoidClassificationLoss()
- if loss_type == 'weighted_softmax':
- config = loss_config.weighted_softmax
- return losses.WeightedSoftmaxClassificationLoss(
- logit_scale=config.logit_scale)
- if loss_type == 'weighted_logits_softmax':
- config = loss_config.weighted_logits_softmax
- return losses.WeightedSoftmaxClassificationAgainstLogitsLoss(
- logit_scale=config.logit_scale)
-
- # By default, Faster RCNN second stage classifier uses Softmax loss
- # with anchor-wise outputs.
- config = loss_config.weighted_softmax
- return losses.WeightedSoftmaxClassificationLoss(
- logit_scale=config.logit_scale)
-
-
-def _build_localization_loss(loss_config):
- """Builds a localization loss based on the loss config.
-
- Args:
- loss_config: A losses_pb2.LocalizationLoss object.
-
- Returns:
- Loss based on the config.
-
- Raises:
- ValueError: On invalid loss_config.
- """
- if not isinstance(loss_config, losses_pb2.LocalizationLoss):
- raise ValueError('loss_config not of type losses_pb2.LocalizationLoss.')
-
- loss_type = loss_config.WhichOneof('localization_loss')
-
- if loss_type == 'weighted_l2':
- return losses.WeightedL2LocalizationLoss()
-
- if loss_type == 'weighted_smooth_l1':
- return losses.WeightedSmoothL1LocalizationLoss(
- loss_config.weighted_smooth_l1.delta)
-
- if loss_type == 'weighted_iou':
- return losses.WeightedIOULocalizationLoss()
-
- raise ValueError('Empty loss config.')
-
-
-def _build_classification_loss(loss_config):
- """Builds a classification loss based on the loss config.
-
- Args:
- loss_config: A losses_pb2.ClassificationLoss object.
-
- Returns:
- Loss based on the config.
-
- Raises:
- ValueError: On invalid loss_config.
- """
- if not isinstance(loss_config, losses_pb2.ClassificationLoss):
- raise ValueError('loss_config not of type losses_pb2.ClassificationLoss.')
-
- loss_type = loss_config.WhichOneof('classification_loss')
-
- if loss_type == 'weighted_sigmoid':
- return losses.WeightedSigmoidClassificationLoss()
-
- if loss_type == 'weighted_sigmoid_focal':
- config = loss_config.weighted_sigmoid_focal
- alpha = None
- if config.HasField('alpha'):
- alpha = config.alpha
- return losses.SigmoidFocalClassificationLoss(
- gamma=config.gamma,
- alpha=alpha)
-
- if loss_type == 'weighted_softmax':
- config = loss_config.weighted_softmax
- return losses.WeightedSoftmaxClassificationLoss(
- logit_scale=config.logit_scale)
-
- if loss_type == 'weighted_logits_softmax':
- config = loss_config.weighted_logits_softmax
- return losses.WeightedSoftmaxClassificationAgainstLogitsLoss(
- logit_scale=config.logit_scale)
-
- if loss_type == 'bootstrapped_sigmoid':
- config = loss_config.bootstrapped_sigmoid
- return losses.BootstrappedSigmoidClassificationLoss(
- alpha=config.alpha,
- bootstrap_type=('hard' if config.hard_bootstrap else 'soft'))
-
- raise ValueError('Empty loss config.')
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/losses_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/losses_builder_test.py
deleted file mode 100644
index 4dc4a754eca9a2180963da8dbb75afd9a520225f..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/losses_builder_test.py
+++ /dev/null
@@ -1,488 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for losses_builder."""
-
-import tensorflow as tf
-
-from google.protobuf import text_format
-from object_detection.builders import losses_builder
-from object_detection.core import losses
-from object_detection.protos import losses_pb2
-
-
-class LocalizationLossBuilderTest(tf.test.TestCase):
-
- def test_build_weighted_l2_localization_loss(self):
- losses_text_proto = """
- localization_loss {
- weighted_l2 {
- }
- }
- classification_loss {
- weighted_softmax {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- _, localization_loss, _, _, _, _ = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(localization_loss,
- losses.WeightedL2LocalizationLoss))
-
- def test_build_weighted_smooth_l1_localization_loss_default_delta(self):
- losses_text_proto = """
- localization_loss {
- weighted_smooth_l1 {
- }
- }
- classification_loss {
- weighted_softmax {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- _, localization_loss, _, _, _, _ = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(localization_loss,
- losses.WeightedSmoothL1LocalizationLoss))
- self.assertAlmostEqual(localization_loss._delta, 1.0)
-
- def test_build_weighted_smooth_l1_localization_loss_non_default_delta(self):
- losses_text_proto = """
- localization_loss {
- weighted_smooth_l1 {
- delta: 0.1
- }
- }
- classification_loss {
- weighted_softmax {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- _, localization_loss, _, _, _, _ = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(localization_loss,
- losses.WeightedSmoothL1LocalizationLoss))
- self.assertAlmostEqual(localization_loss._delta, 0.1)
-
- def test_build_weighted_iou_localization_loss(self):
- losses_text_proto = """
- localization_loss {
- weighted_iou {
- }
- }
- classification_loss {
- weighted_softmax {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- _, localization_loss, _, _, _, _ = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(localization_loss,
- losses.WeightedIOULocalizationLoss))
-
- def test_anchorwise_output(self):
- losses_text_proto = """
- localization_loss {
- weighted_smooth_l1 {
- }
- }
- classification_loss {
- weighted_softmax {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- _, localization_loss, _, _, _, _ = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(localization_loss,
- losses.WeightedSmoothL1LocalizationLoss))
- predictions = tf.constant([[[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]]])
- targets = tf.constant([[[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]]])
- weights = tf.constant([[1.0, 1.0]])
- loss = localization_loss(predictions, targets, weights=weights)
- self.assertEqual(loss.shape, [1, 2])
-
- def test_raise_error_on_empty_localization_config(self):
- losses_text_proto = """
- classification_loss {
- weighted_softmax {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- with self.assertRaises(ValueError):
- losses_builder._build_localization_loss(losses_proto)
-
-
-class ClassificationLossBuilderTest(tf.test.TestCase):
-
- def test_build_weighted_sigmoid_classification_loss(self):
- losses_text_proto = """
- classification_loss {
- weighted_sigmoid {
- }
- }
- localization_loss {
- weighted_l2 {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- classification_loss, _, _, _, _, _ = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(classification_loss,
- losses.WeightedSigmoidClassificationLoss))
-
- def test_build_weighted_sigmoid_focal_classification_loss(self):
- losses_text_proto = """
- classification_loss {
- weighted_sigmoid_focal {
- }
- }
- localization_loss {
- weighted_l2 {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- classification_loss, _, _, _, _, _ = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(classification_loss,
- losses.SigmoidFocalClassificationLoss))
- self.assertAlmostEqual(classification_loss._alpha, None)
- self.assertAlmostEqual(classification_loss._gamma, 2.0)
-
- def test_build_weighted_sigmoid_focal_loss_non_default(self):
- losses_text_proto = """
- classification_loss {
- weighted_sigmoid_focal {
- alpha: 0.25
- gamma: 3.0
- }
- }
- localization_loss {
- weighted_l2 {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- classification_loss, _, _, _, _, _ = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(classification_loss,
- losses.SigmoidFocalClassificationLoss))
- self.assertAlmostEqual(classification_loss._alpha, 0.25)
- self.assertAlmostEqual(classification_loss._gamma, 3.0)
-
- def test_build_weighted_softmax_classification_loss(self):
- losses_text_proto = """
- classification_loss {
- weighted_softmax {
- }
- }
- localization_loss {
- weighted_l2 {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- classification_loss, _, _, _, _, _ = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(classification_loss,
- losses.WeightedSoftmaxClassificationLoss))
-
- def test_build_weighted_logits_softmax_classification_loss(self):
- losses_text_proto = """
- classification_loss {
- weighted_logits_softmax {
- }
- }
- localization_loss {
- weighted_l2 {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- classification_loss, _, _, _, _, _ = losses_builder.build(losses_proto)
- self.assertTrue(
- isinstance(classification_loss,
- losses.WeightedSoftmaxClassificationAgainstLogitsLoss))
-
- def test_build_weighted_softmax_classification_loss_with_logit_scale(self):
- losses_text_proto = """
- classification_loss {
- weighted_softmax {
- logit_scale: 2.0
- }
- }
- localization_loss {
- weighted_l2 {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- classification_loss, _, _, _, _, _ = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(classification_loss,
- losses.WeightedSoftmaxClassificationLoss))
-
- def test_build_bootstrapped_sigmoid_classification_loss(self):
- losses_text_proto = """
- classification_loss {
- bootstrapped_sigmoid {
- alpha: 0.5
- }
- }
- localization_loss {
- weighted_l2 {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- classification_loss, _, _, _, _, _ = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(classification_loss,
- losses.BootstrappedSigmoidClassificationLoss))
-
- def test_anchorwise_output(self):
- losses_text_proto = """
- classification_loss {
- weighted_sigmoid {
- anchorwise_output: true
- }
- }
- localization_loss {
- weighted_l2 {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- classification_loss, _, _, _, _, _ = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(classification_loss,
- losses.WeightedSigmoidClassificationLoss))
- predictions = tf.constant([[[0.0, 1.0, 0.0], [0.0, 0.5, 0.5]]])
- targets = tf.constant([[[0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]])
- weights = tf.constant([[1.0, 1.0]])
- loss = classification_loss(predictions, targets, weights=weights)
- self.assertEqual(loss.shape, [1, 2, 3])
-
- def test_raise_error_on_empty_config(self):
- losses_text_proto = """
- localization_loss {
- weighted_l2 {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- with self.assertRaises(ValueError):
- losses_builder.build(losses_proto)
-
-
-class HardExampleMinerBuilderTest(tf.test.TestCase):
-
- def test_do_not_build_hard_example_miner_by_default(self):
- losses_text_proto = """
- localization_loss {
- weighted_l2 {
- }
- }
- classification_loss {
- weighted_softmax {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- _, _, _, _, hard_example_miner, _ = losses_builder.build(losses_proto)
- self.assertEqual(hard_example_miner, None)
-
- def test_build_hard_example_miner_for_classification_loss(self):
- losses_text_proto = """
- localization_loss {
- weighted_l2 {
- }
- }
- classification_loss {
- weighted_softmax {
- }
- }
- hard_example_miner {
- loss_type: CLASSIFICATION
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- _, _, _, _, hard_example_miner, _ = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner))
- self.assertEqual(hard_example_miner._loss_type, 'cls')
-
- def test_build_hard_example_miner_for_localization_loss(self):
- losses_text_proto = """
- localization_loss {
- weighted_l2 {
- }
- }
- classification_loss {
- weighted_softmax {
- }
- }
- hard_example_miner {
- loss_type: LOCALIZATION
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- _, _, _, _, hard_example_miner, _ = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner))
- self.assertEqual(hard_example_miner._loss_type, 'loc')
-
- def test_build_hard_example_miner_with_non_default_values(self):
- losses_text_proto = """
- localization_loss {
- weighted_l2 {
- }
- }
- classification_loss {
- weighted_softmax {
- }
- }
- hard_example_miner {
- num_hard_examples: 32
- iou_threshold: 0.5
- loss_type: LOCALIZATION
- max_negatives_per_positive: 10
- min_negatives_per_image: 3
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- _, _, _, _, hard_example_miner, _ = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner))
- self.assertEqual(hard_example_miner._num_hard_examples, 32)
- self.assertAlmostEqual(hard_example_miner._iou_threshold, 0.5)
- self.assertEqual(hard_example_miner._max_negatives_per_positive, 10)
- self.assertEqual(hard_example_miner._min_negatives_per_image, 3)
-
-
-class LossBuilderTest(tf.test.TestCase):
-
- def test_build_all_loss_parameters(self):
- losses_text_proto = """
- localization_loss {
- weighted_l2 {
- }
- }
- classification_loss {
- weighted_softmax {
- }
- }
- hard_example_miner {
- }
- classification_weight: 0.8
- localization_weight: 0.2
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- (classification_loss, localization_loss,
- classification_weight, localization_weight,
- hard_example_miner, _) = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner))
- self.assertTrue(isinstance(classification_loss,
- losses.WeightedSoftmaxClassificationLoss))
- self.assertTrue(isinstance(localization_loss,
- losses.WeightedL2LocalizationLoss))
- self.assertAlmostEqual(classification_weight, 0.8)
- self.assertAlmostEqual(localization_weight, 0.2)
-
- def test_raise_error_when_both_focal_loss_and_hard_example_miner(self):
- losses_text_proto = """
- localization_loss {
- weighted_l2 {
- }
- }
- classification_loss {
- weighted_sigmoid_focal {
- }
- }
- hard_example_miner {
- }
- classification_weight: 0.8
- localization_weight: 0.2
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- with self.assertRaises(ValueError):
- losses_builder.build(losses_proto)
-
-
-class FasterRcnnClassificationLossBuilderTest(tf.test.TestCase):
-
- def test_build_sigmoid_loss(self):
- losses_text_proto = """
- weighted_sigmoid {
- }
- """
- losses_proto = losses_pb2.ClassificationLoss()
- text_format.Merge(losses_text_proto, losses_proto)
- classification_loss = losses_builder.build_faster_rcnn_classification_loss(
- losses_proto)
- self.assertTrue(isinstance(classification_loss,
- losses.WeightedSigmoidClassificationLoss))
-
- def test_build_softmax_loss(self):
- losses_text_proto = """
- weighted_softmax {
- }
- """
- losses_proto = losses_pb2.ClassificationLoss()
- text_format.Merge(losses_text_proto, losses_proto)
- classification_loss = losses_builder.build_faster_rcnn_classification_loss(
- losses_proto)
- self.assertTrue(isinstance(classification_loss,
- losses.WeightedSoftmaxClassificationLoss))
-
- def test_build_logits_softmax_loss(self):
- losses_text_proto = """
- weighted_logits_softmax {
- }
- """
- losses_proto = losses_pb2.ClassificationLoss()
- text_format.Merge(losses_text_proto, losses_proto)
- classification_loss = losses_builder.build_faster_rcnn_classification_loss(
- losses_proto)
- self.assertTrue(
- isinstance(classification_loss,
- losses.WeightedSoftmaxClassificationAgainstLogitsLoss))
-
- def test_build_softmax_loss_by_default(self):
- losses_text_proto = """
- """
- losses_proto = losses_pb2.ClassificationLoss()
- text_format.Merge(losses_text_proto, losses_proto)
- classification_loss = losses_builder.build_faster_rcnn_classification_loss(
- losses_proto)
- self.assertTrue(isinstance(classification_loss,
- losses.WeightedSoftmaxClassificationLoss))
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/matcher_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/matcher_builder.py
deleted file mode 100644
index d334f435372984eb78265d72b2bcdf63c45bde5b..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/matcher_builder.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""A function to build an object detection matcher from configuration."""
-
-from object_detection.matchers import argmax_matcher
-from object_detection.matchers import bipartite_matcher
-from object_detection.protos import matcher_pb2
-
-
-def build(matcher_config):
- """Builds a matcher object based on the matcher config.
-
- Args:
- matcher_config: A matcher.proto object containing the config for the desired
- Matcher.
-
- Returns:
- Matcher based on the config.
-
- Raises:
- ValueError: On empty matcher proto.
- """
- if not isinstance(matcher_config, matcher_pb2.Matcher):
- raise ValueError('matcher_config not of type matcher_pb2.Matcher.')
- if matcher_config.WhichOneof('matcher_oneof') == 'argmax_matcher':
- matcher = matcher_config.argmax_matcher
- matched_threshold = unmatched_threshold = None
- if not matcher.ignore_thresholds:
- matched_threshold = matcher.matched_threshold
- unmatched_threshold = matcher.unmatched_threshold
- return argmax_matcher.ArgMaxMatcher(
- matched_threshold=matched_threshold,
- unmatched_threshold=unmatched_threshold,
- negatives_lower_than_unmatched=matcher.negatives_lower_than_unmatched,
- force_match_for_each_row=matcher.force_match_for_each_row,
- use_matmul_gather=matcher.use_matmul_gather)
- if matcher_config.WhichOneof('matcher_oneof') == 'bipartite_matcher':
- matcher = matcher_config.bipartite_matcher
- return bipartite_matcher.GreedyBipartiteMatcher(matcher.use_matmul_gather)
- raise ValueError('Empty matcher.')
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/matcher_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/matcher_builder_test.py
deleted file mode 100644
index 66854491192c1739855b9f2a428a2f29005ad866..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/matcher_builder_test.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for matcher_builder."""
-
-import tensorflow as tf
-
-from google.protobuf import text_format
-from object_detection.builders import matcher_builder
-from object_detection.matchers import argmax_matcher
-from object_detection.matchers import bipartite_matcher
-from object_detection.protos import matcher_pb2
-
-
-class MatcherBuilderTest(tf.test.TestCase):
-
- def test_build_arg_max_matcher_with_defaults(self):
- matcher_text_proto = """
- argmax_matcher {
- }
- """
- matcher_proto = matcher_pb2.Matcher()
- text_format.Merge(matcher_text_proto, matcher_proto)
- matcher_object = matcher_builder.build(matcher_proto)
- self.assertTrue(isinstance(matcher_object, argmax_matcher.ArgMaxMatcher))
- self.assertAlmostEqual(matcher_object._matched_threshold, 0.5)
- self.assertAlmostEqual(matcher_object._unmatched_threshold, 0.5)
- self.assertTrue(matcher_object._negatives_lower_than_unmatched)
- self.assertFalse(matcher_object._force_match_for_each_row)
-
- def test_build_arg_max_matcher_without_thresholds(self):
- matcher_text_proto = """
- argmax_matcher {
- ignore_thresholds: true
- }
- """
- matcher_proto = matcher_pb2.Matcher()
- text_format.Merge(matcher_text_proto, matcher_proto)
- matcher_object = matcher_builder.build(matcher_proto)
- self.assertTrue(isinstance(matcher_object, argmax_matcher.ArgMaxMatcher))
- self.assertEqual(matcher_object._matched_threshold, None)
- self.assertEqual(matcher_object._unmatched_threshold, None)
- self.assertTrue(matcher_object._negatives_lower_than_unmatched)
- self.assertFalse(matcher_object._force_match_for_each_row)
-
- def test_build_arg_max_matcher_with_non_default_parameters(self):
- matcher_text_proto = """
- argmax_matcher {
- matched_threshold: 0.7
- unmatched_threshold: 0.3
- negatives_lower_than_unmatched: false
- force_match_for_each_row: true
- use_matmul_gather: true
- }
- """
- matcher_proto = matcher_pb2.Matcher()
- text_format.Merge(matcher_text_proto, matcher_proto)
- matcher_object = matcher_builder.build(matcher_proto)
- self.assertTrue(isinstance(matcher_object, argmax_matcher.ArgMaxMatcher))
- self.assertAlmostEqual(matcher_object._matched_threshold, 0.7)
- self.assertAlmostEqual(matcher_object._unmatched_threshold, 0.3)
- self.assertFalse(matcher_object._negatives_lower_than_unmatched)
- self.assertTrue(matcher_object._force_match_for_each_row)
- self.assertTrue(matcher_object._use_matmul_gather)
-
- def test_build_bipartite_matcher(self):
- matcher_text_proto = """
- bipartite_matcher {
- }
- """
- matcher_proto = matcher_pb2.Matcher()
- text_format.Merge(matcher_text_proto, matcher_proto)
- matcher_object = matcher_builder.build(matcher_proto)
- self.assertTrue(
- isinstance(matcher_object, bipartite_matcher.GreedyBipartiteMatcher))
-
- def test_raise_error_on_empty_matcher(self):
- matcher_text_proto = """
- """
- matcher_proto = matcher_pb2.Matcher()
- text_format.Merge(matcher_text_proto, matcher_proto)
- with self.assertRaises(ValueError):
- matcher_builder.build(matcher_proto)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/model_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/model_builder.py
deleted file mode 100644
index 1ebdcb79f391726f2af9c85888d36a5f881f590e..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/model_builder.py
+++ /dev/null
@@ -1,377 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""A function to build a DetectionModel from configuration."""
-from object_detection.builders import anchor_generator_builder
-from object_detection.builders import box_coder_builder
-from object_detection.builders import box_predictor_builder
-from object_detection.builders import hyperparams_builder
-from object_detection.builders import image_resizer_builder
-from object_detection.builders import losses_builder
-from object_detection.builders import matcher_builder
-from object_detection.builders import post_processing_builder
-from object_detection.builders import region_similarity_calculator_builder as sim_calc
-from object_detection.core import box_predictor
-from object_detection.meta_architectures import faster_rcnn_meta_arch
-from object_detection.meta_architectures import rfcn_meta_arch
-from object_detection.meta_architectures import ssd_meta_arch
-from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
-from object_detection.models import faster_rcnn_inception_v2_feature_extractor as frcnn_inc_v2
-from object_detection.models import faster_rcnn_nas_feature_extractor as frcnn_nas
-from object_detection.models import faster_rcnn_pnas_feature_extractor as frcnn_pnas
-from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1
-from object_detection.models import ssd_resnet_v1_fpn_feature_extractor as ssd_resnet_v1_fpn
-from object_detection.models.embedded_ssd_mobilenet_v1_feature_extractor import EmbeddedSSDMobileNetV1FeatureExtractor
-from object_detection.models.ssd_inception_v2_feature_extractor import SSDInceptionV2FeatureExtractor
-from object_detection.models.ssd_inception_v3_feature_extractor import SSDInceptionV3FeatureExtractor
-from object_detection.models.ssd_mobilenet_v1_feature_extractor import SSDMobileNetV1FeatureExtractor
-from object_detection.models.ssd_mobilenet_v2_feature_extractor import SSDMobileNetV2FeatureExtractor
-from object_detection.protos import model_pb2
-
-# A map of names to SSD feature extractors.
-SSD_FEATURE_EXTRACTOR_CLASS_MAP = {
- 'ssd_inception_v2': SSDInceptionV2FeatureExtractor,
- 'ssd_inception_v3': SSDInceptionV3FeatureExtractor,
- 'ssd_mobilenet_v1': SSDMobileNetV1FeatureExtractor,
- 'ssd_mobilenet_v2': SSDMobileNetV2FeatureExtractor,
- 'ssd_resnet50_v1_fpn': ssd_resnet_v1_fpn.SSDResnet50V1FpnFeatureExtractor,
- 'ssd_resnet101_v1_fpn': ssd_resnet_v1_fpn.SSDResnet101V1FpnFeatureExtractor,
- 'ssd_resnet152_v1_fpn': ssd_resnet_v1_fpn.SSDResnet152V1FpnFeatureExtractor,
- 'embedded_ssd_mobilenet_v1': EmbeddedSSDMobileNetV1FeatureExtractor,
-}
-
-# A map of names to Faster R-CNN feature extractors.
-FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP = {
- 'faster_rcnn_nas':
- frcnn_nas.FasterRCNNNASFeatureExtractor,
- 'faster_rcnn_pnas':
- frcnn_pnas.FasterRCNNPNASFeatureExtractor,
- 'faster_rcnn_inception_resnet_v2':
- frcnn_inc_res.FasterRCNNInceptionResnetV2FeatureExtractor,
- 'faster_rcnn_inception_v2':
- frcnn_inc_v2.FasterRCNNInceptionV2FeatureExtractor,
- 'faster_rcnn_resnet50':
- frcnn_resnet_v1.FasterRCNNResnet50FeatureExtractor,
- 'faster_rcnn_resnet101':
- frcnn_resnet_v1.FasterRCNNResnet101FeatureExtractor,
- 'faster_rcnn_resnet152':
- frcnn_resnet_v1.FasterRCNNResnet152FeatureExtractor,
-}
-
-
-def build(model_config, is_training, add_summaries=True,
- add_background_class=True):
- """Builds a DetectionModel based on the model config.
-
- Args:
- model_config: A model.proto object containing the config for the desired
- DetectionModel.
- is_training: True if this model is being built for training purposes.
- add_summaries: Whether to add tensorflow summaries in the model graph.
- add_background_class: Whether to add an implicit background class to one-hot
- encodings of groundtruth labels. Set to false if using groundtruth labels
- with an explicit background class or using multiclass scores instead of
- truth in the case of distillation. Ignored in the case of faster_rcnn.
- Returns:
- DetectionModel based on the config.
-
- Raises:
- ValueError: On invalid meta architecture or model.
- """
- if not isinstance(model_config, model_pb2.DetectionModel):
- raise ValueError('model_config not of type model_pb2.DetectionModel.')
- meta_architecture = model_config.WhichOneof('model')
- if meta_architecture == 'ssd':
- return _build_ssd_model(model_config.ssd, is_training, add_summaries,
- add_background_class)
- if meta_architecture == 'faster_rcnn':
- return _build_faster_rcnn_model(model_config.faster_rcnn, is_training,
- add_summaries)
- raise ValueError('Unknown meta architecture: {}'.format(meta_architecture))
-
-
-def _build_ssd_feature_extractor(feature_extractor_config, is_training,
- reuse_weights=None):
- """Builds a ssd_meta_arch.SSDFeatureExtractor based on config.
-
- Args:
- feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto.
- is_training: True if this feature extractor is being built for training.
- reuse_weights: if the feature extractor should reuse weights.
-
- Returns:
- ssd_meta_arch.SSDFeatureExtractor based on config.
-
- Raises:
- ValueError: On invalid feature extractor type.
- """
- feature_type = feature_extractor_config.type
- depth_multiplier = feature_extractor_config.depth_multiplier
- min_depth = feature_extractor_config.min_depth
- pad_to_multiple = feature_extractor_config.pad_to_multiple
- use_explicit_padding = feature_extractor_config.use_explicit_padding
- use_depthwise = feature_extractor_config.use_depthwise
- conv_hyperparams = hyperparams_builder.build(
- feature_extractor_config.conv_hyperparams, is_training)
- override_base_feature_extractor_hyperparams = (
- feature_extractor_config.override_base_feature_extractor_hyperparams)
-
- if feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP:
- raise ValueError('Unknown ssd feature_extractor: {}'.format(feature_type))
-
- feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type]
- return feature_extractor_class(
- is_training, depth_multiplier, min_depth, pad_to_multiple,
- conv_hyperparams, reuse_weights, use_explicit_padding, use_depthwise,
- override_base_feature_extractor_hyperparams)
-
-
-def _build_ssd_model(ssd_config, is_training, add_summaries,
- add_background_class=True):
- """Builds an SSD detection model based on the model config.
-
- Args:
- ssd_config: A ssd.proto object containing the config for the desired
- SSDMetaArch.
- is_training: True if this model is being built for training purposes.
- add_summaries: Whether to add tf summaries in the model.
- add_background_class: Whether to add an implicit background class to one-hot
- encodings of groundtruth labels. Set to false if using groundtruth labels
- with an explicit background class or using multiclass scores instead of
- truth in the case of distillation.
- Returns:
- SSDMetaArch based on the config.
-
- Raises:
- ValueError: If ssd_config.type is not recognized (i.e. not registered in
- model_class_map).
- """
- num_classes = ssd_config.num_classes
-
- # Feature extractor
- feature_extractor = _build_ssd_feature_extractor(
- feature_extractor_config=ssd_config.feature_extractor,
- is_training=is_training)
-
- box_coder = box_coder_builder.build(ssd_config.box_coder)
- matcher = matcher_builder.build(ssd_config.matcher)
- region_similarity_calculator = sim_calc.build(
- ssd_config.similarity_calculator)
- encode_background_as_zeros = ssd_config.encode_background_as_zeros
- negative_class_weight = ssd_config.negative_class_weight
- ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build,
- ssd_config.box_predictor,
- is_training, num_classes)
- anchor_generator = anchor_generator_builder.build(
- ssd_config.anchor_generator)
- image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
- non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
- ssd_config.post_processing)
- (classification_loss, localization_loss, classification_weight,
- localization_weight, hard_example_miner,
- random_example_sampler) = losses_builder.build(ssd_config.loss)
- normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches
- normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize
-
- return ssd_meta_arch.SSDMetaArch(
- is_training,
- anchor_generator,
- ssd_box_predictor,
- box_coder,
- feature_extractor,
- matcher,
- region_similarity_calculator,
- encode_background_as_zeros,
- negative_class_weight,
- image_resizer_fn,
- non_max_suppression_fn,
- score_conversion_fn,
- classification_loss,
- localization_loss,
- classification_weight,
- localization_weight,
- normalize_loss_by_num_matches,
- hard_example_miner,
- add_summaries=add_summaries,
- normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
- freeze_batchnorm=ssd_config.freeze_batchnorm,
- inplace_batchnorm_update=ssd_config.inplace_batchnorm_update,
- add_background_class=add_background_class,
- random_example_sampler=random_example_sampler)
-
-
-def _build_faster_rcnn_feature_extractor(
- feature_extractor_config, is_training, reuse_weights=None,
- inplace_batchnorm_update=False):
- """Builds a faster_rcnn_meta_arch.FasterRCNNFeatureExtractor based on config.
-
- Args:
- feature_extractor_config: A FasterRcnnFeatureExtractor proto config from
- faster_rcnn.proto.
- is_training: True if this feature extractor is being built for training.
- reuse_weights: if the feature extractor should reuse weights.
- inplace_batchnorm_update: Whether to update batch_norm inplace during
- training. This is required for batch norm to work correctly on TPUs. When
- this is false, user must add a control dependency on
- tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
- norm moving average parameters.
-
- Returns:
- faster_rcnn_meta_arch.FasterRCNNFeatureExtractor based on config.
-
- Raises:
- ValueError: On invalid feature extractor type.
- """
- if inplace_batchnorm_update:
- raise ValueError('inplace batchnorm updates not supported.')
- feature_type = feature_extractor_config.type
- first_stage_features_stride = (
- feature_extractor_config.first_stage_features_stride)
- batch_norm_trainable = feature_extractor_config.batch_norm_trainable
-
- if feature_type not in FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP:
- raise ValueError('Unknown Faster R-CNN feature_extractor: {}'.format(
- feature_type))
- feature_extractor_class = FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP[
- feature_type]
- return feature_extractor_class(
- is_training, first_stage_features_stride,
- batch_norm_trainable, reuse_weights)
-
-
-def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
- """Builds a Faster R-CNN or R-FCN detection model based on the model config.
-
- Builds R-FCN model if the second_stage_box_predictor in the config is of type
- `rfcn_box_predictor` else builds a Faster R-CNN model.
-
- Args:
- frcnn_config: A faster_rcnn.proto object containing the config for the
- desired FasterRCNNMetaArch or RFCNMetaArch.
- is_training: True if this model is being built for training purposes.
- add_summaries: Whether to add tf summaries in the model.
-
- Returns:
- FasterRCNNMetaArch based on the config.
-
- Raises:
- ValueError: If frcnn_config.type is not recognized (i.e. not registered in
- model_class_map).
- """
- num_classes = frcnn_config.num_classes
- image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)
-
- feature_extractor = _build_faster_rcnn_feature_extractor(
- frcnn_config.feature_extractor, is_training,
- frcnn_config.inplace_batchnorm_update)
-
- number_of_stages = frcnn_config.number_of_stages
- first_stage_anchor_generator = anchor_generator_builder.build(
- frcnn_config.first_stage_anchor_generator)
-
- first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
- first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build(
- frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training)
- first_stage_box_predictor_kernel_size = (
- frcnn_config.first_stage_box_predictor_kernel_size)
- first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth
- first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size
- first_stage_positive_balance_fraction = (
- frcnn_config.first_stage_positive_balance_fraction)
- first_stage_nms_score_threshold = frcnn_config.first_stage_nms_score_threshold
- first_stage_nms_iou_threshold = frcnn_config.first_stage_nms_iou_threshold
- first_stage_max_proposals = frcnn_config.first_stage_max_proposals
- first_stage_loc_loss_weight = (
- frcnn_config.first_stage_localization_loss_weight)
- first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight
-
- initial_crop_size = frcnn_config.initial_crop_size
- maxpool_kernel_size = frcnn_config.maxpool_kernel_size
- maxpool_stride = frcnn_config.maxpool_stride
-
- second_stage_box_predictor = box_predictor_builder.build(
- hyperparams_builder.build,
- frcnn_config.second_stage_box_predictor,
- is_training=is_training,
- num_classes=num_classes)
- second_stage_batch_size = frcnn_config.second_stage_batch_size
- second_stage_balance_fraction = frcnn_config.second_stage_balance_fraction
- (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn
- ) = post_processing_builder.build(frcnn_config.second_stage_post_processing)
- second_stage_localization_loss_weight = (
- frcnn_config.second_stage_localization_loss_weight)
- second_stage_classification_loss = (
- losses_builder.build_faster_rcnn_classification_loss(
- frcnn_config.second_stage_classification_loss))
- second_stage_classification_loss_weight = (
- frcnn_config.second_stage_classification_loss_weight)
- second_stage_mask_prediction_loss_weight = (
- frcnn_config.second_stage_mask_prediction_loss_weight)
-
- hard_example_miner = None
- if frcnn_config.HasField('hard_example_miner'):
- hard_example_miner = losses_builder.build_hard_example_miner(
- frcnn_config.hard_example_miner,
- second_stage_classification_loss_weight,
- second_stage_localization_loss_weight)
-
- common_kwargs = {
- 'is_training': is_training,
- 'num_classes': num_classes,
- 'image_resizer_fn': image_resizer_fn,
- 'feature_extractor': feature_extractor,
- 'number_of_stages': number_of_stages,
- 'first_stage_anchor_generator': first_stage_anchor_generator,
- 'first_stage_atrous_rate': first_stage_atrous_rate,
- 'first_stage_box_predictor_arg_scope_fn':
- first_stage_box_predictor_arg_scope_fn,
- 'first_stage_box_predictor_kernel_size':
- first_stage_box_predictor_kernel_size,
- 'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
- 'first_stage_minibatch_size': first_stage_minibatch_size,
- 'first_stage_positive_balance_fraction':
- first_stage_positive_balance_fraction,
- 'first_stage_nms_score_threshold': first_stage_nms_score_threshold,
- 'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold,
- 'first_stage_max_proposals': first_stage_max_proposals,
- 'first_stage_localization_loss_weight': first_stage_loc_loss_weight,
- 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight,
- 'second_stage_batch_size': second_stage_batch_size,
- 'second_stage_balance_fraction': second_stage_balance_fraction,
- 'second_stage_non_max_suppression_fn':
- second_stage_non_max_suppression_fn,
- 'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
- 'second_stage_localization_loss_weight':
- second_stage_localization_loss_weight,
- 'second_stage_classification_loss':
- second_stage_classification_loss,
- 'second_stage_classification_loss_weight':
- second_stage_classification_loss_weight,
- 'hard_example_miner': hard_example_miner,
- 'add_summaries': add_summaries}
-
- if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor):
- return rfcn_meta_arch.RFCNMetaArch(
- second_stage_rfcn_box_predictor=second_stage_box_predictor,
- **common_kwargs)
- else:
- return faster_rcnn_meta_arch.FasterRCNNMetaArch(
- initial_crop_size=initial_crop_size,
- maxpool_kernel_size=maxpool_kernel_size,
- maxpool_stride=maxpool_stride,
- second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
- second_stage_mask_prediction_loss_weight=(
- second_stage_mask_prediction_loss_weight),
- **common_kwargs)
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/model_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/model_builder_test.py
deleted file mode 100644
index 225e1d50b1c229b7d7b2017661df55973098eb99..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/model_builder_test.py
+++ /dev/null
@@ -1,1056 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.models.model_builder."""
-
-import tensorflow as tf
-
-from google.protobuf import text_format
-from object_detection.builders import model_builder
-from object_detection.meta_architectures import faster_rcnn_meta_arch
-from object_detection.meta_architectures import rfcn_meta_arch
-from object_detection.meta_architectures import ssd_meta_arch
-from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
-from object_detection.models import faster_rcnn_inception_v2_feature_extractor as frcnn_inc_v2
-from object_detection.models import faster_rcnn_nas_feature_extractor as frcnn_nas
-from object_detection.models import faster_rcnn_pnas_feature_extractor as frcnn_pnas
-from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1
-from object_detection.models import ssd_resnet_v1_fpn_feature_extractor as ssd_resnet_v1_fpn
-from object_detection.models.embedded_ssd_mobilenet_v1_feature_extractor import EmbeddedSSDMobileNetV1FeatureExtractor
-from object_detection.models.ssd_inception_v2_feature_extractor import SSDInceptionV2FeatureExtractor
-from object_detection.models.ssd_inception_v3_feature_extractor import SSDInceptionV3FeatureExtractor
-from object_detection.models.ssd_mobilenet_v1_feature_extractor import SSDMobileNetV1FeatureExtractor
-from object_detection.models.ssd_mobilenet_v2_feature_extractor import SSDMobileNetV2FeatureExtractor
-from object_detection.protos import model_pb2
-
-FRCNN_RESNET_FEAT_MAPS = {
- 'faster_rcnn_resnet50':
- frcnn_resnet_v1.FasterRCNNResnet50FeatureExtractor,
- 'faster_rcnn_resnet101':
- frcnn_resnet_v1.FasterRCNNResnet101FeatureExtractor,
- 'faster_rcnn_resnet152':
- frcnn_resnet_v1.FasterRCNNResnet152FeatureExtractor
-}
-
-SSD_RESNET_V1_FPN_FEAT_MAPS = {
- 'ssd_resnet50_v1_fpn':
- ssd_resnet_v1_fpn.SSDResnet50V1FpnFeatureExtractor,
- 'ssd_resnet101_v1_fpn':
- ssd_resnet_v1_fpn.SSDResnet101V1FpnFeatureExtractor,
- 'ssd_resnet152_v1_fpn':
- ssd_resnet_v1_fpn.SSDResnet152V1FpnFeatureExtractor
-}
-
-
-class ModelBuilderTest(tf.test.TestCase):
-
- def create_model(self, model_config):
- """Builds a DetectionModel based on the model config.
-
- Args:
- model_config: A model.proto object containing the config for the desired
- DetectionModel.
-
- Returns:
- DetectionModel based on the config.
- """
- return model_builder.build(model_config, is_training=True)
-
- def test_create_ssd_inception_v2_model_from_config(self):
- model_text_proto = """
- ssd {
- feature_extractor {
- type: 'ssd_inception_v2'
- conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- override_base_feature_extractor_hyperparams: true
- }
- box_coder {
- faster_rcnn_box_coder {
- }
- }
- matcher {
- argmax_matcher {
- }
- }
- similarity_calculator {
- iou_similarity {
- }
- }
- anchor_generator {
- ssd_anchor_generator {
- aspect_ratios: 1.0
- }
- }
- image_resizer {
- fixed_shape_resizer {
- height: 320
- width: 320
- }
- }
- box_predictor {
- convolutional_box_predictor {
- conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- }
- loss {
- classification_loss {
- weighted_softmax {
- }
- }
- localization_loss {
- weighted_smooth_l1 {
- }
- }
- }
- }"""
- model_proto = model_pb2.DetectionModel()
- text_format.Merge(model_text_proto, model_proto)
- model = self.create_model(model_proto)
- self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
- self.assertIsInstance(model._feature_extractor,
- SSDInceptionV2FeatureExtractor)
-
- def test_create_ssd_inception_v3_model_from_config(self):
- model_text_proto = """
- ssd {
- feature_extractor {
- type: 'ssd_inception_v3'
- conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- override_base_feature_extractor_hyperparams: true
- }
- box_coder {
- faster_rcnn_box_coder {
- }
- }
- matcher {
- argmax_matcher {
- }
- }
- similarity_calculator {
- iou_similarity {
- }
- }
- anchor_generator {
- ssd_anchor_generator {
- aspect_ratios: 1.0
- }
- }
- image_resizer {
- fixed_shape_resizer {
- height: 320
- width: 320
- }
- }
- box_predictor {
- convolutional_box_predictor {
- conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- }
- loss {
- classification_loss {
- weighted_softmax {
- }
- }
- localization_loss {
- weighted_smooth_l1 {
- }
- }
- }
- }"""
- model_proto = model_pb2.DetectionModel()
- text_format.Merge(model_text_proto, model_proto)
- model = self.create_model(model_proto)
- self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
- self.assertIsInstance(model._feature_extractor,
- SSDInceptionV3FeatureExtractor)
-
- def test_create_ssd_resnet_v1_fpn_model_from_config(self):
- model_text_proto = """
- ssd {
- feature_extractor {
- type: 'ssd_resnet50_v1_fpn'
- conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- box_coder {
- faster_rcnn_box_coder {
- }
- }
- matcher {
- argmax_matcher {
- }
- }
- similarity_calculator {
- iou_similarity {
- }
- }
- encode_background_as_zeros: true
- anchor_generator {
- multiscale_anchor_generator {
- aspect_ratios: [1.0, 2.0, 0.5]
- scales_per_octave: 2
- }
- }
- image_resizer {
- fixed_shape_resizer {
- height: 320
- width: 320
- }
- }
- box_predictor {
- weight_shared_convolutional_box_predictor {
- depth: 32
- conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- random_normal_initializer {
- }
- }
- }
- num_layers_before_predictor: 1
- }
- }
- normalize_loss_by_num_matches: true
- normalize_loc_loss_by_codesize: true
- loss {
- classification_loss {
- weighted_sigmoid_focal {
- alpha: 0.25
- gamma: 2.0
- }
- }
- localization_loss {
- weighted_smooth_l1 {
- delta: 0.1
- }
- }
- classification_weight: 1.0
- localization_weight: 1.0
- }
- }"""
- model_proto = model_pb2.DetectionModel()
- text_format.Merge(model_text_proto, model_proto)
-
- for extractor_type, extractor_class in SSD_RESNET_V1_FPN_FEAT_MAPS.items():
- model_proto.ssd.feature_extractor.type = extractor_type
- model = model_builder.build(model_proto, is_training=True)
- self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
- self.assertIsInstance(model._feature_extractor, extractor_class)
-
- def test_create_ssd_mobilenet_v1_model_from_config(self):
- model_text_proto = """
- ssd {
- freeze_batchnorm: true
- inplace_batchnorm_update: true
- feature_extractor {
- type: 'ssd_mobilenet_v1'
- conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- box_coder {
- faster_rcnn_box_coder {
- }
- }
- matcher {
- argmax_matcher {
- }
- }
- similarity_calculator {
- iou_similarity {
- }
- }
- anchor_generator {
- ssd_anchor_generator {
- aspect_ratios: 1.0
- }
- }
- image_resizer {
- fixed_shape_resizer {
- height: 320
- width: 320
- }
- }
- box_predictor {
- convolutional_box_predictor {
- conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- }
- normalize_loc_loss_by_codesize: true
- loss {
- classification_loss {
- weighted_softmax {
- }
- }
- localization_loss {
- weighted_smooth_l1 {
- }
- }
- }
- }"""
- model_proto = model_pb2.DetectionModel()
- text_format.Merge(model_text_proto, model_proto)
- model = self.create_model(model_proto)
- self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
- self.assertIsInstance(model._feature_extractor,
- SSDMobileNetV1FeatureExtractor)
- self.assertTrue(model._normalize_loc_loss_by_codesize)
- self.assertTrue(model._freeze_batchnorm)
- self.assertTrue(model._inplace_batchnorm_update)
-
- def test_create_ssd_mobilenet_v2_model_from_config(self):
- model_text_proto = """
- ssd {
- feature_extractor {
- type: 'ssd_mobilenet_v2'
- conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- box_coder {
- faster_rcnn_box_coder {
- }
- }
- matcher {
- argmax_matcher {
- }
- }
- similarity_calculator {
- iou_similarity {
- }
- }
- anchor_generator {
- ssd_anchor_generator {
- aspect_ratios: 1.0
- }
- }
- image_resizer {
- fixed_shape_resizer {
- height: 320
- width: 320
- }
- }
- box_predictor {
- convolutional_box_predictor {
- conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- }
- normalize_loc_loss_by_codesize: true
- loss {
- classification_loss {
- weighted_softmax {
- }
- }
- localization_loss {
- weighted_smooth_l1 {
- }
- }
- }
- }"""
- model_proto = model_pb2.DetectionModel()
- text_format.Merge(model_text_proto, model_proto)
- model = self.create_model(model_proto)
- self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
- self.assertIsInstance(model._feature_extractor,
- SSDMobileNetV2FeatureExtractor)
- self.assertTrue(model._normalize_loc_loss_by_codesize)
-
- def test_create_embedded_ssd_mobilenet_v1_model_from_config(self):
- model_text_proto = """
- ssd {
- feature_extractor {
- type: 'embedded_ssd_mobilenet_v1'
- conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- box_coder {
- faster_rcnn_box_coder {
- }
- }
- matcher {
- argmax_matcher {
- }
- }
- similarity_calculator {
- iou_similarity {
- }
- }
- anchor_generator {
- ssd_anchor_generator {
- aspect_ratios: 1.0
- }
- }
- image_resizer {
- fixed_shape_resizer {
- height: 256
- width: 256
- }
- }
- box_predictor {
- convolutional_box_predictor {
- conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- }
- loss {
- classification_loss {
- weighted_softmax {
- }
- }
- localization_loss {
- weighted_smooth_l1 {
- }
- }
- }
- }"""
- model_proto = model_pb2.DetectionModel()
- text_format.Merge(model_text_proto, model_proto)
- model = self.create_model(model_proto)
- self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
- self.assertIsInstance(model._feature_extractor,
- EmbeddedSSDMobileNetV1FeatureExtractor)
-
- def test_create_faster_rcnn_resnet_v1_models_from_config(self):
- model_text_proto = """
- faster_rcnn {
- inplace_batchnorm_update: true
- num_classes: 3
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- feature_extractor {
- type: 'faster_rcnn_resnet101'
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- initial_crop_size: 14
- maxpool_kernel_size: 2
- maxpool_stride: 2
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.01
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- score_converter: SOFTMAX
- }
- }"""
- model_proto = model_pb2.DetectionModel()
- text_format.Merge(model_text_proto, model_proto)
- for extractor_type, extractor_class in FRCNN_RESNET_FEAT_MAPS.items():
- model_proto.faster_rcnn.feature_extractor.type = extractor_type
- model = model_builder.build(model_proto, is_training=True)
- self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch)
- self.assertIsInstance(model._feature_extractor, extractor_class)
-
- def test_create_faster_rcnn_resnet101_with_mask_prediction_enabled(self):
- model_text_proto = """
- faster_rcnn {
- num_classes: 3
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- feature_extractor {
- type: 'faster_rcnn_resnet101'
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- initial_crop_size: 14
- maxpool_kernel_size: 2
- maxpool_stride: 2
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- predict_instance_masks: true
- }
- }
- second_stage_mask_prediction_loss_weight: 3.0
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.01
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- score_converter: SOFTMAX
- }
- }"""
- model_proto = model_pb2.DetectionModel()
- text_format.Merge(model_text_proto, model_proto)
- model = model_builder.build(model_proto, is_training=True)
- self.assertAlmostEqual(model._second_stage_mask_loss_weight, 3.0)
-
- def test_create_faster_rcnn_nas_model_from_config(self):
- model_text_proto = """
- faster_rcnn {
- num_classes: 3
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- feature_extractor {
- type: 'faster_rcnn_nas'
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- initial_crop_size: 17
- maxpool_kernel_size: 1
- maxpool_stride: 1
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.01
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- score_converter: SOFTMAX
- }
- }"""
- model_proto = model_pb2.DetectionModel()
- text_format.Merge(model_text_proto, model_proto)
- model = model_builder.build(model_proto, is_training=True)
- self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch)
- self.assertIsInstance(
- model._feature_extractor,
- frcnn_nas.FasterRCNNNASFeatureExtractor)
-
- def test_create_faster_rcnn_pnas_model_from_config(self):
- model_text_proto = """
- faster_rcnn {
- num_classes: 3
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- feature_extractor {
- type: 'faster_rcnn_pnas'
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- initial_crop_size: 17
- maxpool_kernel_size: 1
- maxpool_stride: 1
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.01
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- score_converter: SOFTMAX
- }
- }"""
- model_proto = model_pb2.DetectionModel()
- text_format.Merge(model_text_proto, model_proto)
- model = model_builder.build(model_proto, is_training=True)
- self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch)
- self.assertIsInstance(
- model._feature_extractor,
- frcnn_pnas.FasterRCNNPNASFeatureExtractor)
-
- def test_create_faster_rcnn_inception_resnet_v2_model_from_config(self):
- model_text_proto = """
- faster_rcnn {
- num_classes: 3
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- feature_extractor {
- type: 'faster_rcnn_inception_resnet_v2'
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- initial_crop_size: 17
- maxpool_kernel_size: 1
- maxpool_stride: 1
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.01
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- score_converter: SOFTMAX
- }
- }"""
- model_proto = model_pb2.DetectionModel()
- text_format.Merge(model_text_proto, model_proto)
- model = model_builder.build(model_proto, is_training=True)
- self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch)
- self.assertIsInstance(
- model._feature_extractor,
- frcnn_inc_res.FasterRCNNInceptionResnetV2FeatureExtractor)
-
- def test_create_faster_rcnn_inception_v2_model_from_config(self):
- model_text_proto = """
- faster_rcnn {
- num_classes: 3
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- feature_extractor {
- type: 'faster_rcnn_inception_v2'
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- initial_crop_size: 14
- maxpool_kernel_size: 2
- maxpool_stride: 2
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.01
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- score_converter: SOFTMAX
- }
- }"""
- model_proto = model_pb2.DetectionModel()
- text_format.Merge(model_text_proto, model_proto)
- model = model_builder.build(model_proto, is_training=True)
- self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch)
- self.assertIsInstance(model._feature_extractor,
- frcnn_inc_v2.FasterRCNNInceptionV2FeatureExtractor)
-
- def test_create_faster_rcnn_model_from_config_with_example_miner(self):
- model_text_proto = """
- faster_rcnn {
- num_classes: 3
- feature_extractor {
- type: 'faster_rcnn_inception_resnet_v2'
- }
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- }
- hard_example_miner {
- num_hard_examples: 10
- iou_threshold: 0.99
- }
- }"""
- model_proto = model_pb2.DetectionModel()
- text_format.Merge(model_text_proto, model_proto)
- model = model_builder.build(model_proto, is_training=True)
- self.assertIsNotNone(model._hard_example_miner)
-
- def test_create_rfcn_resnet_v1_model_from_config(self):
- model_text_proto = """
- faster_rcnn {
- num_classes: 3
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- feature_extractor {
- type: 'faster_rcnn_resnet101'
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- initial_crop_size: 14
- maxpool_kernel_size: 2
- maxpool_stride: 2
- second_stage_box_predictor {
- rfcn_box_predictor {
- conv_hyperparams {
- op: CONV
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.01
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- score_converter: SOFTMAX
- }
- }"""
- model_proto = model_pb2.DetectionModel()
- text_format.Merge(model_text_proto, model_proto)
- for extractor_type, extractor_class in FRCNN_RESNET_FEAT_MAPS.items():
- model_proto.faster_rcnn.feature_extractor.type = extractor_type
- model = model_builder.build(model_proto, is_training=True)
- self.assertIsInstance(model, rfcn_meta_arch.RFCNMetaArch)
- self.assertIsInstance(model._feature_extractor, extractor_class)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/optimizer_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/optimizer_builder.py
deleted file mode 100644
index e3a437f0d9a9442dfb1fff3013b250e4e854a2c2..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/optimizer_builder.py
+++ /dev/null
@@ -1,124 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Functions to build DetectionModel training optimizers."""
-
-import tensorflow as tf
-from object_detection.utils import learning_schedules
-
-
-def build(optimizer_config):
- """Create optimizer based on config.
-
- Args:
- optimizer_config: A Optimizer proto message.
-
- Returns:
- An optimizer and a list of variables for summary.
-
- Raises:
- ValueError: when using an unsupported input data type.
- """
- optimizer_type = optimizer_config.WhichOneof('optimizer')
- optimizer = None
-
- summary_vars = []
- if optimizer_type == 'rms_prop_optimizer':
- config = optimizer_config.rms_prop_optimizer
- learning_rate = _create_learning_rate(config.learning_rate)
- summary_vars.append(learning_rate)
- optimizer = tf.train.RMSPropOptimizer(
- learning_rate,
- decay=config.decay,
- momentum=config.momentum_optimizer_value,
- epsilon=config.epsilon)
-
- if optimizer_type == 'momentum_optimizer':
- config = optimizer_config.momentum_optimizer
- learning_rate = _create_learning_rate(config.learning_rate)
- summary_vars.append(learning_rate)
- optimizer = tf.train.MomentumOptimizer(
- learning_rate,
- momentum=config.momentum_optimizer_value)
-
- if optimizer_type == 'adam_optimizer':
- config = optimizer_config.adam_optimizer
- learning_rate = _create_learning_rate(config.learning_rate)
- summary_vars.append(learning_rate)
- optimizer = tf.train.AdamOptimizer(learning_rate)
-
- if optimizer is None:
- raise ValueError('Optimizer %s not supported.' % optimizer_type)
-
- if optimizer_config.use_moving_average:
- optimizer = tf.contrib.opt.MovingAverageOptimizer(
- optimizer, average_decay=optimizer_config.moving_average_decay)
-
- return optimizer, summary_vars
-
-
-def _create_learning_rate(learning_rate_config):
- """Create optimizer learning rate based on config.
-
- Args:
- learning_rate_config: A LearningRate proto message.
-
- Returns:
- A learning rate.
-
- Raises:
- ValueError: when using an unsupported input data type.
- """
- learning_rate = None
- learning_rate_type = learning_rate_config.WhichOneof('learning_rate')
- if learning_rate_type == 'constant_learning_rate':
- config = learning_rate_config.constant_learning_rate
- learning_rate = tf.constant(config.learning_rate, dtype=tf.float32,
- name='learning_rate')
-
- if learning_rate_type == 'exponential_decay_learning_rate':
- config = learning_rate_config.exponential_decay_learning_rate
- learning_rate = tf.train.exponential_decay(
- config.initial_learning_rate,
- tf.train.get_or_create_global_step(),
- config.decay_steps,
- config.decay_factor,
- staircase=config.staircase, name='learning_rate')
-
- if learning_rate_type == 'manual_step_learning_rate':
- config = learning_rate_config.manual_step_learning_rate
- if not config.schedule:
- raise ValueError('Empty learning rate schedule.')
- learning_rate_step_boundaries = [x.step for x in config.schedule]
- learning_rate_sequence = [config.initial_learning_rate]
- learning_rate_sequence += [x.learning_rate for x in config.schedule]
- learning_rate = learning_schedules.manual_stepping(
- tf.train.get_or_create_global_step(), learning_rate_step_boundaries,
- learning_rate_sequence, config.warmup)
-
- if learning_rate_type == 'cosine_decay_learning_rate':
- config = learning_rate_config.cosine_decay_learning_rate
- learning_rate = learning_schedules.cosine_decay_with_warmup(
- tf.train.get_or_create_global_step(),
- config.learning_rate_base,
- config.total_steps,
- config.warmup_learning_rate,
- config.warmup_steps,
- config.hold_base_rate_steps)
-
- if learning_rate is None:
- raise ValueError('Learning_rate %s not supported.' % learning_rate_type)
-
- return learning_rate
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/optimizer_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/optimizer_builder_test.py
deleted file mode 100644
index 343a858fb90b223d7f82b1d11466a6478d73f3e5..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/optimizer_builder_test.py
+++ /dev/null
@@ -1,208 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for optimizer_builder."""
-
-import tensorflow as tf
-
-from google.protobuf import text_format
-
-from object_detection.builders import optimizer_builder
-from object_detection.protos import optimizer_pb2
-
-
-class LearningRateBuilderTest(tf.test.TestCase):
-
- def testBuildConstantLearningRate(self):
- learning_rate_text_proto = """
- constant_learning_rate {
- learning_rate: 0.004
- }
- """
- learning_rate_proto = optimizer_pb2.LearningRate()
- text_format.Merge(learning_rate_text_proto, learning_rate_proto)
- learning_rate = optimizer_builder._create_learning_rate(
- learning_rate_proto)
- self.assertTrue(learning_rate.op.name.endswith('learning_rate'))
- with self.test_session():
- learning_rate_out = learning_rate.eval()
- self.assertAlmostEqual(learning_rate_out, 0.004)
-
- def testBuildExponentialDecayLearningRate(self):
- learning_rate_text_proto = """
- exponential_decay_learning_rate {
- initial_learning_rate: 0.004
- decay_steps: 99999
- decay_factor: 0.85
- staircase: false
- }
- """
- learning_rate_proto = optimizer_pb2.LearningRate()
- text_format.Merge(learning_rate_text_proto, learning_rate_proto)
- learning_rate = optimizer_builder._create_learning_rate(
- learning_rate_proto)
- self.assertTrue(learning_rate.op.name.endswith('learning_rate'))
- self.assertTrue(isinstance(learning_rate, tf.Tensor))
-
- def testBuildManualStepLearningRate(self):
- learning_rate_text_proto = """
- manual_step_learning_rate {
- initial_learning_rate: 0.002
- schedule {
- step: 100
- learning_rate: 0.006
- }
- schedule {
- step: 90000
- learning_rate: 0.00006
- }
- warmup: true
- }
- """
- learning_rate_proto = optimizer_pb2.LearningRate()
- text_format.Merge(learning_rate_text_proto, learning_rate_proto)
- learning_rate = optimizer_builder._create_learning_rate(
- learning_rate_proto)
- self.assertTrue(isinstance(learning_rate, tf.Tensor))
-
- def testBuildCosineDecayLearningRate(self):
- learning_rate_text_proto = """
- cosine_decay_learning_rate {
- learning_rate_base: 0.002
- total_steps: 20000
- warmup_learning_rate: 0.0001
- warmup_steps: 1000
- hold_base_rate_steps: 20000
- }
- """
- learning_rate_proto = optimizer_pb2.LearningRate()
- text_format.Merge(learning_rate_text_proto, learning_rate_proto)
- learning_rate = optimizer_builder._create_learning_rate(
- learning_rate_proto)
- self.assertTrue(isinstance(learning_rate, tf.Tensor))
-
- def testRaiseErrorOnEmptyLearningRate(self):
- learning_rate_text_proto = """
- """
- learning_rate_proto = optimizer_pb2.LearningRate()
- text_format.Merge(learning_rate_text_proto, learning_rate_proto)
- with self.assertRaises(ValueError):
- optimizer_builder._create_learning_rate(learning_rate_proto)
-
-
-class OptimizerBuilderTest(tf.test.TestCase):
-
- def testBuildRMSPropOptimizer(self):
- optimizer_text_proto = """
- rms_prop_optimizer: {
- learning_rate: {
- exponential_decay_learning_rate {
- initial_learning_rate: 0.004
- decay_steps: 800720
- decay_factor: 0.95
- }
- }
- momentum_optimizer_value: 0.9
- decay: 0.9
- epsilon: 1.0
- }
- use_moving_average: false
- """
- optimizer_proto = optimizer_pb2.Optimizer()
- text_format.Merge(optimizer_text_proto, optimizer_proto)
- optimizer, _ = optimizer_builder.build(optimizer_proto)
- self.assertTrue(isinstance(optimizer, tf.train.RMSPropOptimizer))
-
- def testBuildMomentumOptimizer(self):
- optimizer_text_proto = """
- momentum_optimizer: {
- learning_rate: {
- constant_learning_rate {
- learning_rate: 0.001
- }
- }
- momentum_optimizer_value: 0.99
- }
- use_moving_average: false
- """
- optimizer_proto = optimizer_pb2.Optimizer()
- text_format.Merge(optimizer_text_proto, optimizer_proto)
- optimizer, _ = optimizer_builder.build(optimizer_proto)
- self.assertTrue(isinstance(optimizer, tf.train.MomentumOptimizer))
-
- def testBuildAdamOptimizer(self):
- optimizer_text_proto = """
- adam_optimizer: {
- learning_rate: {
- constant_learning_rate {
- learning_rate: 0.002
- }
- }
- }
- use_moving_average: false
- """
- optimizer_proto = optimizer_pb2.Optimizer()
- text_format.Merge(optimizer_text_proto, optimizer_proto)
- optimizer, _ = optimizer_builder.build(optimizer_proto)
- self.assertTrue(isinstance(optimizer, tf.train.AdamOptimizer))
-
- def testBuildMovingAverageOptimizer(self):
- optimizer_text_proto = """
- adam_optimizer: {
- learning_rate: {
- constant_learning_rate {
- learning_rate: 0.002
- }
- }
- }
- use_moving_average: True
- """
- optimizer_proto = optimizer_pb2.Optimizer()
- text_format.Merge(optimizer_text_proto, optimizer_proto)
- optimizer, _ = optimizer_builder.build(optimizer_proto)
- self.assertTrue(
- isinstance(optimizer, tf.contrib.opt.MovingAverageOptimizer))
-
- def testBuildMovingAverageOptimizerWithNonDefaultDecay(self):
- optimizer_text_proto = """
- adam_optimizer: {
- learning_rate: {
- constant_learning_rate {
- learning_rate: 0.002
- }
- }
- }
- use_moving_average: True
- moving_average_decay: 0.2
- """
- optimizer_proto = optimizer_pb2.Optimizer()
- text_format.Merge(optimizer_text_proto, optimizer_proto)
- optimizer, _ = optimizer_builder.build(optimizer_proto)
- self.assertTrue(
- isinstance(optimizer, tf.contrib.opt.MovingAverageOptimizer))
- # TODO(rathodv): Find a way to not depend on the private members.
- self.assertAlmostEqual(optimizer._ema._decay, 0.2)
-
- def testBuildEmptyOptimizer(self):
- optimizer_text_proto = """
- """
- optimizer_proto = optimizer_pb2.Optimizer()
- text_format.Merge(optimizer_text_proto, optimizer_proto)
- with self.assertRaises(ValueError):
- optimizer_builder.build(optimizer_proto)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/post_processing_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/post_processing_builder.py
deleted file mode 100644
index fa3a772896dd1a1b8146677dc862549970a6fecd..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/post_processing_builder.py
+++ /dev/null
@@ -1,123 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Builder function for post processing operations."""
-import functools
-
-import tensorflow as tf
-from object_detection.core import post_processing
-from object_detection.protos import post_processing_pb2
-
-
-def build(post_processing_config):
- """Builds callables for post-processing operations.
-
- Builds callables for non-max suppression and score conversion based on the
- configuration.
-
- Non-max suppression callable takes `boxes`, `scores`, and optionally
- `clip_window`, `parallel_iterations` `masks, and `scope` as inputs. It returns
- `nms_boxes`, `nms_scores`, `nms_classes` `nms_masks` and `num_detections`. See
- post_processing.batch_multiclass_non_max_suppression for the type and shape
- of these tensors.
-
- Score converter callable should be called with `input` tensor. The callable
- returns the output from one of 3 tf operations based on the configuration -
- tf.identity, tf.sigmoid or tf.nn.softmax. See tensorflow documentation for
- argument and return value descriptions.
-
- Args:
- post_processing_config: post_processing.proto object containing the
- parameters for the post-processing operations.
-
- Returns:
- non_max_suppressor_fn: Callable for non-max suppression.
- score_converter_fn: Callable for score conversion.
-
- Raises:
- ValueError: if the post_processing_config is of incorrect type.
- """
- if not isinstance(post_processing_config, post_processing_pb2.PostProcessing):
- raise ValueError('post_processing_config not of type '
- 'post_processing_pb2.Postprocessing.')
- non_max_suppressor_fn = _build_non_max_suppressor(
- post_processing_config.batch_non_max_suppression)
- score_converter_fn = _build_score_converter(
- post_processing_config.score_converter,
- post_processing_config.logit_scale)
- return non_max_suppressor_fn, score_converter_fn
-
-
-def _build_non_max_suppressor(nms_config):
- """Builds non-max suppresson based on the nms config.
-
- Args:
- nms_config: post_processing_pb2.PostProcessing.BatchNonMaxSuppression proto.
-
- Returns:
- non_max_suppressor_fn: Callable non-max suppressor.
-
- Raises:
- ValueError: On incorrect iou_threshold or on incompatible values of
- max_total_detections and max_detections_per_class.
- """
- if nms_config.iou_threshold < 0 or nms_config.iou_threshold > 1.0:
- raise ValueError('iou_threshold not in [0, 1.0].')
- if nms_config.max_detections_per_class > nms_config.max_total_detections:
- raise ValueError('max_detections_per_class should be no greater than '
- 'max_total_detections.')
-
- non_max_suppressor_fn = functools.partial(
- post_processing.batch_multiclass_non_max_suppression,
- score_thresh=nms_config.score_threshold,
- iou_thresh=nms_config.iou_threshold,
- max_size_per_class=nms_config.max_detections_per_class,
- max_total_size=nms_config.max_total_detections)
- return non_max_suppressor_fn
-
-
-def _score_converter_fn_with_logit_scale(tf_score_converter_fn, logit_scale):
- """Create a function to scale logits then apply a Tensorflow function."""
- def score_converter_fn(logits):
- scaled_logits = tf.divide(logits, logit_scale, name='scale_logits')
- return tf_score_converter_fn(scaled_logits, name='convert_scores')
- score_converter_fn.__name__ = '%s_with_logit_scale' % (
- tf_score_converter_fn.__name__)
- return score_converter_fn
-
-
-def _build_score_converter(score_converter_config, logit_scale):
- """Builds score converter based on the config.
-
- Builds one of [tf.identity, tf.sigmoid, tf.softmax] score converters based on
- the config.
-
- Args:
- score_converter_config: post_processing_pb2.PostProcessing.score_converter.
- logit_scale: temperature to use for SOFTMAX score_converter.
-
- Returns:
- Callable score converter op.
-
- Raises:
- ValueError: On unknown score converter.
- """
- if score_converter_config == post_processing_pb2.PostProcessing.IDENTITY:
- return _score_converter_fn_with_logit_scale(tf.identity, logit_scale)
- if score_converter_config == post_processing_pb2.PostProcessing.SIGMOID:
- return _score_converter_fn_with_logit_scale(tf.sigmoid, logit_scale)
- if score_converter_config == post_processing_pb2.PostProcessing.SOFTMAX:
- return _score_converter_fn_with_logit_scale(tf.nn.softmax, logit_scale)
- raise ValueError('Unknown score converter.')
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/post_processing_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/post_processing_builder_test.py
deleted file mode 100644
index c39fbfb417db148d756c3e8a2b51948ed13d07b3..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/post_processing_builder_test.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for post_processing_builder."""
-
-import tensorflow as tf
-from google.protobuf import text_format
-from object_detection.builders import post_processing_builder
-from object_detection.protos import post_processing_pb2
-
-
-class PostProcessingBuilderTest(tf.test.TestCase):
-
- def test_build_non_max_suppressor_with_correct_parameters(self):
- post_processing_text_proto = """
- batch_non_max_suppression {
- score_threshold: 0.7
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- """
- post_processing_config = post_processing_pb2.PostProcessing()
- text_format.Merge(post_processing_text_proto, post_processing_config)
- non_max_suppressor, _ = post_processing_builder.build(
- post_processing_config)
- self.assertEqual(non_max_suppressor.keywords['max_size_per_class'], 100)
- self.assertEqual(non_max_suppressor.keywords['max_total_size'], 300)
- self.assertAlmostEqual(non_max_suppressor.keywords['score_thresh'], 0.7)
- self.assertAlmostEqual(non_max_suppressor.keywords['iou_thresh'], 0.6)
-
- def test_build_identity_score_converter(self):
- post_processing_text_proto = """
- score_converter: IDENTITY
- """
- post_processing_config = post_processing_pb2.PostProcessing()
- text_format.Merge(post_processing_text_proto, post_processing_config)
- _, score_converter = post_processing_builder.build(post_processing_config)
- self.assertEqual(score_converter.__name__, 'identity_with_logit_scale')
-
- inputs = tf.constant([1, 1], tf.float32)
- outputs = score_converter(inputs)
- with self.test_session() as sess:
- converted_scores = sess.run(outputs)
- expected_converted_scores = sess.run(inputs)
- self.assertAllClose(converted_scores, expected_converted_scores)
-
- def test_build_identity_score_converter_with_logit_scale(self):
- post_processing_text_proto = """
- score_converter: IDENTITY
- logit_scale: 2.0
- """
- post_processing_config = post_processing_pb2.PostProcessing()
- text_format.Merge(post_processing_text_proto, post_processing_config)
- _, score_converter = post_processing_builder.build(post_processing_config)
- self.assertEqual(score_converter.__name__, 'identity_with_logit_scale')
-
- inputs = tf.constant([1, 1], tf.float32)
- outputs = score_converter(inputs)
- with self.test_session() as sess:
- converted_scores = sess.run(outputs)
- expected_converted_scores = sess.run(tf.constant([.5, .5], tf.float32))
- self.assertAllClose(converted_scores, expected_converted_scores)
-
- def test_build_sigmoid_score_converter(self):
- post_processing_text_proto = """
- score_converter: SIGMOID
- """
- post_processing_config = post_processing_pb2.PostProcessing()
- text_format.Merge(post_processing_text_proto, post_processing_config)
- _, score_converter = post_processing_builder.build(post_processing_config)
- self.assertEqual(score_converter.__name__, 'sigmoid_with_logit_scale')
-
- def test_build_softmax_score_converter(self):
- post_processing_text_proto = """
- score_converter: SOFTMAX
- """
- post_processing_config = post_processing_pb2.PostProcessing()
- text_format.Merge(post_processing_text_proto, post_processing_config)
- _, score_converter = post_processing_builder.build(post_processing_config)
- self.assertEqual(score_converter.__name__, 'softmax_with_logit_scale')
-
- def test_build_softmax_score_converter_with_temperature(self):
- post_processing_text_proto = """
- score_converter: SOFTMAX
- logit_scale: 2.0
- """
- post_processing_config = post_processing_pb2.PostProcessing()
- text_format.Merge(post_processing_text_proto, post_processing_config)
- _, score_converter = post_processing_builder.build(post_processing_config)
- self.assertEqual(score_converter.__name__, 'softmax_with_logit_scale')
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/preprocessor_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/preprocessor_builder.py
deleted file mode 100644
index 10b92532fc3ef5a533b7f317082436b0052eb166..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/preprocessor_builder.py
+++ /dev/null
@@ -1,322 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Builder for preprocessing steps."""
-
-import tensorflow as tf
-
-from object_detection.core import preprocessor
-from object_detection.protos import preprocessor_pb2
-
-
-def _get_step_config_from_proto(preprocessor_step_config, step_name):
- """Returns the value of a field named step_name from proto.
-
- Args:
- preprocessor_step_config: A preprocessor_pb2.PreprocessingStep object.
- step_name: Name of the field to get value from.
-
- Returns:
- result_dict: a sub proto message from preprocessor_step_config which will be
- later converted to a dictionary.
-
- Raises:
- ValueError: If field does not exist in proto.
- """
- for field, value in preprocessor_step_config.ListFields():
- if field.name == step_name:
- return value
-
- raise ValueError('Could not get field %s from proto!', step_name)
-
-
-def _get_dict_from_proto(config):
- """Helper function to put all proto fields into a dictionary.
-
- For many preprocessing steps, there's an trivial 1-1 mapping from proto fields
- to function arguments. This function automatically populates a dictionary with
- the arguments from the proto.
-
- Protos that CANNOT be trivially populated include:
- * nested messages.
- * steps that check if an optional field is set (ie. where None != 0).
- * protos that don't map 1-1 to arguments (ie. list should be reshaped).
- * fields requiring additional validation (ie. repeated field has n elements).
-
- Args:
- config: A protobuf object that does not violate the conditions above.
-
- Returns:
- result_dict: |config| converted into a python dictionary.
- """
- result_dict = {}
- for field, value in config.ListFields():
- result_dict[field.name] = value
- return result_dict
-
-
-# A map from a PreprocessingStep proto config field name to the preprocessing
-# function that should be used. The PreprocessingStep proto should be parsable
-# with _get_dict_from_proto.
-PREPROCESSING_FUNCTION_MAP = {
- 'normalize_image': preprocessor.normalize_image,
- 'random_pixel_value_scale': preprocessor.random_pixel_value_scale,
- 'random_image_scale': preprocessor.random_image_scale,
- 'random_rgb_to_gray': preprocessor.random_rgb_to_gray,
- 'random_adjust_brightness': preprocessor.random_adjust_brightness,
- 'random_adjust_contrast': preprocessor.random_adjust_contrast,
- 'random_adjust_hue': preprocessor.random_adjust_hue,
- 'random_adjust_saturation': preprocessor.random_adjust_saturation,
- 'random_distort_color': preprocessor.random_distort_color,
- 'random_jitter_boxes': preprocessor.random_jitter_boxes,
- 'random_crop_to_aspect_ratio': preprocessor.random_crop_to_aspect_ratio,
- 'random_black_patches': preprocessor.random_black_patches,
- 'rgb_to_gray': preprocessor.rgb_to_gray,
- 'scale_boxes_to_pixel_coordinates': (
- preprocessor.scale_boxes_to_pixel_coordinates),
- 'subtract_channel_mean': preprocessor.subtract_channel_mean,
-}
-
-
-# A map to convert from preprocessor_pb2.ResizeImage.Method enum to
-# tf.image.ResizeMethod.
-RESIZE_METHOD_MAP = {
- preprocessor_pb2.ResizeImage.AREA: tf.image.ResizeMethod.AREA,
- preprocessor_pb2.ResizeImage.BICUBIC: tf.image.ResizeMethod.BICUBIC,
- preprocessor_pb2.ResizeImage.BILINEAR: tf.image.ResizeMethod.BILINEAR,
- preprocessor_pb2.ResizeImage.NEAREST_NEIGHBOR: (
- tf.image.ResizeMethod.NEAREST_NEIGHBOR),
-}
-
-
-def build(preprocessor_step_config):
- """Builds preprocessing step based on the configuration.
-
- Args:
- preprocessor_step_config: PreprocessingStep configuration proto.
-
- Returns:
- function, argmap: A callable function and an argument map to call function
- with.
-
- Raises:
- ValueError: On invalid configuration.
- """
- step_type = preprocessor_step_config.WhichOneof('preprocessing_step')
-
- if step_type in PREPROCESSING_FUNCTION_MAP:
- preprocessing_function = PREPROCESSING_FUNCTION_MAP[step_type]
- step_config = _get_step_config_from_proto(preprocessor_step_config,
- step_type)
- function_args = _get_dict_from_proto(step_config)
- return (preprocessing_function, function_args)
-
- if step_type == 'random_horizontal_flip':
- config = preprocessor_step_config.random_horizontal_flip
- return (preprocessor.random_horizontal_flip,
- {
- 'keypoint_flip_permutation': tuple(
- config.keypoint_flip_permutation),
- })
-
- if step_type == 'random_vertical_flip':
- config = preprocessor_step_config.random_vertical_flip
- return (preprocessor.random_vertical_flip,
- {
- 'keypoint_flip_permutation': tuple(
- config.keypoint_flip_permutation),
- })
-
- if step_type == 'random_rotation90':
- return (preprocessor.random_rotation90, {})
-
- if step_type == 'random_crop_image':
- config = preprocessor_step_config.random_crop_image
- return (preprocessor.random_crop_image,
- {
- 'min_object_covered': config.min_object_covered,
- 'aspect_ratio_range': (config.min_aspect_ratio,
- config.max_aspect_ratio),
- 'area_range': (config.min_area, config.max_area),
- 'overlap_thresh': config.overlap_thresh,
- 'random_coef': config.random_coef,
- })
-
- if step_type == 'random_pad_image':
- config = preprocessor_step_config.random_pad_image
- min_image_size = None
- if (config.HasField('min_image_height') !=
- config.HasField('min_image_width')):
- raise ValueError('min_image_height and min_image_width should be either '
- 'both set or both unset.')
- if config.HasField('min_image_height'):
- min_image_size = (config.min_image_height, config.min_image_width)
-
- max_image_size = None
- if (config.HasField('max_image_height') !=
- config.HasField('max_image_width')):
- raise ValueError('max_image_height and max_image_width should be either '
- 'both set or both unset.')
- if config.HasField('max_image_height'):
- max_image_size = (config.max_image_height, config.max_image_width)
-
- pad_color = config.pad_color
- if pad_color and len(pad_color) != 3:
- raise ValueError('pad_color should have 3 elements (RGB) if set!')
- if not pad_color:
- pad_color = None
- return (preprocessor.random_pad_image,
- {
- 'min_image_size': min_image_size,
- 'max_image_size': max_image_size,
- 'pad_color': pad_color,
- })
-
- if step_type == 'random_crop_pad_image':
- config = preprocessor_step_config.random_crop_pad_image
- min_padded_size_ratio = config.min_padded_size_ratio
- if min_padded_size_ratio and len(min_padded_size_ratio) != 2:
- raise ValueError('min_padded_size_ratio should have 2 elements if set!')
- max_padded_size_ratio = config.max_padded_size_ratio
- if max_padded_size_ratio and len(max_padded_size_ratio) != 2:
- raise ValueError('max_padded_size_ratio should have 2 elements if set!')
- pad_color = config.pad_color
- if pad_color and len(pad_color) != 3:
- raise ValueError('pad_color should have 3 elements if set!')
- kwargs = {
- 'min_object_covered': config.min_object_covered,
- 'aspect_ratio_range': (config.min_aspect_ratio,
- config.max_aspect_ratio),
- 'area_range': (config.min_area, config.max_area),
- 'overlap_thresh': config.overlap_thresh,
- 'random_coef': config.random_coef,
- }
- if min_padded_size_ratio:
- kwargs['min_padded_size_ratio'] = tuple(min_padded_size_ratio)
- if max_padded_size_ratio:
- kwargs['max_padded_size_ratio'] = tuple(max_padded_size_ratio)
- if pad_color:
- kwargs['pad_color'] = tuple(pad_color)
- return (preprocessor.random_crop_pad_image, kwargs)
-
- if step_type == 'random_resize_method':
- config = preprocessor_step_config.random_resize_method
- return (preprocessor.random_resize_method,
- {
- 'target_size': [config.target_height, config.target_width],
- })
-
- if step_type == 'resize_image':
- config = preprocessor_step_config.resize_image
- method = RESIZE_METHOD_MAP[config.method]
- return (preprocessor.resize_image,
- {
- 'new_height': config.new_height,
- 'new_width': config.new_width,
- 'method': method
- })
-
- if step_type == 'ssd_random_crop':
- config = preprocessor_step_config.ssd_random_crop
- if config.operations:
- min_object_covered = [op.min_object_covered for op in config.operations]
- aspect_ratio_range = [(op.min_aspect_ratio, op.max_aspect_ratio)
- for op in config.operations]
- area_range = [(op.min_area, op.max_area) for op in config.operations]
- overlap_thresh = [op.overlap_thresh for op in config.operations]
- random_coef = [op.random_coef for op in config.operations]
- return (preprocessor.ssd_random_crop,
- {
- 'min_object_covered': min_object_covered,
- 'aspect_ratio_range': aspect_ratio_range,
- 'area_range': area_range,
- 'overlap_thresh': overlap_thresh,
- 'random_coef': random_coef,
- })
- return (preprocessor.ssd_random_crop, {})
-
- if step_type == 'ssd_random_crop_pad':
- config = preprocessor_step_config.ssd_random_crop_pad
- if config.operations:
- min_object_covered = [op.min_object_covered for op in config.operations]
- aspect_ratio_range = [(op.min_aspect_ratio, op.max_aspect_ratio)
- for op in config.operations]
- area_range = [(op.min_area, op.max_area) for op in config.operations]
- overlap_thresh = [op.overlap_thresh for op in config.operations]
- random_coef = [op.random_coef for op in config.operations]
- min_padded_size_ratio = [tuple(op.min_padded_size_ratio)
- for op in config.operations]
- max_padded_size_ratio = [tuple(op.max_padded_size_ratio)
- for op in config.operations]
- pad_color = [(op.pad_color_r, op.pad_color_g, op.pad_color_b)
- for op in config.operations]
- return (preprocessor.ssd_random_crop_pad,
- {
- 'min_object_covered': min_object_covered,
- 'aspect_ratio_range': aspect_ratio_range,
- 'area_range': area_range,
- 'overlap_thresh': overlap_thresh,
- 'random_coef': random_coef,
- 'min_padded_size_ratio': min_padded_size_ratio,
- 'max_padded_size_ratio': max_padded_size_ratio,
- 'pad_color': pad_color,
- })
- return (preprocessor.ssd_random_crop_pad, {})
-
- if step_type == 'ssd_random_crop_fixed_aspect_ratio':
- config = preprocessor_step_config.ssd_random_crop_fixed_aspect_ratio
- if config.operations:
- min_object_covered = [op.min_object_covered for op in config.operations]
- area_range = [(op.min_area, op.max_area) for op in config.operations]
- overlap_thresh = [op.overlap_thresh for op in config.operations]
- random_coef = [op.random_coef for op in config.operations]
- return (preprocessor.ssd_random_crop_fixed_aspect_ratio,
- {
- 'min_object_covered': min_object_covered,
- 'aspect_ratio': config.aspect_ratio,
- 'area_range': area_range,
- 'overlap_thresh': overlap_thresh,
- 'random_coef': random_coef,
- })
- return (preprocessor.ssd_random_crop_fixed_aspect_ratio, {})
-
- if step_type == 'ssd_random_crop_pad_fixed_aspect_ratio':
- config = preprocessor_step_config.ssd_random_crop_pad_fixed_aspect_ratio
- kwargs = {}
- aspect_ratio = config.aspect_ratio
- if aspect_ratio:
- kwargs['aspect_ratio'] = aspect_ratio
- min_padded_size_ratio = config.min_padded_size_ratio
- if min_padded_size_ratio:
- if len(min_padded_size_ratio) != 2:
- raise ValueError('min_padded_size_ratio should have 2 elements if set!')
- kwargs['min_padded_size_ratio'] = tuple(min_padded_size_ratio)
- max_padded_size_ratio = config.max_padded_size_ratio
- if max_padded_size_ratio:
- if len(max_padded_size_ratio) != 2:
- raise ValueError('max_padded_size_ratio should have 2 elements if set!')
- kwargs['max_padded_size_ratio'] = tuple(max_padded_size_ratio)
- if config.operations:
- kwargs['min_object_covered'] = [op.min_object_covered
- for op in config.operations]
- kwargs['aspect_ratio_range'] = [(op.min_aspect_ratio, op.max_aspect_ratio)
- for op in config.operations]
- kwargs['area_range'] = [(op.min_area, op.max_area)
- for op in config.operations]
- kwargs['overlap_thresh'] = [op.overlap_thresh for op in config.operations]
- kwargs['random_coef'] = [op.random_coef for op in config.operations]
- return (preprocessor.ssd_random_crop_pad_fixed_aspect_ratio, kwargs)
-
- raise ValueError('Unknown preprocessing step.')
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/preprocessor_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/preprocessor_builder_test.py
deleted file mode 100644
index 9e5d8de8e9ab84836c918b40cd17345543e18d19..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/preprocessor_builder_test.py
+++ /dev/null
@@ -1,566 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for preprocessor_builder."""
-
-import tensorflow as tf
-
-from google.protobuf import text_format
-
-from object_detection.builders import preprocessor_builder
-from object_detection.core import preprocessor
-from object_detection.protos import preprocessor_pb2
-
-
-class PreprocessorBuilderTest(tf.test.TestCase):
-
- def assert_dictionary_close(self, dict1, dict2):
- """Helper to check if two dicts with floatst or integers are close."""
- self.assertEqual(sorted(dict1.keys()), sorted(dict2.keys()))
- for key in dict1:
- value = dict1[key]
- if isinstance(value, float):
- self.assertAlmostEqual(value, dict2[key])
- else:
- self.assertEqual(value, dict2[key])
-
- def test_build_normalize_image(self):
- preprocessor_text_proto = """
- normalize_image {
- original_minval: 0.0
- original_maxval: 255.0
- target_minval: -1.0
- target_maxval: 1.0
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.normalize_image)
- self.assertEqual(args, {
- 'original_minval': 0.0,
- 'original_maxval': 255.0,
- 'target_minval': -1.0,
- 'target_maxval': 1.0,
- })
-
- def test_build_random_horizontal_flip(self):
- preprocessor_text_proto = """
- random_horizontal_flip {
- keypoint_flip_permutation: 1
- keypoint_flip_permutation: 0
- keypoint_flip_permutation: 2
- keypoint_flip_permutation: 3
- keypoint_flip_permutation: 5
- keypoint_flip_permutation: 4
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_horizontal_flip)
- self.assertEqual(args, {'keypoint_flip_permutation': (1, 0, 2, 3, 5, 4)})
-
- def test_build_random_vertical_flip(self):
- preprocessor_text_proto = """
- random_vertical_flip {
- keypoint_flip_permutation: 1
- keypoint_flip_permutation: 0
- keypoint_flip_permutation: 2
- keypoint_flip_permutation: 3
- keypoint_flip_permutation: 5
- keypoint_flip_permutation: 4
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_vertical_flip)
- self.assertEqual(args, {'keypoint_flip_permutation': (1, 0, 2, 3, 5, 4)})
-
- def test_build_random_rotation90(self):
- preprocessor_text_proto = """
- random_rotation90 {}
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_rotation90)
- self.assertEqual(args, {})
-
- def test_build_random_pixel_value_scale(self):
- preprocessor_text_proto = """
- random_pixel_value_scale {
- minval: 0.8
- maxval: 1.2
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_pixel_value_scale)
- self.assert_dictionary_close(args, {'minval': 0.8, 'maxval': 1.2})
-
- def test_build_random_image_scale(self):
- preprocessor_text_proto = """
- random_image_scale {
- min_scale_ratio: 0.8
- max_scale_ratio: 2.2
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_image_scale)
- self.assert_dictionary_close(args, {'min_scale_ratio': 0.8,
- 'max_scale_ratio': 2.2})
-
- def test_build_random_rgb_to_gray(self):
- preprocessor_text_proto = """
- random_rgb_to_gray {
- probability: 0.8
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_rgb_to_gray)
- self.assert_dictionary_close(args, {'probability': 0.8})
-
- def test_build_random_adjust_brightness(self):
- preprocessor_text_proto = """
- random_adjust_brightness {
- max_delta: 0.2
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_adjust_brightness)
- self.assert_dictionary_close(args, {'max_delta': 0.2})
-
- def test_build_random_adjust_contrast(self):
- preprocessor_text_proto = """
- random_adjust_contrast {
- min_delta: 0.7
- max_delta: 1.1
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_adjust_contrast)
- self.assert_dictionary_close(args, {'min_delta': 0.7, 'max_delta': 1.1})
-
- def test_build_random_adjust_hue(self):
- preprocessor_text_proto = """
- random_adjust_hue {
- max_delta: 0.01
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_adjust_hue)
- self.assert_dictionary_close(args, {'max_delta': 0.01})
-
- def test_build_random_adjust_saturation(self):
- preprocessor_text_proto = """
- random_adjust_saturation {
- min_delta: 0.75
- max_delta: 1.15
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_adjust_saturation)
- self.assert_dictionary_close(args, {'min_delta': 0.75, 'max_delta': 1.15})
-
- def test_build_random_distort_color(self):
- preprocessor_text_proto = """
- random_distort_color {
- color_ordering: 1
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_distort_color)
- self.assertEqual(args, {'color_ordering': 1})
-
- def test_build_random_jitter_boxes(self):
- preprocessor_text_proto = """
- random_jitter_boxes {
- ratio: 0.1
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_jitter_boxes)
- self.assert_dictionary_close(args, {'ratio': 0.1})
-
- def test_build_random_crop_image(self):
- preprocessor_text_proto = """
- random_crop_image {
- min_object_covered: 0.75
- min_aspect_ratio: 0.75
- max_aspect_ratio: 1.5
- min_area: 0.25
- max_area: 0.875
- overlap_thresh: 0.5
- random_coef: 0.125
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_crop_image)
- self.assertEqual(args, {
- 'min_object_covered': 0.75,
- 'aspect_ratio_range': (0.75, 1.5),
- 'area_range': (0.25, 0.875),
- 'overlap_thresh': 0.5,
- 'random_coef': 0.125,
- })
-
- def test_build_random_pad_image(self):
- preprocessor_text_proto = """
- random_pad_image {
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_pad_image)
- self.assertEqual(args, {
- 'min_image_size': None,
- 'max_image_size': None,
- 'pad_color': None,
- })
-
- def test_build_random_crop_pad_image(self):
- preprocessor_text_proto = """
- random_crop_pad_image {
- min_object_covered: 0.75
- min_aspect_ratio: 0.75
- max_aspect_ratio: 1.5
- min_area: 0.25
- max_area: 0.875
- overlap_thresh: 0.5
- random_coef: 0.125
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_crop_pad_image)
- self.assertEqual(args, {
- 'min_object_covered': 0.75,
- 'aspect_ratio_range': (0.75, 1.5),
- 'area_range': (0.25, 0.875),
- 'overlap_thresh': 0.5,
- 'random_coef': 0.125,
- })
-
- def test_build_random_crop_pad_image_with_optional_parameters(self):
- preprocessor_text_proto = """
- random_crop_pad_image {
- min_object_covered: 0.75
- min_aspect_ratio: 0.75
- max_aspect_ratio: 1.5
- min_area: 0.25
- max_area: 0.875
- overlap_thresh: 0.5
- random_coef: 0.125
- min_padded_size_ratio: 0.5
- min_padded_size_ratio: 0.75
- max_padded_size_ratio: 0.5
- max_padded_size_ratio: 0.75
- pad_color: 0.5
- pad_color: 0.5
- pad_color: 1.0
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_crop_pad_image)
- self.assertEqual(args, {
- 'min_object_covered': 0.75,
- 'aspect_ratio_range': (0.75, 1.5),
- 'area_range': (0.25, 0.875),
- 'overlap_thresh': 0.5,
- 'random_coef': 0.125,
- 'min_padded_size_ratio': (0.5, 0.75),
- 'max_padded_size_ratio': (0.5, 0.75),
- 'pad_color': (0.5, 0.5, 1.0)
- })
-
- def test_build_random_crop_to_aspect_ratio(self):
- preprocessor_text_proto = """
- random_crop_to_aspect_ratio {
- aspect_ratio: 0.85
- overlap_thresh: 0.35
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_crop_to_aspect_ratio)
- self.assert_dictionary_close(args, {'aspect_ratio': 0.85,
- 'overlap_thresh': 0.35})
-
- def test_build_random_black_patches(self):
- preprocessor_text_proto = """
- random_black_patches {
- max_black_patches: 20
- probability: 0.95
- size_to_image_ratio: 0.12
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_black_patches)
- self.assert_dictionary_close(args, {'max_black_patches': 20,
- 'probability': 0.95,
- 'size_to_image_ratio': 0.12})
-
- def test_build_random_resize_method(self):
- preprocessor_text_proto = """
- random_resize_method {
- target_height: 75
- target_width: 100
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_resize_method)
- self.assert_dictionary_close(args, {'target_size': [75, 100]})
-
- def test_build_scale_boxes_to_pixel_coordinates(self):
- preprocessor_text_proto = """
- scale_boxes_to_pixel_coordinates {}
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.scale_boxes_to_pixel_coordinates)
- self.assertEqual(args, {})
-
- def test_build_resize_image(self):
- preprocessor_text_proto = """
- resize_image {
- new_height: 75
- new_width: 100
- method: BICUBIC
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.resize_image)
- self.assertEqual(args, {'new_height': 75,
- 'new_width': 100,
- 'method': tf.image.ResizeMethod.BICUBIC})
-
- def test_build_rgb_to_gray(self):
- preprocessor_text_proto = """
- rgb_to_gray {}
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.rgb_to_gray)
- self.assertEqual(args, {})
-
- def test_build_subtract_channel_mean(self):
- preprocessor_text_proto = """
- subtract_channel_mean {
- means: [1.0, 2.0, 3.0]
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.subtract_channel_mean)
- self.assertEqual(args, {'means': [1.0, 2.0, 3.0]})
-
- def test_build_ssd_random_crop(self):
- preprocessor_text_proto = """
- ssd_random_crop {
- operations {
- min_object_covered: 0.0
- min_aspect_ratio: 0.875
- max_aspect_ratio: 1.125
- min_area: 0.5
- max_area: 1.0
- overlap_thresh: 0.0
- random_coef: 0.375
- }
- operations {
- min_object_covered: 0.25
- min_aspect_ratio: 0.75
- max_aspect_ratio: 1.5
- min_area: 0.5
- max_area: 1.0
- overlap_thresh: 0.25
- random_coef: 0.375
- }
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.ssd_random_crop)
- self.assertEqual(args, {'min_object_covered': [0.0, 0.25],
- 'aspect_ratio_range': [(0.875, 1.125), (0.75, 1.5)],
- 'area_range': [(0.5, 1.0), (0.5, 1.0)],
- 'overlap_thresh': [0.0, 0.25],
- 'random_coef': [0.375, 0.375]})
-
- def test_build_ssd_random_crop_empty_operations(self):
- preprocessor_text_proto = """
- ssd_random_crop {
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.ssd_random_crop)
- self.assertEqual(args, {})
-
- def test_build_ssd_random_crop_pad(self):
- preprocessor_text_proto = """
- ssd_random_crop_pad {
- operations {
- min_object_covered: 0.0
- min_aspect_ratio: 0.875
- max_aspect_ratio: 1.125
- min_area: 0.5
- max_area: 1.0
- overlap_thresh: 0.0
- random_coef: 0.375
- min_padded_size_ratio: [1.0, 1.0]
- max_padded_size_ratio: [2.0, 2.0]
- pad_color_r: 0.5
- pad_color_g: 0.5
- pad_color_b: 0.5
- }
- operations {
- min_object_covered: 0.25
- min_aspect_ratio: 0.75
- max_aspect_ratio: 1.5
- min_area: 0.5
- max_area: 1.0
- overlap_thresh: 0.25
- random_coef: 0.375
- min_padded_size_ratio: [1.0, 1.0]
- max_padded_size_ratio: [2.0, 2.0]
- pad_color_r: 0.5
- pad_color_g: 0.5
- pad_color_b: 0.5
- }
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.ssd_random_crop_pad)
- self.assertEqual(args, {'min_object_covered': [0.0, 0.25],
- 'aspect_ratio_range': [(0.875, 1.125), (0.75, 1.5)],
- 'area_range': [(0.5, 1.0), (0.5, 1.0)],
- 'overlap_thresh': [0.0, 0.25],
- 'random_coef': [0.375, 0.375],
- 'min_padded_size_ratio': [(1.0, 1.0), (1.0, 1.0)],
- 'max_padded_size_ratio': [(2.0, 2.0), (2.0, 2.0)],
- 'pad_color': [(0.5, 0.5, 0.5), (0.5, 0.5, 0.5)]})
-
- def test_build_ssd_random_crop_fixed_aspect_ratio(self):
- preprocessor_text_proto = """
- ssd_random_crop_fixed_aspect_ratio {
- operations {
- min_object_covered: 0.0
- min_area: 0.5
- max_area: 1.0
- overlap_thresh: 0.0
- random_coef: 0.375
- }
- operations {
- min_object_covered: 0.25
- min_area: 0.5
- max_area: 1.0
- overlap_thresh: 0.25
- random_coef: 0.375
- }
- aspect_ratio: 0.875
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.ssd_random_crop_fixed_aspect_ratio)
- self.assertEqual(args, {'min_object_covered': [0.0, 0.25],
- 'aspect_ratio': 0.875,
- 'area_range': [(0.5, 1.0), (0.5, 1.0)],
- 'overlap_thresh': [0.0, 0.25],
- 'random_coef': [0.375, 0.375]})
-
- def test_build_ssd_random_crop_pad_fixed_aspect_ratio(self):
- preprocessor_text_proto = """
- ssd_random_crop_pad_fixed_aspect_ratio {
- operations {
- min_object_covered: 0.0
- min_aspect_ratio: 0.875
- max_aspect_ratio: 1.125
- min_area: 0.5
- max_area: 1.0
- overlap_thresh: 0.0
- random_coef: 0.375
- }
- operations {
- min_object_covered: 0.25
- min_aspect_ratio: 0.75
- max_aspect_ratio: 1.5
- min_area: 0.5
- max_area: 1.0
- overlap_thresh: 0.25
- random_coef: 0.375
- }
- aspect_ratio: 0.875
- min_padded_size_ratio: [1.0, 1.0]
- max_padded_size_ratio: [2.0, 2.0]
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function,
- preprocessor.ssd_random_crop_pad_fixed_aspect_ratio)
- self.assertEqual(args, {'min_object_covered': [0.0, 0.25],
- 'aspect_ratio': 0.875,
- 'aspect_ratio_range': [(0.875, 1.125), (0.75, 1.5)],
- 'area_range': [(0.5, 1.0), (0.5, 1.0)],
- 'overlap_thresh': [0.0, 0.25],
- 'random_coef': [0.375, 0.375],
- 'min_padded_size_ratio': (1.0, 1.0),
- 'max_padded_size_ratio': (2.0, 2.0)})
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/region_similarity_calculator_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/region_similarity_calculator_builder.py
deleted file mode 100644
index fa1d671754df07043957ccf9e04f651c114c1cf9..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/region_similarity_calculator_builder.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Builder for region similarity calculators."""
-
-from object_detection.core import region_similarity_calculator
-from object_detection.protos import region_similarity_calculator_pb2
-
-
-def build(region_similarity_calculator_config):
- """Builds region similarity calculator based on the configuration.
-
- Builds one of [IouSimilarity, IoaSimilarity, NegSqDistSimilarity] objects. See
- core/region_similarity_calculator.proto for details.
-
- Args:
- region_similarity_calculator_config: RegionSimilarityCalculator
- configuration proto.
-
- Returns:
- region_similarity_calculator: RegionSimilarityCalculator object.
-
- Raises:
- ValueError: On unknown region similarity calculator.
- """
-
- if not isinstance(
- region_similarity_calculator_config,
- region_similarity_calculator_pb2.RegionSimilarityCalculator):
- raise ValueError(
- 'region_similarity_calculator_config not of type '
- 'region_similarity_calculator_pb2.RegionsSimilarityCalculator')
-
- similarity_calculator = region_similarity_calculator_config.WhichOneof(
- 'region_similarity')
- if similarity_calculator == 'iou_similarity':
- return region_similarity_calculator.IouSimilarity()
- if similarity_calculator == 'ioa_similarity':
- return region_similarity_calculator.IoaSimilarity()
- if similarity_calculator == 'neg_sq_dist_similarity':
- return region_similarity_calculator.NegSqDistSimilarity()
-
- raise ValueError('Unknown region similarity calculator.')
-
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/region_similarity_calculator_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/region_similarity_calculator_builder_test.py
deleted file mode 100644
index ca3a5512e374fc03f39de1f3f77cf22bc6f6556e..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/region_similarity_calculator_builder_test.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for region_similarity_calculator_builder."""
-
-import tensorflow as tf
-
-from google.protobuf import text_format
-from object_detection.builders import region_similarity_calculator_builder
-from object_detection.core import region_similarity_calculator
-from object_detection.protos import region_similarity_calculator_pb2 as sim_calc_pb2
-
-
-class RegionSimilarityCalculatorBuilderTest(tf.test.TestCase):
-
- def testBuildIoaSimilarityCalculator(self):
- similarity_calc_text_proto = """
- ioa_similarity {
- }
- """
- similarity_calc_proto = sim_calc_pb2.RegionSimilarityCalculator()
- text_format.Merge(similarity_calc_text_proto, similarity_calc_proto)
- similarity_calc = region_similarity_calculator_builder.build(
- similarity_calc_proto)
- self.assertTrue(isinstance(similarity_calc,
- region_similarity_calculator.IoaSimilarity))
-
- def testBuildIouSimilarityCalculator(self):
- similarity_calc_text_proto = """
- iou_similarity {
- }
- """
- similarity_calc_proto = sim_calc_pb2.RegionSimilarityCalculator()
- text_format.Merge(similarity_calc_text_proto, similarity_calc_proto)
- similarity_calc = region_similarity_calculator_builder.build(
- similarity_calc_proto)
- self.assertTrue(isinstance(similarity_calc,
- region_similarity_calculator.IouSimilarity))
-
- def testBuildNegSqDistSimilarityCalculator(self):
- similarity_calc_text_proto = """
- neg_sq_dist_similarity {
- }
- """
- similarity_calc_proto = sim_calc_pb2.RegionSimilarityCalculator()
- text_format.Merge(similarity_calc_text_proto, similarity_calc_proto)
- similarity_calc = region_similarity_calculator_builder.build(
- similarity_calc_proto)
- self.assertTrue(isinstance(similarity_calc,
- region_similarity_calculator.
- NegSqDistSimilarity))
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/__init__.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/__init__.py
deleted file mode 100644
index 8b137891791fe96927ad78e64b0aad7bded08bdc..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/anchor_generator.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/anchor_generator.py
deleted file mode 100644
index f2797ef77d3e83597e18db10e5ba87f24364d8aa..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/anchor_generator.py
+++ /dev/null
@@ -1,150 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Base anchor generator.
-
-The job of the anchor generator is to create (or load) a collection
-of bounding boxes to be used as anchors.
-
-Generated anchors are assumed to match some convolutional grid or list of grid
-shapes. For example, we might want to generate anchors matching an 8x8
-feature map and a 4x4 feature map. If we place 3 anchors per grid location
-on the first feature map and 6 anchors per grid location on the second feature
-map, then 3*8*8 + 6*4*4 = 288 anchors are generated in total.
-
-To support fully convolutional settings, feature map shapes are passed
-dynamically at generation time. The number of anchors to place at each location
-is static --- implementations of AnchorGenerator must always be able return
-the number of anchors that it uses per location for each feature map.
-"""
-from abc import ABCMeta
-from abc import abstractmethod
-
-import tensorflow as tf
-
-
-class AnchorGenerator(object):
- """Abstract base class for anchor generators."""
- __metaclass__ = ABCMeta
-
- @abstractmethod
- def name_scope(self):
- """Name scope.
-
- Must be defined by implementations.
-
- Returns:
- a string representing the name scope of the anchor generation operation.
- """
- pass
-
- @property
- def check_num_anchors(self):
- """Whether to dynamically check the number of anchors generated.
-
- Can be overridden by implementations that would like to disable this
- behavior.
-
- Returns:
- a boolean controlling whether the Generate function should dynamically
- check the number of anchors generated against the mathematically
- expected number of anchors.
- """
- return True
-
- @abstractmethod
- def num_anchors_per_location(self):
- """Returns the number of anchors per spatial location.
-
- Returns:
- a list of integers, one for each expected feature map to be passed to
- the `generate` function.
- """
- pass
-
- def generate(self, feature_map_shape_list, **params):
- """Generates a collection of bounding boxes to be used as anchors.
-
- TODO(rathodv): remove **params from argument list and make stride and
- offsets (for multiple_grid_anchor_generator) constructor arguments.
-
- Args:
- feature_map_shape_list: list of (height, width) pairs in the format
- [(height_0, width_0), (height_1, width_1), ...] that the generated
- anchors must align with. Pairs can be provided as 1-dimensional
- integer tensors of length 2 or simply as tuples of integers.
- **params: parameters for anchor generation op
-
- Returns:
- boxes_list: a list of BoxLists each holding anchor boxes corresponding to
- the input feature map shapes.
-
- Raises:
- ValueError: if the number of feature map shapes does not match the length
- of NumAnchorsPerLocation.
- """
- if self.check_num_anchors and (
- len(feature_map_shape_list) != len(self.num_anchors_per_location())):
- raise ValueError('Number of feature maps is expected to equal the length '
- 'of `num_anchors_per_location`.')
- with tf.name_scope(self.name_scope()):
- anchors_list = self._generate(feature_map_shape_list, **params)
- if self.check_num_anchors:
- with tf.control_dependencies([
- self._assert_correct_number_of_anchors(
- anchors_list, feature_map_shape_list)]):
- for item in anchors_list:
- item.set(tf.identity(item.get()))
- return anchors_list
-
- @abstractmethod
- def _generate(self, feature_map_shape_list, **params):
- """To be overridden by implementations.
-
- Args:
- feature_map_shape_list: list of (height, width) pairs in the format
- [(height_0, width_0), (height_1, width_1), ...] that the generated
- anchors must align with.
- **params: parameters for anchor generation op
-
- Returns:
- boxes_list: a list of BoxList, each holding a collection of N anchor
- boxes.
- """
- pass
-
- def _assert_correct_number_of_anchors(self, anchors_list,
- feature_map_shape_list):
- """Assert that correct number of anchors was generated.
-
- Args:
- anchors_list: A list of box_list.BoxList object holding anchors generated.
- feature_map_shape_list: list of (height, width) pairs in the format
- [(height_0, width_0), (height_1, width_1), ...] that the generated
- anchors must align with.
- Returns:
- Op that raises InvalidArgumentError if the number of anchors does not
- match the number of expected anchors.
- """
- expected_num_anchors = 0
- actual_num_anchors = 0
- for num_anchors_per_location, feature_map_shape, anchors in zip(
- self.num_anchors_per_location(), feature_map_shape_list, anchors_list):
- expected_num_anchors += (num_anchors_per_location
- * feature_map_shape[0]
- * feature_map_shape[1])
- actual_num_anchors += anchors.num_boxes()
- return tf.assert_equal(expected_num_anchors, actual_num_anchors)
-
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/balanced_positive_negative_sampler.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/balanced_positive_negative_sampler.py
deleted file mode 100644
index 7042c40fffbef3126fc90a81114693ac4c0c8bf6..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/balanced_positive_negative_sampler.py
+++ /dev/null
@@ -1,105 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Class to subsample minibatches by balancing positives and negatives.
-
-Subsamples minibatches based on a pre-specified positive fraction in range
-[0,1]. The class presumes there are many more negatives than positive examples:
-if the desired batch_size cannot be achieved with the pre-specified positive
-fraction, it fills the rest with negative examples. If this is not sufficient
-for obtaining the desired batch_size, it returns fewer examples.
-
-The main function to call is Subsample(self, indicator, labels). For convenience
-one can also call SubsampleWeights(self, weights, labels) which is defined in
-the minibatch_sampler base class.
-"""
-
-import tensorflow as tf
-
-from object_detection.core import minibatch_sampler
-
-
-class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
- """Subsamples minibatches to a desired balance of positives and negatives."""
-
- def __init__(self, positive_fraction=0.5):
- """Constructs a minibatch sampler.
-
- Args:
- positive_fraction: desired fraction of positive examples (scalar in [0,1])
- in the batch.
-
- Raises:
- ValueError: if positive_fraction < 0, or positive_fraction > 1
- """
- if positive_fraction < 0 or positive_fraction > 1:
- raise ValueError('positive_fraction should be in range [0,1]. '
- 'Received: %s.' % positive_fraction)
- self._positive_fraction = positive_fraction
-
- def subsample(self, indicator, batch_size, labels):
- """Returns subsampled minibatch.
-
- Args:
- indicator: boolean tensor of shape [N] whose True entries can be sampled.
- batch_size: desired batch size. If None, keeps all positive samples and
- randomly selects negative samples so that the positive sample fraction
- matches self._positive_fraction.
- labels: boolean tensor of shape [N] denoting positive(=True) and negative
- (=False) examples.
-
- Returns:
- is_sampled: boolean tensor of shape [N], True for entries which are
- sampled.
-
- Raises:
- ValueError: if labels and indicator are not 1D boolean tensors.
- """
- if len(indicator.get_shape().as_list()) != 1:
- raise ValueError('indicator must be 1 dimensional, got a tensor of '
- 'shape %s' % indicator.get_shape())
- if len(labels.get_shape().as_list()) != 1:
- raise ValueError('labels must be 1 dimensional, got a tensor of '
- 'shape %s' % labels.get_shape())
- if labels.dtype != tf.bool:
- raise ValueError('labels should be of type bool. Received: %s' %
- labels.dtype)
- if indicator.dtype != tf.bool:
- raise ValueError('indicator should be of type bool. Received: %s' %
- indicator.dtype)
-
- # Only sample from indicated samples
- negative_idx = tf.logical_not(labels)
- positive_idx = tf.logical_and(labels, indicator)
- negative_idx = tf.logical_and(negative_idx, indicator)
-
- # Sample positive and negative samples separately
- if batch_size is None:
- max_num_pos = tf.reduce_sum(tf.to_int32(positive_idx))
- else:
- max_num_pos = int(self._positive_fraction * batch_size)
- sampled_pos_idx = self.subsample_indicator(positive_idx, max_num_pos)
- num_sampled_pos = tf.reduce_sum(tf.cast(sampled_pos_idx, tf.int32))
- if batch_size is None:
- negative_positive_ratio = (
- 1 - self._positive_fraction) / self._positive_fraction
- max_num_neg = tf.to_int32(
- negative_positive_ratio * tf.to_float(num_sampled_pos))
- else:
- max_num_neg = batch_size - num_sampled_pos
- sampled_neg_idx = self.subsample_indicator(negative_idx, max_num_neg)
-
- sampled_idx = tf.logical_or(sampled_pos_idx, sampled_neg_idx)
- return sampled_idx
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/balanced_positive_negative_sampler_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/balanced_positive_negative_sampler_test.py
deleted file mode 100644
index e39de5342c4f01afa38725a56ee543c6eec27d13..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/balanced_positive_negative_sampler_test.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.core.balanced_positive_negative_sampler."""
-
-import numpy as np
-import tensorflow as tf
-
-from object_detection.core import balanced_positive_negative_sampler
-from object_detection.utils import test_case
-
-
-class BalancedPositiveNegativeSamplerTest(test_case.TestCase):
-
- def test_subsample_all_examples(self):
- numpy_labels = np.random.permutation(300)
- indicator = tf.constant(np.ones(300) == 1)
- numpy_labels = (numpy_labels - 200) > 0
-
- labels = tf.constant(numpy_labels)
-
- sampler = (balanced_positive_negative_sampler.
- BalancedPositiveNegativeSampler())
- is_sampled = sampler.subsample(indicator, 64, labels)
- with self.test_session() as sess:
- is_sampled = sess.run(is_sampled)
- self.assertTrue(sum(is_sampled) == 64)
- self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 32)
- self.assertTrue(sum(np.logical_and(
- np.logical_not(numpy_labels), is_sampled)) == 32)
-
- def test_subsample_selection(self):
- # Test random sampling when only some examples can be sampled:
- # 100 samples, 20 positives, 10 positives cannot be sampled
- numpy_labels = np.arange(100)
- numpy_indicator = numpy_labels < 90
- indicator = tf.constant(numpy_indicator)
- numpy_labels = (numpy_labels - 80) >= 0
-
- labels = tf.constant(numpy_labels)
-
- sampler = (balanced_positive_negative_sampler.
- BalancedPositiveNegativeSampler())
- is_sampled = sampler.subsample(indicator, 64, labels)
- with self.test_session() as sess:
- is_sampled = sess.run(is_sampled)
- self.assertTrue(sum(is_sampled) == 64)
- self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 10)
- self.assertTrue(sum(np.logical_and(
- np.logical_not(numpy_labels), is_sampled)) == 54)
- self.assertAllEqual(is_sampled, np.logical_and(is_sampled,
- numpy_indicator))
-
- def test_subsample_selection_no_batch_size(self):
- # Test random sampling when only some examples can be sampled:
- # 1000 samples, 6 positives (5 can be sampled).
- numpy_labels = np.arange(1000)
- numpy_indicator = numpy_labels < 999
- indicator = tf.constant(numpy_indicator)
- numpy_labels = (numpy_labels - 994) >= 0
-
- labels = tf.constant(numpy_labels)
-
- sampler = (balanced_positive_negative_sampler.
- BalancedPositiveNegativeSampler(0.01))
- is_sampled = sampler.subsample(indicator, None, labels)
- with self.test_session() as sess:
- is_sampled = sess.run(is_sampled)
- self.assertTrue(sum(is_sampled) == 500)
- self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 5)
- self.assertTrue(sum(np.logical_and(
- np.logical_not(numpy_labels), is_sampled)) == 495)
- self.assertAllEqual(is_sampled, np.logical_and(is_sampled,
- numpy_indicator))
-
- def test_raises_error_with_incorrect_label_shape(self):
- labels = tf.constant([[True, False, False]])
- indicator = tf.constant([True, False, True])
- sampler = (balanced_positive_negative_sampler.
- BalancedPositiveNegativeSampler())
- with self.assertRaises(ValueError):
- sampler.subsample(indicator, 64, labels)
-
- def test_raises_error_with_incorrect_indicator_shape(self):
- labels = tf.constant([True, False, False])
- indicator = tf.constant([[True, False, True]])
- sampler = (balanced_positive_negative_sampler.
- BalancedPositiveNegativeSampler())
- with self.assertRaises(ValueError):
- sampler.subsample(indicator, 64, labels)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/batcher.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/batcher.py
deleted file mode 100644
index c5dfb712108d0f9ec797ef04c9a4a3620b189fea..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/batcher.py
+++ /dev/null
@@ -1,136 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Provides functions to batch a dictionary of input tensors."""
-import collections
-
-import tensorflow as tf
-
-from object_detection.core import prefetcher
-
-rt_shape_str = '_runtime_shapes'
-
-
-class BatchQueue(object):
- """BatchQueue class.
-
- This class creates a batch queue to asynchronously enqueue tensors_dict.
- It also adds a FIFO prefetcher so that the batches are readily available
- for the consumers. Dequeue ops for a BatchQueue object can be created via
- the Dequeue method which evaluates to a batch of tensor_dict.
-
- Example input pipeline with batching:
- ------------------------------------
- key, string_tensor = slim.parallel_reader.parallel_read(...)
- tensor_dict = decoder.decode(string_tensor)
- tensor_dict = preprocessor.preprocess(tensor_dict, ...)
- batch_queue = batcher.BatchQueue(tensor_dict,
- batch_size=32,
- batch_queue_capacity=2000,
- num_batch_queue_threads=8,
- prefetch_queue_capacity=20)
- tensor_dict = batch_queue.dequeue()
- outputs = Model(tensor_dict)
- ...
- -----------------------------------
-
- Notes:
- -----
- This class batches tensors of unequal sizes by zero padding and unpadding
- them after generating a batch. This can be computationally expensive when
- batching tensors (such as images) that are of vastly different sizes. So it is
- recommended that the shapes of such tensors be fully defined in tensor_dict
- while other lightweight tensors such as bounding box corners and class labels
- can be of varying sizes. Use either crop or resize operations to fully define
- the shape of an image in tensor_dict.
-
- It is also recommended to perform any preprocessing operations on tensors
- before passing to BatchQueue and subsequently calling the Dequeue method.
-
- Another caveat is that this class does not read the last batch if it is not
- full. The current implementation makes it hard to support that use case. So,
- for evaluation, when it is critical to run all the examples through your
- network use the input pipeline example mentioned in core/prefetcher.py.
- """
-
- def __init__(self, tensor_dict, batch_size, batch_queue_capacity,
- num_batch_queue_threads, prefetch_queue_capacity):
- """Constructs a batch queue holding tensor_dict.
-
- Args:
- tensor_dict: dictionary of tensors to batch.
- batch_size: batch size.
- batch_queue_capacity: max capacity of the queue from which the tensors are
- batched.
- num_batch_queue_threads: number of threads to use for batching.
- prefetch_queue_capacity: max capacity of the queue used to prefetch
- assembled batches.
- """
- # Remember static shapes to set shapes of batched tensors.
- static_shapes = collections.OrderedDict(
- {key: tensor.get_shape() for key, tensor in tensor_dict.items()})
- # Remember runtime shapes to unpad tensors after batching.
- runtime_shapes = collections.OrderedDict(
- {(key + rt_shape_str): tf.shape(tensor)
- for key, tensor in tensor_dict.items()})
-
- all_tensors = tensor_dict
- all_tensors.update(runtime_shapes)
- batched_tensors = tf.train.batch(
- all_tensors,
- capacity=batch_queue_capacity,
- batch_size=batch_size,
- dynamic_pad=True,
- num_threads=num_batch_queue_threads)
-
- self._queue = prefetcher.prefetch(batched_tensors,
- prefetch_queue_capacity)
- self._static_shapes = static_shapes
- self._batch_size = batch_size
-
- def dequeue(self):
- """Dequeues a batch of tensor_dict from the BatchQueue.
-
- TODO: use allow_smaller_final_batch to allow running over the whole eval set
-
- Returns:
- A list of tensor_dicts of the requested batch_size.
- """
- batched_tensors = self._queue.dequeue()
- # Separate input tensors from tensors containing their runtime shapes.
- tensors = {}
- shapes = {}
- for key, batched_tensor in batched_tensors.items():
- unbatched_tensor_list = tf.unstack(batched_tensor)
- for i, unbatched_tensor in enumerate(unbatched_tensor_list):
- if rt_shape_str in key:
- shapes[(key[:-len(rt_shape_str)], i)] = unbatched_tensor
- else:
- tensors[(key, i)] = unbatched_tensor
-
- # Undo that padding using shapes and create a list of size `batch_size` that
- # contains tensor dictionaries.
- tensor_dict_list = []
- batch_size = self._batch_size
- for batch_id in range(batch_size):
- tensor_dict = {}
- for key in self._static_shapes:
- tensor_dict[key] = tf.slice(tensors[(key, batch_id)],
- tf.zeros_like(shapes[(key, batch_id)]),
- shapes[(key, batch_id)])
- tensor_dict[key].set_shape(self._static_shapes[key])
- tensor_dict_list.append(tensor_dict)
-
- return tensor_dict_list
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/batcher_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/batcher_test.py
deleted file mode 100644
index 61b4390b4cdcff146b721872ee98f9a48c6f67f0..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/batcher_test.py
+++ /dev/null
@@ -1,158 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.core.batcher."""
-
-import numpy as np
-import tensorflow as tf
-
-from object_detection.core import batcher
-
-slim = tf.contrib.slim
-
-
-class BatcherTest(tf.test.TestCase):
-
- def test_batch_and_unpad_2d_tensors_of_different_sizes_in_1st_dimension(self):
- with self.test_session() as sess:
- batch_size = 3
- num_batches = 2
- examples = tf.Variable(tf.constant(2, dtype=tf.int32))
- counter = examples.count_up_to(num_batches * batch_size + 2)
- boxes = tf.tile(
- tf.reshape(tf.range(4), [1, 4]), tf.stack([counter, tf.constant(1)]))
- batch_queue = batcher.BatchQueue(
- tensor_dict={'boxes': boxes},
- batch_size=batch_size,
- batch_queue_capacity=100,
- num_batch_queue_threads=1,
- prefetch_queue_capacity=100)
- batch = batch_queue.dequeue()
-
- for tensor_dict in batch:
- for tensor in tensor_dict.values():
- self.assertAllEqual([None, 4], tensor.get_shape().as_list())
-
- tf.initialize_all_variables().run()
- with slim.queues.QueueRunners(sess):
- i = 2
- for _ in range(num_batches):
- batch_np = sess.run(batch)
- for tensor_dict in batch_np:
- for tensor in tensor_dict.values():
- self.assertAllEqual(tensor, np.tile(np.arange(4), (i, 1)))
- i += 1
- with self.assertRaises(tf.errors.OutOfRangeError):
- sess.run(batch)
-
- def test_batch_and_unpad_2d_tensors_of_different_sizes_in_all_dimensions(
- self):
- with self.test_session() as sess:
- batch_size = 3
- num_batches = 2
- examples = tf.Variable(tf.constant(2, dtype=tf.int32))
- counter = examples.count_up_to(num_batches * batch_size + 2)
- image = tf.reshape(
- tf.range(counter * counter), tf.stack([counter, counter]))
- batch_queue = batcher.BatchQueue(
- tensor_dict={'image': image},
- batch_size=batch_size,
- batch_queue_capacity=100,
- num_batch_queue_threads=1,
- prefetch_queue_capacity=100)
- batch = batch_queue.dequeue()
-
- for tensor_dict in batch:
- for tensor in tensor_dict.values():
- self.assertAllEqual([None, None], tensor.get_shape().as_list())
-
- tf.initialize_all_variables().run()
- with slim.queues.QueueRunners(sess):
- i = 2
- for _ in range(num_batches):
- batch_np = sess.run(batch)
- for tensor_dict in batch_np:
- for tensor in tensor_dict.values():
- self.assertAllEqual(tensor, np.arange(i * i).reshape((i, i)))
- i += 1
- with self.assertRaises(tf.errors.OutOfRangeError):
- sess.run(batch)
-
- def test_batch_and_unpad_2d_tensors_of_same_size_in_all_dimensions(self):
- with self.test_session() as sess:
- batch_size = 3
- num_batches = 2
- examples = tf.Variable(tf.constant(1, dtype=tf.int32))
- counter = examples.count_up_to(num_batches * batch_size + 1)
- image = tf.reshape(tf.range(1, 13), [4, 3]) * counter
- batch_queue = batcher.BatchQueue(
- tensor_dict={'image': image},
- batch_size=batch_size,
- batch_queue_capacity=100,
- num_batch_queue_threads=1,
- prefetch_queue_capacity=100)
- batch = batch_queue.dequeue()
-
- for tensor_dict in batch:
- for tensor in tensor_dict.values():
- self.assertAllEqual([4, 3], tensor.get_shape().as_list())
-
- tf.initialize_all_variables().run()
- with slim.queues.QueueRunners(sess):
- i = 1
- for _ in range(num_batches):
- batch_np = sess.run(batch)
- for tensor_dict in batch_np:
- for tensor in tensor_dict.values():
- self.assertAllEqual(tensor, np.arange(1, 13).reshape((4, 3)) * i)
- i += 1
- with self.assertRaises(tf.errors.OutOfRangeError):
- sess.run(batch)
-
- def test_batcher_when_batch_size_is_one(self):
- with self.test_session() as sess:
- batch_size = 1
- num_batches = 2
- examples = tf.Variable(tf.constant(2, dtype=tf.int32))
- counter = examples.count_up_to(num_batches * batch_size + 2)
- image = tf.reshape(
- tf.range(counter * counter), tf.stack([counter, counter]))
- batch_queue = batcher.BatchQueue(
- tensor_dict={'image': image},
- batch_size=batch_size,
- batch_queue_capacity=100,
- num_batch_queue_threads=1,
- prefetch_queue_capacity=100)
- batch = batch_queue.dequeue()
-
- for tensor_dict in batch:
- for tensor in tensor_dict.values():
- self.assertAllEqual([None, None], tensor.get_shape().as_list())
-
- tf.initialize_all_variables().run()
- with slim.queues.QueueRunners(sess):
- i = 2
- for _ in range(num_batches):
- batch_np = sess.run(batch)
- for tensor_dict in batch_np:
- for tensor in tensor_dict.values():
- self.assertAllEqual(tensor, np.arange(i * i).reshape((i, i)))
- i += 1
- with self.assertRaises(tf.errors.OutOfRangeError):
- sess.run(batch)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_coder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_coder.py
deleted file mode 100644
index f20ac956dfbce1fa69d1b9e6f5b023b704e1ec8a..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_coder.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Base box coder.
-
-Box coders convert between coordinate frames, namely image-centric
-(with (0,0) on the top left of image) and anchor-centric (with (0,0) being
-defined by a specific anchor).
-
-Users of a BoxCoder can call two methods:
- encode: which encodes a box with respect to a given anchor
- (or rather, a tensor of boxes wrt a corresponding tensor of anchors) and
- decode: which inverts this encoding with a decode operation.
-In both cases, the arguments are assumed to be in 1-1 correspondence already;
-it is not the job of a BoxCoder to perform matching.
-"""
-from abc import ABCMeta
-from abc import abstractmethod
-from abc import abstractproperty
-
-import tensorflow as tf
-
-
-# Box coder types.
-FASTER_RCNN = 'faster_rcnn'
-KEYPOINT = 'keypoint'
-MEAN_STDDEV = 'mean_stddev'
-SQUARE = 'square'
-
-
-class BoxCoder(object):
- """Abstract base class for box coder."""
- __metaclass__ = ABCMeta
-
- @abstractproperty
- def code_size(self):
- """Return the size of each code.
-
- This number is a constant and should agree with the output of the `encode`
- op (e.g. if rel_codes is the output of self.encode(...), then it should have
- shape [N, code_size()]). This abstractproperty should be overridden by
- implementations.
-
- Returns:
- an integer constant
- """
- pass
-
- def encode(self, boxes, anchors):
- """Encode a box list relative to an anchor collection.
-
- Args:
- boxes: BoxList holding N boxes to be encoded
- anchors: BoxList of N anchors
-
- Returns:
- a tensor representing N relative-encoded boxes
- """
- with tf.name_scope('Encode'):
- return self._encode(boxes, anchors)
-
- def decode(self, rel_codes, anchors):
- """Decode boxes that are encoded relative to an anchor collection.
-
- Args:
- rel_codes: a tensor representing N relative-encoded boxes
- anchors: BoxList of anchors
-
- Returns:
- boxlist: BoxList holding N boxes encoded in the ordinary way (i.e.,
- with corners y_min, x_min, y_max, x_max)
- """
- with tf.name_scope('Decode'):
- return self._decode(rel_codes, anchors)
-
- @abstractmethod
- def _encode(self, boxes, anchors):
- """Method to be overriden by implementations.
-
- Args:
- boxes: BoxList holding N boxes to be encoded
- anchors: BoxList of N anchors
-
- Returns:
- a tensor representing N relative-encoded boxes
- """
- pass
-
- @abstractmethod
- def _decode(self, rel_codes, anchors):
- """Method to be overriden by implementations.
-
- Args:
- rel_codes: a tensor representing N relative-encoded boxes
- anchors: BoxList of anchors
-
- Returns:
- boxlist: BoxList holding N boxes encoded in the ordinary way (i.e.,
- with corners y_min, x_min, y_max, x_max)
- """
- pass
-
-
-def batch_decode(encoded_boxes, box_coder, anchors):
- """Decode a batch of encoded boxes.
-
- This op takes a batch of encoded bounding boxes and transforms
- them to a batch of bounding boxes specified by their corners in
- the order of [y_min, x_min, y_max, x_max].
-
- Args:
- encoded_boxes: a float32 tensor of shape [batch_size, num_anchors,
- code_size] representing the location of the objects.
- box_coder: a BoxCoder object.
- anchors: a BoxList of anchors used to encode `encoded_boxes`.
-
- Returns:
- decoded_boxes: a float32 tensor of shape [batch_size, num_anchors,
- coder_size] representing the corners of the objects in the order
- of [y_min, x_min, y_max, x_max].
-
- Raises:
- ValueError: if batch sizes of the inputs are inconsistent, or if
- the number of anchors inferred from encoded_boxes and anchors are
- inconsistent.
- """
- encoded_boxes.get_shape().assert_has_rank(3)
- if encoded_boxes.get_shape()[1].value != anchors.num_boxes_static():
- raise ValueError('The number of anchors inferred from encoded_boxes'
- ' and anchors are inconsistent: shape[1] of encoded_boxes'
- ' %s should be equal to the number of anchors: %s.' %
- (encoded_boxes.get_shape()[1].value,
- anchors.num_boxes_static()))
-
- decoded_boxes = tf.stack([
- box_coder.decode(boxes, anchors).get()
- for boxes in tf.unstack(encoded_boxes)
- ])
- return decoded_boxes
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_coder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_coder_test.py
deleted file mode 100644
index c087a325275f84604a114d064e050147001d32d0..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_coder_test.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.core.box_coder."""
-
-import tensorflow as tf
-
-from object_detection.core import box_coder
-from object_detection.core import box_list
-
-
-class MockBoxCoder(box_coder.BoxCoder):
- """Test BoxCoder that encodes/decodes using the multiply-by-two function."""
-
- def code_size(self):
- return 4
-
- def _encode(self, boxes, anchors):
- return 2.0 * boxes.get()
-
- def _decode(self, rel_codes, anchors):
- return box_list.BoxList(rel_codes / 2.0)
-
-
-class BoxCoderTest(tf.test.TestCase):
-
- def test_batch_decode(self):
- mock_anchor_corners = tf.constant(
- [[0, 0.1, 0.2, 0.3], [0.2, 0.4, 0.4, 0.6]], tf.float32)
- mock_anchors = box_list.BoxList(mock_anchor_corners)
- mock_box_coder = MockBoxCoder()
-
- expected_boxes = [[[0.0, 0.1, 0.5, 0.6], [0.5, 0.6, 0.7, 0.8]],
- [[0.1, 0.2, 0.3, 0.4], [0.7, 0.8, 0.9, 1.0]]]
-
- encoded_boxes_list = [mock_box_coder.encode(
- box_list.BoxList(tf.constant(boxes)), mock_anchors)
- for boxes in expected_boxes]
- encoded_boxes = tf.stack(encoded_boxes_list)
- decoded_boxes = box_coder.batch_decode(
- encoded_boxes, mock_box_coder, mock_anchors)
-
- with self.test_session() as sess:
- decoded_boxes_result = sess.run(decoded_boxes)
- self.assertAllClose(expected_boxes, decoded_boxes_result)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_list.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_list.py
deleted file mode 100644
index c0196f053030b103a6021ac159f6203f77ba1eed..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_list.py
+++ /dev/null
@@ -1,207 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Bounding Box List definition.
-
-BoxList represents a list of bounding boxes as tensorflow
-tensors, where each bounding box is represented as a row of 4 numbers,
-[y_min, x_min, y_max, x_max]. It is assumed that all bounding boxes
-within a given list correspond to a single image. See also
-box_list_ops.py for common box related operations (such as area, iou, etc).
-
-Optionally, users can add additional related fields (such as weights).
-We assume the following things to be true about fields:
-* they correspond to boxes in the box_list along the 0th dimension
-* they have inferrable rank at graph construction time
-* all dimensions except for possibly the 0th can be inferred
- (i.e., not None) at graph construction time.
-
-Some other notes:
- * Following tensorflow conventions, we use height, width ordering,
- and correspondingly, y,x (or ymin, xmin, ymax, xmax) ordering
- * Tensors are always provided as (flat) [N, 4] tensors.
-"""
-
-import tensorflow as tf
-
-
-class BoxList(object):
- """Box collection."""
-
- def __init__(self, boxes):
- """Constructs box collection.
-
- Args:
- boxes: a tensor of shape [N, 4] representing box corners
-
- Raises:
- ValueError: if invalid dimensions for bbox data or if bbox data is not in
- float32 format.
- """
- if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4:
- raise ValueError('Invalid dimensions for box data.')
- if boxes.dtype != tf.float32:
- raise ValueError('Invalid tensor type: should be tf.float32')
- self.data = {'boxes': boxes}
-
- def num_boxes(self):
- """Returns number of boxes held in collection.
-
- Returns:
- a tensor representing the number of boxes held in the collection.
- """
- return tf.shape(self.data['boxes'])[0]
-
- def num_boxes_static(self):
- """Returns number of boxes held in collection.
-
- This number is inferred at graph construction time rather than run-time.
-
- Returns:
- Number of boxes held in collection (integer) or None if this is not
- inferrable at graph construction time.
- """
- return self.data['boxes'].get_shape()[0].value
-
- def get_all_fields(self):
- """Returns all fields."""
- return self.data.keys()
-
- def get_extra_fields(self):
- """Returns all non-box fields (i.e., everything not named 'boxes')."""
- return [k for k in self.data.keys() if k != 'boxes']
-
- def add_field(self, field, field_data):
- """Add field to box list.
-
- This method can be used to add related box data such as
- weights/labels, etc.
-
- Args:
- field: a string key to access the data via `get`
- field_data: a tensor containing the data to store in the BoxList
- """
- self.data[field] = field_data
-
- def has_field(self, field):
- return field in self.data
-
- def get(self):
- """Convenience function for accessing box coordinates.
-
- Returns:
- a tensor with shape [N, 4] representing box coordinates.
- """
- return self.get_field('boxes')
-
- def set(self, boxes):
- """Convenience function for setting box coordinates.
-
- Args:
- boxes: a tensor of shape [N, 4] representing box corners
-
- Raises:
- ValueError: if invalid dimensions for bbox data
- """
- if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4:
- raise ValueError('Invalid dimensions for box data.')
- self.data['boxes'] = boxes
-
- def get_field(self, field):
- """Accesses a box collection and associated fields.
-
- This function returns specified field with object; if no field is specified,
- it returns the box coordinates.
-
- Args:
- field: this optional string parameter can be used to specify
- a related field to be accessed.
-
- Returns:
- a tensor representing the box collection or an associated field.
-
- Raises:
- ValueError: if invalid field
- """
- if not self.has_field(field):
- raise ValueError('field ' + str(field) + ' does not exist')
- return self.data[field]
-
- def set_field(self, field, value):
- """Sets the value of a field.
-
- Updates the field of a box_list with a given value.
-
- Args:
- field: (string) name of the field to set value.
- value: the value to assign to the field.
-
- Raises:
- ValueError: if the box_list does not have specified field.
- """
- if not self.has_field(field):
- raise ValueError('field %s does not exist' % field)
- self.data[field] = value
-
- def get_center_coordinates_and_sizes(self, scope=None):
- """Computes the center coordinates, height and width of the boxes.
-
- Args:
- scope: name scope of the function.
-
- Returns:
- a list of 4 1-D tensors [ycenter, xcenter, height, width].
- """
- with tf.name_scope(scope, 'get_center_coordinates_and_sizes'):
- box_corners = self.get()
- ymin, xmin, ymax, xmax = tf.unstack(tf.transpose(box_corners))
- width = xmax - xmin
- height = ymax - ymin
- ycenter = ymin + height / 2.
- xcenter = xmin + width / 2.
- return [ycenter, xcenter, height, width]
-
- def transpose_coordinates(self, scope=None):
- """Transpose the coordinate representation in a boxlist.
-
- Args:
- scope: name scope of the function.
- """
- with tf.name_scope(scope, 'transpose_coordinates'):
- y_min, x_min, y_max, x_max = tf.split(
- value=self.get(), num_or_size_splits=4, axis=1)
- self.set(tf.concat([x_min, y_min, x_max, y_max], 1))
-
- def as_tensor_dict(self, fields=None):
- """Retrieves specified fields as a dictionary of tensors.
-
- Args:
- fields: (optional) list of fields to return in the dictionary.
- If None (default), all fields are returned.
-
- Returns:
- tensor_dict: A dictionary of tensors specified by fields.
-
- Raises:
- ValueError: if specified field is not contained in boxlist.
- """
- tensor_dict = {}
- if fields is None:
- fields = self.get_all_fields()
- for field in fields:
- if not self.has_field(field):
- raise ValueError('boxlist must contain all specified fields')
- tensor_dict[field] = self.get_field(field)
- return tensor_dict
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_list_ops.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_list_ops.py
deleted file mode 100644
index a755ef68ec81d6802f0ea6e8d1e3f613aff8fdef..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_list_ops.py
+++ /dev/null
@@ -1,1061 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Bounding Box List operations.
-
-Example box operations that are supported:
- * areas: compute bounding box areas
- * iou: pairwise intersection-over-union scores
- * sq_dist: pairwise distances between bounding boxes
-
-Whenever box_list_ops functions output a BoxList, the fields of the incoming
-BoxList are retained unless documented otherwise.
-"""
-import tensorflow as tf
-
-from object_detection.core import box_list
-from object_detection.utils import shape_utils
-
-
-class SortOrder(object):
- """Enum class for sort order.
-
- Attributes:
- ascend: ascend order.
- descend: descend order.
- """
- ascend = 1
- descend = 2
-
-
-def area(boxlist, scope=None):
- """Computes area of boxes.
-
- Args:
- boxlist: BoxList holding N boxes
- scope: name scope.
-
- Returns:
- a tensor with shape [N] representing box areas.
- """
- with tf.name_scope(scope, 'Area'):
- y_min, x_min, y_max, x_max = tf.split(
- value=boxlist.get(), num_or_size_splits=4, axis=1)
- return tf.squeeze((y_max - y_min) * (x_max - x_min), [1])
-
-
-def height_width(boxlist, scope=None):
- """Computes height and width of boxes in boxlist.
-
- Args:
- boxlist: BoxList holding N boxes
- scope: name scope.
-
- Returns:
- Height: A tensor with shape [N] representing box heights.
- Width: A tensor with shape [N] representing box widths.
- """
- with tf.name_scope(scope, 'HeightWidth'):
- y_min, x_min, y_max, x_max = tf.split(
- value=boxlist.get(), num_or_size_splits=4, axis=1)
- return tf.squeeze(y_max - y_min, [1]), tf.squeeze(x_max - x_min, [1])
-
-
-def scale(boxlist, y_scale, x_scale, scope=None):
- """scale box coordinates in x and y dimensions.
-
- Args:
- boxlist: BoxList holding N boxes
- y_scale: (float) scalar tensor
- x_scale: (float) scalar tensor
- scope: name scope.
-
- Returns:
- boxlist: BoxList holding N boxes
- """
- with tf.name_scope(scope, 'Scale'):
- y_scale = tf.cast(y_scale, tf.float32)
- x_scale = tf.cast(x_scale, tf.float32)
- y_min, x_min, y_max, x_max = tf.split(
- value=boxlist.get(), num_or_size_splits=4, axis=1)
- y_min = y_scale * y_min
- y_max = y_scale * y_max
- x_min = x_scale * x_min
- x_max = x_scale * x_max
- scaled_boxlist = box_list.BoxList(
- tf.concat([y_min, x_min, y_max, x_max], 1))
- return _copy_extra_fields(scaled_boxlist, boxlist)
-
-
-def clip_to_window(boxlist, window, filter_nonoverlapping=True, scope=None):
- """Clip bounding boxes to a window.
-
- This op clips any input bounding boxes (represented by bounding box
- corners) to a window, optionally filtering out boxes that do not
- overlap at all with the window.
-
- Args:
- boxlist: BoxList holding M_in boxes
- window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
- window to which the op should clip boxes.
- filter_nonoverlapping: whether to filter out boxes that do not overlap at
- all with the window.
- scope: name scope.
-
- Returns:
- a BoxList holding M_out boxes where M_out <= M_in
- """
- with tf.name_scope(scope, 'ClipToWindow'):
- y_min, x_min, y_max, x_max = tf.split(
- value=boxlist.get(), num_or_size_splits=4, axis=1)
- win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
- y_min_clipped = tf.maximum(tf.minimum(y_min, win_y_max), win_y_min)
- y_max_clipped = tf.maximum(tf.minimum(y_max, win_y_max), win_y_min)
- x_min_clipped = tf.maximum(tf.minimum(x_min, win_x_max), win_x_min)
- x_max_clipped = tf.maximum(tf.minimum(x_max, win_x_max), win_x_min)
- clipped = box_list.BoxList(
- tf.concat([y_min_clipped, x_min_clipped, y_max_clipped, x_max_clipped],
- 1))
- clipped = _copy_extra_fields(clipped, boxlist)
- if filter_nonoverlapping:
- areas = area(clipped)
- nonzero_area_indices = tf.cast(
- tf.reshape(tf.where(tf.greater(areas, 0.0)), [-1]), tf.int32)
- clipped = gather(clipped, nonzero_area_indices)
- return clipped
-
-
-def prune_outside_window(boxlist, window, scope=None):
- """Prunes bounding boxes that fall outside a given window.
-
- This function prunes bounding boxes that even partially fall outside the given
- window. See also clip_to_window which only prunes bounding boxes that fall
- completely outside the window, and clips any bounding boxes that partially
- overflow.
-
- Args:
- boxlist: a BoxList holding M_in boxes.
- window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
- of the window
- scope: name scope.
-
- Returns:
- pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in
- valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
- in the input tensor.
- """
- with tf.name_scope(scope, 'PruneOutsideWindow'):
- y_min, x_min, y_max, x_max = tf.split(
- value=boxlist.get(), num_or_size_splits=4, axis=1)
- win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
- coordinate_violations = tf.concat([
- tf.less(y_min, win_y_min), tf.less(x_min, win_x_min),
- tf.greater(y_max, win_y_max), tf.greater(x_max, win_x_max)
- ], 1)
- valid_indices = tf.reshape(
- tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1])
- return gather(boxlist, valid_indices), valid_indices
-
-
-def prune_completely_outside_window(boxlist, window, scope=None):
- """Prunes bounding boxes that fall completely outside of the given window.
-
- The function clip_to_window prunes bounding boxes that fall
- completely outside the window, but also clips any bounding boxes that
- partially overflow. This function does not clip partially overflowing boxes.
-
- Args:
- boxlist: a BoxList holding M_in boxes.
- window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
- of the window
- scope: name scope.
-
- Returns:
- pruned_boxlist: a new BoxList with all bounding boxes partially or fully in
- the window.
- valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
- in the input tensor.
- """
- with tf.name_scope(scope, 'PruneCompleteleyOutsideWindow'):
- y_min, x_min, y_max, x_max = tf.split(
- value=boxlist.get(), num_or_size_splits=4, axis=1)
- win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
- coordinate_violations = tf.concat([
- tf.greater_equal(y_min, win_y_max), tf.greater_equal(x_min, win_x_max),
- tf.less_equal(y_max, win_y_min), tf.less_equal(x_max, win_x_min)
- ], 1)
- valid_indices = tf.reshape(
- tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1])
- return gather(boxlist, valid_indices), valid_indices
-
-
-def intersection(boxlist1, boxlist2, scope=None):
- """Compute pairwise intersection areas between boxes.
-
- Args:
- boxlist1: BoxList holding N boxes
- boxlist2: BoxList holding M boxes
- scope: name scope.
-
- Returns:
- a tensor with shape [N, M] representing pairwise intersections
- """
- with tf.name_scope(scope, 'Intersection'):
- y_min1, x_min1, y_max1, x_max1 = tf.split(
- value=boxlist1.get(), num_or_size_splits=4, axis=1)
- y_min2, x_min2, y_max2, x_max2 = tf.split(
- value=boxlist2.get(), num_or_size_splits=4, axis=1)
- all_pairs_min_ymax = tf.minimum(y_max1, tf.transpose(y_max2))
- all_pairs_max_ymin = tf.maximum(y_min1, tf.transpose(y_min2))
- intersect_heights = tf.maximum(0.0, all_pairs_min_ymax - all_pairs_max_ymin)
- all_pairs_min_xmax = tf.minimum(x_max1, tf.transpose(x_max2))
- all_pairs_max_xmin = tf.maximum(x_min1, tf.transpose(x_min2))
- intersect_widths = tf.maximum(0.0, all_pairs_min_xmax - all_pairs_max_xmin)
- return intersect_heights * intersect_widths
-
-
-def matched_intersection(boxlist1, boxlist2, scope=None):
- """Compute intersection areas between corresponding boxes in two boxlists.
-
- Args:
- boxlist1: BoxList holding N boxes
- boxlist2: BoxList holding N boxes
- scope: name scope.
-
- Returns:
- a tensor with shape [N] representing pairwise intersections
- """
- with tf.name_scope(scope, 'MatchedIntersection'):
- y_min1, x_min1, y_max1, x_max1 = tf.split(
- value=boxlist1.get(), num_or_size_splits=4, axis=1)
- y_min2, x_min2, y_max2, x_max2 = tf.split(
- value=boxlist2.get(), num_or_size_splits=4, axis=1)
- min_ymax = tf.minimum(y_max1, y_max2)
- max_ymin = tf.maximum(y_min1, y_min2)
- intersect_heights = tf.maximum(0.0, min_ymax - max_ymin)
- min_xmax = tf.minimum(x_max1, x_max2)
- max_xmin = tf.maximum(x_min1, x_min2)
- intersect_widths = tf.maximum(0.0, min_xmax - max_xmin)
- return tf.reshape(intersect_heights * intersect_widths, [-1])
-
-
-def iou(boxlist1, boxlist2, scope=None):
- """Computes pairwise intersection-over-union between box collections.
-
- Args:
- boxlist1: BoxList holding N boxes
- boxlist2: BoxList holding M boxes
- scope: name scope.
-
- Returns:
- a tensor with shape [N, M] representing pairwise iou scores.
- """
- with tf.name_scope(scope, 'IOU'):
- intersections = intersection(boxlist1, boxlist2)
- areas1 = area(boxlist1)
- areas2 = area(boxlist2)
- unions = (
- tf.expand_dims(areas1, 1) + tf.expand_dims(areas2, 0) - intersections)
- return tf.where(
- tf.equal(intersections, 0.0),
- tf.zeros_like(intersections), tf.truediv(intersections, unions))
-
-
-def matched_iou(boxlist1, boxlist2, scope=None):
- """Compute intersection-over-union between corresponding boxes in boxlists.
-
- Args:
- boxlist1: BoxList holding N boxes
- boxlist2: BoxList holding N boxes
- scope: name scope.
-
- Returns:
- a tensor with shape [N] representing pairwise iou scores.
- """
- with tf.name_scope(scope, 'MatchedIOU'):
- intersections = matched_intersection(boxlist1, boxlist2)
- areas1 = area(boxlist1)
- areas2 = area(boxlist2)
- unions = areas1 + areas2 - intersections
- return tf.where(
- tf.equal(intersections, 0.0),
- tf.zeros_like(intersections), tf.truediv(intersections, unions))
-
-
-def ioa(boxlist1, boxlist2, scope=None):
- """Computes pairwise intersection-over-area between box collections.
-
- intersection-over-area (IOA) between two boxes box1 and box2 is defined as
- their intersection area over box2's area. Note that ioa is not symmetric,
- that is, ioa(box1, box2) != ioa(box2, box1).
-
- Args:
- boxlist1: BoxList holding N boxes
- boxlist2: BoxList holding M boxes
- scope: name scope.
-
- Returns:
- a tensor with shape [N, M] representing pairwise ioa scores.
- """
- with tf.name_scope(scope, 'IOA'):
- intersections = intersection(boxlist1, boxlist2)
- areas = tf.expand_dims(area(boxlist2), 0)
- return tf.truediv(intersections, areas)
-
-
-def prune_non_overlapping_boxes(
- boxlist1, boxlist2, min_overlap=0.0, scope=None):
- """Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2.
-
- For each box in boxlist1, we want its IOA to be more than minoverlap with
- at least one of the boxes in boxlist2. If it does not, we remove it.
-
- Args:
- boxlist1: BoxList holding N boxes.
- boxlist2: BoxList holding M boxes.
- min_overlap: Minimum required overlap between boxes, to count them as
- overlapping.
- scope: name scope.
-
- Returns:
- new_boxlist1: A pruned boxlist with size [N', 4].
- keep_inds: A tensor with shape [N'] indexing kept bounding boxes in the
- first input BoxList `boxlist1`.
- """
- with tf.name_scope(scope, 'PruneNonOverlappingBoxes'):
- ioa_ = ioa(boxlist2, boxlist1) # [M, N] tensor
- ioa_ = tf.reduce_max(ioa_, reduction_indices=[0]) # [N] tensor
- keep_bool = tf.greater_equal(ioa_, tf.constant(min_overlap))
- keep_inds = tf.squeeze(tf.where(keep_bool), squeeze_dims=[1])
- new_boxlist1 = gather(boxlist1, keep_inds)
- return new_boxlist1, keep_inds
-
-
-def prune_small_boxes(boxlist, min_side, scope=None):
- """Prunes small boxes in the boxlist which have a side smaller than min_side.
-
- Args:
- boxlist: BoxList holding N boxes.
- min_side: Minimum width AND height of box to survive pruning.
- scope: name scope.
-
- Returns:
- A pruned boxlist.
- """
- with tf.name_scope(scope, 'PruneSmallBoxes'):
- height, width = height_width(boxlist)
- is_valid = tf.logical_and(tf.greater_equal(width, min_side),
- tf.greater_equal(height, min_side))
- return gather(boxlist, tf.reshape(tf.where(is_valid), [-1]))
-
-
-def change_coordinate_frame(boxlist, window, scope=None):
- """Change coordinate frame of the boxlist to be relative to window's frame.
-
- Given a window of the form [ymin, xmin, ymax, xmax],
- changes bounding box coordinates from boxlist to be relative to this window
- (e.g., the min corner maps to (0,0) and the max corner maps to (1,1)).
-
- An example use case is data augmentation: where we are given groundtruth
- boxes (boxlist) and would like to randomly crop the image to some
- window (window). In this case we need to change the coordinate frame of
- each groundtruth box to be relative to this new window.
-
- Args:
- boxlist: A BoxList object holding N boxes.
- window: A rank 1 tensor [4].
- scope: name scope.
-
- Returns:
- Returns a BoxList object with N boxes.
- """
- with tf.name_scope(scope, 'ChangeCoordinateFrame'):
- win_height = window[2] - window[0]
- win_width = window[3] - window[1]
- boxlist_new = scale(box_list.BoxList(
- boxlist.get() - [window[0], window[1], window[0], window[1]]),
- 1.0 / win_height, 1.0 / win_width)
- boxlist_new = _copy_extra_fields(boxlist_new, boxlist)
- return boxlist_new
-
-
-def sq_dist(boxlist1, boxlist2, scope=None):
- """Computes the pairwise squared distances between box corners.
-
- This op treats each box as if it were a point in a 4d Euclidean space and
- computes pairwise squared distances.
-
- Mathematically, we are given two matrices of box coordinates X and Y,
- where X(i,:) is the i'th row of X, containing the 4 numbers defining the
- corners of the i'th box in boxlist1. Similarly Y(j,:) corresponds to
- boxlist2. We compute
- Z(i,j) = ||X(i,:) - Y(j,:)||^2
- = ||X(i,:)||^2 + ||Y(j,:)||^2 - 2 X(i,:)' * Y(j,:),
-
- Args:
- boxlist1: BoxList holding N boxes
- boxlist2: BoxList holding M boxes
- scope: name scope.
-
- Returns:
- a tensor with shape [N, M] representing pairwise distances
- """
- with tf.name_scope(scope, 'SqDist'):
- sqnorm1 = tf.reduce_sum(tf.square(boxlist1.get()), 1, keep_dims=True)
- sqnorm2 = tf.reduce_sum(tf.square(boxlist2.get()), 1, keep_dims=True)
- innerprod = tf.matmul(boxlist1.get(), boxlist2.get(),
- transpose_a=False, transpose_b=True)
- return sqnorm1 + tf.transpose(sqnorm2) - 2.0 * innerprod
-
-
-def boolean_mask(boxlist, indicator, fields=None, scope=None):
- """Select boxes from BoxList according to indicator and return new BoxList.
-
- `boolean_mask` returns the subset of boxes that are marked as "True" by the
- indicator tensor. By default, `boolean_mask` returns boxes corresponding to
- the input index list, as well as all additional fields stored in the boxlist
- (indexing into the first dimension). However one can optionally only draw
- from a subset of fields.
-
- Args:
- boxlist: BoxList holding N boxes
- indicator: a rank-1 boolean tensor
- fields: (optional) list of fields to also gather from. If None (default),
- all fields are gathered from. Pass an empty fields list to only gather
- the box coordinates.
- scope: name scope.
-
- Returns:
- subboxlist: a BoxList corresponding to the subset of the input BoxList
- specified by indicator
- Raises:
- ValueError: if `indicator` is not a rank-1 boolean tensor.
- """
- with tf.name_scope(scope, 'BooleanMask'):
- if indicator.shape.ndims != 1:
- raise ValueError('indicator should have rank 1')
- if indicator.dtype != tf.bool:
- raise ValueError('indicator should be a boolean tensor')
- subboxlist = box_list.BoxList(tf.boolean_mask(boxlist.get(), indicator))
- if fields is None:
- fields = boxlist.get_extra_fields()
- for field in fields:
- if not boxlist.has_field(field):
- raise ValueError('boxlist must contain all specified fields')
- subfieldlist = tf.boolean_mask(boxlist.get_field(field), indicator)
- subboxlist.add_field(field, subfieldlist)
- return subboxlist
-
-
-def gather(boxlist, indices, fields=None, scope=None):
- """Gather boxes from BoxList according to indices and return new BoxList.
-
- By default, `gather` returns boxes corresponding to the input index list, as
- well as all additional fields stored in the boxlist (indexing into the
- first dimension). However one can optionally only gather from a
- subset of fields.
-
- Args:
- boxlist: BoxList holding N boxes
- indices: a rank-1 tensor of type int32 / int64
- fields: (optional) list of fields to also gather from. If None (default),
- all fields are gathered from. Pass an empty fields list to only gather
- the box coordinates.
- scope: name scope.
-
- Returns:
- subboxlist: a BoxList corresponding to the subset of the input BoxList
- specified by indices
- Raises:
- ValueError: if specified field is not contained in boxlist or if the
- indices are not of type int32
- """
- with tf.name_scope(scope, 'Gather'):
- if len(indices.shape.as_list()) != 1:
- raise ValueError('indices should have rank 1')
- if indices.dtype != tf.int32 and indices.dtype != tf.int64:
- raise ValueError('indices should be an int32 / int64 tensor')
- subboxlist = box_list.BoxList(tf.gather(boxlist.get(), indices))
- if fields is None:
- fields = boxlist.get_extra_fields()
- for field in fields:
- if not boxlist.has_field(field):
- raise ValueError('boxlist must contain all specified fields')
- subfieldlist = tf.gather(boxlist.get_field(field), indices)
- subboxlist.add_field(field, subfieldlist)
- return subboxlist
-
-
-def concatenate(boxlists, fields=None, scope=None):
- """Concatenate list of BoxLists.
-
- This op concatenates a list of input BoxLists into a larger BoxList. It also
- handles concatenation of BoxList fields as long as the field tensor shapes
- are equal except for the first dimension.
-
- Args:
- boxlists: list of BoxList objects
- fields: optional list of fields to also concatenate. By default, all
- fields from the first BoxList in the list are included in the
- concatenation.
- scope: name scope.
-
- Returns:
- a BoxList with number of boxes equal to
- sum([boxlist.num_boxes() for boxlist in BoxList])
- Raises:
- ValueError: if boxlists is invalid (i.e., is not a list, is empty, or
- contains non BoxList objects), or if requested fields are not contained in
- all boxlists
- """
- with tf.name_scope(scope, 'Concatenate'):
- if not isinstance(boxlists, list):
- raise ValueError('boxlists should be a list')
- if not boxlists:
- raise ValueError('boxlists should have nonzero length')
- for boxlist in boxlists:
- if not isinstance(boxlist, box_list.BoxList):
- raise ValueError('all elements of boxlists should be BoxList objects')
- concatenated = box_list.BoxList(
- tf.concat([boxlist.get() for boxlist in boxlists], 0))
- if fields is None:
- fields = boxlists[0].get_extra_fields()
- for field in fields:
- first_field_shape = boxlists[0].get_field(field).get_shape().as_list()
- first_field_shape[0] = -1
- if None in first_field_shape:
- raise ValueError('field %s must have fully defined shape except for the'
- ' 0th dimension.' % field)
- for boxlist in boxlists:
- if not boxlist.has_field(field):
- raise ValueError('boxlist must contain all requested fields')
- field_shape = boxlist.get_field(field).get_shape().as_list()
- field_shape[0] = -1
- if field_shape != first_field_shape:
- raise ValueError('field %s must have same shape for all boxlists '
- 'except for the 0th dimension.' % field)
- concatenated_field = tf.concat(
- [boxlist.get_field(field) for boxlist in boxlists], 0)
- concatenated.add_field(field, concatenated_field)
- return concatenated
-
-
-def sort_by_field(boxlist, field, order=SortOrder.descend, scope=None):
- """Sort boxes and associated fields according to a scalar field.
-
- A common use case is reordering the boxes according to descending scores.
-
- Args:
- boxlist: BoxList holding N boxes.
- field: A BoxList field for sorting and reordering the BoxList.
- order: (Optional) descend or ascend. Default is descend.
- scope: name scope.
-
- Returns:
- sorted_boxlist: A sorted BoxList with the field in the specified order.
-
- Raises:
- ValueError: if specified field does not exist
- ValueError: if the order is not either descend or ascend
- """
- with tf.name_scope(scope, 'SortByField'):
- if order != SortOrder.descend and order != SortOrder.ascend:
- raise ValueError('Invalid sort order')
-
- field_to_sort = boxlist.get_field(field)
- if len(field_to_sort.shape.as_list()) != 1:
- raise ValueError('Field should have rank 1')
-
- num_boxes = boxlist.num_boxes()
- num_entries = tf.size(field_to_sort)
- length_assert = tf.Assert(
- tf.equal(num_boxes, num_entries),
- ['Incorrect field size: actual vs expected.', num_entries, num_boxes])
-
- with tf.control_dependencies([length_assert]):
- # TODO(derekjchow): Remove with tf.device when top_k operation runs
- # correctly on GPU.
- with tf.device('/cpu:0'):
- _, sorted_indices = tf.nn.top_k(field_to_sort, num_boxes, sorted=True)
-
- if order == SortOrder.ascend:
- sorted_indices = tf.reverse_v2(sorted_indices, [0])
-
- return gather(boxlist, sorted_indices)
-
-
-def visualize_boxes_in_image(image, boxlist, normalized=False, scope=None):
- """Overlay bounding box list on image.
-
- Currently this visualization plots a 1 pixel thick red bounding box on top
- of the image. Note that tf.image.draw_bounding_boxes essentially is
- 1 indexed.
-
- Args:
- image: an image tensor with shape [height, width, 3]
- boxlist: a BoxList
- normalized: (boolean) specify whether corners are to be interpreted
- as absolute coordinates in image space or normalized with respect to the
- image size.
- scope: name scope.
-
- Returns:
- image_and_boxes: an image tensor with shape [height, width, 3]
- """
- with tf.name_scope(scope, 'VisualizeBoxesInImage'):
- if not normalized:
- height, width, _ = tf.unstack(tf.shape(image))
- boxlist = scale(boxlist,
- 1.0 / tf.cast(height, tf.float32),
- 1.0 / tf.cast(width, tf.float32))
- corners = tf.expand_dims(boxlist.get(), 0)
- image = tf.expand_dims(image, 0)
- return tf.squeeze(tf.image.draw_bounding_boxes(image, corners), [0])
-
-
-def filter_field_value_equals(boxlist, field, value, scope=None):
- """Filter to keep only boxes with field entries equal to the given value.
-
- Args:
- boxlist: BoxList holding N boxes.
- field: field name for filtering.
- value: scalar value.
- scope: name scope.
-
- Returns:
- a BoxList holding M boxes where M <= N
-
- Raises:
- ValueError: if boxlist not a BoxList object or if it does not have
- the specified field.
- """
- with tf.name_scope(scope, 'FilterFieldValueEquals'):
- if not isinstance(boxlist, box_list.BoxList):
- raise ValueError('boxlist must be a BoxList')
- if not boxlist.has_field(field):
- raise ValueError('boxlist must contain the specified field')
- filter_field = boxlist.get_field(field)
- gather_index = tf.reshape(tf.where(tf.equal(filter_field, value)), [-1])
- return gather(boxlist, gather_index)
-
-
-def filter_greater_than(boxlist, thresh, scope=None):
- """Filter to keep only boxes with score exceeding a given threshold.
-
- This op keeps the collection of boxes whose corresponding scores are
- greater than the input threshold.
-
- TODO(jonathanhuang): Change function name to filter_scores_greater_than
-
- Args:
- boxlist: BoxList holding N boxes. Must contain a 'scores' field
- representing detection scores.
- thresh: scalar threshold
- scope: name scope.
-
- Returns:
- a BoxList holding M boxes where M <= N
-
- Raises:
- ValueError: if boxlist not a BoxList object or if it does not
- have a scores field
- """
- with tf.name_scope(scope, 'FilterGreaterThan'):
- if not isinstance(boxlist, box_list.BoxList):
- raise ValueError('boxlist must be a BoxList')
- if not boxlist.has_field('scores'):
- raise ValueError('input boxlist must have \'scores\' field')
- scores = boxlist.get_field('scores')
- if len(scores.shape.as_list()) > 2:
- raise ValueError('Scores should have rank 1 or 2')
- if len(scores.shape.as_list()) == 2 and scores.shape.as_list()[1] != 1:
- raise ValueError('Scores should have rank 1 or have shape '
- 'consistent with [None, 1]')
- high_score_indices = tf.cast(tf.reshape(
- tf.where(tf.greater(scores, thresh)),
- [-1]), tf.int32)
- return gather(boxlist, high_score_indices)
-
-
-def non_max_suppression(boxlist, thresh, max_output_size, scope=None):
- """Non maximum suppression.
-
- This op greedily selects a subset of detection bounding boxes, pruning
- away boxes that have high IOU (intersection over union) overlap (> thresh)
- with already selected boxes. Note that this only works for a single class ---
- to apply NMS to multi-class predictions, use MultiClassNonMaxSuppression.
-
- Args:
- boxlist: BoxList holding N boxes. Must contain a 'scores' field
- representing detection scores.
- thresh: scalar threshold
- max_output_size: maximum number of retained boxes
- scope: name scope.
-
- Returns:
- a BoxList holding M boxes where M <= max_output_size
- Raises:
- ValueError: if thresh is not in [0, 1]
- """
- with tf.name_scope(scope, 'NonMaxSuppression'):
- if not 0 <= thresh <= 1.0:
- raise ValueError('thresh must be between 0 and 1')
- if not isinstance(boxlist, box_list.BoxList):
- raise ValueError('boxlist must be a BoxList')
- if not boxlist.has_field('scores'):
- raise ValueError('input boxlist must have \'scores\' field')
- selected_indices = tf.image.non_max_suppression(
- boxlist.get(), boxlist.get_field('scores'),
- max_output_size, iou_threshold=thresh)
- return gather(boxlist, selected_indices)
-
-
-def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from):
- """Copies the extra fields of boxlist_to_copy_from to boxlist_to_copy_to.
-
- Args:
- boxlist_to_copy_to: BoxList to which extra fields are copied.
- boxlist_to_copy_from: BoxList from which fields are copied.
-
- Returns:
- boxlist_to_copy_to with extra fields.
- """
- for field in boxlist_to_copy_from.get_extra_fields():
- boxlist_to_copy_to.add_field(field, boxlist_to_copy_from.get_field(field))
- return boxlist_to_copy_to
-
-
-def to_normalized_coordinates(boxlist, height, width,
- check_range=True, scope=None):
- """Converts absolute box coordinates to normalized coordinates in [0, 1].
-
- Usually one uses the dynamic shape of the image or conv-layer tensor:
- boxlist = box_list_ops.to_normalized_coordinates(boxlist,
- tf.shape(images)[1],
- tf.shape(images)[2]),
-
- This function raises an assertion failed error at graph execution time when
- the maximum coordinate is smaller than 1.01 (which means that coordinates are
- already normalized). The value 1.01 is to deal with small rounding errors.
-
- Args:
- boxlist: BoxList with coordinates in terms of pixel-locations.
- height: Maximum value for height of absolute box coordinates.
- width: Maximum value for width of absolute box coordinates.
- check_range: If True, checks if the coordinates are normalized or not.
- scope: name scope.
-
- Returns:
- boxlist with normalized coordinates in [0, 1].
- """
- with tf.name_scope(scope, 'ToNormalizedCoordinates'):
- height = tf.cast(height, tf.float32)
- width = tf.cast(width, tf.float32)
-
- if check_range:
- max_val = tf.reduce_max(boxlist.get())
- max_assert = tf.Assert(tf.greater(max_val, 1.01),
- ['max value is lower than 1.01: ', max_val])
- with tf.control_dependencies([max_assert]):
- width = tf.identity(width)
-
- return scale(boxlist, 1 / height, 1 / width)
-
-
-def to_absolute_coordinates(boxlist,
- height,
- width,
- check_range=True,
- maximum_normalized_coordinate=1.1,
- scope=None):
- """Converts normalized box coordinates to absolute pixel coordinates.
-
- This function raises an assertion failed error when the maximum box coordinate
- value is larger than maximum_normalized_coordinate (in which case coordinates
- are already absolute).
-
- Args:
- boxlist: BoxList with coordinates in range [0, 1].
- height: Maximum value for height of absolute box coordinates.
- width: Maximum value for width of absolute box coordinates.
- check_range: If True, checks if the coordinates are normalized or not.
- maximum_normalized_coordinate: Maximum coordinate value to be considered
- as normalized, default to 1.1.
- scope: name scope.
-
- Returns:
- boxlist with absolute coordinates in terms of the image size.
-
- """
- with tf.name_scope(scope, 'ToAbsoluteCoordinates'):
- height = tf.cast(height, tf.float32)
- width = tf.cast(width, tf.float32)
-
- # Ensure range of input boxes is correct.
- if check_range:
- box_maximum = tf.reduce_max(boxlist.get())
- max_assert = tf.Assert(
- tf.greater_equal(maximum_normalized_coordinate, box_maximum),
- ['maximum box coordinate value is larger '
- 'than %f: ' % maximum_normalized_coordinate, box_maximum])
- with tf.control_dependencies([max_assert]):
- width = tf.identity(width)
-
- return scale(boxlist, height, width)
-
-
-def refine_boxes_multi_class(pool_boxes,
- num_classes,
- nms_iou_thresh,
- nms_max_detections,
- voting_iou_thresh=0.5):
- """Refines a pool of boxes using non max suppression and box voting.
-
- Box refinement is done independently for each class.
-
- Args:
- pool_boxes: (BoxList) A collection of boxes to be refined. pool_boxes must
- have a rank 1 'scores' field and a rank 1 'classes' field.
- num_classes: (int scalar) Number of classes.
- nms_iou_thresh: (float scalar) iou threshold for non max suppression (NMS).
- nms_max_detections: (int scalar) maximum output size for NMS.
- voting_iou_thresh: (float scalar) iou threshold for box voting.
-
- Returns:
- BoxList of refined boxes.
-
- Raises:
- ValueError: if
- a) nms_iou_thresh or voting_iou_thresh is not in [0, 1].
- b) pool_boxes is not a BoxList.
- c) pool_boxes does not have a scores and classes field.
- """
- if not 0.0 <= nms_iou_thresh <= 1.0:
- raise ValueError('nms_iou_thresh must be between 0 and 1')
- if not 0.0 <= voting_iou_thresh <= 1.0:
- raise ValueError('voting_iou_thresh must be between 0 and 1')
- if not isinstance(pool_boxes, box_list.BoxList):
- raise ValueError('pool_boxes must be a BoxList')
- if not pool_boxes.has_field('scores'):
- raise ValueError('pool_boxes must have a \'scores\' field')
- if not pool_boxes.has_field('classes'):
- raise ValueError('pool_boxes must have a \'classes\' field')
-
- refined_boxes = []
- for i in range(num_classes):
- boxes_class = filter_field_value_equals(pool_boxes, 'classes', i)
- refined_boxes_class = refine_boxes(boxes_class, nms_iou_thresh,
- nms_max_detections, voting_iou_thresh)
- refined_boxes.append(refined_boxes_class)
- return sort_by_field(concatenate(refined_boxes), 'scores')
-
-
-def refine_boxes(pool_boxes,
- nms_iou_thresh,
- nms_max_detections,
- voting_iou_thresh=0.5):
- """Refines a pool of boxes using non max suppression and box voting.
-
- Args:
- pool_boxes: (BoxList) A collection of boxes to be refined. pool_boxes must
- have a rank 1 'scores' field.
- nms_iou_thresh: (float scalar) iou threshold for non max suppression (NMS).
- nms_max_detections: (int scalar) maximum output size for NMS.
- voting_iou_thresh: (float scalar) iou threshold for box voting.
-
- Returns:
- BoxList of refined boxes.
-
- Raises:
- ValueError: if
- a) nms_iou_thresh or voting_iou_thresh is not in [0, 1].
- b) pool_boxes is not a BoxList.
- c) pool_boxes does not have a scores field.
- """
- if not 0.0 <= nms_iou_thresh <= 1.0:
- raise ValueError('nms_iou_thresh must be between 0 and 1')
- if not 0.0 <= voting_iou_thresh <= 1.0:
- raise ValueError('voting_iou_thresh must be between 0 and 1')
- if not isinstance(pool_boxes, box_list.BoxList):
- raise ValueError('pool_boxes must be a BoxList')
- if not pool_boxes.has_field('scores'):
- raise ValueError('pool_boxes must have a \'scores\' field')
-
- nms_boxes = non_max_suppression(
- pool_boxes, nms_iou_thresh, nms_max_detections)
- return box_voting(nms_boxes, pool_boxes, voting_iou_thresh)
-
-
-def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
- """Performs box voting as described in S. Gidaris and N. Komodakis, ICCV 2015.
-
- Performs box voting as described in 'Object detection via a multi-region &
- semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For
- each box 'B' in selected_boxes, we find the set 'S' of boxes in pool_boxes
- with iou overlap >= iou_thresh. The location of B is set to the weighted
- average location of boxes in S (scores are used for weighting). And the score
- of B is set to the average score of boxes in S.
-
- Args:
- selected_boxes: BoxList containing a subset of boxes in pool_boxes. These
- boxes are usually selected from pool_boxes using non max suppression.
- pool_boxes: BoxList containing a set of (possibly redundant) boxes.
- iou_thresh: (float scalar) iou threshold for matching boxes in
- selected_boxes and pool_boxes.
-
- Returns:
- BoxList containing averaged locations and scores for each box in
- selected_boxes.
-
- Raises:
- ValueError: if
- a) selected_boxes or pool_boxes is not a BoxList.
- b) if iou_thresh is not in [0, 1].
- c) pool_boxes does not have a scores field.
- """
- if not 0.0 <= iou_thresh <= 1.0:
- raise ValueError('iou_thresh must be between 0 and 1')
- if not isinstance(selected_boxes, box_list.BoxList):
- raise ValueError('selected_boxes must be a BoxList')
- if not isinstance(pool_boxes, box_list.BoxList):
- raise ValueError('pool_boxes must be a BoxList')
- if not pool_boxes.has_field('scores'):
- raise ValueError('pool_boxes must have a \'scores\' field')
-
- iou_ = iou(selected_boxes, pool_boxes)
- match_indicator = tf.to_float(tf.greater(iou_, iou_thresh))
- num_matches = tf.reduce_sum(match_indicator, 1)
- # TODO(kbanoop): Handle the case where some boxes in selected_boxes do not
- # match to any boxes in pool_boxes. For such boxes without any matches, we
- # should return the original boxes without voting.
- match_assert = tf.Assert(
- tf.reduce_all(tf.greater(num_matches, 0)),
- ['Each box in selected_boxes must match with at least one box '
- 'in pool_boxes.'])
-
- scores = tf.expand_dims(pool_boxes.get_field('scores'), 1)
- scores_assert = tf.Assert(
- tf.reduce_all(tf.greater_equal(scores, 0)),
- ['Scores must be non negative.'])
-
- with tf.control_dependencies([scores_assert, match_assert]):
- sum_scores = tf.matmul(match_indicator, scores)
- averaged_scores = tf.reshape(sum_scores, [-1]) / num_matches
-
- box_locations = tf.matmul(match_indicator,
- pool_boxes.get() * scores) / sum_scores
- averaged_boxes = box_list.BoxList(box_locations)
- _copy_extra_fields(averaged_boxes, selected_boxes)
- averaged_boxes.add_field('scores', averaged_scores)
- return averaged_boxes
-
-
-def pad_or_clip_box_list(boxlist, num_boxes, scope=None):
- """Pads or clips all fields of a BoxList.
-
- Args:
- boxlist: A BoxList with arbitrary of number of boxes.
- num_boxes: First num_boxes in boxlist are kept.
- The fields are zero-padded if num_boxes is bigger than the
- actual number of boxes.
- scope: name scope.
-
- Returns:
- BoxList with all fields padded or clipped.
- """
- with tf.name_scope(scope, 'PadOrClipBoxList'):
- subboxlist = box_list.BoxList(shape_utils.pad_or_clip_tensor(
- boxlist.get(), num_boxes))
- for field in boxlist.get_extra_fields():
- subfield = shape_utils.pad_or_clip_tensor(
- boxlist.get_field(field), num_boxes)
- subboxlist.add_field(field, subfield)
- return subboxlist
-
-
-def select_random_box(boxlist,
- default_box=None,
- seed=None,
- scope=None):
- """Selects a random bounding box from a `BoxList`.
-
- Args:
- boxlist: A BoxList.
- default_box: A [1, 4] float32 tensor. If no boxes are present in `boxlist`,
- this default box will be returned. If None, will use a default box of
- [[-1., -1., -1., -1.]].
- seed: Random seed.
- scope: Name scope.
-
- Returns:
- bbox: A [1, 4] tensor with a random bounding box.
- valid: A bool tensor indicating whether a valid bounding box is returned
- (True) or whether the default box is returned (False).
- """
- with tf.name_scope(scope, 'SelectRandomBox'):
- bboxes = boxlist.get()
- combined_shape = shape_utils.combined_static_and_dynamic_shape(bboxes)
- number_of_boxes = combined_shape[0]
- default_box = default_box or tf.constant([[-1., -1., -1., -1.]])
-
- def select_box():
- random_index = tf.random_uniform([],
- maxval=number_of_boxes,
- dtype=tf.int32,
- seed=seed)
- return tf.expand_dims(bboxes[random_index], axis=0), tf.constant(True)
-
- return tf.cond(
- tf.greater_equal(number_of_boxes, 1),
- true_fn=select_box,
- false_fn=lambda: (default_box, tf.constant(False)))
-
-
-def get_minimal_coverage_box(boxlist,
- default_box=None,
- scope=None):
- """Creates a single bounding box which covers all boxes in the boxlist.
-
- Args:
- boxlist: A Boxlist.
- default_box: A [1, 4] float32 tensor. If no boxes are present in `boxlist`,
- this default box will be returned. If None, will use a default box of
- [[0., 0., 1., 1.]].
- scope: Name scope.
-
- Returns:
- A [1, 4] float32 tensor with a bounding box that tightly covers all the
- boxes in the box list. If the boxlist does not contain any boxes, the
- default box is returned.
- """
- with tf.name_scope(scope, 'CreateCoverageBox'):
- num_boxes = boxlist.num_boxes()
-
- def coverage_box(bboxes):
- y_min, x_min, y_max, x_max = tf.split(
- value=bboxes, num_or_size_splits=4, axis=1)
- y_min_coverage = tf.reduce_min(y_min, axis=0)
- x_min_coverage = tf.reduce_min(x_min, axis=0)
- y_max_coverage = tf.reduce_max(y_max, axis=0)
- x_max_coverage = tf.reduce_max(x_max, axis=0)
- return tf.stack(
- [y_min_coverage, x_min_coverage, y_max_coverage, x_max_coverage],
- axis=1)
-
- default_box = default_box or tf.constant([[0., 0., 1., 1.]])
- return tf.cond(
- tf.greater_equal(num_boxes, 1),
- true_fn=lambda: coverage_box(boxlist.get()),
- false_fn=lambda: default_box)
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_list_ops_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_list_ops_test.py
deleted file mode 100644
index bb76cfd35af1a077debdf6945c13b04aaac37eca..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_list_ops_test.py
+++ /dev/null
@@ -1,1036 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.core.box_list_ops."""
-import numpy as np
-import tensorflow as tf
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import ops
-
-from object_detection.core import box_list
-from object_detection.core import box_list_ops
-
-
-class BoxListOpsTest(tf.test.TestCase):
- """Tests for common bounding box operations."""
-
- def test_area(self):
- corners = tf.constant([[0.0, 0.0, 10.0, 20.0], [1.0, 2.0, 3.0, 4.0]])
- exp_output = [200.0, 4.0]
- boxes = box_list.BoxList(corners)
- areas = box_list_ops.area(boxes)
- with self.test_session() as sess:
- areas_output = sess.run(areas)
- self.assertAllClose(areas_output, exp_output)
-
- def test_height_width(self):
- corners = tf.constant([[0.0, 0.0, 10.0, 20.0], [1.0, 2.0, 3.0, 4.0]])
- exp_output_heights = [10., 2.]
- exp_output_widths = [20., 2.]
- boxes = box_list.BoxList(corners)
- heights, widths = box_list_ops.height_width(boxes)
- with self.test_session() as sess:
- output_heights, output_widths = sess.run([heights, widths])
- self.assertAllClose(output_heights, exp_output_heights)
- self.assertAllClose(output_widths, exp_output_widths)
-
- def test_scale(self):
- corners = tf.constant([[0, 0, 100, 200], [50, 120, 100, 140]],
- dtype=tf.float32)
- boxes = box_list.BoxList(corners)
- boxes.add_field('extra_data', tf.constant([[1], [2]]))
-
- y_scale = tf.constant(1.0/100)
- x_scale = tf.constant(1.0/200)
- scaled_boxes = box_list_ops.scale(boxes, y_scale, x_scale)
- exp_output = [[0, 0, 1, 1], [0.5, 0.6, 1.0, 0.7]]
- with self.test_session() as sess:
- scaled_corners_out = sess.run(scaled_boxes.get())
- self.assertAllClose(scaled_corners_out, exp_output)
- extra_data_out = sess.run(scaled_boxes.get_field('extra_data'))
- self.assertAllEqual(extra_data_out, [[1], [2]])
-
- def test_clip_to_window_filter_boxes_which_fall_outside_the_window(
- self):
- window = tf.constant([0, 0, 9, 14], tf.float32)
- corners = tf.constant([[5.0, 5.0, 6.0, 6.0],
- [-1.0, -2.0, 4.0, 5.0],
- [2.0, 3.0, 5.0, 9.0],
- [0.0, 0.0, 9.0, 14.0],
- [-100.0, -100.0, 300.0, 600.0],
- [-10.0, -10.0, -9.0, -9.0]])
- boxes = box_list.BoxList(corners)
- boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]]))
- exp_output = [[5.0, 5.0, 6.0, 6.0], [0.0, 0.0, 4.0, 5.0],
- [2.0, 3.0, 5.0, 9.0], [0.0, 0.0, 9.0, 14.0],
- [0.0, 0.0, 9.0, 14.0]]
- pruned = box_list_ops.clip_to_window(
- boxes, window, filter_nonoverlapping=True)
- with self.test_session() as sess:
- pruned_output = sess.run(pruned.get())
- self.assertAllClose(pruned_output, exp_output)
- extra_data_out = sess.run(pruned.get_field('extra_data'))
- self.assertAllEqual(extra_data_out, [[1], [2], [3], [4], [5]])
-
- def test_clip_to_window_without_filtering_boxes_which_fall_outside_the_window(
- self):
- window = tf.constant([0, 0, 9, 14], tf.float32)
- corners = tf.constant([[5.0, 5.0, 6.0, 6.0],
- [-1.0, -2.0, 4.0, 5.0],
- [2.0, 3.0, 5.0, 9.0],
- [0.0, 0.0, 9.0, 14.0],
- [-100.0, -100.0, 300.0, 600.0],
- [-10.0, -10.0, -9.0, -9.0]])
- boxes = box_list.BoxList(corners)
- boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]]))
- exp_output = [[5.0, 5.0, 6.0, 6.0], [0.0, 0.0, 4.0, 5.0],
- [2.0, 3.0, 5.0, 9.0], [0.0, 0.0, 9.0, 14.0],
- [0.0, 0.0, 9.0, 14.0], [0.0, 0.0, 0.0, 0.0]]
- pruned = box_list_ops.clip_to_window(
- boxes, window, filter_nonoverlapping=False)
- with self.test_session() as sess:
- pruned_output = sess.run(pruned.get())
- self.assertAllClose(pruned_output, exp_output)
- extra_data_out = sess.run(pruned.get_field('extra_data'))
- self.assertAllEqual(extra_data_out, [[1], [2], [3], [4], [5], [6]])
-
- def test_prune_outside_window_filters_boxes_which_fall_outside_the_window(
- self):
- window = tf.constant([0, 0, 9, 14], tf.float32)
- corners = tf.constant([[5.0, 5.0, 6.0, 6.0],
- [-1.0, -2.0, 4.0, 5.0],
- [2.0, 3.0, 5.0, 9.0],
- [0.0, 0.0, 9.0, 14.0],
- [-10.0, -10.0, -9.0, -9.0],
- [-100.0, -100.0, 300.0, 600.0]])
- boxes = box_list.BoxList(corners)
- boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]]))
- exp_output = [[5.0, 5.0, 6.0, 6.0],
- [2.0, 3.0, 5.0, 9.0],
- [0.0, 0.0, 9.0, 14.0]]
- pruned, keep_indices = box_list_ops.prune_outside_window(boxes, window)
- with self.test_session() as sess:
- pruned_output = sess.run(pruned.get())
- self.assertAllClose(pruned_output, exp_output)
- keep_indices_out = sess.run(keep_indices)
- self.assertAllEqual(keep_indices_out, [0, 2, 3])
- extra_data_out = sess.run(pruned.get_field('extra_data'))
- self.assertAllEqual(extra_data_out, [[1], [3], [4]])
-
- def test_prune_completely_outside_window(self):
- window = tf.constant([0, 0, 9, 14], tf.float32)
- corners = tf.constant([[5.0, 5.0, 6.0, 6.0],
- [-1.0, -2.0, 4.0, 5.0],
- [2.0, 3.0, 5.0, 9.0],
- [0.0, 0.0, 9.0, 14.0],
- [-10.0, -10.0, -9.0, -9.0],
- [-100.0, -100.0, 300.0, 600.0]])
- boxes = box_list.BoxList(corners)
- boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]]))
- exp_output = [[5.0, 5.0, 6.0, 6.0],
- [-1.0, -2.0, 4.0, 5.0],
- [2.0, 3.0, 5.0, 9.0],
- [0.0, 0.0, 9.0, 14.0],
- [-100.0, -100.0, 300.0, 600.0]]
- pruned, keep_indices = box_list_ops.prune_completely_outside_window(boxes,
- window)
- with self.test_session() as sess:
- pruned_output = sess.run(pruned.get())
- self.assertAllClose(pruned_output, exp_output)
- keep_indices_out = sess.run(keep_indices)
- self.assertAllEqual(keep_indices_out, [0, 1, 2, 3, 5])
- extra_data_out = sess.run(pruned.get_field('extra_data'))
- self.assertAllEqual(extra_data_out, [[1], [2], [3], [4], [6]])
-
- def test_prune_completely_outside_window_with_empty_boxlist(self):
- window = tf.constant([0, 0, 9, 14], tf.float32)
- corners = tf.zeros(shape=[0, 4], dtype=tf.float32)
- boxes = box_list.BoxList(corners)
- boxes.add_field('extra_data', tf.zeros(shape=[0], dtype=tf.int32))
- pruned, keep_indices = box_list_ops.prune_completely_outside_window(boxes,
- window)
- pruned_boxes = pruned.get()
- extra = pruned.get_field('extra_data')
-
- exp_pruned_boxes = np.zeros(shape=[0, 4], dtype=np.float32)
- exp_extra = np.zeros(shape=[0], dtype=np.int32)
- with self.test_session() as sess:
- pruned_boxes_out, keep_indices_out, extra_out = sess.run(
- [pruned_boxes, keep_indices, extra])
- self.assertAllClose(exp_pruned_boxes, pruned_boxes_out)
- self.assertAllEqual([], keep_indices_out)
- self.assertAllEqual(exp_extra, extra_out)
-
- def test_intersection(self):
- corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
- corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0]])
- exp_output = [[2.0, 0.0, 6.0], [1.0, 0.0, 5.0]]
- boxes1 = box_list.BoxList(corners1)
- boxes2 = box_list.BoxList(corners2)
- intersect = box_list_ops.intersection(boxes1, boxes2)
- with self.test_session() as sess:
- intersect_output = sess.run(intersect)
- self.assertAllClose(intersect_output, exp_output)
-
- def test_matched_intersection(self):
- corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
- corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]])
- exp_output = [2.0, 0.0]
- boxes1 = box_list.BoxList(corners1)
- boxes2 = box_list.BoxList(corners2)
- intersect = box_list_ops.matched_intersection(boxes1, boxes2)
- with self.test_session() as sess:
- intersect_output = sess.run(intersect)
- self.assertAllClose(intersect_output, exp_output)
-
- def test_iou(self):
- corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
- corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0]])
- exp_output = [[2.0 / 16.0, 0, 6.0 / 400.0], [1.0 / 16.0, 0.0, 5.0 / 400.0]]
- boxes1 = box_list.BoxList(corners1)
- boxes2 = box_list.BoxList(corners2)
- iou = box_list_ops.iou(boxes1, boxes2)
- with self.test_session() as sess:
- iou_output = sess.run(iou)
- self.assertAllClose(iou_output, exp_output)
-
- def test_matched_iou(self):
- corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
- corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]])
- exp_output = [2.0 / 16.0, 0]
- boxes1 = box_list.BoxList(corners1)
- boxes2 = box_list.BoxList(corners2)
- iou = box_list_ops.matched_iou(boxes1, boxes2)
- with self.test_session() as sess:
- iou_output = sess.run(iou)
- self.assertAllClose(iou_output, exp_output)
-
- def test_iouworks_on_empty_inputs(self):
- corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
- corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0]])
- boxes1 = box_list.BoxList(corners1)
- boxes2 = box_list.BoxList(corners2)
- boxes_empty = box_list.BoxList(tf.zeros((0, 4)))
- iou_empty_1 = box_list_ops.iou(boxes1, boxes_empty)
- iou_empty_2 = box_list_ops.iou(boxes_empty, boxes2)
- iou_empty_3 = box_list_ops.iou(boxes_empty, boxes_empty)
- with self.test_session() as sess:
- iou_output_1, iou_output_2, iou_output_3 = sess.run(
- [iou_empty_1, iou_empty_2, iou_empty_3])
- self.assertAllEqual(iou_output_1.shape, (2, 0))
- self.assertAllEqual(iou_output_2.shape, (0, 3))
- self.assertAllEqual(iou_output_3.shape, (0, 0))
-
- def test_ioa(self):
- corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
- corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0]])
- exp_output_1 = [[2.0 / 12.0, 0, 6.0 / 400.0],
- [1.0 / 12.0, 0.0, 5.0 / 400.0]]
- exp_output_2 = [[2.0 / 6.0, 1.0 / 5.0],
- [0, 0],
- [6.0 / 6.0, 5.0 / 5.0]]
- boxes1 = box_list.BoxList(corners1)
- boxes2 = box_list.BoxList(corners2)
- ioa_1 = box_list_ops.ioa(boxes1, boxes2)
- ioa_2 = box_list_ops.ioa(boxes2, boxes1)
- with self.test_session() as sess:
- ioa_output_1, ioa_output_2 = sess.run([ioa_1, ioa_2])
- self.assertAllClose(ioa_output_1, exp_output_1)
- self.assertAllClose(ioa_output_2, exp_output_2)
-
- def test_prune_non_overlapping_boxes(self):
- corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
- corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0]])
- boxes1 = box_list.BoxList(corners1)
- boxes2 = box_list.BoxList(corners2)
- minoverlap = 0.5
-
- exp_output_1 = boxes1
- exp_output_2 = box_list.BoxList(tf.constant(0.0, shape=[0, 4]))
- output_1, keep_indices_1 = box_list_ops.prune_non_overlapping_boxes(
- boxes1, boxes2, min_overlap=minoverlap)
- output_2, keep_indices_2 = box_list_ops.prune_non_overlapping_boxes(
- boxes2, boxes1, min_overlap=minoverlap)
- with self.test_session() as sess:
- (output_1_, keep_indices_1_, output_2_, keep_indices_2_, exp_output_1_,
- exp_output_2_) = sess.run(
- [output_1.get(), keep_indices_1,
- output_2.get(), keep_indices_2,
- exp_output_1.get(), exp_output_2.get()])
- self.assertAllClose(output_1_, exp_output_1_)
- self.assertAllClose(output_2_, exp_output_2_)
- self.assertAllEqual(keep_indices_1_, [0, 1])
- self.assertAllEqual(keep_indices_2_, [])
-
- def test_prune_small_boxes(self):
- boxes = tf.constant([[4.0, 3.0, 7.0, 5.0],
- [5.0, 6.0, 10.0, 7.0],
- [3.0, 4.0, 6.0, 8.0],
- [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0]])
- exp_boxes = [[3.0, 4.0, 6.0, 8.0],
- [0.0, 0.0, 20.0, 20.0]]
- boxes = box_list.BoxList(boxes)
- pruned_boxes = box_list_ops.prune_small_boxes(boxes, 3)
- with self.test_session() as sess:
- pruned_boxes = sess.run(pruned_boxes.get())
- self.assertAllEqual(pruned_boxes, exp_boxes)
-
- def test_prune_small_boxes_prunes_boxes_with_negative_side(self):
- boxes = tf.constant([[4.0, 3.0, 7.0, 5.0],
- [5.0, 6.0, 10.0, 7.0],
- [3.0, 4.0, 6.0, 8.0],
- [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0],
- [2.0, 3.0, 1.5, 7.0], # negative height
- [2.0, 3.0, 5.0, 1.7]]) # negative width
- exp_boxes = [[3.0, 4.0, 6.0, 8.0],
- [0.0, 0.0, 20.0, 20.0]]
- boxes = box_list.BoxList(boxes)
- pruned_boxes = box_list_ops.prune_small_boxes(boxes, 3)
- with self.test_session() as sess:
- pruned_boxes = sess.run(pruned_boxes.get())
- self.assertAllEqual(pruned_boxes, exp_boxes)
-
- def test_change_coordinate_frame(self):
- corners = tf.constant([[0.25, 0.5, 0.75, 0.75], [0.5, 0.0, 1.0, 1.0]])
- window = tf.constant([0.25, 0.25, 0.75, 0.75])
- boxes = box_list.BoxList(corners)
-
- expected_corners = tf.constant([[0, 0.5, 1.0, 1.0], [0.5, -0.5, 1.5, 1.5]])
- expected_boxes = box_list.BoxList(expected_corners)
- output = box_list_ops.change_coordinate_frame(boxes, window)
-
- with self.test_session() as sess:
- output_, expected_boxes_ = sess.run([output.get(), expected_boxes.get()])
- self.assertAllClose(output_, expected_boxes_)
-
- def test_ioaworks_on_empty_inputs(self):
- corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
- corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0]])
- boxes1 = box_list.BoxList(corners1)
- boxes2 = box_list.BoxList(corners2)
- boxes_empty = box_list.BoxList(tf.zeros((0, 4)))
- ioa_empty_1 = box_list_ops.ioa(boxes1, boxes_empty)
- ioa_empty_2 = box_list_ops.ioa(boxes_empty, boxes2)
- ioa_empty_3 = box_list_ops.ioa(boxes_empty, boxes_empty)
- with self.test_session() as sess:
- ioa_output_1, ioa_output_2, ioa_output_3 = sess.run(
- [ioa_empty_1, ioa_empty_2, ioa_empty_3])
- self.assertAllEqual(ioa_output_1.shape, (2, 0))
- self.assertAllEqual(ioa_output_2.shape, (0, 3))
- self.assertAllEqual(ioa_output_3.shape, (0, 0))
-
- def test_pairwise_distances(self):
- corners1 = tf.constant([[0.0, 0.0, 0.0, 0.0],
- [1.0, 1.0, 0.0, 2.0]])
- corners2 = tf.constant([[3.0, 4.0, 1.0, 0.0],
- [-4.0, 0.0, 0.0, 3.0],
- [0.0, 0.0, 0.0, 0.0]])
- exp_output = [[26, 25, 0], [18, 27, 6]]
- boxes1 = box_list.BoxList(corners1)
- boxes2 = box_list.BoxList(corners2)
- dist_matrix = box_list_ops.sq_dist(boxes1, boxes2)
- with self.test_session() as sess:
- dist_output = sess.run(dist_matrix)
- self.assertAllClose(dist_output, exp_output)
-
- def test_boolean_mask(self):
- corners = tf.constant(
- [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]])
- indicator = tf.constant([True, False, True, False, True], tf.bool)
- expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
- boxes = box_list.BoxList(corners)
- subset = box_list_ops.boolean_mask(boxes, indicator)
- with self.test_session() as sess:
- subset_output = sess.run(subset.get())
- self.assertAllClose(subset_output, expected_subset)
-
- def test_boolean_mask_with_field(self):
- corners = tf.constant(
- [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]])
- indicator = tf.constant([True, False, True, False, True], tf.bool)
- weights = tf.constant([[.1], [.3], [.5], [.7], [.9]], tf.float32)
- expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
- expected_weights = [[.1], [.5], [.9]]
-
- boxes = box_list.BoxList(corners)
- boxes.add_field('weights', weights)
- subset = box_list_ops.boolean_mask(boxes, indicator, ['weights'])
- with self.test_session() as sess:
- subset_output, weights_output = sess.run(
- [subset.get(), subset.get_field('weights')])
- self.assertAllClose(subset_output, expected_subset)
- self.assertAllClose(weights_output, expected_weights)
-
- def test_gather(self):
- corners = tf.constant(
- [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]])
- indices = tf.constant([0, 2, 4], tf.int32)
- expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
- boxes = box_list.BoxList(corners)
- subset = box_list_ops.gather(boxes, indices)
- with self.test_session() as sess:
- subset_output = sess.run(subset.get())
- self.assertAllClose(subset_output, expected_subset)
-
- def test_gather_with_field(self):
- corners = tf.constant([4*[0.0], 4*[1.0], 4*[2.0], 4*[3.0], 4*[4.0]])
- indices = tf.constant([0, 2, 4], tf.int32)
- weights = tf.constant([[.1], [.3], [.5], [.7], [.9]], tf.float32)
- expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
- expected_weights = [[.1], [.5], [.9]]
-
- boxes = box_list.BoxList(corners)
- boxes.add_field('weights', weights)
- subset = box_list_ops.gather(boxes, indices, ['weights'])
- with self.test_session() as sess:
- subset_output, weights_output = sess.run(
- [subset.get(), subset.get_field('weights')])
- self.assertAllClose(subset_output, expected_subset)
- self.assertAllClose(weights_output, expected_weights)
-
- def test_gather_with_invalid_field(self):
- corners = tf.constant([4 * [0.0], 4 * [1.0]])
- indices = tf.constant([0, 1], tf.int32)
- weights = tf.constant([[.1], [.3]], tf.float32)
-
- boxes = box_list.BoxList(corners)
- boxes.add_field('weights', weights)
- with self.assertRaises(ValueError):
- box_list_ops.gather(boxes, indices, ['foo', 'bar'])
-
- def test_gather_with_invalid_inputs(self):
- corners = tf.constant(
- [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]])
- indices_float32 = tf.constant([0, 2, 4], tf.float32)
- boxes = box_list.BoxList(corners)
- with self.assertRaises(ValueError):
- _ = box_list_ops.gather(boxes, indices_float32)
- indices_2d = tf.constant([[0, 2, 4]], tf.int32)
- boxes = box_list.BoxList(corners)
- with self.assertRaises(ValueError):
- _ = box_list_ops.gather(boxes, indices_2d)
-
- def test_gather_with_dynamic_indexing(self):
- corners = tf.constant([4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]
- ])
- weights = tf.constant([.5, .3, .7, .1, .9], tf.float32)
- indices = tf.reshape(tf.where(tf.greater(weights, 0.4)), [-1])
- expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
- expected_weights = [.5, .7, .9]
-
- boxes = box_list.BoxList(corners)
- boxes.add_field('weights', weights)
- subset = box_list_ops.gather(boxes, indices, ['weights'])
- with self.test_session() as sess:
- subset_output, weights_output = sess.run([subset.get(), subset.get_field(
- 'weights')])
- self.assertAllClose(subset_output, expected_subset)
- self.assertAllClose(weights_output, expected_weights)
-
- def test_sort_by_field_ascending_order(self):
- exp_corners = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9],
- [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]]
- exp_scores = [.95, .9, .75, .6, .5, .3]
- exp_weights = [.2, .45, .6, .75, .8, .92]
- shuffle = [2, 4, 0, 5, 1, 3]
- corners = tf.constant([exp_corners[i] for i in shuffle], tf.float32)
- boxes = box_list.BoxList(corners)
- boxes.add_field('scores', tf.constant(
- [exp_scores[i] for i in shuffle], tf.float32))
- boxes.add_field('weights', tf.constant(
- [exp_weights[i] for i in shuffle], tf.float32))
- sort_by_weight = box_list_ops.sort_by_field(
- boxes,
- 'weights',
- order=box_list_ops.SortOrder.ascend)
- with self.test_session() as sess:
- corners_out, scores_out, weights_out = sess.run([
- sort_by_weight.get(),
- sort_by_weight.get_field('scores'),
- sort_by_weight.get_field('weights')])
- self.assertAllClose(corners_out, exp_corners)
- self.assertAllClose(scores_out, exp_scores)
- self.assertAllClose(weights_out, exp_weights)
-
- def test_sort_by_field_descending_order(self):
- exp_corners = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9],
- [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]]
- exp_scores = [.95, .9, .75, .6, .5, .3]
- exp_weights = [.2, .45, .6, .75, .8, .92]
- shuffle = [2, 4, 0, 5, 1, 3]
-
- corners = tf.constant([exp_corners[i] for i in shuffle], tf.float32)
- boxes = box_list.BoxList(corners)
- boxes.add_field('scores', tf.constant(
- [exp_scores[i] for i in shuffle], tf.float32))
- boxes.add_field('weights', tf.constant(
- [exp_weights[i] for i in shuffle], tf.float32))
-
- sort_by_score = box_list_ops.sort_by_field(boxes, 'scores')
- with self.test_session() as sess:
- corners_out, scores_out, weights_out = sess.run([sort_by_score.get(
- ), sort_by_score.get_field('scores'), sort_by_score.get_field('weights')])
- self.assertAllClose(corners_out, exp_corners)
- self.assertAllClose(scores_out, exp_scores)
- self.assertAllClose(weights_out, exp_weights)
-
- def test_sort_by_field_invalid_inputs(self):
- corners = tf.constant([4 * [0.0], 4 * [0.5], 4 * [1.0], 4 * [2.0], 4 *
- [3.0], 4 * [4.0]])
- misc = tf.constant([[.95, .9], [.5, .3]], tf.float32)
- weights = tf.constant([.1, .2], tf.float32)
- boxes = box_list.BoxList(corners)
- boxes.add_field('misc', misc)
- boxes.add_field('weights', weights)
-
- with self.test_session() as sess:
- with self.assertRaises(ValueError):
- box_list_ops.sort_by_field(boxes, 'area')
-
- with self.assertRaises(ValueError):
- box_list_ops.sort_by_field(boxes, 'misc')
-
- if ops._USE_C_API:
- with self.assertRaises(ValueError):
- box_list_ops.sort_by_field(boxes, 'weights')
- else:
- with self.assertRaisesWithPredicateMatch(errors.InvalidArgumentError,
- 'Incorrect field size'):
- sess.run(box_list_ops.sort_by_field(boxes, 'weights').get())
-
- def test_visualize_boxes_in_image(self):
- image = tf.zeros((6, 4, 3))
- corners = tf.constant([[0, 0, 5, 3],
- [0, 0, 3, 2]], tf.float32)
- boxes = box_list.BoxList(corners)
- image_and_boxes = box_list_ops.visualize_boxes_in_image(image, boxes)
- image_and_boxes_bw = tf.to_float(
- tf.greater(tf.reduce_sum(image_and_boxes, 2), 0.0))
- exp_result = [[1, 1, 1, 0],
- [1, 1, 1, 0],
- [1, 1, 1, 0],
- [1, 0, 1, 0],
- [1, 1, 1, 0],
- [0, 0, 0, 0]]
- with self.test_session() as sess:
- output = sess.run(image_and_boxes_bw)
- self.assertAllEqual(output.astype(int), exp_result)
-
- def test_filter_field_value_equals(self):
- corners = tf.constant([[0, 0, 1, 1],
- [0, 0.1, 1, 1.1],
- [0, -0.1, 1, 0.9],
- [0, 10, 1, 11],
- [0, 10.1, 1, 11.1],
- [0, 100, 1, 101]], tf.float32)
- boxes = box_list.BoxList(corners)
- boxes.add_field('classes', tf.constant([1, 2, 1, 2, 2, 1]))
- exp_output1 = [[0, 0, 1, 1], [0, -0.1, 1, 0.9], [0, 100, 1, 101]]
- exp_output2 = [[0, 0.1, 1, 1.1], [0, 10, 1, 11], [0, 10.1, 1, 11.1]]
-
- filtered_boxes1 = box_list_ops.filter_field_value_equals(
- boxes, 'classes', 1)
- filtered_boxes2 = box_list_ops.filter_field_value_equals(
- boxes, 'classes', 2)
- with self.test_session() as sess:
- filtered_output1, filtered_output2 = sess.run([filtered_boxes1.get(),
- filtered_boxes2.get()])
- self.assertAllClose(filtered_output1, exp_output1)
- self.assertAllClose(filtered_output2, exp_output2)
-
- def test_filter_greater_than(self):
- corners = tf.constant([[0, 0, 1, 1],
- [0, 0.1, 1, 1.1],
- [0, -0.1, 1, 0.9],
- [0, 10, 1, 11],
- [0, 10.1, 1, 11.1],
- [0, 100, 1, 101]], tf.float32)
- boxes = box_list.BoxList(corners)
- boxes.add_field('scores', tf.constant([.1, .75, .9, .5, .5, .8]))
- thresh = .6
- exp_output = [[0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9], [0, 100, 1, 101]]
-
- filtered_boxes = box_list_ops.filter_greater_than(boxes, thresh)
- with self.test_session() as sess:
- filtered_output = sess.run(filtered_boxes.get())
- self.assertAllClose(filtered_output, exp_output)
-
- def test_clip_box_list(self):
- boxlist = box_list.BoxList(
- tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
- [0.6, 0.6, 0.8, 0.8], [0.2, 0.2, 0.3, 0.3]], tf.float32))
- boxlist.add_field('classes', tf.constant([0, 0, 1, 1]))
- boxlist.add_field('scores', tf.constant([0.75, 0.65, 0.3, 0.2]))
- num_boxes = 2
- clipped_boxlist = box_list_ops.pad_or_clip_box_list(boxlist, num_boxes)
-
- expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]]
- expected_classes = [0, 0]
- expected_scores = [0.75, 0.65]
- with self.test_session() as sess:
- boxes_out, classes_out, scores_out = sess.run(
- [clipped_boxlist.get(), clipped_boxlist.get_field('classes'),
- clipped_boxlist.get_field('scores')])
-
- self.assertAllClose(expected_boxes, boxes_out)
- self.assertAllEqual(expected_classes, classes_out)
- self.assertAllClose(expected_scores, scores_out)
-
- def test_pad_box_list(self):
- boxlist = box_list.BoxList(
- tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32))
- boxlist.add_field('classes', tf.constant([0, 1]))
- boxlist.add_field('scores', tf.constant([0.75, 0.2]))
- num_boxes = 4
- padded_boxlist = box_list_ops.pad_or_clip_box_list(boxlist, num_boxes)
-
- expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
- [0, 0, 0, 0], [0, 0, 0, 0]]
- expected_classes = [0, 1, 0, 0]
- expected_scores = [0.75, 0.2, 0, 0]
- with self.test_session() as sess:
- boxes_out, classes_out, scores_out = sess.run(
- [padded_boxlist.get(), padded_boxlist.get_field('classes'),
- padded_boxlist.get_field('scores')])
-
- self.assertAllClose(expected_boxes, boxes_out)
- self.assertAllEqual(expected_classes, classes_out)
- self.assertAllClose(expected_scores, scores_out)
-
- def test_select_random_box(self):
- boxes = [[0., 0., 1., 1.],
- [0., 1., 2., 3.],
- [0., 2., 3., 4.]]
-
- corners = tf.constant(boxes, dtype=tf.float32)
- boxlist = box_list.BoxList(corners)
- random_bbox, valid = box_list_ops.select_random_box(boxlist)
- with self.test_session() as sess:
- random_bbox_out, valid_out = sess.run([random_bbox, valid])
-
- norm_small = any(
- [np.linalg.norm(random_bbox_out - box) < 1e-6 for box in boxes])
-
- self.assertTrue(norm_small)
- self.assertTrue(valid_out)
-
- def test_select_random_box_with_empty_boxlist(self):
- corners = tf.constant([], shape=[0, 4], dtype=tf.float32)
- boxlist = box_list.BoxList(corners)
- random_bbox, valid = box_list_ops.select_random_box(boxlist)
- with self.test_session() as sess:
- random_bbox_out, valid_out = sess.run([random_bbox, valid])
-
- expected_bbox_out = np.array([[-1., -1., -1., -1.]], dtype=np.float32)
- self.assertAllEqual(expected_bbox_out, random_bbox_out)
- self.assertFalse(valid_out)
-
- def test_get_minimal_coverage_box(self):
- boxes = [[0., 0., 1., 1.],
- [-1., 1., 2., 3.],
- [0., 2., 3., 4.]]
-
- expected_coverage_box = [[-1., 0., 3., 4.]]
-
- corners = tf.constant(boxes, dtype=tf.float32)
- boxlist = box_list.BoxList(corners)
- coverage_box = box_list_ops.get_minimal_coverage_box(boxlist)
- with self.test_session() as sess:
- coverage_box_out = sess.run(coverage_box)
-
- self.assertAllClose(expected_coverage_box, coverage_box_out)
-
- def test_get_minimal_coverage_box_with_empty_boxlist(self):
- corners = tf.constant([], shape=[0, 4], dtype=tf.float32)
- boxlist = box_list.BoxList(corners)
- coverage_box = box_list_ops.get_minimal_coverage_box(boxlist)
- with self.test_session() as sess:
- coverage_box_out = sess.run(coverage_box)
-
- self.assertAllClose([[0.0, 0.0, 1.0, 1.0]], coverage_box_out)
-
-
-class ConcatenateTest(tf.test.TestCase):
-
- def test_invalid_input_box_list_list(self):
- with self.assertRaises(ValueError):
- box_list_ops.concatenate(None)
- with self.assertRaises(ValueError):
- box_list_ops.concatenate([])
- with self.assertRaises(ValueError):
- corners = tf.constant([[0, 0, 0, 0]], tf.float32)
- boxlist = box_list.BoxList(corners)
- box_list_ops.concatenate([boxlist, 2])
-
- def test_concatenate_with_missing_fields(self):
- corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32)
- scores1 = tf.constant([1.0, 2.1])
- corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8]], tf.float32)
- boxlist1 = box_list.BoxList(corners1)
- boxlist1.add_field('scores', scores1)
- boxlist2 = box_list.BoxList(corners2)
- with self.assertRaises(ValueError):
- box_list_ops.concatenate([boxlist1, boxlist2])
-
- def test_concatenate_with_incompatible_field_shapes(self):
- corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32)
- scores1 = tf.constant([1.0, 2.1])
- corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8]], tf.float32)
- scores2 = tf.constant([[1.0, 1.0], [2.1, 3.2]])
- boxlist1 = box_list.BoxList(corners1)
- boxlist1.add_field('scores', scores1)
- boxlist2 = box_list.BoxList(corners2)
- boxlist2.add_field('scores', scores2)
- with self.assertRaises(ValueError):
- box_list_ops.concatenate([boxlist1, boxlist2])
-
- def test_concatenate_is_correct(self):
- corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32)
- scores1 = tf.constant([1.0, 2.1])
- corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8], [1, 0, 5, 10]],
- tf.float32)
- scores2 = tf.constant([1.0, 2.1, 5.6])
-
- exp_corners = [[0, 0, 0, 0],
- [1, 2, 3, 4],
- [0, 3, 1, 6],
- [2, 4, 3, 8],
- [1, 0, 5, 10]]
- exp_scores = [1.0, 2.1, 1.0, 2.1, 5.6]
-
- boxlist1 = box_list.BoxList(corners1)
- boxlist1.add_field('scores', scores1)
- boxlist2 = box_list.BoxList(corners2)
- boxlist2.add_field('scores', scores2)
- result = box_list_ops.concatenate([boxlist1, boxlist2])
- with self.test_session() as sess:
- corners_output, scores_output = sess.run(
- [result.get(), result.get_field('scores')])
- self.assertAllClose(corners_output, exp_corners)
- self.assertAllClose(scores_output, exp_scores)
-
-
-class NonMaxSuppressionTest(tf.test.TestCase):
-
- def test_select_from_three_clusters(self):
- corners = tf.constant([[0, 0, 1, 1],
- [0, 0.1, 1, 1.1],
- [0, -0.1, 1, 0.9],
- [0, 10, 1, 11],
- [0, 10.1, 1, 11.1],
- [0, 100, 1, 101]], tf.float32)
- boxes = box_list.BoxList(corners)
- boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3]))
- iou_thresh = .5
- max_output_size = 3
-
- exp_nms = [[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 100, 1, 101]]
- nms = box_list_ops.non_max_suppression(
- boxes, iou_thresh, max_output_size)
- with self.test_session() as sess:
- nms_output = sess.run(nms.get())
- self.assertAllClose(nms_output, exp_nms)
-
- def test_select_at_most_two_boxes_from_three_clusters(self):
- corners = tf.constant([[0, 0, 1, 1],
- [0, 0.1, 1, 1.1],
- [0, -0.1, 1, 0.9],
- [0, 10, 1, 11],
- [0, 10.1, 1, 11.1],
- [0, 100, 1, 101]], tf.float32)
- boxes = box_list.BoxList(corners)
- boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3]))
- iou_thresh = .5
- max_output_size = 2
-
- exp_nms = [[0, 10, 1, 11],
- [0, 0, 1, 1]]
- nms = box_list_ops.non_max_suppression(
- boxes, iou_thresh, max_output_size)
- with self.test_session() as sess:
- nms_output = sess.run(nms.get())
- self.assertAllClose(nms_output, exp_nms)
-
- def test_select_at_most_thirty_boxes_from_three_clusters(self):
- corners = tf.constant([[0, 0, 1, 1],
- [0, 0.1, 1, 1.1],
- [0, -0.1, 1, 0.9],
- [0, 10, 1, 11],
- [0, 10.1, 1, 11.1],
- [0, 100, 1, 101]], tf.float32)
- boxes = box_list.BoxList(corners)
- boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3]))
- iou_thresh = .5
- max_output_size = 30
-
- exp_nms = [[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 100, 1, 101]]
- nms = box_list_ops.non_max_suppression(
- boxes, iou_thresh, max_output_size)
- with self.test_session() as sess:
- nms_output = sess.run(nms.get())
- self.assertAllClose(nms_output, exp_nms)
-
- def test_select_single_box(self):
- corners = tf.constant([[0, 0, 1, 1]], tf.float32)
- boxes = box_list.BoxList(corners)
- boxes.add_field('scores', tf.constant([.9]))
- iou_thresh = .5
- max_output_size = 3
-
- exp_nms = [[0, 0, 1, 1]]
- nms = box_list_ops.non_max_suppression(
- boxes, iou_thresh, max_output_size)
- with self.test_session() as sess:
- nms_output = sess.run(nms.get())
- self.assertAllClose(nms_output, exp_nms)
-
- def test_select_from_ten_identical_boxes(self):
- corners = tf.constant(10 * [[0, 0, 1, 1]], tf.float32)
- boxes = box_list.BoxList(corners)
- boxes.add_field('scores', tf.constant(10 * [.9]))
- iou_thresh = .5
- max_output_size = 3
-
- exp_nms = [[0, 0, 1, 1]]
- nms = box_list_ops.non_max_suppression(
- boxes, iou_thresh, max_output_size)
- with self.test_session() as sess:
- nms_output = sess.run(nms.get())
- self.assertAllClose(nms_output, exp_nms)
-
- def test_copy_extra_fields(self):
- corners = tf.constant([[0, 0, 1, 1],
- [0, 0.1, 1, 1.1]], tf.float32)
- boxes = box_list.BoxList(corners)
- tensor1 = np.array([[1], [4]])
- tensor2 = np.array([[1, 1], [2, 2]])
- boxes.add_field('tensor1', tf.constant(tensor1))
- boxes.add_field('tensor2', tf.constant(tensor2))
- new_boxes = box_list.BoxList(tf.constant([[0, 0, 10, 10],
- [1, 3, 5, 5]], tf.float32))
- new_boxes = box_list_ops._copy_extra_fields(new_boxes, boxes)
- with self.test_session() as sess:
- self.assertAllClose(tensor1, sess.run(new_boxes.get_field('tensor1')))
- self.assertAllClose(tensor2, sess.run(new_boxes.get_field('tensor2')))
-
-
-class CoordinatesConversionTest(tf.test.TestCase):
-
- def test_to_normalized_coordinates(self):
- coordinates = tf.constant([[0, 0, 100, 100],
- [25, 25, 75, 75]], tf.float32)
- img = tf.ones((128, 100, 100, 3))
- boxlist = box_list.BoxList(coordinates)
- normalized_boxlist = box_list_ops.to_normalized_coordinates(
- boxlist, tf.shape(img)[1], tf.shape(img)[2])
- expected_boxes = [[0, 0, 1, 1],
- [0.25, 0.25, 0.75, 0.75]]
-
- with self.test_session() as sess:
- normalized_boxes = sess.run(normalized_boxlist.get())
- self.assertAllClose(normalized_boxes, expected_boxes)
-
- def test_to_normalized_coordinates_already_normalized(self):
- coordinates = tf.constant([[0, 0, 1, 1],
- [0.25, 0.25, 0.75, 0.75]], tf.float32)
- img = tf.ones((128, 100, 100, 3))
- boxlist = box_list.BoxList(coordinates)
- normalized_boxlist = box_list_ops.to_normalized_coordinates(
- boxlist, tf.shape(img)[1], tf.shape(img)[2])
-
- with self.test_session() as sess:
- with self.assertRaisesOpError('assertion failed'):
- sess.run(normalized_boxlist.get())
-
- def test_to_absolute_coordinates(self):
- coordinates = tf.constant([[0, 0, 1, 1],
- [0.25, 0.25, 0.75, 0.75]], tf.float32)
- img = tf.ones((128, 100, 100, 3))
- boxlist = box_list.BoxList(coordinates)
- absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist,
- tf.shape(img)[1],
- tf.shape(img)[2])
- expected_boxes = [[0, 0, 100, 100],
- [25, 25, 75, 75]]
-
- with self.test_session() as sess:
- absolute_boxes = sess.run(absolute_boxlist.get())
- self.assertAllClose(absolute_boxes, expected_boxes)
-
- def test_to_absolute_coordinates_already_abolute(self):
- coordinates = tf.constant([[0, 0, 100, 100],
- [25, 25, 75, 75]], tf.float32)
- img = tf.ones((128, 100, 100, 3))
- boxlist = box_list.BoxList(coordinates)
- absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist,
- tf.shape(img)[1],
- tf.shape(img)[2])
-
- with self.test_session() as sess:
- with self.assertRaisesOpError('assertion failed'):
- sess.run(absolute_boxlist.get())
-
- def test_convert_to_normalized_and_back(self):
- coordinates = np.random.uniform(size=(100, 4))
- coordinates = np.round(np.sort(coordinates) * 200)
- coordinates[:, 2:4] += 1
- coordinates[99, :] = [0, 0, 201, 201]
- img = tf.ones((128, 202, 202, 3))
-
- boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32))
- boxlist = box_list_ops.to_normalized_coordinates(boxlist,
- tf.shape(img)[1],
- tf.shape(img)[2])
- boxlist = box_list_ops.to_absolute_coordinates(boxlist,
- tf.shape(img)[1],
- tf.shape(img)[2])
-
- with self.test_session() as sess:
- out = sess.run(boxlist.get())
- self.assertAllClose(out, coordinates)
-
- def test_convert_to_absolute_and_back(self):
- coordinates = np.random.uniform(size=(100, 4))
- coordinates = np.sort(coordinates)
- coordinates[99, :] = [0, 0, 1, 1]
- img = tf.ones((128, 202, 202, 3))
-
- boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32))
- boxlist = box_list_ops.to_absolute_coordinates(boxlist,
- tf.shape(img)[1],
- tf.shape(img)[2])
- boxlist = box_list_ops.to_normalized_coordinates(boxlist,
- tf.shape(img)[1],
- tf.shape(img)[2])
-
- with self.test_session() as sess:
- out = sess.run(boxlist.get())
- self.assertAllClose(out, coordinates)
-
- def test_to_absolute_coordinates_maximum_coordinate_check(self):
- coordinates = tf.constant([[0, 0, 1.2, 1.2],
- [0.25, 0.25, 0.75, 0.75]], tf.float32)
- img = tf.ones((128, 100, 100, 3))
- boxlist = box_list.BoxList(coordinates)
- absolute_boxlist = box_list_ops.to_absolute_coordinates(
- boxlist,
- tf.shape(img)[1],
- tf.shape(img)[2],
- maximum_normalized_coordinate=1.1)
-
- with self.test_session() as sess:
- with self.assertRaisesOpError('assertion failed'):
- sess.run(absolute_boxlist.get())
-
-
-class BoxRefinementTest(tf.test.TestCase):
-
- def test_box_voting(self):
- candidates = box_list.BoxList(
- tf.constant([[0.1, 0.1, 0.4, 0.4], [0.6, 0.6, 0.8, 0.8]], tf.float32))
- candidates.add_field('ExtraField', tf.constant([1, 2]))
- pool = box_list.BoxList(
- tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
- [0.6, 0.6, 0.8, 0.8]], tf.float32))
- pool.add_field('scores', tf.constant([0.75, 0.25, 0.3]))
- averaged_boxes = box_list_ops.box_voting(candidates, pool)
- expected_boxes = [[0.1, 0.1, 0.425, 0.425], [0.6, 0.6, 0.8, 0.8]]
- expected_scores = [0.5, 0.3]
- with self.test_session() as sess:
- boxes_out, scores_out, extra_field_out = sess.run(
- [averaged_boxes.get(), averaged_boxes.get_field('scores'),
- averaged_boxes.get_field('ExtraField')])
-
- self.assertAllClose(expected_boxes, boxes_out)
- self.assertAllClose(expected_scores, scores_out)
- self.assertAllEqual(extra_field_out, [1, 2])
-
- def test_box_voting_fails_with_negative_scores(self):
- candidates = box_list.BoxList(
- tf.constant([[0.1, 0.1, 0.4, 0.4]], tf.float32))
- pool = box_list.BoxList(tf.constant([[0.1, 0.1, 0.4, 0.4]], tf.float32))
- pool.add_field('scores', tf.constant([-0.2]))
- averaged_boxes = box_list_ops.box_voting(candidates, pool)
-
- with self.test_session() as sess:
- with self.assertRaisesOpError('Scores must be non negative'):
- sess.run([averaged_boxes.get()])
-
- def test_box_voting_fails_when_unmatched(self):
- candidates = box_list.BoxList(
- tf.constant([[0.1, 0.1, 0.4, 0.4]], tf.float32))
- pool = box_list.BoxList(tf.constant([[0.6, 0.6, 0.8, 0.8]], tf.float32))
- pool.add_field('scores', tf.constant([0.2]))
- averaged_boxes = box_list_ops.box_voting(candidates, pool)
-
- with self.test_session() as sess:
- with self.assertRaisesOpError('Each box in selected_boxes must match '
- 'with at least one box in pool_boxes.'):
- sess.run([averaged_boxes.get()])
-
- def test_refine_boxes(self):
- pool = box_list.BoxList(
- tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
- [0.6, 0.6, 0.8, 0.8]], tf.float32))
- pool.add_field('ExtraField', tf.constant([1, 2, 3]))
- pool.add_field('scores', tf.constant([0.75, 0.25, 0.3]))
- refined_boxes = box_list_ops.refine_boxes(pool, 0.5, 10)
-
- expected_boxes = [[0.1, 0.1, 0.425, 0.425], [0.6, 0.6, 0.8, 0.8]]
- expected_scores = [0.5, 0.3]
- with self.test_session() as sess:
- boxes_out, scores_out, extra_field_out = sess.run(
- [refined_boxes.get(), refined_boxes.get_field('scores'),
- refined_boxes.get_field('ExtraField')])
-
- self.assertAllClose(expected_boxes, boxes_out)
- self.assertAllClose(expected_scores, scores_out)
- self.assertAllEqual(extra_field_out, [1, 3])
-
- def test_refine_boxes_multi_class(self):
- pool = box_list.BoxList(
- tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
- [0.6, 0.6, 0.8, 0.8], [0.2, 0.2, 0.3, 0.3]], tf.float32))
- pool.add_field('classes', tf.constant([0, 0, 1, 1]))
- pool.add_field('scores', tf.constant([0.75, 0.25, 0.3, 0.2]))
- refined_boxes = box_list_ops.refine_boxes_multi_class(pool, 3, 0.5, 10)
-
- expected_boxes = [[0.1, 0.1, 0.425, 0.425], [0.6, 0.6, 0.8, 0.8],
- [0.2, 0.2, 0.3, 0.3]]
- expected_scores = [0.5, 0.3, 0.2]
- with self.test_session() as sess:
- boxes_out, scores_out, extra_field_out = sess.run(
- [refined_boxes.get(), refined_boxes.get_field('scores'),
- refined_boxes.get_field('classes')])
-
- self.assertAllClose(expected_boxes, boxes_out)
- self.assertAllClose(expected_scores, scores_out)
- self.assertAllEqual(extra_field_out, [0, 1, 1])
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_list_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_list_test.py
deleted file mode 100644
index edc00ebbc40227713739e2583fe9fc067e9449e2..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_list_test.py
+++ /dev/null
@@ -1,134 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.core.box_list."""
-
-import tensorflow as tf
-
-from object_detection.core import box_list
-
-
-class BoxListTest(tf.test.TestCase):
- """Tests for BoxList class."""
-
- def test_num_boxes(self):
- data = tf.constant([[0, 0, 1, 1], [1, 1, 2, 3], [3, 4, 5, 5]], tf.float32)
- expected_num_boxes = 3
-
- boxes = box_list.BoxList(data)
- with self.test_session() as sess:
- num_boxes_output = sess.run(boxes.num_boxes())
- self.assertEquals(num_boxes_output, expected_num_boxes)
-
- def test_get_correct_center_coordinates_and_sizes(self):
- boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
- boxes = box_list.BoxList(tf.constant(boxes))
- centers_sizes = boxes.get_center_coordinates_and_sizes()
- expected_centers_sizes = [[15, 0.35], [12.5, 0.25], [10, 0.3], [5, 0.3]]
- with self.test_session() as sess:
- centers_sizes_out = sess.run(centers_sizes)
- self.assertAllClose(centers_sizes_out, expected_centers_sizes)
-
- def test_create_box_list_with_dynamic_shape(self):
- data = tf.constant([[0, 0, 1, 1], [1, 1, 2, 3], [3, 4, 5, 5]], tf.float32)
- indices = tf.reshape(tf.where(tf.greater([1, 0, 1], 0)), [-1])
- data = tf.gather(data, indices)
- assert data.get_shape().as_list() == [None, 4]
- expected_num_boxes = 2
-
- boxes = box_list.BoxList(data)
- with self.test_session() as sess:
- num_boxes_output = sess.run(boxes.num_boxes())
- self.assertEquals(num_boxes_output, expected_num_boxes)
-
- def test_transpose_coordinates(self):
- boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
- boxes = box_list.BoxList(tf.constant(boxes))
- boxes.transpose_coordinates()
- expected_corners = [[10.0, 10.0, 15.0, 20.0], [0.1, 0.2, 0.4, 0.5]]
- with self.test_session() as sess:
- corners_out = sess.run(boxes.get())
- self.assertAllClose(corners_out, expected_corners)
-
- def test_box_list_invalid_inputs(self):
- data0 = tf.constant([[[0, 0, 1, 1], [3, 4, 5, 5]]], tf.float32)
- data1 = tf.constant([[0, 0, 1], [1, 1, 2], [3, 4, 5]], tf.float32)
- data2 = tf.constant([[0, 0, 1], [1, 1, 2], [3, 4, 5]], tf.int32)
-
- with self.assertRaises(ValueError):
- _ = box_list.BoxList(data0)
- with self.assertRaises(ValueError):
- _ = box_list.BoxList(data1)
- with self.assertRaises(ValueError):
- _ = box_list.BoxList(data2)
-
- def test_num_boxes_static(self):
- box_corners = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
- boxes = box_list.BoxList(tf.constant(box_corners))
- self.assertEquals(boxes.num_boxes_static(), 2)
- self.assertEquals(type(boxes.num_boxes_static()), int)
-
- def test_num_boxes_static_for_uninferrable_shape(self):
- placeholder = tf.placeholder(tf.float32, shape=[None, 4])
- boxes = box_list.BoxList(placeholder)
- self.assertEquals(boxes.num_boxes_static(), None)
-
- def test_as_tensor_dict(self):
- boxlist = box_list.BoxList(
- tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32))
- boxlist.add_field('classes', tf.constant([0, 1]))
- boxlist.add_field('scores', tf.constant([0.75, 0.2]))
- tensor_dict = boxlist.as_tensor_dict()
-
- expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]]
- expected_classes = [0, 1]
- expected_scores = [0.75, 0.2]
-
- with self.test_session() as sess:
- tensor_dict_out = sess.run(tensor_dict)
- self.assertAllEqual(3, len(tensor_dict_out))
- self.assertAllClose(expected_boxes, tensor_dict_out['boxes'])
- self.assertAllEqual(expected_classes, tensor_dict_out['classes'])
- self.assertAllClose(expected_scores, tensor_dict_out['scores'])
-
- def test_as_tensor_dict_with_features(self):
- boxlist = box_list.BoxList(
- tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32))
- boxlist.add_field('classes', tf.constant([0, 1]))
- boxlist.add_field('scores', tf.constant([0.75, 0.2]))
- tensor_dict = boxlist.as_tensor_dict(['boxes', 'classes', 'scores'])
-
- expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]]
- expected_classes = [0, 1]
- expected_scores = [0.75, 0.2]
-
- with self.test_session() as sess:
- tensor_dict_out = sess.run(tensor_dict)
- self.assertAllEqual(3, len(tensor_dict_out))
- self.assertAllClose(expected_boxes, tensor_dict_out['boxes'])
- self.assertAllEqual(expected_classes, tensor_dict_out['classes'])
- self.assertAllClose(expected_scores, tensor_dict_out['scores'])
-
- def test_as_tensor_dict_missing_field(self):
- boxlist = box_list.BoxList(
- tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32))
- boxlist.add_field('classes', tf.constant([0, 1]))
- boxlist.add_field('scores', tf.constant([0.75, 0.2]))
- with self.assertRaises(ValueError):
- boxlist.as_tensor_dict(['foo', 'bar'])
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_predictor.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_predictor.py
deleted file mode 100644
index 78d8242372549fecbdd5442fda2a520850308972..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_predictor.py
+++ /dev/null
@@ -1,963 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Box predictor for object detectors.
-
-Box predictors are classes that take a high level
-image feature map as input and produce two predictions,
-(1) a tensor encoding box locations, and
-(2) a tensor encoding classes for each box.
-
-These components are passed directly to loss functions
-in our detection models.
-
-These modules are separated from the main model since the same
-few box predictor architectures are shared across many models.
-"""
-from abc import abstractmethod
-import math
-import tensorflow as tf
-from object_detection.utils import ops
-from object_detection.utils import shape_utils
-from object_detection.utils import static_shape
-
-slim = tf.contrib.slim
-
-BOX_ENCODINGS = 'box_encodings'
-CLASS_PREDICTIONS_WITH_BACKGROUND = 'class_predictions_with_background'
-MASK_PREDICTIONS = 'mask_predictions'
-
-
-class BoxPredictor(object):
- """BoxPredictor."""
-
- def __init__(self, is_training, num_classes):
- """Constructor.
-
- Args:
- is_training: Indicates whether the BoxPredictor is in training mode.
- num_classes: number of classes. Note that num_classes *does not*
- include the background category, so if groundtruth labels take values
- in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
- assigned classification targets can range from {0,... K}).
- """
- self._is_training = is_training
- self._num_classes = num_classes
-
- @property
- def num_classes(self):
- return self._num_classes
-
- def predict(self, image_features, num_predictions_per_location,
- scope=None, **params):
- """Computes encoded object locations and corresponding confidences.
-
- Takes a list of high level image feature maps as input and produces a list
- of box encodings and a list of class scores where each element in the output
- lists correspond to the feature maps in the input list.
-
- Args:
- image_features: A list of float tensors of shape [batch_size, height_i,
- width_i, channels_i] containing features for a batch of images.
- num_predictions_per_location: A list of integers representing the number
- of box predictions to be made per spatial location for each feature map.
- scope: Variable and Op scope name.
- **params: Additional keyword arguments for specific implementations of
- BoxPredictor.
-
- Returns:
- A dictionary containing at least the following tensors.
- box_encodings: A list of float tensors. Each entry in the list
- corresponds to a feature map in the input `image_features` list. All
- tensors in the list have one of the two following shapes:
- a. [batch_size, num_anchors_i, q, code_size] representing the location
- of the objects, where q is 1 or the number of classes.
- b. [batch_size, num_anchors_i, code_size].
- class_predictions_with_background: A list of float tensors of shape
- [batch_size, num_anchors_i, num_classes + 1] representing the class
- predictions for the proposals. Each entry in the list corresponds to a
- feature map in the input `image_features` list.
-
- Raises:
- ValueError: If length of `image_features` is not equal to length of
- `num_predictions_per_location`.
- """
- if len(image_features) != len(num_predictions_per_location):
- raise ValueError('image_feature and num_predictions_per_location must '
- 'be of same length, found: {} vs {}'.
- format(len(image_features),
- len(num_predictions_per_location)))
- if scope is not None:
- with tf.variable_scope(scope):
- return self._predict(image_features, num_predictions_per_location,
- **params)
- return self._predict(image_features, num_predictions_per_location,
- **params)
-
- # TODO(rathodv): num_predictions_per_location could be moved to constructor.
- # This is currently only used by ConvolutionalBoxPredictor.
- @abstractmethod
- def _predict(self, image_features, num_predictions_per_location, **params):
- """Implementations must override this method.
-
- Args:
- image_features: A list of float tensors of shape [batch_size, height_i,
- width_i, channels_i] containing features for a batch of images.
- num_predictions_per_location: A list of integers representing the number
- of box predictions to be made per spatial location for each feature map.
- **params: Additional keyword arguments for specific implementations of
- BoxPredictor.
-
- Returns:
- A dictionary containing at least the following tensors.
- box_encodings: A list of float tensors. Each entry in the list
- corresponds to a feature map in the input `image_features` list. All
- tensors in the list have one of the two following shapes:
- a. [batch_size, num_anchors_i, q, code_size] representing the location
- of the objects, where q is 1 or the number of classes.
- b. [batch_size, num_anchors_i, code_size].
- class_predictions_with_background: A list of float tensors of shape
- [batch_size, num_anchors_i, num_classes + 1] representing the class
- predictions for the proposals. Each entry in the list corresponds to a
- feature map in the input `image_features` list.
- """
- pass
-
-
-class RfcnBoxPredictor(BoxPredictor):
- """RFCN Box Predictor.
-
- Applies a position sensitive ROI pooling on position sensitive feature maps to
- predict classes and refined locations. See https://arxiv.org/abs/1605.06409
- for details.
-
- This is used for the second stage of the RFCN meta architecture. Notice that
- locations are *not* shared across classes, thus for each anchor, a separate
- prediction is made for each class.
- """
-
- def __init__(self,
- is_training,
- num_classes,
- conv_hyperparams_fn,
- num_spatial_bins,
- depth,
- crop_size,
- box_code_size):
- """Constructor.
-
- Args:
- is_training: Indicates whether the BoxPredictor is in training mode.
- num_classes: number of classes. Note that num_classes *does not*
- include the background category, so if groundtruth labels take values
- in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
- assigned classification targets can range from {0,... K}).
- conv_hyperparams_fn: A function to construct tf-slim arg_scope with
- hyperparameters for convolutional layers.
- num_spatial_bins: A list of two integers `[spatial_bins_y,
- spatial_bins_x]`.
- depth: Target depth to reduce the input feature maps to.
- crop_size: A list of two integers `[crop_height, crop_width]`.
- box_code_size: Size of encoding for each box.
- """
- super(RfcnBoxPredictor, self).__init__(is_training, num_classes)
- self._conv_hyperparams_fn = conv_hyperparams_fn
- self._num_spatial_bins = num_spatial_bins
- self._depth = depth
- self._crop_size = crop_size
- self._box_code_size = box_code_size
-
- @property
- def num_classes(self):
- return self._num_classes
-
- def _predict(self, image_features, num_predictions_per_location,
- proposal_boxes):
- """Computes encoded object locations and corresponding confidences.
-
- Args:
- image_features: A list of float tensors of shape [batch_size, height_i,
- width_i, channels_i] containing features for a batch of images.
- num_predictions_per_location: A list of integers representing the number
- of box predictions to be made per spatial location for each feature map.
- Currently, this must be set to [1], or an error will be raised.
- proposal_boxes: A float tensor of shape [batch_size, num_proposals,
- box_code_size].
-
- Returns:
- box_encodings: A list of float tensors of shape
- [batch_size, num_anchors_i, q, code_size] representing the location of
- the objects, where q is 1 or the number of classes. Each entry in the
- list corresponds to a feature map in the input `image_features` list.
- class_predictions_with_background: A list of float tensors of shape
- [batch_size, num_anchors_i, num_classes + 1] representing the class
- predictions for the proposals. Each entry in the list corresponds to a
- feature map in the input `image_features` list.
-
- Raises:
- ValueError: if num_predictions_per_location is not 1 or if
- len(image_features) is not 1.
- """
- if (len(num_predictions_per_location) != 1 or
- num_predictions_per_location[0] != 1):
- raise ValueError('Currently RfcnBoxPredictor only supports '
- 'predicting a single box per class per location.')
- if len(image_features) != 1:
- raise ValueError('length of `image_features` must be 1. Found {}'.
- format(len(image_features)))
- image_feature = image_features[0]
- num_predictions_per_location = num_predictions_per_location[0]
- batch_size = tf.shape(proposal_boxes)[0]
- num_boxes = tf.shape(proposal_boxes)[1]
- def get_box_indices(proposals):
- proposals_shape = proposals.get_shape().as_list()
- if any(dim is None for dim in proposals_shape):
- proposals_shape = tf.shape(proposals)
- ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32)
- multiplier = tf.expand_dims(
- tf.range(start=0, limit=proposals_shape[0]), 1)
- return tf.reshape(ones_mat * multiplier, [-1])
-
- net = image_feature
- with slim.arg_scope(self._conv_hyperparams_fn()):
- net = slim.conv2d(net, self._depth, [1, 1], scope='reduce_depth')
- # Location predictions.
- location_feature_map_depth = (self._num_spatial_bins[0] *
- self._num_spatial_bins[1] *
- self.num_classes *
- self._box_code_size)
- location_feature_map = slim.conv2d(net, location_feature_map_depth,
- [1, 1], activation_fn=None,
- scope='refined_locations')
- box_encodings = ops.position_sensitive_crop_regions(
- location_feature_map,
- boxes=tf.reshape(proposal_boxes, [-1, self._box_code_size]),
- box_ind=get_box_indices(proposal_boxes),
- crop_size=self._crop_size,
- num_spatial_bins=self._num_spatial_bins,
- global_pool=True)
- box_encodings = tf.squeeze(box_encodings, squeeze_dims=[1, 2])
- box_encodings = tf.reshape(box_encodings,
- [batch_size * num_boxes, 1, self.num_classes,
- self._box_code_size])
-
- # Class predictions.
- total_classes = self.num_classes + 1 # Account for background class.
- class_feature_map_depth = (self._num_spatial_bins[0] *
- self._num_spatial_bins[1] *
- total_classes)
- class_feature_map = slim.conv2d(net, class_feature_map_depth, [1, 1],
- activation_fn=None,
- scope='class_predictions')
- class_predictions_with_background = ops.position_sensitive_crop_regions(
- class_feature_map,
- boxes=tf.reshape(proposal_boxes, [-1, self._box_code_size]),
- box_ind=get_box_indices(proposal_boxes),
- crop_size=self._crop_size,
- num_spatial_bins=self._num_spatial_bins,
- global_pool=True)
- class_predictions_with_background = tf.squeeze(
- class_predictions_with_background, squeeze_dims=[1, 2])
- class_predictions_with_background = tf.reshape(
- class_predictions_with_background,
- [batch_size * num_boxes, 1, total_classes])
-
- return {BOX_ENCODINGS: [box_encodings],
- CLASS_PREDICTIONS_WITH_BACKGROUND:
- [class_predictions_with_background]}
-
-
-# TODO(rathodv): Change the implementation to return lists of predictions.
-class MaskRCNNBoxPredictor(BoxPredictor):
- """Mask R-CNN Box Predictor.
-
- See Mask R-CNN: He, K., Gkioxari, G., Dollar, P., & Girshick, R. (2017).
- Mask R-CNN. arXiv preprint arXiv:1703.06870.
-
- This is used for the second stage of the Mask R-CNN detector where proposals
- cropped from an image are arranged along the batch dimension of the input
- image_features tensor. Notice that locations are *not* shared across classes,
- thus for each anchor, a separate prediction is made for each class.
-
- In addition to predicting boxes and classes, optionally this class allows
- predicting masks and/or keypoints inside detection boxes.
-
- Currently this box predictor makes per-class predictions; that is, each
- anchor makes a separate box prediction for each class.
- """
-
- def __init__(self,
- is_training,
- num_classes,
- fc_hyperparams_fn,
- use_dropout,
- dropout_keep_prob,
- box_code_size,
- conv_hyperparams_fn=None,
- predict_instance_masks=False,
- mask_height=14,
- mask_width=14,
- mask_prediction_num_conv_layers=2,
- mask_prediction_conv_depth=256,
- masks_are_class_agnostic=False,
- predict_keypoints=False,
- share_box_across_classes=False):
- """Constructor.
-
- Args:
- is_training: Indicates whether the BoxPredictor is in training mode.
- num_classes: number of classes. Note that num_classes *does not*
- include the background category, so if groundtruth labels take values
- in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
- assigned classification targets can range from {0,... K}).
- fc_hyperparams_fn: A function to generate tf-slim arg_scope with
- hyperparameters for fully connected ops.
- use_dropout: Option to use dropout or not. Note that a single dropout
- op is applied here prior to both box and class predictions, which stands
- in contrast to the ConvolutionalBoxPredictor below.
- dropout_keep_prob: Keep probability for dropout.
- This is only used if use_dropout is True.
- box_code_size: Size of encoding for each box.
- conv_hyperparams_fn: A function to generate tf-slim arg_scope with
- hyperparameters for convolution ops.
- predict_instance_masks: Whether to predict object masks inside detection
- boxes.
- mask_height: Desired output mask height. The default value is 14.
- mask_width: Desired output mask width. The default value is 14.
- mask_prediction_num_conv_layers: Number of convolution layers applied to
- the image_features in mask prediction branch.
- mask_prediction_conv_depth: The depth for the first conv2d_transpose op
- applied to the image_features in the mask prediction branch. If set
- to 0, the depth of the convolution layers will be automatically chosen
- based on the number of object classes and the number of channels in the
- image features.
- masks_are_class_agnostic: Boolean determining if the mask-head is
- class-agnostic or not.
- predict_keypoints: Whether to predict keypoints insde detection boxes.
- share_box_across_classes: Whether to share boxes across classes rather
- than use a different box for each class.
-
- Raises:
- ValueError: If predict_instance_masks is true but conv_hyperparams is not
- set.
- ValueError: If predict_keypoints is true since it is not implemented yet.
- ValueError: If mask_prediction_num_conv_layers is smaller than two.
- """
- super(MaskRCNNBoxPredictor, self).__init__(is_training, num_classes)
- self._fc_hyperparams_fn = fc_hyperparams_fn
- self._use_dropout = use_dropout
- self._box_code_size = box_code_size
- self._dropout_keep_prob = dropout_keep_prob
- self._conv_hyperparams_fn = conv_hyperparams_fn
- self._predict_instance_masks = predict_instance_masks
- self._mask_height = mask_height
- self._mask_width = mask_width
- self._mask_prediction_num_conv_layers = mask_prediction_num_conv_layers
- self._mask_prediction_conv_depth = mask_prediction_conv_depth
- self._masks_are_class_agnostic = masks_are_class_agnostic
- self._predict_keypoints = predict_keypoints
- self._share_box_across_classes = share_box_across_classes
- if self._predict_keypoints:
- raise ValueError('Keypoint prediction is unimplemented.')
- if ((self._predict_instance_masks or self._predict_keypoints) and
- self._conv_hyperparams_fn is None):
- raise ValueError('`conv_hyperparams` must be provided when predicting '
- 'masks.')
- if self._mask_prediction_num_conv_layers < 2:
- raise ValueError(
- 'Mask prediction should consist of at least 2 conv layers')
-
- @property
- def num_classes(self):
- return self._num_classes
-
- @property
- def predicts_instance_masks(self):
- return self._predict_instance_masks
-
- def _predict_boxes_and_classes(self, image_features):
- """Predicts boxes and class scores.
-
- Args:
- image_features: A float tensor of shape [batch_size, height, width,
- channels] containing features for a batch of images.
-
- Returns:
- box_encodings: A float tensor of shape
- [batch_size, 1, num_classes, code_size] representing the location of the
- objects.
- class_predictions_with_background: A float tensor of shape
- [batch_size, 1, num_classes + 1] representing the class predictions for
- the proposals.
- """
- spatial_averaged_image_features = tf.reduce_mean(image_features, [1, 2],
- keep_dims=True,
- name='AvgPool')
- flattened_image_features = slim.flatten(spatial_averaged_image_features)
- if self._use_dropout:
- flattened_image_features = slim.dropout(flattened_image_features,
- keep_prob=self._dropout_keep_prob,
- is_training=self._is_training)
- number_of_boxes = 1
- if not self._share_box_across_classes:
- number_of_boxes = self._num_classes
-
- with slim.arg_scope(self._fc_hyperparams_fn()):
- box_encodings = slim.fully_connected(
- flattened_image_features,
- number_of_boxes * self._box_code_size,
- activation_fn=None,
- scope='BoxEncodingPredictor')
- class_predictions_with_background = slim.fully_connected(
- flattened_image_features,
- self._num_classes + 1,
- activation_fn=None,
- scope='ClassPredictor')
- box_encodings = tf.reshape(
- box_encodings, [-1, 1, number_of_boxes, self._box_code_size])
- class_predictions_with_background = tf.reshape(
- class_predictions_with_background, [-1, 1, self._num_classes + 1])
- return box_encodings, class_predictions_with_background
-
- def _get_mask_predictor_conv_depth(self, num_feature_channels, num_classes,
- class_weight=3.0, feature_weight=2.0):
- """Computes the depth of the mask predictor convolutions.
-
- Computes the depth of the mask predictor convolutions given feature channels
- and number of classes by performing a weighted average of the two in
- log space to compute the number of convolution channels. The weights that
- are used for computing the weighted average do not need to sum to 1.
-
- Args:
- num_feature_channels: An integer containing the number of feature
- channels.
- num_classes: An integer containing the number of classes.
- class_weight: Class weight used in computing the weighted average.
- feature_weight: Feature weight used in computing the weighted average.
-
- Returns:
- An integer containing the number of convolution channels used by mask
- predictor.
- """
- num_feature_channels_log = math.log(float(num_feature_channels), 2.0)
- num_classes_log = math.log(float(num_classes), 2.0)
- weighted_num_feature_channels_log = (
- num_feature_channels_log * feature_weight)
- weighted_num_classes_log = num_classes_log * class_weight
- total_weight = feature_weight + class_weight
- num_conv_channels_log = round(
- (weighted_num_feature_channels_log + weighted_num_classes_log) /
- total_weight)
- return int(math.pow(2.0, num_conv_channels_log))
-
- def _predict_masks(self, image_features):
- """Performs mask prediction.
-
- Args:
- image_features: A float tensor of shape [batch_size, height, width,
- channels] containing features for a batch of images.
-
- Returns:
- instance_masks: A float tensor of shape
- [batch_size, 1, num_classes, image_height, image_width].
- """
- num_conv_channels = self._mask_prediction_conv_depth
- if num_conv_channels == 0:
- num_feature_channels = image_features.get_shape().as_list()[3]
- num_conv_channels = self._get_mask_predictor_conv_depth(
- num_feature_channels, self.num_classes)
- with slim.arg_scope(self._conv_hyperparams_fn()):
- upsampled_features = tf.image.resize_bilinear(
- image_features,
- [self._mask_height, self._mask_width],
- align_corners=True)
- for _ in range(self._mask_prediction_num_conv_layers - 1):
- upsampled_features = slim.conv2d(
- upsampled_features,
- num_outputs=num_conv_channels,
- kernel_size=[3, 3])
- num_masks = 1 if self._masks_are_class_agnostic else self.num_classes
- mask_predictions = slim.conv2d(upsampled_features,
- num_outputs=num_masks,
- activation_fn=None,
- kernel_size=[3, 3])
- return tf.expand_dims(
- tf.transpose(mask_predictions, perm=[0, 3, 1, 2]),
- axis=1,
- name='MaskPredictor')
-
- def _predict(self, image_features, num_predictions_per_location,
- predict_boxes_and_classes=True, predict_auxiliary_outputs=False):
- """Optionally computes encoded object locations, confidences, and masks.
-
- Flattens image_features and applies fully connected ops (with no
- non-linearity) to predict box encodings and class predictions. In this
- setting, anchors are not spatially arranged in any way and are assumed to
- have been folded into the batch dimension. Thus we output 1 for the
- anchors dimension.
-
- Also optionally predicts instance masks.
- The mask prediction head is based on the Mask RCNN paper with the following
- modifications: We replace the deconvolution layer with a bilinear resize
- and a convolution.
-
- Args:
- image_features: A list of float tensors of shape [batch_size, height_i,
- width_i, channels_i] containing features for a batch of images.
- num_predictions_per_location: A list of integers representing the number
- of box predictions to be made per spatial location for each feature map.
- Currently, this must be set to [1], or an error will be raised.
- predict_boxes_and_classes: If true, the function will perform box
- refinement and classification.
- predict_auxiliary_outputs: If true, the function will perform other
- predictions such as mask, keypoint, boundaries, etc. if any.
-
- Returns:
- A dictionary containing the following tensors.
- box_encodings: A float tensor of shape
- [batch_size, 1, num_classes, code_size] representing the
- location of the objects.
- class_predictions_with_background: A float tensor of shape
- [batch_size, 1, num_classes + 1] representing the class
- predictions for the proposals.
- If predict_masks is True the dictionary also contains:
- instance_masks: A float tensor of shape
- [batch_size, 1, num_classes, image_height, image_width]
- If predict_keypoints is True the dictionary also contains:
- keypoints: [batch_size, 1, num_keypoints, 2]
-
- Raises:
- ValueError: If num_predictions_per_location is not 1 or if both
- predict_boxes_and_classes and predict_auxiliary_outputs are false or if
- len(image_features) is not 1.
- """
- if (len(num_predictions_per_location) != 1 or
- num_predictions_per_location[0] != 1):
- raise ValueError('Currently FullyConnectedBoxPredictor only supports '
- 'predicting a single box per class per location.')
- if not predict_boxes_and_classes and not predict_auxiliary_outputs:
- raise ValueError('Should perform at least one prediction.')
- if len(image_features) != 1:
- raise ValueError('length of `image_features` must be 1. Found {}'.
- format(len(image_features)))
- image_feature = image_features[0]
- num_predictions_per_location = num_predictions_per_location[0]
- predictions_dict = {}
-
- if predict_boxes_and_classes:
- (box_encodings, class_predictions_with_background
- ) = self._predict_boxes_and_classes(image_feature)
- predictions_dict[BOX_ENCODINGS] = box_encodings
- predictions_dict[
- CLASS_PREDICTIONS_WITH_BACKGROUND] = class_predictions_with_background
-
- if self._predict_instance_masks and predict_auxiliary_outputs:
- predictions_dict[MASK_PREDICTIONS] = self._predict_masks(image_feature)
-
- return predictions_dict
-
-
-class _NoopVariableScope(object):
- """A dummy class that does not push any scope."""
-
- def __enter__(self):
- return None
-
- def __exit__(self, exc_type, exc_value, traceback):
- return False
-
-
-class ConvolutionalBoxPredictor(BoxPredictor):
- """Convolutional Box Predictor.
-
- Optionally add an intermediate 1x1 convolutional layer after features and
- predict in parallel branches box_encodings and
- class_predictions_with_background.
-
- Currently this box predictor assumes that predictions are "shared" across
- classes --- that is each anchor makes box predictions which do not depend
- on class.
- """
-
- def __init__(self,
- is_training,
- num_classes,
- conv_hyperparams_fn,
- min_depth,
- max_depth,
- num_layers_before_predictor,
- use_dropout,
- dropout_keep_prob,
- kernel_size,
- box_code_size,
- apply_sigmoid_to_scores=False,
- class_prediction_bias_init=0.0,
- use_depthwise=False):
- """Constructor.
-
- Args:
- is_training: Indicates whether the BoxPredictor is in training mode.
- num_classes: number of classes. Note that num_classes *does not*
- include the background category, so if groundtruth labels take values
- in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
- assigned classification targets can range from {0,... K}).
- conv_hyperparams_fn: A function to generate tf-slim arg_scope with
- hyperparameters for convolution ops.
- min_depth: Minimum feature depth prior to predicting box encodings
- and class predictions.
- max_depth: Maximum feature depth prior to predicting box encodings
- and class predictions. If max_depth is set to 0, no additional
- feature map will be inserted before location and class predictions.
- num_layers_before_predictor: Number of the additional conv layers before
- the predictor.
- use_dropout: Option to use dropout for class prediction or not.
- dropout_keep_prob: Keep probability for dropout.
- This is only used if use_dropout is True.
- kernel_size: Size of final convolution kernel. If the
- spatial resolution of the feature map is smaller than the kernel size,
- then the kernel size is automatically set to be
- min(feature_width, feature_height).
- box_code_size: Size of encoding for each box.
- apply_sigmoid_to_scores: if True, apply the sigmoid on the output
- class_predictions.
- class_prediction_bias_init: constant value to initialize bias of the last
- conv2d layer before class prediction.
- use_depthwise: Whether to use depthwise convolutions for prediction
- steps. Default is False.
-
- Raises:
- ValueError: if min_depth > max_depth.
- """
- super(ConvolutionalBoxPredictor, self).__init__(is_training, num_classes)
- if min_depth > max_depth:
- raise ValueError('min_depth should be less than or equal to max_depth')
- self._conv_hyperparams_fn = conv_hyperparams_fn
- self._min_depth = min_depth
- self._max_depth = max_depth
- self._num_layers_before_predictor = num_layers_before_predictor
- self._use_dropout = use_dropout
- self._kernel_size = kernel_size
- self._box_code_size = box_code_size
- self._dropout_keep_prob = dropout_keep_prob
- self._apply_sigmoid_to_scores = apply_sigmoid_to_scores
- self._class_prediction_bias_init = class_prediction_bias_init
- self._use_depthwise = use_depthwise
-
- def _predict(self, image_features, num_predictions_per_location_list):
- """Computes encoded object locations and corresponding confidences.
-
- Args:
- image_features: A list of float tensors of shape [batch_size, height_i,
- width_i, channels_i] containing features for a batch of images.
- num_predictions_per_location_list: A list of integers representing the
- number of box predictions to be made per spatial location for each
- feature map.
-
- Returns:
- box_encodings: A list of float tensors of shape
- [batch_size, num_anchors_i, q, code_size] representing the location of
- the objects, where q is 1 or the number of classes. Each entry in the
- list corresponds to a feature map in the input `image_features` list.
- class_predictions_with_background: A list of float tensors of shape
- [batch_size, num_anchors_i, num_classes + 1] representing the class
- predictions for the proposals. Each entry in the list corresponds to a
- feature map in the input `image_features` list.
- """
- box_encodings_list = []
- class_predictions_list = []
- # TODO(rathodv): Come up with a better way to generate scope names
- # in box predictor once we have time to retrain all models in the zoo.
- # The following lines create scope names to be backwards compatible with the
- # existing checkpoints.
- box_predictor_scopes = [_NoopVariableScope()]
- if len(image_features) > 1:
- box_predictor_scopes = [
- tf.variable_scope('BoxPredictor_{}'.format(i))
- for i in range(len(image_features))
- ]
-
- for (image_feature,
- num_predictions_per_location, box_predictor_scope) in zip(
- image_features, num_predictions_per_location_list,
- box_predictor_scopes):
- with box_predictor_scope:
- # Add a slot for the background class.
- num_class_slots = self.num_classes + 1
- net = image_feature
- with slim.arg_scope(self._conv_hyperparams_fn()), \
- slim.arg_scope([slim.dropout], is_training=self._is_training):
- # Add additional conv layers before the class predictor.
- features_depth = static_shape.get_depth(image_feature.get_shape())
- depth = max(min(features_depth, self._max_depth), self._min_depth)
- tf.logging.info('depth of additional conv before box predictor: {}'.
- format(depth))
- if depth > 0 and self._num_layers_before_predictor > 0:
- for i in range(self._num_layers_before_predictor):
- net = slim.conv2d(
- net, depth, [1, 1], scope='Conv2d_%d_1x1_%d' % (i, depth))
- with slim.arg_scope([slim.conv2d], activation_fn=None,
- normalizer_fn=None, normalizer_params=None):
- if self._use_depthwise:
- box_encodings = slim.separable_conv2d(
- net, None, [self._kernel_size, self._kernel_size],
- padding='SAME', depth_multiplier=1, stride=1,
- rate=1, scope='BoxEncodingPredictor_depthwise')
- box_encodings = slim.conv2d(
- box_encodings,
- num_predictions_per_location * self._box_code_size, [1, 1],
- scope='BoxEncodingPredictor')
- else:
- box_encodings = slim.conv2d(
- net, num_predictions_per_location * self._box_code_size,
- [self._kernel_size, self._kernel_size],
- scope='BoxEncodingPredictor')
- if self._use_dropout:
- net = slim.dropout(net, keep_prob=self._dropout_keep_prob)
- if self._use_depthwise:
- class_predictions_with_background = slim.separable_conv2d(
- net, None, [self._kernel_size, self._kernel_size],
- padding='SAME', depth_multiplier=1, stride=1,
- rate=1, scope='ClassPredictor_depthwise')
- class_predictions_with_background = slim.conv2d(
- class_predictions_with_background,
- num_predictions_per_location * num_class_slots,
- [1, 1], scope='ClassPredictor')
- else:
- class_predictions_with_background = slim.conv2d(
- net, num_predictions_per_location * num_class_slots,
- [self._kernel_size, self._kernel_size],
- scope='ClassPredictor',
- biases_initializer=tf.constant_initializer(
- self._class_prediction_bias_init))
- if self._apply_sigmoid_to_scores:
- class_predictions_with_background = tf.sigmoid(
- class_predictions_with_background)
-
- combined_feature_map_shape = (shape_utils.
- combined_static_and_dynamic_shape(
- image_feature))
- box_encodings = tf.reshape(
- box_encodings, tf.stack([combined_feature_map_shape[0],
- combined_feature_map_shape[1] *
- combined_feature_map_shape[2] *
- num_predictions_per_location,
- 1, self._box_code_size]))
- box_encodings_list.append(box_encodings)
- class_predictions_with_background = tf.reshape(
- class_predictions_with_background,
- tf.stack([combined_feature_map_shape[0],
- combined_feature_map_shape[1] *
- combined_feature_map_shape[2] *
- num_predictions_per_location,
- num_class_slots]))
- class_predictions_list.append(class_predictions_with_background)
- return {
- BOX_ENCODINGS: box_encodings_list,
- CLASS_PREDICTIONS_WITH_BACKGROUND: class_predictions_list
- }
-
-
-# TODO(rathodv): Replace with slim.arg_scope_func_key once its available
-# externally.
-def _arg_scope_func_key(op):
- """Returns a key that can be used to index arg_scope dictionary."""
- return getattr(op, '_key_op', str(op))
-
-
-# TODO(rathodv): Merge the implementation with ConvolutionalBoxPredictor above
-# since they are very similar.
-class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
- """Convolutional Box Predictor with weight sharing.
-
- Defines the box predictor as defined in
- https://arxiv.org/abs/1708.02002. This class differs from
- ConvolutionalBoxPredictor in that it shares weights and biases while
- predicting from different feature maps. However, batch_norm parameters are not
- shared because the statistics of the activations vary among the different
- feature maps.
-
- Also note that separate multi-layer towers are constructed for the box
- encoding and class predictors respectively.
- """
-
- def __init__(self,
- is_training,
- num_classes,
- conv_hyperparams_fn,
- depth,
- num_layers_before_predictor,
- box_code_size,
- kernel_size=3,
- class_prediction_bias_init=0.0,
- use_dropout=False,
- dropout_keep_prob=0.8):
- """Constructor.
-
- Args:
- is_training: Indicates whether the BoxPredictor is in training mode.
- num_classes: number of classes. Note that num_classes *does not*
- include the background category, so if groundtruth labels take values
- in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
- assigned classification targets can range from {0,... K}).
- conv_hyperparams_fn: A function to generate tf-slim arg_scope with
- hyperparameters for convolution ops.
- depth: depth of conv layers.
- num_layers_before_predictor: Number of the additional conv layers before
- the predictor.
- box_code_size: Size of encoding for each box.
- kernel_size: Size of final convolution kernel.
- class_prediction_bias_init: constant value to initialize bias of the last
- conv2d layer before class prediction.
- use_dropout: Whether to apply dropout to class prediction head.
- dropout_keep_prob: Probability of keeping activiations.
- """
- super(WeightSharedConvolutionalBoxPredictor, self).__init__(is_training,
- num_classes)
- self._conv_hyperparams_fn = conv_hyperparams_fn
- self._depth = depth
- self._num_layers_before_predictor = num_layers_before_predictor
- self._box_code_size = box_code_size
- self._kernel_size = kernel_size
- self._class_prediction_bias_init = class_prediction_bias_init
- self._use_dropout = use_dropout
- self._dropout_keep_prob = dropout_keep_prob
-
- def _predict(self, image_features, num_predictions_per_location_list):
- """Computes encoded object locations and corresponding confidences.
-
- Args:
- image_features: A list of float tensors of shape [batch_size, height_i,
- width_i, channels] containing features for a batch of images. Note that
- all tensors in the list must have the same number of channels.
- num_predictions_per_location_list: A list of integers representing the
- number of box predictions to be made per spatial location for each
- feature map. Note that all values must be the same since the weights are
- shared.
-
- Returns:
- box_encodings: A list of float tensors of shape
- [batch_size, num_anchors_i, code_size] representing the location of
- the objects. Each entry in the list corresponds to a feature map in the
- input `image_features` list.
- class_predictions_with_background: A list of float tensors of shape
- [batch_size, num_anchors_i, num_classes + 1] representing the class
- predictions for the proposals. Each entry in the list corresponds to a
- feature map in the input `image_features` list.
-
-
- Raises:
- ValueError: If the image feature maps do not have the same number of
- channels or if the num predictions per locations is differs between the
- feature maps.
- """
- if len(set(num_predictions_per_location_list)) > 1:
- raise ValueError('num predictions per location must be same for all'
- 'feature maps, found: {}'.format(
- num_predictions_per_location_list))
- feature_channels = [
- image_feature.shape[3].value for image_feature in image_features
- ]
- if len(set(feature_channels)) > 1:
- raise ValueError('all feature maps must have the same number of '
- 'channels, found: {}'.format(feature_channels))
- box_encodings_list = []
- class_predictions_list = []
- for feature_index, (image_feature,
- num_predictions_per_location) in enumerate(
- zip(image_features,
- num_predictions_per_location_list)):
- # Add a slot for the background class.
- with tf.variable_scope('WeightSharedConvolutionalBoxPredictor',
- reuse=tf.AUTO_REUSE):
- num_class_slots = self.num_classes + 1
- box_encodings_net = image_feature
- class_predictions_net = image_feature
- with slim.arg_scope(self._conv_hyperparams_fn()) as sc:
- apply_batch_norm = _arg_scope_func_key(slim.batch_norm) in sc
- for i in range(self._num_layers_before_predictor):
- box_encodings_net = slim.conv2d(
- box_encodings_net,
- self._depth,
- [self._kernel_size, self._kernel_size],
- stride=1,
- padding='SAME',
- activation_fn=None,
- normalizer_fn=(tf.identity if apply_batch_norm else None),
- scope='BoxPredictionTower/conv2d_{}'.format(i))
- if apply_batch_norm:
- box_encodings_net = slim.batch_norm(
- box_encodings_net,
- scope='BoxPredictionTower/conv2d_{}/BatchNorm/feature_{}'.
- format(i, feature_index))
- box_encodings_net = tf.nn.relu6(box_encodings_net)
- box_encodings = slim.conv2d(
- box_encodings_net,
- num_predictions_per_location * self._box_code_size,
- [self._kernel_size, self._kernel_size],
- activation_fn=None, stride=1, padding='SAME',
- normalizer_fn=None,
- scope='BoxPredictor')
-
- for i in range(self._num_layers_before_predictor):
- class_predictions_net = slim.conv2d(
- class_predictions_net,
- self._depth,
- [self._kernel_size, self._kernel_size],
- stride=1,
- padding='SAME',
- activation_fn=None,
- normalizer_fn=(tf.identity if apply_batch_norm else None),
- scope='ClassPredictionTower/conv2d_{}'.format(i))
- if apply_batch_norm:
- class_predictions_net = slim.batch_norm(
- class_predictions_net,
- scope='ClassPredictionTower/conv2d_{}/BatchNorm/feature_{}'
- .format(i, feature_index))
- class_predictions_net = tf.nn.relu6(class_predictions_net)
- if self._use_dropout:
- class_predictions_net = slim.dropout(
- class_predictions_net, keep_prob=self._dropout_keep_prob)
- class_predictions_with_background = slim.conv2d(
- class_predictions_net,
- num_predictions_per_location * num_class_slots,
- [self._kernel_size, self._kernel_size],
- activation_fn=None, stride=1, padding='SAME',
- normalizer_fn=None,
- biases_initializer=tf.constant_initializer(
- self._class_prediction_bias_init),
- scope='ClassPredictor')
-
- combined_feature_map_shape = (shape_utils.
- combined_static_and_dynamic_shape(
- image_feature))
- box_encodings = tf.reshape(
- box_encodings, tf.stack([combined_feature_map_shape[0],
- combined_feature_map_shape[1] *
- combined_feature_map_shape[2] *
- num_predictions_per_location,
- self._box_code_size]))
- box_encodings_list.append(box_encodings)
- class_predictions_with_background = tf.reshape(
- class_predictions_with_background,
- tf.stack([combined_feature_map_shape[0],
- combined_feature_map_shape[1] *
- combined_feature_map_shape[2] *
- num_predictions_per_location,
- num_class_slots]))
- class_predictions_list.append(class_predictions_with_background)
- return {
- BOX_ENCODINGS: box_encodings_list,
- CLASS_PREDICTIONS_WITH_BACKGROUND: class_predictions_list
- }
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_predictor_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_predictor_test.py
deleted file mode 100644
index 49680596f8d607ba52c0791a49df67a5d5ebe293..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_predictor_test.py
+++ /dev/null
@@ -1,724 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.core.box_predictor."""
-import numpy as np
-import tensorflow as tf
-
-from google.protobuf import text_format
-from object_detection.builders import hyperparams_builder
-from object_detection.core import box_predictor
-from object_detection.protos import hyperparams_pb2
-from object_detection.utils import test_case
-
-
-class MaskRCNNBoxPredictorTest(tf.test.TestCase):
-
- def _build_arg_scope_with_hyperparams(self,
- op_type=hyperparams_pb2.Hyperparams.FC):
- hyperparams = hyperparams_pb2.Hyperparams()
- hyperparams_text_proto = """
- activation: NONE
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- """
- text_format.Merge(hyperparams_text_proto, hyperparams)
- hyperparams.op = op_type
- return hyperparams_builder.build(hyperparams, is_training=True)
-
- def test_get_boxes_with_five_classes(self):
- image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32)
- mask_box_predictor = box_predictor.MaskRCNNBoxPredictor(
- is_training=False,
- num_classes=5,
- fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
- use_dropout=False,
- dropout_keep_prob=0.5,
- box_code_size=4,
- )
- box_predictions = mask_box_predictor.predict(
- [image_features], num_predictions_per_location=[1],
- scope='BoxPredictor')
- box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
- class_predictions_with_background = box_predictions[
- box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- (box_encodings_shape,
- class_predictions_with_background_shape) = sess.run(
- [tf.shape(box_encodings),
- tf.shape(class_predictions_with_background)])
- self.assertAllEqual(box_encodings_shape, [2, 1, 5, 4])
- self.assertAllEqual(class_predictions_with_background_shape, [2, 1, 6])
-
- def test_get_boxes_with_five_classes_share_box_across_classes(self):
- image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32)
- mask_box_predictor = box_predictor.MaskRCNNBoxPredictor(
- is_training=False,
- num_classes=5,
- fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
- use_dropout=False,
- dropout_keep_prob=0.5,
- box_code_size=4,
- share_box_across_classes=True
- )
- box_predictions = mask_box_predictor.predict(
- [image_features], num_predictions_per_location=[1],
- scope='BoxPredictor')
- box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
- class_predictions_with_background = box_predictions[
- box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- (box_encodings_shape,
- class_predictions_with_background_shape) = sess.run(
- [tf.shape(box_encodings),
- tf.shape(class_predictions_with_background)])
- self.assertAllEqual(box_encodings_shape, [2, 1, 1, 4])
- self.assertAllEqual(class_predictions_with_background_shape, [2, 1, 6])
-
- def test_value_error_on_predict_instance_masks_with_no_conv_hyperparms(self):
- with self.assertRaises(ValueError):
- box_predictor.MaskRCNNBoxPredictor(
- is_training=False,
- num_classes=5,
- fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
- use_dropout=False,
- dropout_keep_prob=0.5,
- box_code_size=4,
- predict_instance_masks=True)
-
- def test_get_instance_masks(self):
- image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32)
- mask_box_predictor = box_predictor.MaskRCNNBoxPredictor(
- is_training=False,
- num_classes=5,
- fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
- use_dropout=False,
- dropout_keep_prob=0.5,
- box_code_size=4,
- conv_hyperparams_fn=self._build_arg_scope_with_hyperparams(
- op_type=hyperparams_pb2.Hyperparams.CONV),
- predict_instance_masks=True)
- box_predictions = mask_box_predictor.predict(
- [image_features],
- num_predictions_per_location=[1],
- scope='BoxPredictor',
- predict_boxes_and_classes=True,
- predict_auxiliary_outputs=True)
- mask_predictions = box_predictions[box_predictor.MASK_PREDICTIONS]
- self.assertListEqual([2, 1, 5, 14, 14],
- mask_predictions.get_shape().as_list())
-
- def test_do_not_return_instance_masks_without_request(self):
- image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32)
- mask_box_predictor = box_predictor.MaskRCNNBoxPredictor(
- is_training=False,
- num_classes=5,
- fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
- use_dropout=False,
- dropout_keep_prob=0.5,
- box_code_size=4)
- box_predictions = mask_box_predictor.predict(
- [image_features], num_predictions_per_location=[1],
- scope='BoxPredictor')
- self.assertEqual(len(box_predictions), 2)
- self.assertTrue(box_predictor.BOX_ENCODINGS in box_predictions)
- self.assertTrue(box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND
- in box_predictions)
-
- def test_value_error_on_predict_keypoints(self):
- with self.assertRaises(ValueError):
- box_predictor.MaskRCNNBoxPredictor(
- is_training=False,
- num_classes=5,
- fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
- use_dropout=False,
- dropout_keep_prob=0.5,
- box_code_size=4,
- predict_keypoints=True)
-
-
-class RfcnBoxPredictorTest(tf.test.TestCase):
-
- def _build_arg_scope_with_conv_hyperparams(self):
- conv_hyperparams = hyperparams_pb2.Hyperparams()
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- """
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
- return hyperparams_builder.build(conv_hyperparams, is_training=True)
-
- def test_get_correct_box_encoding_and_class_prediction_shapes(self):
- image_features = tf.random_uniform([4, 8, 8, 64], dtype=tf.float32)
- proposal_boxes = tf.random_normal([4, 2, 4], dtype=tf.float32)
- rfcn_box_predictor = box_predictor.RfcnBoxPredictor(
- is_training=False,
- num_classes=2,
- conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
- num_spatial_bins=[3, 3],
- depth=4,
- crop_size=[12, 12],
- box_code_size=4
- )
- box_predictions = rfcn_box_predictor.predict(
- [image_features], num_predictions_per_location=[1],
- scope='BoxPredictor',
- proposal_boxes=proposal_boxes)
- box_encodings = tf.concat(
- box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
- class_predictions_with_background = tf.concat(
- box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
- axis=1)
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- (box_encodings_shape,
- class_predictions_shape) = sess.run(
- [tf.shape(box_encodings),
- tf.shape(class_predictions_with_background)])
- self.assertAllEqual(box_encodings_shape, [8, 1, 2, 4])
- self.assertAllEqual(class_predictions_shape, [8, 1, 3])
-
-
-class ConvolutionalBoxPredictorTest(test_case.TestCase):
-
- def _build_arg_scope_with_conv_hyperparams(self):
- conv_hyperparams = hyperparams_pb2.Hyperparams()
- conv_hyperparams_text_proto = """
- activation: RELU_6
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- """
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
- return hyperparams_builder.build(conv_hyperparams, is_training=True)
-
- def test_get_boxes_for_five_aspect_ratios_per_location(self):
- def graph_fn(image_features):
- conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
- is_training=False,
- num_classes=0,
- conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
- min_depth=0,
- max_depth=32,
- num_layers_before_predictor=1,
- use_dropout=True,
- dropout_keep_prob=0.8,
- kernel_size=1,
- box_code_size=4
- )
- box_predictions = conv_box_predictor.predict(
- [image_features], num_predictions_per_location=[5],
- scope='BoxPredictor')
- box_encodings = tf.concat(
- box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
- objectness_predictions = tf.concat(
- box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
- axis=1)
- return (box_encodings, objectness_predictions)
- image_features = np.random.rand(4, 8, 8, 64).astype(np.float32)
- (box_encodings, objectness_predictions) = self.execute(graph_fn,
- [image_features])
- self.assertAllEqual(box_encodings.shape, [4, 320, 1, 4])
- self.assertAllEqual(objectness_predictions.shape, [4, 320, 1])
-
- def test_get_boxes_for_one_aspect_ratio_per_location(self):
- def graph_fn(image_features):
- conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
- is_training=False,
- num_classes=0,
- conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
- min_depth=0,
- max_depth=32,
- num_layers_before_predictor=1,
- use_dropout=True,
- dropout_keep_prob=0.8,
- kernel_size=1,
- box_code_size=4
- )
- box_predictions = conv_box_predictor.predict(
- [image_features], num_predictions_per_location=[1],
- scope='BoxPredictor')
- box_encodings = tf.concat(
- box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
- objectness_predictions = tf.concat(box_predictions[
- box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1)
- return (box_encodings, objectness_predictions)
- image_features = np.random.rand(4, 8, 8, 64).astype(np.float32)
- (box_encodings, objectness_predictions) = self.execute(graph_fn,
- [image_features])
- self.assertAllEqual(box_encodings.shape, [4, 64, 1, 4])
- self.assertAllEqual(objectness_predictions.shape, [4, 64, 1])
-
- def test_get_multi_class_predictions_for_five_aspect_ratios_per_location(
- self):
- num_classes_without_background = 6
- image_features = np.random.rand(4, 8, 8, 64).astype(np.float32)
- def graph_fn(image_features):
- conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
- is_training=False,
- num_classes=num_classes_without_background,
- conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
- min_depth=0,
- max_depth=32,
- num_layers_before_predictor=1,
- use_dropout=True,
- dropout_keep_prob=0.8,
- kernel_size=1,
- box_code_size=4
- )
- box_predictions = conv_box_predictor.predict(
- [image_features],
- num_predictions_per_location=[5],
- scope='BoxPredictor')
- box_encodings = tf.concat(
- box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
- class_predictions_with_background = tf.concat(
- box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
- axis=1)
- return (box_encodings, class_predictions_with_background)
- (box_encodings,
- class_predictions_with_background) = self.execute(graph_fn,
- [image_features])
- self.assertAllEqual(box_encodings.shape, [4, 320, 1, 4])
- self.assertAllEqual(class_predictions_with_background.shape,
- [4, 320, num_classes_without_background+1])
-
- def test_get_predictions_with_feature_maps_of_dynamic_shape(
- self):
- image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64])
- conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
- is_training=False,
- num_classes=0,
- conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
- min_depth=0,
- max_depth=32,
- num_layers_before_predictor=1,
- use_dropout=True,
- dropout_keep_prob=0.8,
- kernel_size=1,
- box_code_size=4
- )
- box_predictions = conv_box_predictor.predict(
- [image_features], num_predictions_per_location=[5],
- scope='BoxPredictor')
- box_encodings = tf.concat(
- box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
- objectness_predictions = tf.concat(
- box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
- axis=1)
- init_op = tf.global_variables_initializer()
-
- resolution = 32
- expected_num_anchors = resolution*resolution*5
- with self.test_session() as sess:
- sess.run(init_op)
- (box_encodings_shape,
- objectness_predictions_shape) = sess.run(
- [tf.shape(box_encodings), tf.shape(objectness_predictions)],
- feed_dict={image_features:
- np.random.rand(4, resolution, resolution, 64)})
- actual_variable_set = set(
- [var.op.name for var in tf.trainable_variables()])
- self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4])
- self.assertAllEqual(objectness_predictions_shape,
- [4, expected_num_anchors, 1])
- expected_variable_set = set([
- 'BoxPredictor/Conv2d_0_1x1_32/biases',
- 'BoxPredictor/Conv2d_0_1x1_32/weights',
- 'BoxPredictor/BoxEncodingPredictor/biases',
- 'BoxPredictor/BoxEncodingPredictor/weights',
- 'BoxPredictor/ClassPredictor/biases',
- 'BoxPredictor/ClassPredictor/weights'])
- self.assertEqual(expected_variable_set, actual_variable_set)
-
- def test_use_depthwise_convolution(self):
- image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64])
- conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
- is_training=False,
- num_classes=0,
- conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
- min_depth=0,
- max_depth=32,
- num_layers_before_predictor=1,
- dropout_keep_prob=0.8,
- kernel_size=1,
- box_code_size=4,
- use_dropout=True,
- use_depthwise=True
- )
- box_predictions = conv_box_predictor.predict(
- [image_features], num_predictions_per_location=[5],
- scope='BoxPredictor')
- box_encodings = tf.concat(
- box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
- objectness_predictions = tf.concat(
- box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
- axis=1)
- init_op = tf.global_variables_initializer()
-
- resolution = 32
- expected_num_anchors = resolution*resolution*5
- with self.test_session() as sess:
- sess.run(init_op)
- (box_encodings_shape,
- objectness_predictions_shape) = sess.run(
- [tf.shape(box_encodings), tf.shape(objectness_predictions)],
- feed_dict={image_features:
- np.random.rand(4, resolution, resolution, 64)})
- actual_variable_set = set(
- [var.op.name for var in tf.trainable_variables()])
- self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4])
- self.assertAllEqual(objectness_predictions_shape,
- [4, expected_num_anchors, 1])
- expected_variable_set = set([
- 'BoxPredictor/Conv2d_0_1x1_32/biases',
- 'BoxPredictor/Conv2d_0_1x1_32/weights',
- 'BoxPredictor/BoxEncodingPredictor_depthwise/biases',
- 'BoxPredictor/BoxEncodingPredictor_depthwise/depthwise_weights',
- 'BoxPredictor/BoxEncodingPredictor/biases',
- 'BoxPredictor/BoxEncodingPredictor/weights',
- 'BoxPredictor/ClassPredictor_depthwise/biases',
- 'BoxPredictor/ClassPredictor_depthwise/depthwise_weights',
- 'BoxPredictor/ClassPredictor/biases',
- 'BoxPredictor/ClassPredictor/weights'])
- self.assertEqual(expected_variable_set, actual_variable_set)
-
-
-class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
-
- def _build_arg_scope_with_conv_hyperparams(self):
- conv_hyperparams = hyperparams_pb2.Hyperparams()
- conv_hyperparams_text_proto = """
- activation: RELU_6
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- random_normal_initializer {
- stddev: 0.01
- mean: 0.0
- }
- }
- batch_norm {
- train: true,
- }
- """
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
- return hyperparams_builder.build(conv_hyperparams, is_training=True)
-
- def _build_conv_arg_scope_no_batch_norm(self):
- conv_hyperparams = hyperparams_pb2.Hyperparams()
- conv_hyperparams_text_proto = """
- activation: RELU_6
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- random_normal_initializer {
- stddev: 0.01
- mean: 0.0
- }
- }
- """
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
- return hyperparams_builder.build(conv_hyperparams, is_training=True)
-
- def test_get_boxes_for_five_aspect_ratios_per_location(self):
-
- def graph_fn(image_features):
- conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
- is_training=False,
- num_classes=0,
- conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
- depth=32,
- num_layers_before_predictor=1,
- box_code_size=4)
- box_predictions = conv_box_predictor.predict(
- [image_features], num_predictions_per_location=[5],
- scope='BoxPredictor')
- box_encodings = tf.concat(
- box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
- objectness_predictions = tf.concat(box_predictions[
- box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1)
- return (box_encodings, objectness_predictions)
- image_features = np.random.rand(4, 8, 8, 64).astype(np.float32)
- (box_encodings, objectness_predictions) = self.execute(
- graph_fn, [image_features])
- self.assertAllEqual(box_encodings.shape, [4, 320, 4])
- self.assertAllEqual(objectness_predictions.shape, [4, 320, 1])
-
- def test_bias_predictions_to_background_with_sigmoid_score_conversion(self):
-
- def graph_fn(image_features):
- conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
- is_training=True,
- num_classes=2,
- conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
- depth=32,
- num_layers_before_predictor=1,
- class_prediction_bias_init=-4.6,
- box_code_size=4)
- box_predictions = conv_box_predictor.predict(
- [image_features], num_predictions_per_location=[5],
- scope='BoxPredictor')
- class_predictions = tf.concat(box_predictions[
- box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1)
- return (tf.nn.sigmoid(class_predictions),)
- image_features = np.random.rand(4, 8, 8, 64).astype(np.float32)
- class_predictions = self.execute(graph_fn, [image_features])
- self.assertAlmostEqual(np.mean(class_predictions), 0.01, places=3)
-
- def test_get_multi_class_predictions_for_five_aspect_ratios_per_location(
- self):
-
- num_classes_without_background = 6
- def graph_fn(image_features):
- conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
- is_training=False,
- num_classes=num_classes_without_background,
- conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
- depth=32,
- num_layers_before_predictor=1,
- box_code_size=4)
- box_predictions = conv_box_predictor.predict(
- [image_features],
- num_predictions_per_location=[5],
- scope='BoxPredictor')
- box_encodings = tf.concat(
- box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
- class_predictions_with_background = tf.concat(box_predictions[
- box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1)
- return (box_encodings, class_predictions_with_background)
-
- image_features = np.random.rand(4, 8, 8, 64).astype(np.float32)
- (box_encodings, class_predictions_with_background) = self.execute(
- graph_fn, [image_features])
- self.assertAllEqual(box_encodings.shape, [4, 320, 4])
- self.assertAllEqual(class_predictions_with_background.shape,
- [4, 320, num_classes_without_background+1])
-
- def test_get_multi_class_predictions_from_two_feature_maps(
- self):
-
- num_classes_without_background = 6
- def graph_fn(image_features1, image_features2):
- conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
- is_training=False,
- num_classes=num_classes_without_background,
- conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
- depth=32,
- num_layers_before_predictor=1,
- box_code_size=4)
- box_predictions = conv_box_predictor.predict(
- [image_features1, image_features2],
- num_predictions_per_location=[5, 5],
- scope='BoxPredictor')
- box_encodings = tf.concat(
- box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
- class_predictions_with_background = tf.concat(
- box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
- axis=1)
- return (box_encodings, class_predictions_with_background)
-
- image_features1 = np.random.rand(4, 8, 8, 64).astype(np.float32)
- image_features2 = np.random.rand(4, 8, 8, 64).astype(np.float32)
- (box_encodings, class_predictions_with_background) = self.execute(
- graph_fn, [image_features1, image_features2])
- self.assertAllEqual(box_encodings.shape, [4, 640, 4])
- self.assertAllEqual(class_predictions_with_background.shape,
- [4, 640, num_classes_without_background+1])
-
- def test_predictions_from_multiple_feature_maps_share_weights_not_batchnorm(
- self):
- num_classes_without_background = 6
- def graph_fn(image_features1, image_features2):
- conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
- is_training=False,
- num_classes=num_classes_without_background,
- conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
- depth=32,
- num_layers_before_predictor=2,
- box_code_size=4)
- box_predictions = conv_box_predictor.predict(
- [image_features1, image_features2],
- num_predictions_per_location=[5, 5],
- scope='BoxPredictor')
- box_encodings = tf.concat(
- box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
- class_predictions_with_background = tf.concat(
- box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
- axis=1)
- return (box_encodings, class_predictions_with_background)
-
- with self.test_session(graph=tf.Graph()):
- graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32),
- tf.random_uniform([4, 16, 16, 3], dtype=tf.float32))
- actual_variable_set = set(
- [var.op.name for var in tf.trainable_variables()])
- expected_variable_set = set([
- # Box prediction tower
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'BoxPredictionTower/conv2d_0/weights'),
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'BoxPredictionTower/conv2d_0/BatchNorm/feature_0/beta'),
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'BoxPredictionTower/conv2d_0/BatchNorm/feature_1/beta'),
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'BoxPredictionTower/conv2d_1/weights'),
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'BoxPredictionTower/conv2d_1/BatchNorm/feature_0/beta'),
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'BoxPredictionTower/conv2d_1/BatchNorm/feature_1/beta'),
- # Box prediction head
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'BoxPredictor/weights'),
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'BoxPredictor/biases'),
- # Class prediction tower
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'ClassPredictionTower/conv2d_0/weights'),
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'ClassPredictionTower/conv2d_0/BatchNorm/feature_0/beta'),
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'ClassPredictionTower/conv2d_0/BatchNorm/feature_1/beta'),
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'ClassPredictionTower/conv2d_1/weights'),
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'ClassPredictionTower/conv2d_1/BatchNorm/feature_0/beta'),
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'ClassPredictionTower/conv2d_1/BatchNorm/feature_1/beta'),
- # Class prediction head
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'ClassPredictor/weights'),
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'ClassPredictor/biases')])
- self.assertEqual(expected_variable_set, actual_variable_set)
-
- def test_no_batchnorm_params_when_batchnorm_is_not_configured(self):
- num_classes_without_background = 6
- def graph_fn(image_features1, image_features2):
- conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
- is_training=False,
- num_classes=num_classes_without_background,
- conv_hyperparams_fn=self._build_conv_arg_scope_no_batch_norm(),
- depth=32,
- num_layers_before_predictor=2,
- box_code_size=4)
- box_predictions = conv_box_predictor.predict(
- [image_features1, image_features2],
- num_predictions_per_location=[5, 5],
- scope='BoxPredictor')
- box_encodings = tf.concat(
- box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
- class_predictions_with_background = tf.concat(
- box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
- axis=1)
- return (box_encodings, class_predictions_with_background)
-
- with self.test_session(graph=tf.Graph()):
- graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32),
- tf.random_uniform([4, 16, 16, 3], dtype=tf.float32))
- actual_variable_set = set(
- [var.op.name for var in tf.trainable_variables()])
- expected_variable_set = set([
- # Box prediction tower
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'BoxPredictionTower/conv2d_0/weights'),
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'BoxPredictionTower/conv2d_0/biases'),
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'BoxPredictionTower/conv2d_1/weights'),
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'BoxPredictionTower/conv2d_1/biases'),
- # Box prediction head
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'BoxPredictor/weights'),
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'BoxPredictor/biases'),
- # Class prediction tower
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'ClassPredictionTower/conv2d_0/weights'),
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'ClassPredictionTower/conv2d_0/biases'),
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'ClassPredictionTower/conv2d_1/weights'),
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'ClassPredictionTower/conv2d_1/biases'),
- # Class prediction head
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'ClassPredictor/weights'),
- ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
- 'ClassPredictor/biases')])
- self.assertEqual(expected_variable_set, actual_variable_set)
-
- def test_get_predictions_with_feature_maps_of_dynamic_shape(
- self):
- image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64])
- conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
- is_training=False,
- num_classes=0,
- conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
- depth=32,
- num_layers_before_predictor=1,
- box_code_size=4)
- box_predictions = conv_box_predictor.predict(
- [image_features], num_predictions_per_location=[5],
- scope='BoxPredictor')
- box_encodings = tf.concat(box_predictions[box_predictor.BOX_ENCODINGS],
- axis=1)
- objectness_predictions = tf.concat(box_predictions[
- box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1)
- init_op = tf.global_variables_initializer()
-
- resolution = 32
- expected_num_anchors = resolution*resolution*5
- with self.test_session() as sess:
- sess.run(init_op)
- (box_encodings_shape,
- objectness_predictions_shape) = sess.run(
- [tf.shape(box_encodings), tf.shape(objectness_predictions)],
- feed_dict={image_features:
- np.random.rand(4, resolution, resolution, 64)})
- self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 4])
- self.assertAllEqual(objectness_predictions_shape,
- [4, expected_num_anchors, 1])
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/data_decoder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/data_decoder.py
deleted file mode 100644
index 9ae18c1f957ea69432b08740451abb2af2548910..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/data_decoder.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Interface for data decoders.
-
-Data decoders decode the input data and return a dictionary of tensors keyed by
-the entries in core.reader.Fields.
-"""
-from abc import ABCMeta
-from abc import abstractmethod
-
-
-class DataDecoder(object):
- """Interface for data decoders."""
- __metaclass__ = ABCMeta
-
- @abstractmethod
- def decode(self, data):
- """Return a single image and associated labels.
-
- Args:
- data: a string tensor holding a serialized protocol buffer corresponding
- to data for a single image.
-
- Returns:
- tensor_dict: a dictionary containing tensors. Possible keys are defined in
- reader.Fields.
- """
- pass
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/data_parser.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/data_parser.py
deleted file mode 100644
index 3dac4de28ec52da5697e0b2fee81a56ebb72e35c..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/data_parser.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Interface for data parsers.
-
-Data parser parses input data and returns a dictionary of numpy arrays
-keyed by the entries in standard_fields.py. Since the parser parses records
-to numpy arrays (materialized tensors) directly, it is used to read data for
-evaluation/visualization; to parse the data during training, DataDecoder should
-be used.
-"""
-from abc import ABCMeta
-from abc import abstractmethod
-
-
-class DataToNumpyParser(object):
- __metaclass__ = ABCMeta
-
- @abstractmethod
- def parse(self, input_data):
- """Parses input and returns a numpy array or a dictionary of numpy arrays.
-
- Args:
- input_data: an input data
-
- Returns:
- A numpy array or a dictionary of numpy arrays or None, if input
- cannot be parsed.
- """
- pass
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/keypoint_ops.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/keypoint_ops.py
deleted file mode 100644
index e520845f92f10faf39c419c321c696e871f4558c..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/keypoint_ops.py
+++ /dev/null
@@ -1,282 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Keypoint operations.
-
-Keypoints are represented as tensors of shape [num_instances, num_keypoints, 2],
-where the last dimension holds rank 2 tensors of the form [y, x] representing
-the coordinates of the keypoint.
-"""
-import numpy as np
-import tensorflow as tf
-
-
-def scale(keypoints, y_scale, x_scale, scope=None):
- """Scales keypoint coordinates in x and y dimensions.
-
- Args:
- keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- y_scale: (float) scalar tensor
- x_scale: (float) scalar tensor
- scope: name scope.
-
- Returns:
- new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- """
- with tf.name_scope(scope, 'Scale'):
- y_scale = tf.cast(y_scale, tf.float32)
- x_scale = tf.cast(x_scale, tf.float32)
- new_keypoints = keypoints * [[[y_scale, x_scale]]]
- return new_keypoints
-
-
-def clip_to_window(keypoints, window, scope=None):
- """Clips keypoints to a window.
-
- This op clips any input keypoints to a window.
-
- Args:
- keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
- window to which the op should clip the keypoints.
- scope: name scope.
-
- Returns:
- new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- """
- with tf.name_scope(scope, 'ClipToWindow'):
- y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
- win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
- y = tf.maximum(tf.minimum(y, win_y_max), win_y_min)
- x = tf.maximum(tf.minimum(x, win_x_max), win_x_min)
- new_keypoints = tf.concat([y, x], 2)
- return new_keypoints
-
-
-def prune_outside_window(keypoints, window, scope=None):
- """Prunes keypoints that fall outside a given window.
-
- This function replaces keypoints that fall outside the given window with nan.
- See also clip_to_window which clips any keypoints that fall outside the given
- window.
-
- Args:
- keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
- window outside of which the op should prune the keypoints.
- scope: name scope.
-
- Returns:
- new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- """
- with tf.name_scope(scope, 'PruneOutsideWindow'):
- y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
- win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
-
- valid_indices = tf.logical_and(
- tf.logical_and(y >= win_y_min, y <= win_y_max),
- tf.logical_and(x >= win_x_min, x <= win_x_max))
-
- new_y = tf.where(valid_indices, y, np.nan * tf.ones_like(y))
- new_x = tf.where(valid_indices, x, np.nan * tf.ones_like(x))
- new_keypoints = tf.concat([new_y, new_x], 2)
-
- return new_keypoints
-
-
-def change_coordinate_frame(keypoints, window, scope=None):
- """Changes coordinate frame of the keypoints to be relative to window's frame.
-
- Given a window of the form [y_min, x_min, y_max, x_max], changes keypoint
- coordinates from keypoints of shape [num_instances, num_keypoints, 2]
- to be relative to this window.
-
- An example use case is data augmentation: where we are given groundtruth
- keypoints and would like to randomly crop the image to some window. In this
- case we need to change the coordinate frame of each groundtruth keypoint to be
- relative to this new window.
-
- Args:
- keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
- window we should change the coordinate frame to.
- scope: name scope.
-
- Returns:
- new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- """
- with tf.name_scope(scope, 'ChangeCoordinateFrame'):
- win_height = window[2] - window[0]
- win_width = window[3] - window[1]
- new_keypoints = scale(keypoints - [window[0], window[1]], 1.0 / win_height,
- 1.0 / win_width)
- return new_keypoints
-
-
-def to_normalized_coordinates(keypoints, height, width,
- check_range=True, scope=None):
- """Converts absolute keypoint coordinates to normalized coordinates in [0, 1].
-
- Usually one uses the dynamic shape of the image or conv-layer tensor:
- keypoints = keypoint_ops.to_normalized_coordinates(keypoints,
- tf.shape(images)[1],
- tf.shape(images)[2]),
-
- This function raises an assertion failed error at graph execution time when
- the maximum coordinate is smaller than 1.01 (which means that coordinates are
- already normalized). The value 1.01 is to deal with small rounding errors.
-
- Args:
- keypoints: A tensor of shape [num_instances, num_keypoints, 2].
- height: Maximum value for y coordinate of absolute keypoint coordinates.
- width: Maximum value for x coordinate of absolute keypoint coordinates.
- check_range: If True, checks if the coordinates are normalized.
- scope: name scope.
-
- Returns:
- tensor of shape [num_instances, num_keypoints, 2] with normalized
- coordinates in [0, 1].
- """
- with tf.name_scope(scope, 'ToNormalizedCoordinates'):
- height = tf.cast(height, tf.float32)
- width = tf.cast(width, tf.float32)
-
- if check_range:
- max_val = tf.reduce_max(keypoints)
- max_assert = tf.Assert(tf.greater(max_val, 1.01),
- ['max value is lower than 1.01: ', max_val])
- with tf.control_dependencies([max_assert]):
- width = tf.identity(width)
-
- return scale(keypoints, 1.0 / height, 1.0 / width)
-
-
-def to_absolute_coordinates(keypoints, height, width,
- check_range=True, scope=None):
- """Converts normalized keypoint coordinates to absolute pixel coordinates.
-
- This function raises an assertion failed error when the maximum keypoint
- coordinate value is larger than 1.01 (in which case coordinates are already
- absolute).
-
- Args:
- keypoints: A tensor of shape [num_instances, num_keypoints, 2]
- height: Maximum value for y coordinate of absolute keypoint coordinates.
- width: Maximum value for x coordinate of absolute keypoint coordinates.
- check_range: If True, checks if the coordinates are normalized or not.
- scope: name scope.
-
- Returns:
- tensor of shape [num_instances, num_keypoints, 2] with absolute coordinates
- in terms of the image size.
-
- """
- with tf.name_scope(scope, 'ToAbsoluteCoordinates'):
- height = tf.cast(height, tf.float32)
- width = tf.cast(width, tf.float32)
-
- # Ensure range of input keypoints is correct.
- if check_range:
- max_val = tf.reduce_max(keypoints)
- max_assert = tf.Assert(tf.greater_equal(1.01, max_val),
- ['maximum keypoint coordinate value is larger '
- 'than 1.01: ', max_val])
- with tf.control_dependencies([max_assert]):
- width = tf.identity(width)
-
- return scale(keypoints, height, width)
-
-
-def flip_horizontal(keypoints, flip_point, flip_permutation, scope=None):
- """Flips the keypoints horizontally around the flip_point.
-
- This operation flips the x coordinate for each keypoint around the flip_point
- and also permutes the keypoints in a manner specified by flip_permutation.
-
- Args:
- keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- flip_point: (float) scalar tensor representing the x coordinate to flip the
- keypoints around.
- flip_permutation: rank 1 int32 tensor containing the keypoint flip
- permutation. This specifies the mapping from original keypoint indices
- to the flipped keypoint indices. This is used primarily for keypoints
- that are not reflection invariant. E.g. Suppose there are 3 keypoints
- representing ['head', 'right_eye', 'left_eye'], then a logical choice for
- flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
- and 'right_eye' after a horizontal flip.
- scope: name scope.
-
- Returns:
- new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- """
- with tf.name_scope(scope, 'FlipHorizontal'):
- keypoints = tf.transpose(keypoints, [1, 0, 2])
- keypoints = tf.gather(keypoints, flip_permutation)
- v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
- u = flip_point * 2.0 - u
- new_keypoints = tf.concat([v, u], 2)
- new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
- return new_keypoints
-
-
-def flip_vertical(keypoints, flip_point, flip_permutation, scope=None):
- """Flips the keypoints vertically around the flip_point.
-
- This operation flips the y coordinate for each keypoint around the flip_point
- and also permutes the keypoints in a manner specified by flip_permutation.
-
- Args:
- keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- flip_point: (float) scalar tensor representing the y coordinate to flip the
- keypoints around.
- flip_permutation: rank 1 int32 tensor containing the keypoint flip
- permutation. This specifies the mapping from original keypoint indices
- to the flipped keypoint indices. This is used primarily for keypoints
- that are not reflection invariant. E.g. Suppose there are 3 keypoints
- representing ['head', 'right_eye', 'left_eye'], then a logical choice for
- flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
- and 'right_eye' after a horizontal flip.
- scope: name scope.
-
- Returns:
- new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- """
- with tf.name_scope(scope, 'FlipVertical'):
- keypoints = tf.transpose(keypoints, [1, 0, 2])
- keypoints = tf.gather(keypoints, flip_permutation)
- v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
- v = flip_point * 2.0 - v
- new_keypoints = tf.concat([v, u], 2)
- new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
- return new_keypoints
-
-
-def rot90(keypoints, scope=None):
- """Rotates the keypoints counter-clockwise by 90 degrees.
-
- Args:
- keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- scope: name scope.
-
- Returns:
- new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- """
- with tf.name_scope(scope, 'Rot90'):
- keypoints = tf.transpose(keypoints, [1, 0, 2])
- v, u = tf.split(value=keypoints[:, :, ::-1], num_or_size_splits=2, axis=2)
- v = 1.0 - v
- new_keypoints = tf.concat([v, u], 2)
- new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
- return new_keypoints
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/keypoint_ops_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/keypoint_ops_test.py
deleted file mode 100644
index 1c09c55aa2c834e566dd8d6cd57b9a254bf26efe..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/keypoint_ops_test.py
+++ /dev/null
@@ -1,200 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.core.keypoint_ops."""
-import numpy as np
-import tensorflow as tf
-
-from object_detection.core import keypoint_ops
-
-
-class KeypointOpsTest(tf.test.TestCase):
- """Tests for common keypoint operations."""
-
- def test_scale(self):
- keypoints = tf.constant([
- [[0.0, 0.0], [100.0, 200.0]],
- [[50.0, 120.0], [100.0, 140.0]]
- ])
- y_scale = tf.constant(1.0 / 100)
- x_scale = tf.constant(1.0 / 200)
-
- expected_keypoints = tf.constant([
- [[0., 0.], [1.0, 1.0]],
- [[0.5, 0.6], [1.0, 0.7]]
- ])
- output = keypoint_ops.scale(keypoints, y_scale, x_scale)
-
- with self.test_session() as sess:
- output_, expected_keypoints_ = sess.run([output, expected_keypoints])
- self.assertAllClose(output_, expected_keypoints_)
-
- def test_clip_to_window(self):
- keypoints = tf.constant([
- [[0.25, 0.5], [0.75, 0.75]],
- [[0.5, 0.0], [1.0, 1.0]]
- ])
- window = tf.constant([0.25, 0.25, 0.75, 0.75])
-
- expected_keypoints = tf.constant([
- [[0.25, 0.5], [0.75, 0.75]],
- [[0.5, 0.25], [0.75, 0.75]]
- ])
- output = keypoint_ops.clip_to_window(keypoints, window)
-
- with self.test_session() as sess:
- output_, expected_keypoints_ = sess.run([output, expected_keypoints])
- self.assertAllClose(output_, expected_keypoints_)
-
- def test_prune_outside_window(self):
- keypoints = tf.constant([
- [[0.25, 0.5], [0.75, 0.75]],
- [[0.5, 0.0], [1.0, 1.0]]
- ])
- window = tf.constant([0.25, 0.25, 0.75, 0.75])
-
- expected_keypoints = tf.constant([[[0.25, 0.5], [0.75, 0.75]],
- [[np.nan, np.nan], [np.nan, np.nan]]])
- output = keypoint_ops.prune_outside_window(keypoints, window)
-
- with self.test_session() as sess:
- output_, expected_keypoints_ = sess.run([output, expected_keypoints])
- self.assertAllClose(output_, expected_keypoints_)
-
- def test_change_coordinate_frame(self):
- keypoints = tf.constant([
- [[0.25, 0.5], [0.75, 0.75]],
- [[0.5, 0.0], [1.0, 1.0]]
- ])
- window = tf.constant([0.25, 0.25, 0.75, 0.75])
-
- expected_keypoints = tf.constant([
- [[0, 0.5], [1.0, 1.0]],
- [[0.5, -0.5], [1.5, 1.5]]
- ])
- output = keypoint_ops.change_coordinate_frame(keypoints, window)
-
- with self.test_session() as sess:
- output_, expected_keypoints_ = sess.run([output, expected_keypoints])
- self.assertAllClose(output_, expected_keypoints_)
-
- def test_to_normalized_coordinates(self):
- keypoints = tf.constant([
- [[10., 30.], [30., 45.]],
- [[20., 0.], [40., 60.]]
- ])
- output = keypoint_ops.to_normalized_coordinates(
- keypoints, 40, 60)
- expected_keypoints = tf.constant([
- [[0.25, 0.5], [0.75, 0.75]],
- [[0.5, 0.0], [1.0, 1.0]]
- ])
-
- with self.test_session() as sess:
- output_, expected_keypoints_ = sess.run([output, expected_keypoints])
- self.assertAllClose(output_, expected_keypoints_)
-
- def test_to_normalized_coordinates_already_normalized(self):
- keypoints = tf.constant([
- [[0.25, 0.5], [0.75, 0.75]],
- [[0.5, 0.0], [1.0, 1.0]]
- ])
- output = keypoint_ops.to_normalized_coordinates(
- keypoints, 40, 60)
-
- with self.test_session() as sess:
- with self.assertRaisesOpError('assertion failed'):
- sess.run(output)
-
- def test_to_absolute_coordinates(self):
- keypoints = tf.constant([
- [[0.25, 0.5], [0.75, 0.75]],
- [[0.5, 0.0], [1.0, 1.0]]
- ])
- output = keypoint_ops.to_absolute_coordinates(
- keypoints, 40, 60)
- expected_keypoints = tf.constant([
- [[10., 30.], [30., 45.]],
- [[20., 0.], [40., 60.]]
- ])
-
- with self.test_session() as sess:
- output_, expected_keypoints_ = sess.run([output, expected_keypoints])
- self.assertAllClose(output_, expected_keypoints_)
-
- def test_to_absolute_coordinates_already_absolute(self):
- keypoints = tf.constant([
- [[10., 30.], [30., 45.]],
- [[20., 0.], [40., 60.]]
- ])
- output = keypoint_ops.to_absolute_coordinates(
- keypoints, 40, 60)
-
- with self.test_session() as sess:
- with self.assertRaisesOpError('assertion failed'):
- sess.run(output)
-
- def test_flip_horizontal(self):
- keypoints = tf.constant([
- [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
- [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]]
- ])
- flip_permutation = [0, 2, 1]
-
- expected_keypoints = tf.constant([
- [[0.1, 0.9], [0.3, 0.7], [0.2, 0.8]],
- [[0.4, 0.6], [0.6, 0.4], [0.5, 0.5]],
- ])
- output = keypoint_ops.flip_horizontal(keypoints, 0.5, flip_permutation)
-
- with self.test_session() as sess:
- output_, expected_keypoints_ = sess.run([output, expected_keypoints])
- self.assertAllClose(output_, expected_keypoints_)
-
- def test_flip_vertical(self):
- keypoints = tf.constant([
- [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
- [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]]
- ])
- flip_permutation = [0, 2, 1]
-
- expected_keypoints = tf.constant([
- [[0.9, 0.1], [0.7, 0.3], [0.8, 0.2]],
- [[0.6, 0.4], [0.4, 0.6], [0.5, 0.5]],
- ])
- output = keypoint_ops.flip_vertical(keypoints, 0.5, flip_permutation)
-
- with self.test_session() as sess:
- output_, expected_keypoints_ = sess.run([output, expected_keypoints])
- self.assertAllClose(output_, expected_keypoints_)
-
- def test_rot90(self):
- keypoints = tf.constant([
- [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
- [[0.4, 0.6], [0.5, 0.6], [0.6, 0.7]]
- ])
- expected_keypoints = tf.constant([
- [[0.9, 0.1], [0.8, 0.2], [0.7, 0.3]],
- [[0.4, 0.4], [0.4, 0.5], [0.3, 0.6]],
- ])
- output = keypoint_ops.rot90(keypoints)
-
- with self.test_session() as sess:
- output_, expected_keypoints_ = sess.run([output, expected_keypoints])
- self.assertAllClose(output_, expected_keypoints_)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/losses.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/losses.py
deleted file mode 100644
index 5471c955fdcef7530c04557dba8b8cbb54936cef..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/losses.py
+++ /dev/null
@@ -1,641 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Classification and regression loss functions for object detection.
-
-Localization losses:
- * WeightedL2LocalizationLoss
- * WeightedSmoothL1LocalizationLoss
- * WeightedIOULocalizationLoss
-
-Classification losses:
- * WeightedSigmoidClassificationLoss
- * WeightedSoftmaxClassificationLoss
- * WeightedSoftmaxClassificationAgainstLogitsLoss
- * BootstrappedSigmoidClassificationLoss
-"""
-from abc import ABCMeta
-from abc import abstractmethod
-
-import tensorflow as tf
-
-from object_detection.core import box_list
-from object_detection.core import box_list_ops
-from object_detection.utils import ops
-
-slim = tf.contrib.slim
-
-
-class Loss(object):
- """Abstract base class for loss functions."""
- __metaclass__ = ABCMeta
-
- def __call__(self,
- prediction_tensor,
- target_tensor,
- ignore_nan_targets=False,
- scope=None,
- **params):
- """Call the loss function.
-
- Args:
- prediction_tensor: an N-d tensor of shape [batch, anchors, ...]
- representing predicted quantities.
- target_tensor: an N-d tensor of shape [batch, anchors, ...] representing
- regression or classification targets.
- ignore_nan_targets: whether to ignore nan targets in the loss computation.
- E.g. can be used if the target tensor is missing groundtruth data that
- shouldn't be factored into the loss.
- scope: Op scope name. Defaults to 'Loss' if None.
- **params: Additional keyword arguments for specific implementations of
- the Loss.
-
- Returns:
- loss: a tensor representing the value of the loss function.
- """
- with tf.name_scope(scope, 'Loss',
- [prediction_tensor, target_tensor, params]) as scope:
- if ignore_nan_targets:
- target_tensor = tf.where(tf.is_nan(target_tensor),
- prediction_tensor,
- target_tensor)
- return self._compute_loss(prediction_tensor, target_tensor, **params)
-
- @abstractmethod
- def _compute_loss(self, prediction_tensor, target_tensor, **params):
- """Method to be overridden by implementations.
-
- Args:
- prediction_tensor: a tensor representing predicted quantities
- target_tensor: a tensor representing regression or classification targets
- **params: Additional keyword arguments for specific implementations of
- the Loss.
-
- Returns:
- loss: an N-d tensor of shape [batch, anchors, ...] containing the loss per
- anchor
- """
- pass
-
-
-class WeightedL2LocalizationLoss(Loss):
- """L2 localization loss function with anchorwise output support.
-
- Loss[b,a] = .5 * ||weights[b,a] * (prediction[b,a,:] - target[b,a,:])||^2
- """
-
- def _compute_loss(self, prediction_tensor, target_tensor, weights):
- """Compute loss function.
-
- Args:
- prediction_tensor: A float tensor of shape [batch_size, num_anchors,
- code_size] representing the (encoded) predicted locations of objects.
- target_tensor: A float tensor of shape [batch_size, num_anchors,
- code_size] representing the regression targets
- weights: a float tensor of shape [batch_size, num_anchors]
-
- Returns:
- loss: a float tensor of shape [batch_size, num_anchors] tensor
- representing the value of the loss function.
- """
- weighted_diff = (prediction_tensor - target_tensor) * tf.expand_dims(
- weights, 2)
- square_diff = 0.5 * tf.square(weighted_diff)
- return tf.reduce_sum(square_diff, 2)
-
-
-class WeightedSmoothL1LocalizationLoss(Loss):
- """Smooth L1 localization loss function aka Huber Loss..
-
- The smooth L1_loss is defined elementwise as .5 x^2 if |x| <= delta and
- 0.5 x^2 + delta * (|x|-delta) otherwise, where x is the difference between
- predictions and target.
-
- See also Equation (3) in the Fast R-CNN paper by Ross Girshick (ICCV 2015)
- """
-
- def __init__(self, delta=1.0):
- """Constructor.
-
- Args:
- delta: delta for smooth L1 loss.
- """
- self._delta = delta
-
- def _compute_loss(self, prediction_tensor, target_tensor, weights):
- """Compute loss function.
-
- Args:
- prediction_tensor: A float tensor of shape [batch_size, num_anchors,
- code_size] representing the (encoded) predicted locations of objects.
- target_tensor: A float tensor of shape [batch_size, num_anchors,
- code_size] representing the regression targets
- weights: a float tensor of shape [batch_size, num_anchors]
-
- Returns:
- loss: a float tensor of shape [batch_size, num_anchors] tensor
- representing the value of the loss function.
- """
- return tf.reduce_sum(tf.losses.huber_loss(
- target_tensor,
- prediction_tensor,
- delta=self._delta,
- weights=tf.expand_dims(weights, axis=2),
- loss_collection=None,
- reduction=tf.losses.Reduction.NONE
- ), axis=2)
-
-
-class WeightedIOULocalizationLoss(Loss):
- """IOU localization loss function.
-
- Sums the IOU for corresponding pairs of predicted/groundtruth boxes
- and for each pair assign a loss of 1 - IOU. We then compute a weighted
- sum over all pairs which is returned as the total loss.
- """
-
- def _compute_loss(self, prediction_tensor, target_tensor, weights):
- """Compute loss function.
-
- Args:
- prediction_tensor: A float tensor of shape [batch_size, num_anchors, 4]
- representing the decoded predicted boxes
- target_tensor: A float tensor of shape [batch_size, num_anchors, 4]
- representing the decoded target boxes
- weights: a float tensor of shape [batch_size, num_anchors]
-
- Returns:
- loss: a float tensor of shape [batch_size, num_anchors] tensor
- representing the value of the loss function.
- """
- predicted_boxes = box_list.BoxList(tf.reshape(prediction_tensor, [-1, 4]))
- target_boxes = box_list.BoxList(tf.reshape(target_tensor, [-1, 4]))
- per_anchor_iou_loss = 1.0 - box_list_ops.matched_iou(predicted_boxes,
- target_boxes)
- return tf.reshape(weights, [-1]) * per_anchor_iou_loss
-
-
-class WeightedSigmoidClassificationLoss(Loss):
- """Sigmoid cross entropy classification loss function."""
-
- def _compute_loss(self,
- prediction_tensor,
- target_tensor,
- weights,
- class_indices=None):
- """Compute loss function.
-
- Args:
- prediction_tensor: A float tensor of shape [batch_size, num_anchors,
- num_classes] representing the predicted logits for each class
- target_tensor: A float tensor of shape [batch_size, num_anchors,
- num_classes] representing one-hot encoded classification targets
- weights: a float tensor of shape [batch_size, num_anchors]
- class_indices: (Optional) A 1-D integer tensor of class indices.
- If provided, computes loss only for the specified class indices.
-
- Returns:
- loss: a float tensor of shape [batch_size, num_anchors, num_classes]
- representing the value of the loss function.
- """
- weights = tf.expand_dims(weights, 2)
- if class_indices is not None:
- weights *= tf.reshape(
- ops.indices_to_dense_vector(class_indices,
- tf.shape(prediction_tensor)[2]),
- [1, 1, -1])
- per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits(
- labels=target_tensor, logits=prediction_tensor))
- return per_entry_cross_ent * weights
-
-
-class SigmoidFocalClassificationLoss(Loss):
- """Sigmoid focal cross entropy loss.
-
- Focal loss down-weights well classified examples and focusses on the hard
- examples. See https://arxiv.org/pdf/1708.02002.pdf for the loss definition.
- """
-
- def __init__(self, gamma=2.0, alpha=0.25):
- """Constructor.
-
- Args:
- gamma: exponent of the modulating factor (1 - p_t) ^ gamma.
- alpha: optional alpha weighting factor to balance positives vs negatives.
- """
- self._alpha = alpha
- self._gamma = gamma
-
- def _compute_loss(self,
- prediction_tensor,
- target_tensor,
- weights,
- class_indices=None):
- """Compute loss function.
-
- Args:
- prediction_tensor: A float tensor of shape [batch_size, num_anchors,
- num_classes] representing the predicted logits for each class
- target_tensor: A float tensor of shape [batch_size, num_anchors,
- num_classes] representing one-hot encoded classification targets
- weights: a float tensor of shape [batch_size, num_anchors]
- class_indices: (Optional) A 1-D integer tensor of class indices.
- If provided, computes loss only for the specified class indices.
-
- Returns:
- loss: a float tensor of shape [batch_size, num_anchors, num_classes]
- representing the value of the loss function.
- """
- weights = tf.expand_dims(weights, 2)
- if class_indices is not None:
- weights *= tf.reshape(
- ops.indices_to_dense_vector(class_indices,
- tf.shape(prediction_tensor)[2]),
- [1, 1, -1])
- per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits(
- labels=target_tensor, logits=prediction_tensor))
- prediction_probabilities = tf.sigmoid(prediction_tensor)
- p_t = ((target_tensor * prediction_probabilities) +
- ((1 - target_tensor) * (1 - prediction_probabilities)))
- modulating_factor = 1.0
- if self._gamma:
- modulating_factor = tf.pow(1.0 - p_t, self._gamma)
- alpha_weight_factor = 1.0
- if self._alpha is not None:
- alpha_weight_factor = (target_tensor * self._alpha +
- (1 - target_tensor) * (1 - self._alpha))
- focal_cross_entropy_loss = (modulating_factor * alpha_weight_factor *
- per_entry_cross_ent)
- return focal_cross_entropy_loss * weights
-
-
-class WeightedSoftmaxClassificationLoss(Loss):
- """Softmax loss function."""
-
- def __init__(self, logit_scale=1.0):
- """Constructor.
-
- Args:
- logit_scale: When this value is high, the prediction is "diffused" and
- when this value is low, the prediction is made peakier.
- (default 1.0)
-
- """
- self._logit_scale = logit_scale
-
- def _compute_loss(self, prediction_tensor, target_tensor, weights):
- """Compute loss function.
-
- Args:
- prediction_tensor: A float tensor of shape [batch_size, num_anchors,
- num_classes] representing the predicted logits for each class
- target_tensor: A float tensor of shape [batch_size, num_anchors,
- num_classes] representing one-hot encoded classification targets
- weights: a float tensor of shape [batch_size, num_anchors]
-
- Returns:
- loss: a float tensor of shape [batch_size, num_anchors]
- representing the value of the loss function.
- """
- num_classes = prediction_tensor.get_shape().as_list()[-1]
- prediction_tensor = tf.divide(
- prediction_tensor, self._logit_scale, name='scale_logit')
- per_row_cross_ent = (tf.nn.softmax_cross_entropy_with_logits(
- labels=tf.reshape(target_tensor, [-1, num_classes]),
- logits=tf.reshape(prediction_tensor, [-1, num_classes])))
- return tf.reshape(per_row_cross_ent, tf.shape(weights)) * weights
-
-
-class WeightedSoftmaxClassificationAgainstLogitsLoss(Loss):
- """Softmax loss function against logits.
-
- Targets are expected to be provided in logits space instead of "one hot" or
- "probability distribution" space.
- """
-
- def __init__(self, logit_scale=1.0):
- """Constructor.
-
- Args:
- logit_scale: When this value is high, the target is "diffused" and
- when this value is low, the target is made peakier.
- (default 1.0)
-
- """
- self._logit_scale = logit_scale
-
- def _scale_and_softmax_logits(self, logits):
- """Scale logits then apply softmax."""
- scaled_logits = tf.divide(logits, self._logit_scale, name='scale_logits')
- return tf.nn.softmax(scaled_logits, name='convert_scores')
-
- def _compute_loss(self, prediction_tensor, target_tensor, weights):
- """Compute loss function.
-
- Args:
- prediction_tensor: A float tensor of shape [batch_size, num_anchors,
- num_classes] representing the predicted logits for each class
- target_tensor: A float tensor of shape [batch_size, num_anchors,
- num_classes] representing logit classification targets
- weights: a float tensor of shape [batch_size, num_anchors]
-
- Returns:
- loss: a float tensor of shape [batch_size, num_anchors]
- representing the value of the loss function.
- """
- num_classes = prediction_tensor.get_shape().as_list()[-1]
- target_tensor = self._scale_and_softmax_logits(target_tensor)
- prediction_tensor = tf.divide(prediction_tensor, self._logit_scale,
- name='scale_logits')
-
- per_row_cross_ent = (tf.nn.softmax_cross_entropy_with_logits(
- labels=tf.reshape(target_tensor, [-1, num_classes]),
- logits=tf.reshape(prediction_tensor, [-1, num_classes])))
- return tf.reshape(per_row_cross_ent, tf.shape(weights)) * weights
-
-
-class BootstrappedSigmoidClassificationLoss(Loss):
- """Bootstrapped sigmoid cross entropy classification loss function.
-
- This loss uses a convex combination of training labels and the current model's
- predictions as training targets in the classification loss. The idea is that
- as the model improves over time, its predictions can be trusted more and we
- can use these predictions to mitigate the damage of noisy/incorrect labels,
- because incorrect labels are likely to be eventually highly inconsistent with
- other stimuli predicted to have the same label by the model.
-
- In "soft" bootstrapping, we use all predicted class probabilities, whereas in
- "hard" bootstrapping, we use the single class favored by the model.
-
- See also Training Deep Neural Networks On Noisy Labels with Bootstrapping by
- Reed et al. (ICLR 2015).
- """
-
- def __init__(self, alpha, bootstrap_type='soft'):
- """Constructor.
-
- Args:
- alpha: a float32 scalar tensor between 0 and 1 representing interpolation
- weight
- bootstrap_type: set to either 'hard' or 'soft' (default)
-
- Raises:
- ValueError: if bootstrap_type is not either 'hard' or 'soft'
- """
- if bootstrap_type != 'hard' and bootstrap_type != 'soft':
- raise ValueError('Unrecognized bootstrap_type: must be one of '
- '\'hard\' or \'soft.\'')
- self._alpha = alpha
- self._bootstrap_type = bootstrap_type
-
- def _compute_loss(self, prediction_tensor, target_tensor, weights):
- """Compute loss function.
-
- Args:
- prediction_tensor: A float tensor of shape [batch_size, num_anchors,
- num_classes] representing the predicted logits for each class
- target_tensor: A float tensor of shape [batch_size, num_anchors,
- num_classes] representing one-hot encoded classification targets
- weights: a float tensor of shape [batch_size, num_anchors]
-
- Returns:
- loss: a float tensor of shape [batch_size, num_anchors, num_classes]
- representing the value of the loss function.
- """
- if self._bootstrap_type == 'soft':
- bootstrap_target_tensor = self._alpha * target_tensor + (
- 1.0 - self._alpha) * tf.sigmoid(prediction_tensor)
- else:
- bootstrap_target_tensor = self._alpha * target_tensor + (
- 1.0 - self._alpha) * tf.cast(
- tf.sigmoid(prediction_tensor) > 0.5, tf.float32)
- per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits(
- labels=bootstrap_target_tensor, logits=prediction_tensor))
- return per_entry_cross_ent * tf.expand_dims(weights, 2)
-
-
-class HardExampleMiner(object):
- """Hard example mining for regions in a list of images.
-
- Implements hard example mining to select a subset of regions to be
- back-propagated. For each image, selects the regions with highest losses,
- subject to the condition that a newly selected region cannot have
- an IOU > iou_threshold with any of the previously selected regions.
- This can be achieved by re-using a greedy non-maximum suppression algorithm.
- A constraint on the number of negatives mined per positive region can also be
- enforced.
-
- Reference papers: "Training Region-based Object Detectors with Online
- Hard Example Mining" (CVPR 2016) by Srivastava et al., and
- "SSD: Single Shot MultiBox Detector" (ECCV 2016) by Liu et al.
- """
-
- def __init__(self,
- num_hard_examples=64,
- iou_threshold=0.7,
- loss_type='both',
- cls_loss_weight=0.05,
- loc_loss_weight=0.06,
- max_negatives_per_positive=None,
- min_negatives_per_image=0):
- """Constructor.
-
- The hard example mining implemented by this class can replicate the behavior
- in the two aforementioned papers (Srivastava et al., and Liu et al).
- To replicate the A2 paper (Srivastava et al), num_hard_examples is set
- to a fixed parameter (64 by default) and iou_threshold is set to .7 for
- running non-max-suppression the predicted boxes prior to hard mining.
- In order to replicate the SSD paper (Liu et al), num_hard_examples should
- be set to None, max_negatives_per_positive should be 3 and iou_threshold
- should be 1.0 (in order to effectively turn off NMS).
-
- Args:
- num_hard_examples: maximum number of hard examples to be
- selected per image (prior to enforcing max negative to positive ratio
- constraint). If set to None, all examples obtained after NMS are
- considered.
- iou_threshold: minimum intersection over union for an example
- to be discarded during NMS.
- loss_type: use only classification losses ('cls', default),
- localization losses ('loc') or both losses ('both').
- In the last case, cls_loss_weight and loc_loss_weight are used to
- compute weighted sum of the two losses.
- cls_loss_weight: weight for classification loss.
- loc_loss_weight: weight for location loss.
- max_negatives_per_positive: maximum number of negatives to retain for
- each positive anchor. By default, num_negatives_per_positive is None,
- which means that we do not enforce a prespecified negative:positive
- ratio. Note also that num_negatives_per_positives can be a float
- (and will be converted to be a float even if it is passed in otherwise).
- min_negatives_per_image: minimum number of negative anchors to sample for
- a given image. Setting this to a positive number allows sampling
- negatives in an image without any positive anchors and thus not biased
- towards at least one detection per image.
- """
- self._num_hard_examples = num_hard_examples
- self._iou_threshold = iou_threshold
- self._loss_type = loss_type
- self._cls_loss_weight = cls_loss_weight
- self._loc_loss_weight = loc_loss_weight
- self._max_negatives_per_positive = max_negatives_per_positive
- self._min_negatives_per_image = min_negatives_per_image
- if self._max_negatives_per_positive is not None:
- self._max_negatives_per_positive = float(self._max_negatives_per_positive)
- self._num_positives_list = None
- self._num_negatives_list = None
-
- def __call__(self,
- location_losses,
- cls_losses,
- decoded_boxlist_list,
- match_list=None):
- """Computes localization and classification losses after hard mining.
-
- Args:
- location_losses: a float tensor of shape [num_images, num_anchors]
- representing anchorwise localization losses.
- cls_losses: a float tensor of shape [num_images, num_anchors]
- representing anchorwise classification losses.
- decoded_boxlist_list: a list of decoded BoxList representing location
- predictions for each image.
- match_list: an optional list of matcher.Match objects encoding the match
- between anchors and groundtruth boxes for each image of the batch,
- with rows of the Match objects corresponding to groundtruth boxes
- and columns corresponding to anchors. Match objects in match_list are
- used to reference which anchors are positive, negative or ignored. If
- self._max_negatives_per_positive exists, these are then used to enforce
- a prespecified negative to positive ratio.
-
- Returns:
- mined_location_loss: a float scalar with sum of localization losses from
- selected hard examples.
- mined_cls_loss: a float scalar with sum of classification losses from
- selected hard examples.
- Raises:
- ValueError: if location_losses, cls_losses and decoded_boxlist_list do
- not have compatible shapes (i.e., they must correspond to the same
- number of images).
- ValueError: if match_list is specified but its length does not match
- len(decoded_boxlist_list).
- """
- mined_location_losses = []
- mined_cls_losses = []
- location_losses = tf.unstack(location_losses)
- cls_losses = tf.unstack(cls_losses)
- num_images = len(decoded_boxlist_list)
- if not match_list:
- match_list = num_images * [None]
- if not len(location_losses) == len(decoded_boxlist_list) == len(cls_losses):
- raise ValueError('location_losses, cls_losses and decoded_boxlist_list '
- 'do not have compatible shapes.')
- if not isinstance(match_list, list):
- raise ValueError('match_list must be a list.')
- if len(match_list) != len(decoded_boxlist_list):
- raise ValueError('match_list must either be None or have '
- 'length=len(decoded_boxlist_list).')
- num_positives_list = []
- num_negatives_list = []
- for ind, detection_boxlist in enumerate(decoded_boxlist_list):
- box_locations = detection_boxlist.get()
- match = match_list[ind]
- image_losses = cls_losses[ind]
- if self._loss_type == 'loc':
- image_losses = location_losses[ind]
- elif self._loss_type == 'both':
- image_losses *= self._cls_loss_weight
- image_losses += location_losses[ind] * self._loc_loss_weight
- if self._num_hard_examples is not None:
- num_hard_examples = self._num_hard_examples
- else:
- num_hard_examples = detection_boxlist.num_boxes()
- selected_indices = tf.image.non_max_suppression(
- box_locations, image_losses, num_hard_examples, self._iou_threshold)
- if self._max_negatives_per_positive is not None and match:
- (selected_indices, num_positives,
- num_negatives) = self._subsample_selection_to_desired_neg_pos_ratio(
- selected_indices, match, self._max_negatives_per_positive,
- self._min_negatives_per_image)
- num_positives_list.append(num_positives)
- num_negatives_list.append(num_negatives)
- mined_location_losses.append(
- tf.reduce_sum(tf.gather(location_losses[ind], selected_indices)))
- mined_cls_losses.append(
- tf.reduce_sum(tf.gather(cls_losses[ind], selected_indices)))
- location_loss = tf.reduce_sum(tf.stack(mined_location_losses))
- cls_loss = tf.reduce_sum(tf.stack(mined_cls_losses))
- if match and self._max_negatives_per_positive:
- self._num_positives_list = num_positives_list
- self._num_negatives_list = num_negatives_list
- return (location_loss, cls_loss)
-
- def summarize(self):
- """Summarize the number of positives and negatives after mining."""
- if self._num_positives_list and self._num_negatives_list:
- avg_num_positives = tf.reduce_mean(tf.to_float(self._num_positives_list))
- avg_num_negatives = tf.reduce_mean(tf.to_float(self._num_negatives_list))
- tf.summary.scalar('HardExampleMiner/NumPositives', avg_num_positives)
- tf.summary.scalar('HardExampleMiner/NumNegatives', avg_num_negatives)
-
- def _subsample_selection_to_desired_neg_pos_ratio(self,
- indices,
- match,
- max_negatives_per_positive,
- min_negatives_per_image=0):
- """Subsample a collection of selected indices to a desired neg:pos ratio.
-
- This function takes a subset of M indices (indexing into a large anchor
- collection of N anchors where M=0,
- meaning that column i is matched with row match_results[i].
- (2) match_results[i]=-1, meaning that column i is not matched.
- (3) match_results[i]=-2, meaning that column i is ignored.
- use_matmul_gather: Use matrix multiplication based gather instead of
- standard tf.gather. (Default: False).
-
- Raises:
- ValueError: if match_results does not have rank 1 or is not an
- integer int32 scalar tensor
- """
- if match_results.shape.ndims != 1:
- raise ValueError('match_results should have rank 1')
- if match_results.dtype != tf.int32:
- raise ValueError('match_results should be an int32 or int64 scalar '
- 'tensor')
- self._match_results = match_results
- self._gather_op = tf.gather
- if use_matmul_gather:
- self._gather_op = ops.matmul_gather_on_zeroth_axis
-
- @property
- def match_results(self):
- """The accessor for match results.
-
- Returns:
- the tensor which encodes the match results.
- """
- return self._match_results
-
- def matched_column_indices(self):
- """Returns column indices that match to some row.
-
- The indices returned by this op are always sorted in increasing order.
-
- Returns:
- column_indices: int32 tensor of shape [K] with column indices.
- """
- return self._reshape_and_cast(tf.where(tf.greater(self._match_results, -1)))
-
- def matched_column_indicator(self):
- """Returns column indices that are matched.
-
- Returns:
- column_indices: int32 tensor of shape [K] with column indices.
- """
- return tf.greater_equal(self._match_results, 0)
-
- def num_matched_columns(self):
- """Returns number (int32 scalar tensor) of matched columns."""
- return tf.size(self.matched_column_indices())
-
- def unmatched_column_indices(self):
- """Returns column indices that do not match any row.
-
- The indices returned by this op are always sorted in increasing order.
-
- Returns:
- column_indices: int32 tensor of shape [K] with column indices.
- """
- return self._reshape_and_cast(tf.where(tf.equal(self._match_results, -1)))
-
- def unmatched_column_indicator(self):
- """Returns column indices that are unmatched.
-
- Returns:
- column_indices: int32 tensor of shape [K] with column indices.
- """
- return tf.equal(self._match_results, -1)
-
- def num_unmatched_columns(self):
- """Returns number (int32 scalar tensor) of unmatched columns."""
- return tf.size(self.unmatched_column_indices())
-
- def ignored_column_indices(self):
- """Returns column indices that are ignored (neither Matched nor Unmatched).
-
- The indices returned by this op are always sorted in increasing order.
-
- Returns:
- column_indices: int32 tensor of shape [K] with column indices.
- """
- return self._reshape_and_cast(tf.where(self.ignored_column_indicator()))
-
- def ignored_column_indicator(self):
- """Returns boolean column indicator where True means the colum is ignored.
-
- Returns:
- column_indicator: boolean vector which is True for all ignored column
- indices.
- """
- return tf.equal(self._match_results, -2)
-
- def num_ignored_columns(self):
- """Returns number (int32 scalar tensor) of matched columns."""
- return tf.size(self.ignored_column_indices())
-
- def unmatched_or_ignored_column_indices(self):
- """Returns column indices that are unmatched or ignored.
-
- The indices returned by this op are always sorted in increasing order.
-
- Returns:
- column_indices: int32 tensor of shape [K] with column indices.
- """
- return self._reshape_and_cast(tf.where(tf.greater(0, self._match_results)))
-
- def matched_row_indices(self):
- """Returns row indices that match some column.
-
- The indices returned by this op are ordered so as to be in correspondence
- with the output of matched_column_indicator(). For example if
- self.matched_column_indicator() is [0,2], and self.matched_row_indices() is
- [7, 3], then we know that column 0 was matched to row 7 and column 2 was
- matched to row 3.
-
- Returns:
- row_indices: int32 tensor of shape [K] with row indices.
- """
- return self._reshape_and_cast(
- self._gather_op(self._match_results, self.matched_column_indices()))
-
- def _reshape_and_cast(self, t):
- return tf.cast(tf.reshape(t, [-1]), tf.int32)
-
- def gather_based_on_match(self, input_tensor, unmatched_value,
- ignored_value):
- """Gathers elements from `input_tensor` based on match results.
-
- For columns that are matched to a row, gathered_tensor[col] is set to
- input_tensor[match_results[col]]. For columns that are unmatched,
- gathered_tensor[col] is set to unmatched_value. Finally, for columns that
- are ignored gathered_tensor[col] is set to ignored_value.
-
- Note that the input_tensor.shape[1:] must match with unmatched_value.shape
- and ignored_value.shape
-
- Args:
- input_tensor: Tensor to gather values from.
- unmatched_value: Constant tensor value for unmatched columns.
- ignored_value: Constant tensor value for ignored columns.
-
- Returns:
- gathered_tensor: A tensor containing values gathered from input_tensor.
- The shape of the gathered tensor is [match_results.shape[0]] +
- input_tensor.shape[1:].
- """
- input_tensor = tf.concat([tf.stack([ignored_value, unmatched_value]),
- input_tensor], axis=0)
- gather_indices = tf.maximum(self.match_results + 2, 0)
- gathered_tensor = self._gather_op(input_tensor, gather_indices)
- return gathered_tensor
-
-
-class Matcher(object):
- """Abstract base class for matcher.
- """
- __metaclass__ = ABCMeta
-
- def __init__(self, use_matmul_gather=False):
- """Constructs a Matcher.
-
- Args:
- use_matmul_gather: Force constructed match objects to use matrix
- multiplication based gather instead of standard tf.gather.
- (Default: False).
- """
- self._use_matmul_gather = use_matmul_gather
-
- def match(self, similarity_matrix, scope=None, **params):
- """Computes matches among row and column indices and returns the result.
-
- Computes matches among the row and column indices based on the similarity
- matrix and optional arguments.
-
- Args:
- similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
- where higher value means more similar.
- scope: Op scope name. Defaults to 'Match' if None.
- **params: Additional keyword arguments for specific implementations of
- the Matcher.
-
- Returns:
- A Match object with the results of matching.
- """
- with tf.name_scope(scope, 'Match', [similarity_matrix, params]) as scope:
- return Match(self._match(similarity_matrix, **params),
- self._use_matmul_gather)
-
- @abstractmethod
- def _match(self, similarity_matrix, **params):
- """Method to be overridden by implementations.
-
- Args:
- similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
- where higher value means more similar.
- **params: Additional keyword arguments for specific implementations of
- the Matcher.
-
- Returns:
- match_results: Integer tensor of shape [M]: match_results[i]>=0 means
- that column i is matched to row match_results[i], match_results[i]=-1
- means that the column is not matched. match_results[i]=-2 means that
- the column is ignored (usually this happens when there is a very weak
- match which one neither wants as positive nor negative example).
- """
- pass
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/matcher_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/matcher_test.py
deleted file mode 100644
index 05607834a1dd116e2e0beeb79a508d6196fad235..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/matcher_test.py
+++ /dev/null
@@ -1,192 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.core.matcher."""
-import numpy as np
-import tensorflow as tf
-
-from object_detection.core import matcher
-
-
-class MatchTest(tf.test.TestCase):
-
- def test_get_correct_matched_columnIndices(self):
- match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
- match = matcher.Match(match_results)
- expected_column_indices = [0, 1, 3, 5]
- matched_column_indices = match.matched_column_indices()
- self.assertEquals(matched_column_indices.dtype, tf.int32)
- with self.test_session() as sess:
- matched_column_indices = sess.run(matched_column_indices)
- self.assertAllEqual(matched_column_indices, expected_column_indices)
-
- def test_get_correct_counts(self):
- match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
- match = matcher.Match(match_results)
- exp_num_matched_columns = 4
- exp_num_unmatched_columns = 2
- exp_num_ignored_columns = 1
- num_matched_columns = match.num_matched_columns()
- num_unmatched_columns = match.num_unmatched_columns()
- num_ignored_columns = match.num_ignored_columns()
- self.assertEquals(num_matched_columns.dtype, tf.int32)
- self.assertEquals(num_unmatched_columns.dtype, tf.int32)
- self.assertEquals(num_ignored_columns.dtype, tf.int32)
- with self.test_session() as sess:
- (num_matched_columns_out, num_unmatched_columns_out,
- num_ignored_columns_out) = sess.run(
- [num_matched_columns, num_unmatched_columns, num_ignored_columns])
- self.assertAllEqual(num_matched_columns_out, exp_num_matched_columns)
- self.assertAllEqual(num_unmatched_columns_out, exp_num_unmatched_columns)
- self.assertAllEqual(num_ignored_columns_out, exp_num_ignored_columns)
-
- def testGetCorrectUnmatchedColumnIndices(self):
- match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
- match = matcher.Match(match_results)
- expected_column_indices = [2, 4]
- unmatched_column_indices = match.unmatched_column_indices()
- self.assertEquals(unmatched_column_indices.dtype, tf.int32)
- with self.test_session() as sess:
- unmatched_column_indices = sess.run(unmatched_column_indices)
- self.assertAllEqual(unmatched_column_indices, expected_column_indices)
-
- def testGetCorrectMatchedRowIndices(self):
- match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
- match = matcher.Match(match_results)
- expected_row_indices = [3, 1, 0, 5]
- matched_row_indices = match.matched_row_indices()
- self.assertEquals(matched_row_indices.dtype, tf.int32)
- with self.test_session() as sess:
- matched_row_inds = sess.run(matched_row_indices)
- self.assertAllEqual(matched_row_inds, expected_row_indices)
-
- def test_get_correct_ignored_column_indices(self):
- match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
- match = matcher.Match(match_results)
- expected_column_indices = [6]
- ignored_column_indices = match.ignored_column_indices()
- self.assertEquals(ignored_column_indices.dtype, tf.int32)
- with self.test_session() as sess:
- ignored_column_indices = sess.run(ignored_column_indices)
- self.assertAllEqual(ignored_column_indices, expected_column_indices)
-
- def test_get_correct_matched_column_indicator(self):
- match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
- match = matcher.Match(match_results)
- expected_column_indicator = [True, True, False, True, False, True, False]
- matched_column_indicator = match.matched_column_indicator()
- self.assertEquals(matched_column_indicator.dtype, tf.bool)
- with self.test_session() as sess:
- matched_column_indicator = sess.run(matched_column_indicator)
- self.assertAllEqual(matched_column_indicator, expected_column_indicator)
-
- def test_get_correct_unmatched_column_indicator(self):
- match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
- match = matcher.Match(match_results)
- expected_column_indicator = [False, False, True, False, True, False, False]
- unmatched_column_indicator = match.unmatched_column_indicator()
- self.assertEquals(unmatched_column_indicator.dtype, tf.bool)
- with self.test_session() as sess:
- unmatched_column_indicator = sess.run(unmatched_column_indicator)
- self.assertAllEqual(unmatched_column_indicator, expected_column_indicator)
-
- def test_get_correct_ignored_column_indicator(self):
- match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
- match = matcher.Match(match_results)
- expected_column_indicator = [False, False, False, False, False, False, True]
- ignored_column_indicator = match.ignored_column_indicator()
- self.assertEquals(ignored_column_indicator.dtype, tf.bool)
- with self.test_session() as sess:
- ignored_column_indicator = sess.run(ignored_column_indicator)
- self.assertAllEqual(ignored_column_indicator, expected_column_indicator)
-
- def test_get_correct_unmatched_ignored_column_indices(self):
- match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
- match = matcher.Match(match_results)
- expected_column_indices = [2, 4, 6]
- unmatched_ignored_column_indices = (match.
- unmatched_or_ignored_column_indices())
- self.assertEquals(unmatched_ignored_column_indices.dtype, tf.int32)
- with self.test_session() as sess:
- unmatched_ignored_column_indices = sess.run(
- unmatched_ignored_column_indices)
- self.assertAllEqual(unmatched_ignored_column_indices,
- expected_column_indices)
-
- def test_all_columns_accounted_for(self):
- # Note: deliberately setting to small number so not always
- # all possibilities appear (matched, unmatched, ignored)
- num_matches = 10
- match_results = tf.random_uniform(
- [num_matches], minval=-2, maxval=5, dtype=tf.int32)
- match = matcher.Match(match_results)
- matched_column_indices = match.matched_column_indices()
- unmatched_column_indices = match.unmatched_column_indices()
- ignored_column_indices = match.ignored_column_indices()
- with self.test_session() as sess:
- matched, unmatched, ignored = sess.run([
- matched_column_indices, unmatched_column_indices,
- ignored_column_indices
- ])
- all_indices = np.hstack((matched, unmatched, ignored))
- all_indices_sorted = np.sort(all_indices)
- self.assertAllEqual(all_indices_sorted,
- np.arange(num_matches, dtype=np.int32))
-
- def test_scalar_gather_based_on_match(self):
- match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
- input_tensor = tf.constant([0, 1, 2, 3, 4, 5, 6, 7], dtype=tf.float32)
- expected_gathered_tensor = [3, 1, 100, 0, 100, 5, 200]
- match = matcher.Match(match_results)
- gathered_tensor = match.gather_based_on_match(input_tensor,
- unmatched_value=100.,
- ignored_value=200.)
- self.assertEquals(gathered_tensor.dtype, tf.float32)
- with self.test_session():
- gathered_tensor_out = gathered_tensor.eval()
- self.assertAllEqual(expected_gathered_tensor, gathered_tensor_out)
-
- def test_multidimensional_gather_based_on_match(self):
- match_results = tf.constant([1, -1, -2])
- input_tensor = tf.constant([[0, 0.5, 0, 0.5], [0, 0, 0.5, 0.5]],
- dtype=tf.float32)
- expected_gathered_tensor = [[0, 0, 0.5, 0.5], [0, 0, 0, 0], [0, 0, 0, 0]]
- match = matcher.Match(match_results)
- gathered_tensor = match.gather_based_on_match(input_tensor,
- unmatched_value=tf.zeros(4),
- ignored_value=tf.zeros(4))
- self.assertEquals(gathered_tensor.dtype, tf.float32)
- with self.test_session():
- gathered_tensor_out = gathered_tensor.eval()
- self.assertAllEqual(expected_gathered_tensor, gathered_tensor_out)
-
- def test_multidimensional_gather_based_on_match_with_matmul_gather_op(self):
- match_results = tf.constant([1, -1, -2])
- input_tensor = tf.constant([[0, 0.5, 0, 0.5], [0, 0, 0.5, 0.5]],
- dtype=tf.float32)
- expected_gathered_tensor = [[0, 0, 0.5, 0.5], [0, 0, 0, 0], [0, 0, 0, 0]]
- match = matcher.Match(match_results, use_matmul_gather=True)
- gathered_tensor = match.gather_based_on_match(input_tensor,
- unmatched_value=tf.zeros(4),
- ignored_value=tf.zeros(4))
- self.assertEquals(gathered_tensor.dtype, tf.float32)
- with self.test_session() as sess:
- self.assertTrue(
- all([op.name is not 'Gather' for op in sess.graph.get_operations()]))
- gathered_tensor_out = gathered_tensor.eval()
- self.assertAllEqual(expected_gathered_tensor, gathered_tensor_out)
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/minibatch_sampler.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/minibatch_sampler.py
deleted file mode 100644
index dc622221ae526360d0a5f85f914bc2c53365911c..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/minibatch_sampler.py
+++ /dev/null
@@ -1,90 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Base minibatch sampler module.
-
-The job of the minibatch_sampler is to subsample a minibatch based on some
-criterion.
-
-The main function call is:
- subsample(indicator, batch_size, **params).
-Indicator is a 1d boolean tensor where True denotes which examples can be
-sampled. It returns a boolean indicator where True denotes an example has been
-sampled..
-
-Subclasses should implement the Subsample function and can make use of the
-@staticmethod SubsampleIndicator.
-"""
-
-from abc import ABCMeta
-from abc import abstractmethod
-
-import tensorflow as tf
-
-from object_detection.utils import ops
-
-
-class MinibatchSampler(object):
- """Abstract base class for subsampling minibatches."""
- __metaclass__ = ABCMeta
-
- def __init__(self):
- """Constructs a minibatch sampler."""
- pass
-
- @abstractmethod
- def subsample(self, indicator, batch_size, **params):
- """Returns subsample of entries in indicator.
-
- Args:
- indicator: boolean tensor of shape [N] whose True entries can be sampled.
- batch_size: desired batch size.
- **params: additional keyword arguments for specific implementations of
- the MinibatchSampler.
-
- Returns:
- sample_indicator: boolean tensor of shape [N] whose True entries have been
- sampled. If sum(indicator) >= batch_size, sum(is_sampled) = batch_size
- """
- pass
-
- @staticmethod
- def subsample_indicator(indicator, num_samples):
- """Subsample indicator vector.
-
- Given a boolean indicator vector with M elements set to `True`, the function
- assigns all but `num_samples` of these previously `True` elements to
- `False`. If `num_samples` is greater than M, the original indicator vector
- is returned.
-
- Args:
- indicator: a 1-dimensional boolean tensor indicating which elements
- are allowed to be sampled and which are not.
- num_samples: int32 scalar tensor
-
- Returns:
- a boolean tensor with the same shape as input (indicator) tensor
- """
- indices = tf.where(indicator)
- indices = tf.random_shuffle(indices)
- indices = tf.reshape(indices, [-1])
-
- num_samples = tf.minimum(tf.size(indices), num_samples)
- selected_indices = tf.slice(indices, [0], tf.reshape(num_samples, [1]))
-
- selected_indicator = ops.indices_to_dense_vector(selected_indices,
- tf.shape(indicator)[0])
-
- return tf.equal(selected_indicator, 1)
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/minibatch_sampler_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/minibatch_sampler_test.py
deleted file mode 100644
index 7420ae5d03ca5318d2fd5df4dd4a5cee400189b1..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/minibatch_sampler_test.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for google3.research.vale.object_detection.minibatch_sampler."""
-
-import numpy as np
-import tensorflow as tf
-
-from object_detection.core import minibatch_sampler
-
-
-class MinibatchSamplerTest(tf.test.TestCase):
-
- def test_subsample_indicator_when_more_true_elements_than_num_samples(self):
- np_indicator = [True, False, True, False, True, True, False]
- indicator = tf.constant(np_indicator)
- samples = minibatch_sampler.MinibatchSampler.subsample_indicator(
- indicator, 3)
- with self.test_session() as sess:
- samples_out = sess.run(samples)
- self.assertTrue(np.sum(samples_out), 3)
- self.assertAllEqual(samples_out,
- np.logical_and(samples_out, np_indicator))
-
- def test_subsample_when_more_true_elements_than_num_samples_no_shape(self):
- np_indicator = [True, False, True, False, True, True, False]
- indicator = tf.placeholder(tf.bool)
- feed_dict = {indicator: np_indicator}
-
- samples = minibatch_sampler.MinibatchSampler.subsample_indicator(
- indicator, 3)
- with self.test_session() as sess:
- samples_out = sess.run(samples, feed_dict=feed_dict)
- self.assertTrue(np.sum(samples_out), 3)
- self.assertAllEqual(samples_out,
- np.logical_and(samples_out, np_indicator))
-
- def test_subsample_indicator_when_less_true_elements_than_num_samples(self):
- np_indicator = [True, False, True, False, True, True, False]
- indicator = tf.constant(np_indicator)
- samples = minibatch_sampler.MinibatchSampler.subsample_indicator(
- indicator, 5)
- with self.test_session() as sess:
- samples_out = sess.run(samples)
- self.assertTrue(np.sum(samples_out), 4)
- self.assertAllEqual(samples_out,
- np.logical_and(samples_out, np_indicator))
-
- def test_subsample_indicator_when_num_samples_is_zero(self):
- np_indicator = [True, False, True, False, True, True, False]
- indicator = tf.constant(np_indicator)
- samples_none = minibatch_sampler.MinibatchSampler.subsample_indicator(
- indicator, 0)
- with self.test_session() as sess:
- samples_none_out = sess.run(samples_none)
- self.assertAllEqual(
- np.zeros_like(samples_none_out, dtype=bool),
- samples_none_out)
-
- def test_subsample_indicator_when_indicator_all_false(self):
- indicator_empty = tf.zeros([0], dtype=tf.bool)
- samples_empty = minibatch_sampler.MinibatchSampler.subsample_indicator(
- indicator_empty, 4)
- with self.test_session() as sess:
- samples_empty_out = sess.run(samples_empty)
- self.assertEqual(0, samples_empty_out.size)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/model.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/model.py
deleted file mode 100644
index 081136f9c6a64ca8b56b2a98b9113a81bdc791f8..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/model.py
+++ /dev/null
@@ -1,305 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Abstract detection model.
-
-This file defines a generic base class for detection models. Programs that are
-designed to work with arbitrary detection models should only depend on this
-class. We intend for the functions in this class to follow tensor-in/tensor-out
-design, thus all functions have tensors or lists/dictionaries holding tensors as
-inputs and outputs.
-
-Abstractly, detection models predict output tensors given input images
-which can be passed to a loss function at training time or passed to a
-postprocessing function at eval time. The computation graphs at a high level
-consequently look as follows:
-
-Training time:
-inputs (images tensor) -> preprocess -> predict -> loss -> outputs (loss tensor)
-
-Evaluation time:
-inputs (images tensor) -> preprocess -> predict -> postprocess
- -> outputs (boxes tensor, scores tensor, classes tensor, num_detections tensor)
-
-DetectionModels must thus implement four functions (1) preprocess, (2) predict,
-(3) postprocess and (4) loss. DetectionModels should make no assumptions about
-the input size or aspect ratio --- they are responsible for doing any
-resize/reshaping necessary (see docstring for the preprocess function).
-Output classes are always integers in the range [0, num_classes). Any mapping
-of these integers to semantic labels is to be handled outside of this class.
-
-Images are resized in the `preprocess` method. All of `preprocess`, `predict`,
-and `postprocess` should be reentrant.
-
-The `preprocess` method runs `image_resizer_fn` that returns resized_images and
-`true_image_shapes`. Since `image_resizer_fn` can pad the images with zeros,
-true_image_shapes indicate the slices that contain the image without padding.
-This is useful for padding images to be a fixed size for batching.
-
-The `postprocess` method uses the true image shapes to clip predictions that lie
-outside of images.
-
-By default, DetectionModels produce bounding box detections; However, we support
-a handful of auxiliary annotations associated with each bounding box, namely,
-instance masks and keypoints.
-"""
-from abc import ABCMeta
-from abc import abstractmethod
-
-from object_detection.core import standard_fields as fields
-
-
-class DetectionModel(object):
- """Abstract base class for detection models."""
- __metaclass__ = ABCMeta
-
- def __init__(self, num_classes):
- """Constructor.
-
- Args:
- num_classes: number of classes. Note that num_classes *does not* include
- background categories that might be implicitly predicted in various
- implementations.
- """
- self._num_classes = num_classes
- self._groundtruth_lists = {}
-
- @property
- def num_classes(self):
- return self._num_classes
-
- def groundtruth_lists(self, field):
- """Access list of groundtruth tensors.
-
- Args:
- field: a string key, options are
- fields.BoxListFields.{boxes,classes,masks,keypoints}
-
- Returns:
- a list of tensors holding groundtruth information (see also
- provide_groundtruth function below), with one entry for each image in the
- batch.
- Raises:
- RuntimeError: if the field has not been provided via provide_groundtruth.
- """
- if field not in self._groundtruth_lists:
- raise RuntimeError('Groundtruth tensor %s has not been provided', field)
- return self._groundtruth_lists[field]
-
- def groundtruth_has_field(self, field):
- """Determines whether the groundtruth includes the given field.
-
- Args:
- field: a string key, options are
- fields.BoxListFields.{boxes,classes,masks,keypoints}
-
- Returns:
- True if the groundtruth includes the given field, False otherwise.
- """
- return field in self._groundtruth_lists
-
- @abstractmethod
- def preprocess(self, inputs):
- """Input preprocessing.
-
- To be overridden by implementations.
-
- This function is responsible for any scaling/shifting of input values that
- is necessary prior to running the detector on an input image.
- It is also responsible for any resizing, padding that might be necessary
- as images are assumed to arrive in arbitrary sizes. While this function
- could conceivably be part of the predict method (below), it is often
- convenient to keep these separate --- for example, we may want to preprocess
- on one device, place onto a queue, and let another device (e.g., the GPU)
- handle prediction.
-
- A few important notes about the preprocess function:
- + We assume that this operation does not have any trainable variables nor
- does it affect the groundtruth annotations in any way (thus data
- augmentation operations such as random cropping should be performed
- externally).
- + There is no assumption that the batchsize in this function is the same as
- the batch size in the predict function. In fact, we recommend calling the
- preprocess function prior to calling any batching operations (which should
- happen outside of the model) and thus assuming that batch sizes are equal
- to 1 in the preprocess function.
- + There is also no explicit assumption that the output resolutions
- must be fixed across inputs --- this is to support "fully convolutional"
- settings in which input images can have different shapes/resolutions.
-
- Args:
- inputs: a [batch, height_in, width_in, channels] float32 tensor
- representing a batch of images with values between 0 and 255.0.
-
- Returns:
- preprocessed_inputs: a [batch, height_out, width_out, channels] float32
- tensor representing a batch of images.
- true_image_shapes: int32 tensor of shape [batch, 3] where each row is
- of the form [height, width, channels] indicating the shapes
- of true images in the resized images, as resized images can be padded
- with zeros.
- """
- pass
-
- @abstractmethod
- def predict(self, preprocessed_inputs, true_image_shapes):
- """Predict prediction tensors from inputs tensor.
-
- Outputs of this function can be passed to loss or postprocess functions.
-
- Args:
- preprocessed_inputs: a [batch, height, width, channels] float32 tensor
- representing a batch of images.
- true_image_shapes: int32 tensor of shape [batch, 3] where each row is
- of the form [height, width, channels] indicating the shapes
- of true images in the resized images, as resized images can be padded
- with zeros.
-
- Returns:
- prediction_dict: a dictionary holding prediction tensors to be
- passed to the Loss or Postprocess functions.
- """
- pass
-
- @abstractmethod
- def postprocess(self, prediction_dict, true_image_shapes, **params):
- """Convert predicted output tensors to final detections.
-
- Outputs adhere to the following conventions:
- * Classes are integers in [0, num_classes); background classes are removed
- and the first non-background class is mapped to 0. If the model produces
- class-agnostic detections, then no output is produced for classes.
- * Boxes are to be interpreted as being in [y_min, x_min, y_max, x_max]
- format and normalized relative to the image window.
- * `num_detections` is provided for settings where detections are padded to a
- fixed number of boxes.
- * We do not specifically assume any kind of probabilistic interpretation
- of the scores --- the only important thing is their relative ordering.
- Thus implementations of the postprocess function are free to output
- logits, probabilities, calibrated probabilities, or anything else.
-
- Args:
- prediction_dict: a dictionary holding prediction tensors.
- true_image_shapes: int32 tensor of shape [batch, 3] where each row is
- of the form [height, width, channels] indicating the shapes
- of true images in the resized images, as resized images can be padded
- with zeros.
- **params: Additional keyword arguments for specific implementations of
- DetectionModel.
-
- Returns:
- detections: a dictionary containing the following fields
- detection_boxes: [batch, max_detections, 4]
- detection_scores: [batch, max_detections]
- detection_classes: [batch, max_detections]
- (If a model is producing class-agnostic detections, this field may be
- missing)
- instance_masks: [batch, max_detections, image_height, image_width]
- (optional)
- keypoints: [batch, max_detections, num_keypoints, 2] (optional)
- num_detections: [batch]
- """
- pass
-
- @abstractmethod
- def loss(self, prediction_dict, true_image_shapes):
- """Compute scalar loss tensors with respect to provided groundtruth.
-
- Calling this function requires that groundtruth tensors have been
- provided via the provide_groundtruth function.
-
- Args:
- prediction_dict: a dictionary holding predicted tensors
- true_image_shapes: int32 tensor of shape [batch, 3] where each row is
- of the form [height, width, channels] indicating the shapes
- of true images in the resized images, as resized images can be padded
- with zeros.
-
- Returns:
- a dictionary mapping strings (loss names) to scalar tensors representing
- loss values.
- """
- pass
-
- def provide_groundtruth(self,
- groundtruth_boxes_list,
- groundtruth_classes_list,
- groundtruth_masks_list=None,
- groundtruth_keypoints_list=None,
- groundtruth_weights_list=None,
- groundtruth_is_crowd_list=None):
- """Provide groundtruth tensors.
-
- Args:
- groundtruth_boxes_list: a list of 2-D tf.float32 tensors of shape
- [num_boxes, 4] containing coordinates of the groundtruth boxes.
- Groundtruth boxes are provided in [y_min, x_min, y_max, x_max]
- format and assumed to be normalized and clipped
- relative to the image window with y_min <= y_max and x_min <= x_max.
- groundtruth_classes_list: a list of 2-D tf.float32 one-hot (or k-hot)
- tensors of shape [num_boxes, num_classes] containing the class targets
- with the 0th index assumed to map to the first non-background class.
- groundtruth_masks_list: a list of 3-D tf.float32 tensors of
- shape [num_boxes, height_in, width_in] containing instance
- masks with values in {0, 1}. If None, no masks are provided.
- Mask resolution `height_in`x`width_in` must agree with the resolution
- of the input image tensor provided to the `preprocess` function.
- groundtruth_keypoints_list: a list of 3-D tf.float32 tensors of
- shape [num_boxes, num_keypoints, 2] containing keypoints.
- Keypoints are assumed to be provided in normalized coordinates and
- missing keypoints should be encoded as NaN.
- groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape
- [num_boxes] containing weights for groundtruth boxes.
- groundtruth_is_crowd_list: A list of 1-D tf.bool tensors of shape
- [num_boxes] containing is_crowd annotations
- """
- self._groundtruth_lists[fields.BoxListFields.boxes] = groundtruth_boxes_list
- self._groundtruth_lists[
- fields.BoxListFields.classes] = groundtruth_classes_list
- if groundtruth_weights_list:
- self._groundtruth_lists[fields.BoxListFields.
- weights] = groundtruth_weights_list
- if groundtruth_masks_list:
- self._groundtruth_lists[
- fields.BoxListFields.masks] = groundtruth_masks_list
- if groundtruth_keypoints_list:
- self._groundtruth_lists[
- fields.BoxListFields.keypoints] = groundtruth_keypoints_list
- if groundtruth_is_crowd_list:
- self._groundtruth_lists[
- fields.BoxListFields.is_crowd] = groundtruth_is_crowd_list
-
- @abstractmethod
- def restore_map(self, fine_tune_checkpoint_type='detection'):
- """Returns a map of variables to load from a foreign checkpoint.
-
- Returns a map of variable names to load from a checkpoint to variables in
- the model graph. This enables the model to initialize based on weights from
- another task. For example, the feature extractor variables from a
- classification model can be used to bootstrap training of an object
- detector. When loading from an object detection model, the checkpoint model
- should have the same parameters as this detection model with exception of
- the num_classes parameter.
-
- Args:
- fine_tune_checkpoint_type: whether to restore from a full detection
- checkpoint (with compatible variable names) or to restore from a
- classification checkpoint for initialization prior to training.
- Valid values: `detection`, `classification`. Default 'detection'.
-
- Returns:
- A dict mapping variable names (to load from a checkpoint) to variables in
- the model graph.
- """
- pass
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/post_processing.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/post_processing.py
deleted file mode 100644
index bbc61f66fe7e61b2a3d243fa3285a204374af0c1..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/post_processing.py
+++ /dev/null
@@ -1,425 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Post-processing operations on detected boxes."""
-
-import tensorflow as tf
-
-from object_detection.core import box_list
-from object_detection.core import box_list_ops
-from object_detection.core import standard_fields as fields
-from object_detection.utils import shape_utils
-
-
-def multiclass_non_max_suppression(boxes,
- scores,
- score_thresh,
- iou_thresh,
- max_size_per_class,
- max_total_size=0,
- clip_window=None,
- change_coordinate_frame=False,
- masks=None,
- boundaries=None,
- additional_fields=None,
- scope=None):
- """Multi-class version of non maximum suppression.
-
- This op greedily selects a subset of detection bounding boxes, pruning
- away boxes that have high IOU (intersection over union) overlap (> thresh)
- with already selected boxes. It operates independently for each class for
- which scores are provided (via the scores field of the input box_list),
- pruning boxes with score less than a provided threshold prior to
- applying NMS.
-
- Please note that this operation is performed on *all* classes, therefore any
- background classes should be removed prior to calling this function.
-
- Args:
- boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either
- number of classes or 1 depending on whether a separate box is predicted
- per class.
- scores: A [k, num_classes] float32 tensor containing the scores for each of
- the k detections.
- score_thresh: scalar threshold for score (low scoring boxes are removed).
- iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
- with previously selected boxes are removed).
- max_size_per_class: maximum number of retained boxes per class.
- max_total_size: maximum number of boxes retained over all classes. By
- default returns all boxes retained after capping boxes per class.
- clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
- representing the window to clip and normalize boxes to before performing
- non-max suppression.
- change_coordinate_frame: Whether to normalize coordinates after clipping
- relative to clip_window (this can only be set to True if a clip_window
- is provided)
- masks: (optional) a [k, q, mask_height, mask_width] float32 tensor
- containing box masks. `q` can be either number of classes or 1 depending
- on whether a separate mask is predicted per class.
- boundaries: (optional) a [k, q, boundary_height, boundary_width] float32
- tensor containing box boundaries. `q` can be either number of classes or 1
- depending on whether a separate boundary is predicted per class.
- additional_fields: (optional) If not None, a dictionary that maps keys to
- tensors whose first dimensions are all of size `k`. After non-maximum
- suppression, all tensors corresponding to the selected boxes will be
- added to resulting BoxList.
- scope: name scope.
-
- Returns:
- a BoxList holding M boxes with a rank-1 scores field representing
- corresponding scores for each box with scores sorted in decreasing order
- and a rank-1 classes field representing a class label for each box.
-
- Raises:
- ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
- a valid scores field.
- """
- if not 0 <= iou_thresh <= 1.0:
- raise ValueError('iou_thresh must be between 0 and 1')
- if scores.shape.ndims != 2:
- raise ValueError('scores field must be of rank 2')
- if scores.shape[1].value is None:
- raise ValueError('scores must have statically defined second '
- 'dimension')
- if boxes.shape.ndims != 3:
- raise ValueError('boxes must be of rank 3.')
- if not (boxes.shape[1].value == scores.shape[1].value or
- boxes.shape[1].value == 1):
- raise ValueError('second dimension of boxes must be either 1 or equal '
- 'to the second dimension of scores')
- if boxes.shape[2].value != 4:
- raise ValueError('last dimension of boxes must be of size 4.')
- if change_coordinate_frame and clip_window is None:
- raise ValueError('if change_coordinate_frame is True, then a clip_window'
- 'must be specified.')
-
- with tf.name_scope(scope, 'MultiClassNonMaxSuppression'):
- num_boxes = tf.shape(boxes)[0]
- num_scores = tf.shape(scores)[0]
- num_classes = scores.get_shape()[1]
-
- length_assert = tf.Assert(
- tf.equal(num_boxes, num_scores),
- ['Incorrect scores field length: actual vs expected.',
- num_scores, num_boxes])
-
- selected_boxes_list = []
- per_class_boxes_list = tf.unstack(boxes, axis=1)
- if masks is not None:
- per_class_masks_list = tf.unstack(masks, axis=1)
- if boundaries is not None:
- per_class_boundaries_list = tf.unstack(boundaries, axis=1)
- boxes_ids = (range(num_classes) if len(per_class_boxes_list) > 1
- else [0] * num_classes.value)
- for class_idx, boxes_idx in zip(range(num_classes), boxes_ids):
- per_class_boxes = per_class_boxes_list[boxes_idx]
- boxlist_and_class_scores = box_list.BoxList(per_class_boxes)
- with tf.control_dependencies([length_assert]):
- class_scores = tf.reshape(
- tf.slice(scores, [0, class_idx], tf.stack([num_scores, 1])), [-1])
- boxlist_and_class_scores.add_field(fields.BoxListFields.scores,
- class_scores)
- if masks is not None:
- per_class_masks = per_class_masks_list[boxes_idx]
- boxlist_and_class_scores.add_field(fields.BoxListFields.masks,
- per_class_masks)
- if boundaries is not None:
- per_class_boundaries = per_class_boundaries_list[boxes_idx]
- boxlist_and_class_scores.add_field(fields.BoxListFields.boundaries,
- per_class_boundaries)
- if additional_fields is not None:
- for key, tensor in additional_fields.items():
- boxlist_and_class_scores.add_field(key, tensor)
- boxlist_filtered = box_list_ops.filter_greater_than(
- boxlist_and_class_scores, score_thresh)
- if clip_window is not None:
- boxlist_filtered = box_list_ops.clip_to_window(
- boxlist_filtered, clip_window)
- if change_coordinate_frame:
- boxlist_filtered = box_list_ops.change_coordinate_frame(
- boxlist_filtered, clip_window)
- max_selection_size = tf.minimum(max_size_per_class,
- boxlist_filtered.num_boxes())
- selected_indices = tf.image.non_max_suppression(
- boxlist_filtered.get(),
- boxlist_filtered.get_field(fields.BoxListFields.scores),
- max_selection_size,
- iou_threshold=iou_thresh)
- nms_result = box_list_ops.gather(boxlist_filtered, selected_indices)
- nms_result.add_field(
- fields.BoxListFields.classes, (tf.zeros_like(
- nms_result.get_field(fields.BoxListFields.scores)) + class_idx))
- selected_boxes_list.append(nms_result)
- selected_boxes = box_list_ops.concatenate(selected_boxes_list)
- sorted_boxes = box_list_ops.sort_by_field(selected_boxes,
- fields.BoxListFields.scores)
- if max_total_size:
- max_total_size = tf.minimum(max_total_size,
- sorted_boxes.num_boxes())
- sorted_boxes = box_list_ops.gather(sorted_boxes,
- tf.range(max_total_size))
- return sorted_boxes
-
-
-def batch_multiclass_non_max_suppression(boxes,
- scores,
- score_thresh,
- iou_thresh,
- max_size_per_class,
- max_total_size=0,
- clip_window=None,
- change_coordinate_frame=False,
- num_valid_boxes=None,
- masks=None,
- additional_fields=None,
- scope=None,
- parallel_iterations=32):
- """Multi-class version of non maximum suppression that operates on a batch.
-
- This op is similar to `multiclass_non_max_suppression` but operates on a batch
- of boxes and scores. See documentation for `multiclass_non_max_suppression`
- for details.
-
- Args:
- boxes: A [batch_size, num_anchors, q, 4] float32 tensor containing
- detections. If `q` is 1 then same boxes are used for all classes
- otherwise, if `q` is equal to number of classes, class-specific boxes
- are used.
- scores: A [batch_size, num_anchors, num_classes] float32 tensor containing
- the scores for each of the `num_anchors` detections.
- score_thresh: scalar threshold for score (low scoring boxes are removed).
- iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
- with previously selected boxes are removed).
- max_size_per_class: maximum number of retained boxes per class.
- max_total_size: maximum number of boxes retained over all classes. By
- default returns all boxes retained after capping boxes per class.
- clip_window: A float32 tensor of shape [batch_size, 4] where each entry is
- of the form [y_min, x_min, y_max, x_max] representing the window to clip
- boxes to before performing non-max suppression. This argument can also be
- a tensor of shape [4] in which case, the same clip window is applied to
- all images in the batch. If clip_widow is None, all boxes are used to
- perform non-max suppression.
- change_coordinate_frame: Whether to normalize coordinates after clipping
- relative to clip_window (this can only be set to True if a clip_window
- is provided)
- num_valid_boxes: (optional) a Tensor of type `int32`. A 1-D tensor of shape
- [batch_size] representing the number of valid boxes to be considered
- for each image in the batch. This parameter allows for ignoring zero
- paddings.
- masks: (optional) a [batch_size, num_anchors, q, mask_height, mask_width]
- float32 tensor containing box masks. `q` can be either number of classes
- or 1 depending on whether a separate mask is predicted per class.
- additional_fields: (optional) If not None, a dictionary that maps keys to
- tensors whose dimensions are [batch_size, num_anchors, ...].
- scope: tf scope name.
- parallel_iterations: (optional) number of batch items to process in
- parallel.
-
- Returns:
- 'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor
- containing the non-max suppressed boxes.
- 'nmsed_scores': A [batch_size, max_detections] float32 tensor containing
- the scores for the boxes.
- 'nmsed_classes': A [batch_size, max_detections] float32 tensor
- containing the class for boxes.
- 'nmsed_masks': (optional) a
- [batch_size, max_detections, mask_height, mask_width] float32 tensor
- containing masks for each selected box. This is set to None if input
- `masks` is None.
- 'nmsed_additional_fields': (optional) a dictionary of
- [batch_size, max_detections, ...] float32 tensors corresponding to the
- tensors specified in the input `additional_fields`. This is not returned
- if input `additional_fields` is None.
- 'num_detections': A [batch_size] int32 tensor indicating the number of
- valid detections per batch item. Only the top num_detections[i] entries in
- nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the
- entries are zero paddings.
-
- Raises:
- ValueError: if `q` in boxes.shape is not 1 or not equal to number of
- classes as inferred from scores.shape.
- """
- q = boxes.shape[2].value
- num_classes = scores.shape[2].value
- if q != 1 and q != num_classes:
- raise ValueError('third dimension of boxes must be either 1 or equal '
- 'to the third dimension of scores')
- if change_coordinate_frame and clip_window is None:
- raise ValueError('if change_coordinate_frame is True, then a clip_window'
- 'must be specified.')
- original_masks = masks
- original_additional_fields = additional_fields
- with tf.name_scope(scope, 'BatchMultiClassNonMaxSuppression'):
- boxes_shape = boxes.shape
- batch_size = boxes_shape[0].value
- num_anchors = boxes_shape[1].value
-
- if batch_size is None:
- batch_size = tf.shape(boxes)[0]
- if num_anchors is None:
- num_anchors = tf.shape(boxes)[1]
-
- # If num valid boxes aren't provided, create one and mark all boxes as
- # valid.
- if num_valid_boxes is None:
- num_valid_boxes = tf.ones([batch_size], dtype=tf.int32) * num_anchors
-
- # If masks aren't provided, create dummy masks so we can only have one copy
- # of _single_image_nms_fn and discard the dummy masks after map_fn.
- if masks is None:
- masks_shape = tf.stack([batch_size, num_anchors, 1, 0, 0])
- masks = tf.zeros(masks_shape)
-
- if clip_window is None:
- clip_window = tf.stack([
- tf.reduce_min(boxes[:, :, :, 0]),
- tf.reduce_min(boxes[:, :, :, 1]),
- tf.reduce_max(boxes[:, :, :, 2]),
- tf.reduce_max(boxes[:, :, :, 3])
- ])
- if clip_window.shape.ndims == 1:
- clip_window = tf.tile(tf.expand_dims(clip_window, 0), [batch_size, 1])
-
- if additional_fields is None:
- additional_fields = {}
-
- def _single_image_nms_fn(args):
- """Runs NMS on a single image and returns padded output.
-
- Args:
- args: A list of tensors consisting of the following:
- per_image_boxes - A [num_anchors, q, 4] float32 tensor containing
- detections. If `q` is 1 then same boxes are used for all classes
- otherwise, if `q` is equal to number of classes, class-specific
- boxes are used.
- per_image_scores - A [num_anchors, num_classes] float32 tensor
- containing the scores for each of the `num_anchors` detections.
- per_image_masks - A [num_anchors, q, mask_height, mask_width] float32
- tensor containing box masks. `q` can be either number of classes
- or 1 depending on whether a separate mask is predicted per class.
- per_image_clip_window - A 1D float32 tensor of the form
- [ymin, xmin, ymax, xmax] representing the window to clip the boxes
- to.
- per_image_additional_fields - (optional) A variable number of float32
- tensors each with size [num_anchors, ...].
- per_image_num_valid_boxes - A tensor of type `int32`. A 1-D tensor of
- shape [batch_size] representing the number of valid boxes to be
- considered for each image in the batch. This parameter allows for
- ignoring zero paddings.
-
- Returns:
- 'nmsed_boxes': A [max_detections, 4] float32 tensor containing the
- non-max suppressed boxes.
- 'nmsed_scores': A [max_detections] float32 tensor containing the scores
- for the boxes.
- 'nmsed_classes': A [max_detections] float32 tensor containing the class
- for boxes.
- 'nmsed_masks': (optional) a [max_detections, mask_height, mask_width]
- float32 tensor containing masks for each selected box. This is set to
- None if input `masks` is None.
- 'nmsed_additional_fields': (optional) A variable number of float32
- tensors each with size [max_detections, ...] corresponding to the
- input `per_image_additional_fields`.
- 'num_detections': A [batch_size] int32 tensor indicating the number of
- valid detections per batch item. Only the top num_detections[i]
- entries in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The
- rest of the entries are zero paddings.
- """
- per_image_boxes = args[0]
- per_image_scores = args[1]
- per_image_masks = args[2]
- per_image_clip_window = args[3]
- per_image_additional_fields = {
- key: value
- for key, value in zip(additional_fields, args[4:-1])
- }
- per_image_num_valid_boxes = args[-1]
- per_image_boxes = tf.reshape(
- tf.slice(per_image_boxes, 3 * [0],
- tf.stack([per_image_num_valid_boxes, -1, -1])), [-1, q, 4])
- per_image_scores = tf.reshape(
- tf.slice(per_image_scores, [0, 0],
- tf.stack([per_image_num_valid_boxes, -1])),
- [-1, num_classes])
- per_image_masks = tf.reshape(
- tf.slice(per_image_masks, 4 * [0],
- tf.stack([per_image_num_valid_boxes, -1, -1, -1])),
- [-1, q, per_image_masks.shape[2].value,
- per_image_masks.shape[3].value])
- if per_image_additional_fields is not None:
- for key, tensor in per_image_additional_fields.items():
- additional_field_shape = tensor.get_shape()
- additional_field_dim = len(additional_field_shape)
- per_image_additional_fields[key] = tf.reshape(
- tf.slice(per_image_additional_fields[key],
- additional_field_dim * [0],
- tf.stack([per_image_num_valid_boxes] +
- (additional_field_dim - 1) * [-1])),
- [-1] + [dim.value for dim in additional_field_shape[1:]])
- nmsed_boxlist = multiclass_non_max_suppression(
- per_image_boxes,
- per_image_scores,
- score_thresh,
- iou_thresh,
- max_size_per_class,
- max_total_size,
- clip_window=per_image_clip_window,
- change_coordinate_frame=change_coordinate_frame,
- masks=per_image_masks,
- additional_fields=per_image_additional_fields)
- padded_boxlist = box_list_ops.pad_or_clip_box_list(nmsed_boxlist,
- max_total_size)
- num_detections = nmsed_boxlist.num_boxes()
- nmsed_boxes = padded_boxlist.get()
- nmsed_scores = padded_boxlist.get_field(fields.BoxListFields.scores)
- nmsed_classes = padded_boxlist.get_field(fields.BoxListFields.classes)
- nmsed_masks = padded_boxlist.get_field(fields.BoxListFields.masks)
- nmsed_additional_fields = [
- padded_boxlist.get_field(key) for key in per_image_additional_fields
- ]
- return ([nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks] +
- nmsed_additional_fields + [num_detections])
-
- num_additional_fields = 0
- if additional_fields is not None:
- num_additional_fields = len(additional_fields)
- num_nmsed_outputs = 4 + num_additional_fields
-
- batch_outputs = shape_utils.static_or_dynamic_map_fn(
- _single_image_nms_fn,
- elems=([boxes, scores, masks, clip_window] +
- list(additional_fields.values()) + [num_valid_boxes]),
- dtype=(num_nmsed_outputs * [tf.float32] + [tf.int32]),
- parallel_iterations=parallel_iterations)
-
- batch_nmsed_boxes = batch_outputs[0]
- batch_nmsed_scores = batch_outputs[1]
- batch_nmsed_classes = batch_outputs[2]
- batch_nmsed_masks = batch_outputs[3]
- batch_nmsed_additional_fields = {
- key: value
- for key, value in zip(additional_fields, batch_outputs[4:-1])
- }
- batch_num_detections = batch_outputs[-1]
-
- if original_masks is None:
- batch_nmsed_masks = None
-
- if original_additional_fields is None:
- batch_nmsed_additional_fields = None
-
- return (batch_nmsed_boxes, batch_nmsed_scores, batch_nmsed_classes,
- batch_nmsed_masks, batch_nmsed_additional_fields,
- batch_num_detections)
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/post_processing_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/post_processing_test.py
deleted file mode 100644
index 9674139967f933192026c2245a82bf0026a732fe..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/post_processing_test.py
+++ /dev/null
@@ -1,1078 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for tensorflow_models.object_detection.core.post_processing."""
-import numpy as np
-import tensorflow as tf
-from object_detection.core import post_processing
-from object_detection.core import standard_fields as fields
-
-
-class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
-
- def test_with_invalid_scores_size(self):
- boxes = tf.constant([[[0, 0, 1, 1]],
- [[0, 0.1, 1, 1.1]],
- [[0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11]],
- [[0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101]]], tf.float32)
- scores = tf.constant([[.9], [.75], [.6], [.95], [.5]])
- iou_thresh = .5
- score_thresh = 0.6
- max_output_size = 3
- nms = post_processing.multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh, max_output_size)
- with self.test_session() as sess:
- with self.assertRaisesWithPredicateMatch(
- tf.errors.InvalidArgumentError, 'Incorrect scores field length'):
- sess.run(nms.get())
-
- def test_multiclass_nms_select_with_shared_boxes(self):
- boxes = tf.constant([[[0, 0, 1, 1]],
- [[0, 0.1, 1, 1.1]],
- [[0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11]],
- [[0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101]],
- [[0, 1000, 1, 1002]],
- [[0, 1000, 1, 1002.1]]], tf.float32)
- scores = tf.constant([[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0],
- [.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]])
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
-
- exp_nms_corners = [[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 1000, 1, 1002],
- [0, 100, 1, 101]]
- exp_nms_scores = [.95, .9, .85, .3]
- exp_nms_classes = [0, 0, 1, 0]
-
- nms = post_processing.multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh, max_output_size)
- with self.test_session() as sess:
- nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
- [nms.get(), nms.get_field(fields.BoxListFields.scores),
- nms.get_field(fields.BoxListFields.classes)])
- self.assertAllClose(nms_corners_output, exp_nms_corners)
- self.assertAllClose(nms_scores_output, exp_nms_scores)
- self.assertAllClose(nms_classes_output, exp_nms_classes)
-
- def test_multiclass_nms_select_with_shared_boxes_given_keypoints(self):
- boxes = tf.constant([[[0, 0, 1, 1]],
- [[0, 0.1, 1, 1.1]],
- [[0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11]],
- [[0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101]],
- [[0, 1000, 1, 1002]],
- [[0, 1000, 1, 1002.1]]], tf.float32)
- scores = tf.constant([[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0],
- [.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]])
- num_keypoints = 6
- keypoints = tf.tile(
- tf.reshape(tf.range(8), [8, 1, 1]),
- [1, num_keypoints, 2])
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
-
- exp_nms_corners = [[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 1000, 1, 1002],
- [0, 100, 1, 101]]
- exp_nms_scores = [.95, .9, .85, .3]
- exp_nms_classes = [0, 0, 1, 0]
- exp_nms_keypoints_tensor = tf.tile(
- tf.reshape(tf.constant([3, 0, 6, 5], dtype=tf.float32), [4, 1, 1]),
- [1, num_keypoints, 2])
-
- nms = post_processing.multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh, max_output_size,
- additional_fields={
- fields.BoxListFields.keypoints: keypoints})
-
- with self.test_session() as sess:
- (nms_corners_output,
- nms_scores_output,
- nms_classes_output,
- nms_keypoints,
- exp_nms_keypoints) = sess.run([
- nms.get(),
- nms.get_field(fields.BoxListFields.scores),
- nms.get_field(fields.BoxListFields.classes),
- nms.get_field(fields.BoxListFields.keypoints),
- exp_nms_keypoints_tensor
- ])
- self.assertAllClose(nms_corners_output, exp_nms_corners)
- self.assertAllClose(nms_scores_output, exp_nms_scores)
- self.assertAllClose(nms_classes_output, exp_nms_classes)
- self.assertAllEqual(nms_keypoints, exp_nms_keypoints)
-
- def test_multiclass_nms_with_shared_boxes_given_keypoint_heatmaps(self):
- boxes = tf.constant([[[0, 0, 1, 1]],
- [[0, 0.1, 1, 1.1]],
- [[0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11]],
- [[0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101]],
- [[0, 1000, 1, 1002]],
- [[0, 1000, 1, 1002.1]]], tf.float32)
-
- scores = tf.constant([[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0],
- [.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]])
-
- num_boxes = tf.shape(boxes)[0]
- heatmap_height = 5
- heatmap_width = 5
- num_keypoints = 17
- keypoint_heatmaps = tf.ones(
- [num_boxes, heatmap_height, heatmap_width, num_keypoints],
- dtype=tf.float32)
-
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
- exp_nms_corners = [[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 1000, 1, 1002],
- [0, 100, 1, 101]]
-
- exp_nms_scores = [.95, .9, .85, .3]
- exp_nms_classes = [0, 0, 1, 0]
- exp_nms_keypoint_heatmaps = np.ones(
- (4, heatmap_height, heatmap_width, num_keypoints), dtype=np.float32)
-
- nms = post_processing.multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh, max_output_size,
- additional_fields={
- fields.BoxListFields.keypoint_heatmaps: keypoint_heatmaps})
-
- with self.test_session() as sess:
- (nms_corners_output,
- nms_scores_output,
- nms_classes_output,
- nms_keypoint_heatmaps) = sess.run(
- [nms.get(),
- nms.get_field(fields.BoxListFields.scores),
- nms.get_field(fields.BoxListFields.classes),
- nms.get_field(fields.BoxListFields.keypoint_heatmaps)])
-
- self.assertAllClose(nms_corners_output, exp_nms_corners)
- self.assertAllClose(nms_scores_output, exp_nms_scores)
- self.assertAllClose(nms_classes_output, exp_nms_classes)
- self.assertAllEqual(nms_keypoint_heatmaps, exp_nms_keypoint_heatmaps)
-
- def test_multiclass_nms_with_additional_fields(self):
- boxes = tf.constant([[[0, 0, 1, 1]],
- [[0, 0.1, 1, 1.1]],
- [[0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11]],
- [[0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101]],
- [[0, 1000, 1, 1002]],
- [[0, 1000, 1, 1002.1]]], tf.float32)
-
- scores = tf.constant([[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0],
- [.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]])
-
- coarse_boxes_key = 'coarse_boxes'
- coarse_boxes = tf.constant([[0.1, 0.1, 1.1, 1.1],
- [0.1, 0.2, 1.1, 1.2],
- [0.1, -0.2, 1.1, 1.0],
- [0.1, 10.1, 1.1, 11.1],
- [0.1, 10.2, 1.1, 11.2],
- [0.1, 100.1, 1.1, 101.1],
- [0.1, 1000.1, 1.1, 1002.1],
- [0.1, 1000.1, 1.1, 1002.2]], tf.float32)
-
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
-
- exp_nms_corners = np.array([[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 1000, 1, 1002],
- [0, 100, 1, 101]], dtype=np.float32)
-
- exp_nms_coarse_corners = np.array([[0.1, 10.1, 1.1, 11.1],
- [0.1, 0.1, 1.1, 1.1],
- [0.1, 1000.1, 1.1, 1002.1],
- [0.1, 100.1, 1.1, 101.1]],
- dtype=np.float32)
-
- exp_nms_scores = [.95, .9, .85, .3]
- exp_nms_classes = [0, 0, 1, 0]
-
- nms = post_processing.multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh, max_output_size,
- additional_fields={coarse_boxes_key: coarse_boxes})
-
- with self.test_session() as sess:
- (nms_corners_output,
- nms_scores_output,
- nms_classes_output,
- nms_coarse_corners) = sess.run(
- [nms.get(),
- nms.get_field(fields.BoxListFields.scores),
- nms.get_field(fields.BoxListFields.classes),
- nms.get_field(coarse_boxes_key)])
-
- self.assertAllClose(nms_corners_output, exp_nms_corners)
- self.assertAllClose(nms_scores_output, exp_nms_scores)
- self.assertAllClose(nms_classes_output, exp_nms_classes)
- self.assertAllEqual(nms_coarse_corners, exp_nms_coarse_corners)
-
- def test_multiclass_nms_select_with_shared_boxes_given_masks(self):
- boxes = tf.constant([[[0, 0, 1, 1]],
- [[0, 0.1, 1, 1.1]],
- [[0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11]],
- [[0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101]],
- [[0, 1000, 1, 1002]],
- [[0, 1000, 1, 1002.1]]], tf.float32)
- scores = tf.constant([[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0],
- [.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]])
- num_classes = 2
- mask_height = 3
- mask_width = 3
- masks = tf.tile(
- tf.reshape(tf.range(8), [8, 1, 1, 1]),
- [1, num_classes, mask_height, mask_width])
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
-
- exp_nms_corners = [[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 1000, 1, 1002],
- [0, 100, 1, 101]]
- exp_nms_scores = [.95, .9, .85, .3]
- exp_nms_classes = [0, 0, 1, 0]
- exp_nms_masks_tensor = tf.tile(
- tf.reshape(tf.constant([3, 0, 6, 5], dtype=tf.float32), [4, 1, 1]),
- [1, mask_height, mask_width])
-
- nms = post_processing.multiclass_non_max_suppression(boxes, scores,
- score_thresh,
- iou_thresh,
- max_output_size,
- masks=masks)
- with self.test_session() as sess:
- (nms_corners_output,
- nms_scores_output,
- nms_classes_output,
- nms_masks,
- exp_nms_masks) = sess.run([nms.get(),
- nms.get_field(fields.BoxListFields.scores),
- nms.get_field(fields.BoxListFields.classes),
- nms.get_field(fields.BoxListFields.masks),
- exp_nms_masks_tensor])
- self.assertAllClose(nms_corners_output, exp_nms_corners)
- self.assertAllClose(nms_scores_output, exp_nms_scores)
- self.assertAllClose(nms_classes_output, exp_nms_classes)
- self.assertAllEqual(nms_masks, exp_nms_masks)
-
- def test_multiclass_nms_select_with_clip_window(self):
- boxes = tf.constant([[[0, 0, 10, 10]],
- [[1, 1, 11, 11]]], tf.float32)
- scores = tf.constant([[.9], [.75]])
- clip_window = tf.constant([5, 4, 8, 7], tf.float32)
- score_thresh = 0.0
- iou_thresh = 0.5
- max_output_size = 100
-
- exp_nms_corners = [[5, 4, 8, 7]]
- exp_nms_scores = [.9]
- exp_nms_classes = [0]
-
- nms = post_processing.multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh, max_output_size,
- clip_window=clip_window)
- with self.test_session() as sess:
- nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
- [nms.get(), nms.get_field(fields.BoxListFields.scores),
- nms.get_field(fields.BoxListFields.classes)])
- self.assertAllClose(nms_corners_output, exp_nms_corners)
- self.assertAllClose(nms_scores_output, exp_nms_scores)
- self.assertAllClose(nms_classes_output, exp_nms_classes)
-
- def test_multiclass_nms_select_with_clip_window_change_coordinate_frame(self):
- boxes = tf.constant([[[0, 0, 10, 10]],
- [[1, 1, 11, 11]]], tf.float32)
- scores = tf.constant([[.9], [.75]])
- clip_window = tf.constant([5, 4, 8, 7], tf.float32)
- score_thresh = 0.0
- iou_thresh = 0.5
- max_output_size = 100
-
- exp_nms_corners = [[0, 0, 1, 1]]
- exp_nms_scores = [.9]
- exp_nms_classes = [0]
-
- nms = post_processing.multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh, max_output_size,
- clip_window=clip_window, change_coordinate_frame=True)
- with self.test_session() as sess:
- nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
- [nms.get(), nms.get_field(fields.BoxListFields.scores),
- nms.get_field(fields.BoxListFields.classes)])
- self.assertAllClose(nms_corners_output, exp_nms_corners)
- self.assertAllClose(nms_scores_output, exp_nms_scores)
- self.assertAllClose(nms_classes_output, exp_nms_classes)
-
- def test_multiclass_nms_select_with_per_class_cap(self):
- boxes = tf.constant([[[0, 0, 1, 1]],
- [[0, 0.1, 1, 1.1]],
- [[0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11]],
- [[0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101]],
- [[0, 1000, 1, 1002]],
- [[0, 1000, 1, 1002.1]]], tf.float32)
- scores = tf.constant([[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0],
- [.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]])
- score_thresh = 0.1
- iou_thresh = .5
- max_size_per_class = 2
-
- exp_nms_corners = [[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 1000, 1, 1002]]
- exp_nms_scores = [.95, .9, .85]
- exp_nms_classes = [0, 0, 1]
-
- nms = post_processing.multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh, max_size_per_class)
- with self.test_session() as sess:
- nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
- [nms.get(), nms.get_field(fields.BoxListFields.scores),
- nms.get_field(fields.BoxListFields.classes)])
- self.assertAllClose(nms_corners_output, exp_nms_corners)
- self.assertAllClose(nms_scores_output, exp_nms_scores)
- self.assertAllClose(nms_classes_output, exp_nms_classes)
-
- def test_multiclass_nms_select_with_total_cap(self):
- boxes = tf.constant([[[0, 0, 1, 1]],
- [[0, 0.1, 1, 1.1]],
- [[0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11]],
- [[0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101]],
- [[0, 1000, 1, 1002]],
- [[0, 1000, 1, 1002.1]]], tf.float32)
- scores = tf.constant([[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0],
- [.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]])
- score_thresh = 0.1
- iou_thresh = .5
- max_size_per_class = 4
- max_total_size = 2
-
- exp_nms_corners = [[0, 10, 1, 11],
- [0, 0, 1, 1]]
- exp_nms_scores = [.95, .9]
- exp_nms_classes = [0, 0]
-
- nms = post_processing.multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh, max_size_per_class,
- max_total_size)
- with self.test_session() as sess:
- nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
- [nms.get(), nms.get_field(fields.BoxListFields.scores),
- nms.get_field(fields.BoxListFields.classes)])
- self.assertAllClose(nms_corners_output, exp_nms_corners)
- self.assertAllClose(nms_scores_output, exp_nms_scores)
- self.assertAllClose(nms_classes_output, exp_nms_classes)
-
- def test_multiclass_nms_threshold_then_select_with_shared_boxes(self):
- boxes = tf.constant([[[0, 0, 1, 1]],
- [[0, 0.1, 1, 1.1]],
- [[0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11]],
- [[0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101]],
- [[0, 1000, 1, 1002]],
- [[0, 1000, 1, 1002.1]]], tf.float32)
- scores = tf.constant([[.9], [.75], [.6], [.95], [.5], [.3], [.01], [.01]])
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 3
-
- exp_nms = [[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 100, 1, 101]]
- nms = post_processing.multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh, max_output_size)
- with self.test_session() as sess:
- nms_output = sess.run(nms.get())
- self.assertAllClose(nms_output, exp_nms)
-
- def test_multiclass_nms_select_with_separate_boxes(self):
- boxes = tf.constant([[[0, 0, 1, 1], [0, 0, 4, 5]],
- [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
- [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11], [0, 10, 1, 11]],
- [[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101], [0, 100, 1, 101]],
- [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
- [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]],
- tf.float32)
- scores = tf.constant([[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0],
- [.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]])
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
-
- exp_nms_corners = [[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 999, 2, 1004],
- [0, 100, 1, 101]]
- exp_nms_scores = [.95, .9, .85, .3]
- exp_nms_classes = [0, 0, 1, 0]
-
- nms = post_processing.multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh, max_output_size)
- with self.test_session() as sess:
- nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
- [nms.get(), nms.get_field(fields.BoxListFields.scores),
- nms.get_field(fields.BoxListFields.classes)])
- self.assertAllClose(nms_corners_output, exp_nms_corners)
- self.assertAllClose(nms_scores_output, exp_nms_scores)
- self.assertAllClose(nms_classes_output, exp_nms_classes)
-
- def test_batch_multiclass_nms_with_batch_size_1(self):
- boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
- [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
- [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11], [0, 10, 1, 11]],
- [[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101], [0, 100, 1, 101]],
- [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
- [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
- tf.float32)
- scores = tf.constant([[[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0],
- [.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]]])
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
-
- exp_nms_corners = [[[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 999, 2, 1004],
- [0, 100, 1, 101]]]
- exp_nms_scores = [[.95, .9, .85, .3]]
- exp_nms_classes = [[0, 0, 1, 0]]
-
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- nmsed_additional_fields, num_detections
- ) = post_processing.batch_multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh,
- max_size_per_class=max_output_size, max_total_size=max_output_size)
-
- self.assertIsNone(nmsed_masks)
- self.assertIsNone(nmsed_additional_fields)
-
- with self.test_session() as sess:
- (nmsed_boxes, nmsed_scores, nmsed_classes,
- num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
- num_detections])
- self.assertAllClose(nmsed_boxes, exp_nms_corners)
- self.assertAllClose(nmsed_scores, exp_nms_scores)
- self.assertAllClose(nmsed_classes, exp_nms_classes)
- self.assertEqual(num_detections, [4])
-
- def test_batch_multiclass_nms_with_batch_size_2(self):
- boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
- [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
- [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11], [0, 10, 1, 11]]],
- [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101], [0, 100, 1, 101]],
- [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
- [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
- tf.float32)
- scores = tf.constant([[[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0]],
- [[.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]]])
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
-
- exp_nms_corners = np.array([[[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 0, 0, 0],
- [0, 0, 0, 0]],
- [[0, 999, 2, 1004],
- [0, 10.1, 1, 11.1],
- [0, 100, 1, 101],
- [0, 0, 0, 0]]])
- exp_nms_scores = np.array([[.95, .9, 0, 0],
- [.85, .5, .3, 0]])
- exp_nms_classes = np.array([[0, 0, 0, 0],
- [1, 0, 0, 0]])
-
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- nmsed_additional_fields, num_detections
- ) = post_processing.batch_multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh,
- max_size_per_class=max_output_size, max_total_size=max_output_size)
-
- self.assertIsNone(nmsed_masks)
- self.assertIsNone(nmsed_additional_fields)
- # Check static shapes
- self.assertAllEqual(nmsed_boxes.shape.as_list(),
- exp_nms_corners.shape)
- self.assertAllEqual(nmsed_scores.shape.as_list(),
- exp_nms_scores.shape)
- self.assertAllEqual(nmsed_classes.shape.as_list(),
- exp_nms_classes.shape)
- self.assertEqual(num_detections.shape.as_list(), [2])
-
- with self.test_session() as sess:
- (nmsed_boxes, nmsed_scores, nmsed_classes,
- num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
- num_detections])
- self.assertAllClose(nmsed_boxes, exp_nms_corners)
- self.assertAllClose(nmsed_scores, exp_nms_scores)
- self.assertAllClose(nmsed_classes, exp_nms_classes)
- self.assertAllClose(num_detections, [2, 3])
-
- def test_batch_multiclass_nms_with_per_batch_clip_window(self):
- boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
- [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
- [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11], [0, 10, 1, 11]]],
- [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101], [0, 100, 1, 101]],
- [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
- [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
- tf.float32)
- scores = tf.constant([[[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0]],
- [[.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]]])
- clip_window = tf.constant([0., 0., 200., 200.])
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
-
- exp_nms_corners = np.array([[[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 0, 0, 0],
- [0, 0, 0, 0]],
- [[0, 10.1, 1, 11.1],
- [0, 100, 1, 101],
- [0, 0, 0, 0],
- [0, 0, 0, 0]]])
- exp_nms_scores = np.array([[.95, .9, 0, 0],
- [.5, .3, 0, 0]])
- exp_nms_classes = np.array([[0, 0, 0, 0],
- [0, 0, 0, 0]])
-
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- nmsed_additional_fields, num_detections
- ) = post_processing.batch_multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh,
- max_size_per_class=max_output_size, max_total_size=max_output_size,
- clip_window=clip_window)
-
- self.assertIsNone(nmsed_masks)
- self.assertIsNone(nmsed_additional_fields)
- # Check static shapes
- self.assertAllEqual(nmsed_boxes.shape.as_list(),
- exp_nms_corners.shape)
- self.assertAllEqual(nmsed_scores.shape.as_list(),
- exp_nms_scores.shape)
- self.assertAllEqual(nmsed_classes.shape.as_list(),
- exp_nms_classes.shape)
- self.assertEqual(num_detections.shape.as_list(), [2])
-
- with self.test_session() as sess:
- (nmsed_boxes, nmsed_scores, nmsed_classes,
- num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
- num_detections])
- self.assertAllClose(nmsed_boxes, exp_nms_corners)
- self.assertAllClose(nmsed_scores, exp_nms_scores)
- self.assertAllClose(nmsed_classes, exp_nms_classes)
- self.assertAllClose(num_detections, [2, 2])
-
- def test_batch_multiclass_nms_with_per_image_clip_window(self):
- boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
- [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
- [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11], [0, 10, 1, 11]]],
- [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101], [0, 100, 1, 101]],
- [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
- [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
- tf.float32)
- scores = tf.constant([[[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0]],
- [[.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]]])
- clip_window = tf.constant([[0., 0., 5., 5.],
- [0., 0., 200., 200.]])
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
-
- exp_nms_corners = np.array([[[0, 0, 1, 1],
- [0, 0, 0, 0],
- [0, 0, 0, 0],
- [0, 0, 0, 0]],
- [[0, 10.1, 1, 11.1],
- [0, 100, 1, 101],
- [0, 0, 0, 0],
- [0, 0, 0, 0]]])
- exp_nms_scores = np.array([[.9, 0., 0., 0.],
- [.5, .3, 0, 0]])
- exp_nms_classes = np.array([[0, 0, 0, 0],
- [0, 0, 0, 0]])
-
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- nmsed_additional_fields, num_detections
- ) = post_processing.batch_multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh,
- max_size_per_class=max_output_size, max_total_size=max_output_size,
- clip_window=clip_window)
-
- self.assertIsNone(nmsed_masks)
- self.assertIsNone(nmsed_additional_fields)
- # Check static shapes
- self.assertAllEqual(nmsed_boxes.shape.as_list(),
- exp_nms_corners.shape)
- self.assertAllEqual(nmsed_scores.shape.as_list(),
- exp_nms_scores.shape)
- self.assertAllEqual(nmsed_classes.shape.as_list(),
- exp_nms_classes.shape)
- self.assertEqual(num_detections.shape.as_list(), [2])
-
- with self.test_session() as sess:
- (nmsed_boxes, nmsed_scores, nmsed_classes,
- num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
- num_detections])
- self.assertAllClose(nmsed_boxes, exp_nms_corners)
- self.assertAllClose(nmsed_scores, exp_nms_scores)
- self.assertAllClose(nmsed_classes, exp_nms_classes)
- self.assertAllClose(num_detections, [1, 2])
-
- def test_batch_multiclass_nms_with_masks(self):
- boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
- [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
- [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11], [0, 10, 1, 11]]],
- [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101], [0, 100, 1, 101]],
- [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
- [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
- tf.float32)
- scores = tf.constant([[[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0]],
- [[.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]]])
- masks = tf.constant([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]],
- [[[2, 3], [4, 5]], [[3, 4], [5, 6]]],
- [[[4, 5], [6, 7]], [[5, 6], [7, 8]]],
- [[[6, 7], [8, 9]], [[7, 8], [9, 10]]]],
- [[[[8, 9], [10, 11]], [[9, 10], [11, 12]]],
- [[[10, 11], [12, 13]], [[11, 12], [13, 14]]],
- [[[12, 13], [14, 15]], [[13, 14], [15, 16]]],
- [[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]],
- tf.float32)
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
-
- exp_nms_corners = np.array([[[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 0, 0, 0],
- [0, 0, 0, 0]],
- [[0, 999, 2, 1004],
- [0, 10.1, 1, 11.1],
- [0, 100, 1, 101],
- [0, 0, 0, 0]]])
- exp_nms_scores = np.array([[.95, .9, 0, 0],
- [.85, .5, .3, 0]])
- exp_nms_classes = np.array([[0, 0, 0, 0],
- [1, 0, 0, 0]])
- exp_nms_masks = np.array([[[[6, 7], [8, 9]],
- [[0, 1], [2, 3]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]]],
- [[[13, 14], [15, 16]],
- [[8, 9], [10, 11]],
- [[10, 11], [12, 13]],
- [[0, 0], [0, 0]]]])
-
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- nmsed_additional_fields, num_detections
- ) = post_processing.batch_multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh,
- max_size_per_class=max_output_size, max_total_size=max_output_size,
- masks=masks)
-
- self.assertIsNone(nmsed_additional_fields)
- # Check static shapes
- self.assertAllEqual(nmsed_boxes.shape.as_list(), exp_nms_corners.shape)
- self.assertAllEqual(nmsed_scores.shape.as_list(), exp_nms_scores.shape)
- self.assertAllEqual(nmsed_classes.shape.as_list(), exp_nms_classes.shape)
- self.assertAllEqual(nmsed_masks.shape.as_list(), exp_nms_masks.shape)
- self.assertEqual(num_detections.shape.as_list(), [2])
-
- with self.test_session() as sess:
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
- nmsed_masks, num_detections])
-
- self.assertAllClose(nmsed_boxes, exp_nms_corners)
- self.assertAllClose(nmsed_scores, exp_nms_scores)
- self.assertAllClose(nmsed_classes, exp_nms_classes)
- self.assertAllClose(num_detections, [2, 3])
- self.assertAllClose(nmsed_masks, exp_nms_masks)
-
- def test_batch_multiclass_nms_with_additional_fields(self):
- boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
- [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
- [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11], [0, 10, 1, 11]]],
- [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101], [0, 100, 1, 101]],
- [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
- [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
- tf.float32)
- scores = tf.constant([[[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0]],
- [[.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]]])
- additional_fields = {
- 'keypoints': tf.constant(
- [[[[6, 7], [8, 9]],
- [[0, 1], [2, 3]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]]],
- [[[13, 14], [15, 16]],
- [[8, 9], [10, 11]],
- [[10, 11], [12, 13]],
- [[0, 0], [0, 0]]]],
- tf.float32)
- }
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
-
- exp_nms_corners = np.array([[[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 0, 0, 0],
- [0, 0, 0, 0]],
- [[0, 999, 2, 1004],
- [0, 10.1, 1, 11.1],
- [0, 100, 1, 101],
- [0, 0, 0, 0]]])
- exp_nms_scores = np.array([[.95, .9, 0, 0],
- [.85, .5, .3, 0]])
- exp_nms_classes = np.array([[0, 0, 0, 0],
- [1, 0, 0, 0]])
- exp_nms_additional_fields = {
- 'keypoints': np.array([[[[0, 0], [0, 0]],
- [[6, 7], [8, 9]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]]],
- [[[10, 11], [12, 13]],
- [[13, 14], [15, 16]],
- [[8, 9], [10, 11]],
- [[0, 0], [0, 0]]]])
- }
-
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- nmsed_additional_fields, num_detections
- ) = post_processing.batch_multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh,
- max_size_per_class=max_output_size, max_total_size=max_output_size,
- additional_fields=additional_fields)
-
- self.assertIsNone(nmsed_masks)
- # Check static shapes
- self.assertAllEqual(nmsed_boxes.shape.as_list(), exp_nms_corners.shape)
- self.assertAllEqual(nmsed_scores.shape.as_list(), exp_nms_scores.shape)
- self.assertAllEqual(nmsed_classes.shape.as_list(), exp_nms_classes.shape)
- self.assertEqual(len(nmsed_additional_fields),
- len(exp_nms_additional_fields))
- for key in exp_nms_additional_fields:
- self.assertAllEqual(nmsed_additional_fields[key].shape.as_list(),
- exp_nms_additional_fields[key].shape)
- self.assertEqual(num_detections.shape.as_list(), [2])
-
- with self.test_session() as sess:
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_additional_fields,
- num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
- nmsed_additional_fields, num_detections])
-
- self.assertAllClose(nmsed_boxes, exp_nms_corners)
- self.assertAllClose(nmsed_scores, exp_nms_scores)
- self.assertAllClose(nmsed_classes, exp_nms_classes)
- for key in exp_nms_additional_fields:
- self.assertAllClose(nmsed_additional_fields[key],
- exp_nms_additional_fields[key])
- self.assertAllClose(num_detections, [2, 3])
-
- def test_batch_multiclass_nms_with_dynamic_batch_size(self):
- boxes_placeholder = tf.placeholder(tf.float32, shape=(None, None, 2, 4))
- scores_placeholder = tf.placeholder(tf.float32, shape=(None, None, 2))
- masks_placeholder = tf.placeholder(tf.float32, shape=(None, None, 2, 2, 2))
-
- boxes = np.array([[[[0, 0, 1, 1], [0, 0, 4, 5]],
- [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
- [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11], [0, 10, 1, 11]]],
- [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101], [0, 100, 1, 101]],
- [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
- [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]])
- scores = np.array([[[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0]],
- [[.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]]])
- masks = np.array([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]],
- [[[2, 3], [4, 5]], [[3, 4], [5, 6]]],
- [[[4, 5], [6, 7]], [[5, 6], [7, 8]]],
- [[[6, 7], [8, 9]], [[7, 8], [9, 10]]]],
- [[[[8, 9], [10, 11]], [[9, 10], [11, 12]]],
- [[[10, 11], [12, 13]], [[11, 12], [13, 14]]],
- [[[12, 13], [14, 15]], [[13, 14], [15, 16]]],
- [[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]])
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
-
- exp_nms_corners = np.array([[[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 0, 0, 0],
- [0, 0, 0, 0]],
- [[0, 999, 2, 1004],
- [0, 10.1, 1, 11.1],
- [0, 100, 1, 101],
- [0, 0, 0, 0]]])
- exp_nms_scores = np.array([[.95, .9, 0, 0],
- [.85, .5, .3, 0]])
- exp_nms_classes = np.array([[0, 0, 0, 0],
- [1, 0, 0, 0]])
- exp_nms_masks = np.array([[[[6, 7], [8, 9]],
- [[0, 1], [2, 3]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]]],
- [[[13, 14], [15, 16]],
- [[8, 9], [10, 11]],
- [[10, 11], [12, 13]],
- [[0, 0], [0, 0]]]])
-
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- nmsed_additional_fields, num_detections
- ) = post_processing.batch_multiclass_non_max_suppression(
- boxes_placeholder, scores_placeholder, score_thresh, iou_thresh,
- max_size_per_class=max_output_size, max_total_size=max_output_size,
- masks=masks_placeholder)
-
- self.assertIsNone(nmsed_additional_fields)
- # Check static shapes
- self.assertAllEqual(nmsed_boxes.shape.as_list(), [None, 4, 4])
- self.assertAllEqual(nmsed_scores.shape.as_list(), [None, 4])
- self.assertAllEqual(nmsed_classes.shape.as_list(), [None, 4])
- self.assertAllEqual(nmsed_masks.shape.as_list(), [None, 4, 2, 2])
- self.assertEqual(num_detections.shape.as_list(), [None])
-
- with self.test_session() as sess:
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
- nmsed_masks, num_detections],
- feed_dict={boxes_placeholder: boxes,
- scores_placeholder: scores,
- masks_placeholder: masks})
- self.assertAllClose(nmsed_boxes, exp_nms_corners)
- self.assertAllClose(nmsed_scores, exp_nms_scores)
- self.assertAllClose(nmsed_classes, exp_nms_classes)
- self.assertAllClose(num_detections, [2, 3])
- self.assertAllClose(nmsed_masks, exp_nms_masks)
-
- def test_batch_multiclass_nms_with_masks_and_num_valid_boxes(self):
- boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
- [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
- [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11], [0, 10, 1, 11]]],
- [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101], [0, 100, 1, 101]],
- [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
- [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
- tf.float32)
- scores = tf.constant([[[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0]],
- [[.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]]])
- masks = tf.constant([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]],
- [[[2, 3], [4, 5]], [[3, 4], [5, 6]]],
- [[[4, 5], [6, 7]], [[5, 6], [7, 8]]],
- [[[6, 7], [8, 9]], [[7, 8], [9, 10]]]],
- [[[[8, 9], [10, 11]], [[9, 10], [11, 12]]],
- [[[10, 11], [12, 13]], [[11, 12], [13, 14]]],
- [[[12, 13], [14, 15]], [[13, 14], [15, 16]]],
- [[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]],
- tf.float32)
- num_valid_boxes = tf.constant([1, 1], tf.int32)
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
-
- exp_nms_corners = [[[0, 0, 1, 1],
- [0, 0, 0, 0],
- [0, 0, 0, 0],
- [0, 0, 0, 0]],
- [[0, 10.1, 1, 11.1],
- [0, 0, 0, 0],
- [0, 0, 0, 0],
- [0, 0, 0, 0]]]
- exp_nms_scores = [[.9, 0, 0, 0],
- [.5, 0, 0, 0]]
- exp_nms_classes = [[0, 0, 0, 0],
- [0, 0, 0, 0]]
- exp_nms_masks = [[[[0, 1], [2, 3]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]]],
- [[[8, 9], [10, 11]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]]]]
-
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- nmsed_additional_fields, num_detections
- ) = post_processing.batch_multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh,
- max_size_per_class=max_output_size, max_total_size=max_output_size,
- num_valid_boxes=num_valid_boxes, masks=masks)
-
- self.assertIsNone(nmsed_additional_fields)
-
- with self.test_session() as sess:
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
- nmsed_masks, num_detections])
- self.assertAllClose(nmsed_boxes, exp_nms_corners)
- self.assertAllClose(nmsed_scores, exp_nms_scores)
- self.assertAllClose(nmsed_classes, exp_nms_classes)
- self.assertAllClose(num_detections, [1, 1])
- self.assertAllClose(nmsed_masks, exp_nms_masks)
-
- def test_batch_multiclass_nms_with_additional_fields_and_num_valid_boxes(
- self):
- boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
- [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
- [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11], [0, 10, 1, 11]]],
- [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101], [0, 100, 1, 101]],
- [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
- [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
- tf.float32)
- scores = tf.constant([[[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0]],
- [[.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]]])
- additional_fields = {
- 'keypoints': tf.constant(
- [[[[6, 7], [8, 9]],
- [[0, 1], [2, 3]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]]],
- [[[13, 14], [15, 16]],
- [[8, 9], [10, 11]],
- [[10, 11], [12, 13]],
- [[0, 0], [0, 0]]]],
- tf.float32)
- }
- num_valid_boxes = tf.constant([1, 1], tf.int32)
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
-
- exp_nms_corners = [[[0, 0, 1, 1],
- [0, 0, 0, 0],
- [0, 0, 0, 0],
- [0, 0, 0, 0]],
- [[0, 10.1, 1, 11.1],
- [0, 0, 0, 0],
- [0, 0, 0, 0],
- [0, 0, 0, 0]]]
- exp_nms_scores = [[.9, 0, 0, 0],
- [.5, 0, 0, 0]]
- exp_nms_classes = [[0, 0, 0, 0],
- [0, 0, 0, 0]]
- exp_nms_additional_fields = {
- 'keypoints': np.array([[[[6, 7], [8, 9]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]]],
- [[[13, 14], [15, 16]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]]]])
- }
-
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- nmsed_additional_fields, num_detections
- ) = post_processing.batch_multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh,
- max_size_per_class=max_output_size, max_total_size=max_output_size,
- num_valid_boxes=num_valid_boxes,
- additional_fields=additional_fields)
-
- self.assertIsNone(nmsed_masks)
-
- with self.test_session() as sess:
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_additional_fields,
- num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
- nmsed_additional_fields, num_detections])
-
- self.assertAllClose(nmsed_boxes, exp_nms_corners)
- self.assertAllClose(nmsed_scores, exp_nms_scores)
- self.assertAllClose(nmsed_classes, exp_nms_classes)
- for key in exp_nms_additional_fields:
- self.assertAllClose(nmsed_additional_fields[key],
- exp_nms_additional_fields[key])
- self.assertAllClose(num_detections, [1, 1])
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/prefetcher.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/prefetcher.py
deleted file mode 100644
index e690c599fa74e024d9b7ec857628cdbfb0e3ee81..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/prefetcher.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Provides functions to prefetch tensors to feed into models."""
-import tensorflow as tf
-
-
-def prefetch(tensor_dict, capacity):
- """Creates a prefetch queue for tensors.
-
- Creates a FIFO queue to asynchronously enqueue tensor_dicts and returns a
- dequeue op that evaluates to a tensor_dict. This function is useful in
- prefetching preprocessed tensors so that the data is readily available for
- consumers.
-
- Example input pipeline when you don't need batching:
- ----------------------------------------------------
- key, string_tensor = slim.parallel_reader.parallel_read(...)
- tensor_dict = decoder.decode(string_tensor)
- tensor_dict = preprocessor.preprocess(tensor_dict, ...)
- prefetch_queue = prefetcher.prefetch(tensor_dict, capacity=20)
- tensor_dict = prefetch_queue.dequeue()
- outputs = Model(tensor_dict)
- ...
- ----------------------------------------------------
-
- For input pipelines with batching, refer to core/batcher.py
-
- Args:
- tensor_dict: a dictionary of tensors to prefetch.
- capacity: the size of the prefetch queue.
-
- Returns:
- a FIFO prefetcher queue
- """
- names = list(tensor_dict.keys())
- dtypes = [t.dtype for t in tensor_dict.values()]
- shapes = [t.get_shape() for t in tensor_dict.values()]
- prefetch_queue = tf.PaddingFIFOQueue(capacity, dtypes=dtypes,
- shapes=shapes,
- names=names,
- name='prefetch_queue')
- enqueue_op = prefetch_queue.enqueue(tensor_dict)
- tf.train.queue_runner.add_queue_runner(tf.train.queue_runner.QueueRunner(
- prefetch_queue, [enqueue_op]))
- tf.summary.scalar('queue/%s/fraction_of_%d_full' % (prefetch_queue.name,
- capacity),
- tf.to_float(prefetch_queue.size()) * (1. / capacity))
- return prefetch_queue
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/prefetcher_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/prefetcher_test.py
deleted file mode 100644
index 63f557e3318c25d02434bc1dd0763f1df35b18ac..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/prefetcher_test.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.core.prefetcher."""
-import tensorflow as tf
-
-from object_detection.core import prefetcher
-
-slim = tf.contrib.slim
-
-
-class PrefetcherTest(tf.test.TestCase):
-
- def test_prefetch_tensors_with_fully_defined_shapes(self):
- with self.test_session() as sess:
- batch_size = 10
- image_size = 32
- num_batches = 5
- examples = tf.Variable(tf.constant(0, dtype=tf.int64))
- counter = examples.count_up_to(num_batches)
- image = tf.random_normal([batch_size, image_size,
- image_size, 3],
- dtype=tf.float32,
- name='images')
- label = tf.random_uniform([batch_size, 1], 0, 10,
- dtype=tf.int32, name='labels')
-
- prefetch_queue = prefetcher.prefetch(tensor_dict={'counter': counter,
- 'image': image,
- 'label': label},
- capacity=100)
- tensor_dict = prefetch_queue.dequeue()
-
- self.assertAllEqual(tensor_dict['image'].get_shape().as_list(),
- [batch_size, image_size, image_size, 3])
- self.assertAllEqual(tensor_dict['label'].get_shape().as_list(),
- [batch_size, 1])
-
- tf.initialize_all_variables().run()
- with slim.queues.QueueRunners(sess):
- for _ in range(num_batches):
- results = sess.run(tensor_dict)
- self.assertEquals(results['image'].shape,
- (batch_size, image_size, image_size, 3))
- self.assertEquals(results['label'].shape, (batch_size, 1))
- with self.assertRaises(tf.errors.OutOfRangeError):
- sess.run(tensor_dict)
-
- def test_prefetch_tensors_with_partially_defined_shapes(self):
- with self.test_session() as sess:
- batch_size = 10
- image_size = 32
- num_batches = 5
- examples = tf.Variable(tf.constant(0, dtype=tf.int64))
- counter = examples.count_up_to(num_batches)
- image = tf.random_normal([batch_size,
- tf.Variable(image_size),
- tf.Variable(image_size), 3],
- dtype=tf.float32,
- name='image')
- image.set_shape([batch_size, None, None, 3])
- label = tf.random_uniform([batch_size, tf.Variable(1)], 0,
- 10, dtype=tf.int32, name='label')
- label.set_shape([batch_size, None])
-
- prefetch_queue = prefetcher.prefetch(tensor_dict={'counter': counter,
- 'image': image,
- 'label': label},
- capacity=100)
- tensor_dict = prefetch_queue.dequeue()
-
- self.assertAllEqual(tensor_dict['image'].get_shape().as_list(),
- [batch_size, None, None, 3])
- self.assertAllEqual(tensor_dict['label'].get_shape().as_list(),
- [batch_size, None])
-
- tf.initialize_all_variables().run()
- with slim.queues.QueueRunners(sess):
- for _ in range(num_batches):
- results = sess.run(tensor_dict)
- self.assertEquals(results['image'].shape,
- (batch_size, image_size, image_size, 3))
- self.assertEquals(results['label'].shape, (batch_size, 1))
- with self.assertRaises(tf.errors.OutOfRangeError):
- sess.run(tensor_dict)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/preprocessor.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/preprocessor.py
deleted file mode 100644
index 0fcdfcc69c273c634a9c7183e159f912e099c6c1..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/preprocessor.py
+++ /dev/null
@@ -1,3176 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Preprocess images and bounding boxes for detection.
-
-We perform two sets of operations in preprocessing stage:
-(a) operations that are applied to both training and testing data,
-(b) operations that are applied only to training data for the purpose of
- data augmentation.
-
-A preprocessing function receives a set of inputs,
-e.g. an image and bounding boxes,
-performs an operation on them, and returns them.
-Some examples are: randomly cropping the image, randomly mirroring the image,
- randomly changing the brightness, contrast, hue and
- randomly jittering the bounding boxes.
-
-The preprocess function receives a tensor_dict which is a dictionary that maps
-different field names to their tensors. For example,
-tensor_dict[fields.InputDataFields.image] holds the image tensor.
-The image is a rank 4 tensor: [1, height, width, channels] with
-dtype=tf.float32. The groundtruth_boxes is a rank 2 tensor: [N, 4] where
-in each row there is a box with [ymin xmin ymax xmax].
-Boxes are in normalized coordinates meaning
-their coordinate values range in [0, 1]
-
-To preprocess multiple images with the same operations in cases where
-nondeterministic operations are used, a preprocessor_cache.PreprocessorCache
-object can be passed into the preprocess function or individual operations.
-All nondeterministic operations except random_jitter_boxes support caching.
-E.g.
-Let tensor_dict{1,2,3,4,5} be copies of the same inputs.
-Let preprocess_options contain nondeterministic operation(s) excluding
-random_jitter_boxes.
-
-cache1 = preprocessor_cache.PreprocessorCache()
-cache2 = preprocessor_cache.PreprocessorCache()
-a = preprocess(tensor_dict1, preprocess_options, preprocess_vars_cache=cache1)
-b = preprocess(tensor_dict2, preprocess_options, preprocess_vars_cache=cache1)
-c = preprocess(tensor_dict3, preprocess_options, preprocess_vars_cache=cache2)
-d = preprocess(tensor_dict4, preprocess_options, preprocess_vars_cache=cache2)
-e = preprocess(tensor_dict5, preprocess_options)
-
-Then correspondings tensors of object pairs (a,b) and (c,d)
-are guaranteed to be equal element-wise, but the equality of any other object
-pair cannot be determined.
-
-Important Note: In tensor_dict, images is a rank 4 tensor, but preprocessing
-functions receive a rank 3 tensor for processing the image. Thus, inside the
-preprocess function we squeeze the image to become a rank 3 tensor and then
-we pass it to the functions. At the end of the preprocess we expand the image
-back to rank 4.
-"""
-
-import functools
-import inspect
-import sys
-import tensorflow as tf
-
-from tensorflow.python.ops import control_flow_ops
-
-from object_detection.core import box_list
-from object_detection.core import box_list_ops
-from object_detection.core import keypoint_ops
-from object_detection.core import preprocessor_cache
-from object_detection.core import standard_fields as fields
-from object_detection.utils import shape_utils
-
-
-def _apply_with_random_selector(x,
- func,
- num_cases,
- preprocess_vars_cache=None,
- key=''):
- """Computes func(x, sel), with sel sampled from [0...num_cases-1].
-
- If both preprocess_vars_cache AND key are the same between two calls, sel will
- be the same value in both calls.
-
- Args:
- x: input Tensor.
- func: Python function to apply.
- num_cases: Python int32, number of cases to sample sel from.
- preprocess_vars_cache: PreprocessorCache object that records previously
- performed augmentations. Updated in-place. If this
- function is called multiple times with the same
- non-null cache, it will perform deterministically.
- key: variable identifier for preprocess_vars_cache.
-
- Returns:
- The result of func(x, sel), where func receives the value of the
- selector as a python integer, but sel is sampled dynamically.
- """
- generator_func = functools.partial(
- tf.random_uniform, [], maxval=num_cases, dtype=tf.int32)
- rand_sel = _get_or_create_preprocess_rand_vars(
- generator_func, preprocessor_cache.PreprocessorCache.SELECTOR,
- preprocess_vars_cache, key)
-
- # Pass the real x only to one of the func calls.
- return control_flow_ops.merge([func(
- control_flow_ops.switch(x, tf.equal(rand_sel, case))[1], case)
- for case in range(num_cases)])[0]
-
-
-def _apply_with_random_selector_tuples(x,
- func,
- num_cases,
- preprocess_vars_cache=None,
- key=''):
- """Computes func(x, sel), with sel sampled from [0...num_cases-1].
-
- If both preprocess_vars_cache AND key are the same between two calls, sel will
- be the same value in both calls.
-
- Args:
- x: A tuple of input tensors.
- func: Python function to apply.
- num_cases: Python int32, number of cases to sample sel from.
- preprocess_vars_cache: PreprocessorCache object that records previously
- performed augmentations. Updated in-place. If this
- function is called multiple times with the same
- non-null cache, it will perform deterministically.
- key: variable identifier for preprocess_vars_cache.
-
- Returns:
- The result of func(x, sel), where func receives the value of the
- selector as a python integer, but sel is sampled dynamically.
- """
- num_inputs = len(x)
- generator_func = functools.partial(
- tf.random_uniform, [], maxval=num_cases, dtype=tf.int32)
- rand_sel = _get_or_create_preprocess_rand_vars(
- generator_func, preprocessor_cache.PreprocessorCache.SELECTOR_TUPLES,
- preprocess_vars_cache, key)
-
- # Pass the real x only to one of the func calls.
- tuples = [list() for t in x]
- for case in range(num_cases):
- new_x = [control_flow_ops.switch(t, tf.equal(rand_sel, case))[1] for t in x]
- output = func(tuple(new_x), case)
- for j in range(num_inputs):
- tuples[j].append(output[j])
-
- for i in range(num_inputs):
- tuples[i] = control_flow_ops.merge(tuples[i])[0]
- return tuple(tuples)
-
-
-def _get_or_create_preprocess_rand_vars(generator_func,
- function_id,
- preprocess_vars_cache,
- key=''):
- """Returns a tensor stored in preprocess_vars_cache or using generator_func.
-
- If the tensor was previously generated and appears in the PreprocessorCache,
- the previously generated tensor will be returned. Otherwise, a new tensor
- is generated using generator_func and stored in the cache.
-
- Args:
- generator_func: A 0-argument function that generates a tensor.
- function_id: identifier for the preprocessing function used.
- preprocess_vars_cache: PreprocessorCache object that records previously
- performed augmentations. Updated in-place. If this
- function is called multiple times with the same
- non-null cache, it will perform deterministically.
- key: identifier for the variable stored.
- Returns:
- The generated tensor.
- """
- if preprocess_vars_cache is not None:
- var = preprocess_vars_cache.get(function_id, key)
- if var is None:
- var = generator_func()
- preprocess_vars_cache.update(function_id, key, var)
- else:
- var = generator_func()
- return var
-
-
-def _random_integer(minval, maxval, seed):
- """Returns a random 0-D tensor between minval and maxval.
-
- Args:
- minval: minimum value of the random tensor.
- maxval: maximum value of the random tensor.
- seed: random seed.
-
- Returns:
- A random 0-D tensor between minval and maxval.
- """
- return tf.random_uniform(
- [], minval=minval, maxval=maxval, dtype=tf.int32, seed=seed)
-
-
-# TODO(mttang): This method is needed because the current
-# tf.image.rgb_to_grayscale method does not support quantization. Replace with
-# tf.image.rgb_to_grayscale after quantization support is added.
-def _rgb_to_grayscale(images, name=None):
- """Converts one or more images from RGB to Grayscale.
-
- Outputs a tensor of the same `DType` and rank as `images`. The size of the
- last dimension of the output is 1, containing the Grayscale value of the
- pixels.
-
- Args:
- images: The RGB tensor to convert. Last dimension must have size 3 and
- should contain RGB values.
- name: A name for the operation (optional).
-
- Returns:
- The converted grayscale image(s).
- """
- with tf.name_scope(name, 'rgb_to_grayscale', [images]) as name:
- images = tf.convert_to_tensor(images, name='images')
- # Remember original dtype to so we can convert back if needed
- orig_dtype = images.dtype
- flt_image = tf.image.convert_image_dtype(images, tf.float32)
-
- # Reference for converting between RGB and grayscale.
- # https://en.wikipedia.org/wiki/Luma_%28video%29
- rgb_weights = [0.2989, 0.5870, 0.1140]
- rank_1 = tf.expand_dims(tf.rank(images) - 1, 0)
- gray_float = tf.reduce_sum(
- flt_image * rgb_weights, rank_1, keep_dims=True)
- gray_float.set_shape(images.get_shape()[:-1].concatenate([1]))
- return tf.image.convert_image_dtype(gray_float, orig_dtype, name=name)
-
-
-def normalize_image(image, original_minval, original_maxval, target_minval,
- target_maxval):
- """Normalizes pixel values in the image.
-
- Moves the pixel values from the current [original_minval, original_maxval]
- range to a the [target_minval, target_maxval] range.
-
- Args:
- image: rank 3 float32 tensor containing 1
- image -> [height, width, channels].
- original_minval: current image minimum value.
- original_maxval: current image maximum value.
- target_minval: target image minimum value.
- target_maxval: target image maximum value.
-
- Returns:
- image: image which is the same shape as input image.
- """
- with tf.name_scope('NormalizeImage', values=[image]):
- original_minval = float(original_minval)
- original_maxval = float(original_maxval)
- target_minval = float(target_minval)
- target_maxval = float(target_maxval)
- image = tf.to_float(image)
- image = tf.subtract(image, original_minval)
- image = tf.multiply(image, (target_maxval - target_minval) /
- (original_maxval - original_minval))
- image = tf.add(image, target_minval)
- return image
-
-
-def retain_boxes_above_threshold(boxes,
- labels,
- label_scores,
- multiclass_scores=None,
- masks=None,
- keypoints=None,
- threshold=0.0):
- """Retains boxes whose label score is above a given threshold.
-
- If the label score for a box is missing (represented by NaN), the box is
- retained. The boxes that don't pass the threshold will not appear in the
- returned tensor.
-
- Args:
- boxes: float32 tensor of shape [num_instance, 4] representing boxes
- location in normalized coordinates.
- labels: rank 1 int32 tensor of shape [num_instance] containing the object
- classes.
- label_scores: float32 tensor of shape [num_instance] representing the
- score for each box.
- multiclass_scores: (optional) float32 tensor of shape
- [num_instances, num_classes] representing the score for each box for each
- class.
- masks: (optional) rank 3 float32 tensor with shape
- [num_instances, height, width] containing instance masks. The masks are of
- the same height, width as the input `image`.
- keypoints: (optional) rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized
- coordinates.
- threshold: scalar python float.
-
- Returns:
- retained_boxes: [num_retained_instance, 4]
- retianed_labels: [num_retained_instance]
- retained_label_scores: [num_retained_instance]
-
- If multiclass_scores, masks, or keypoints are not None, the function also
- returns:
-
- retained_multiclass_scores: [num_retained_instance, num_classes]
- retained_masks: [num_retained_instance, height, width]
- retained_keypoints: [num_retained_instance, num_keypoints, 2]
- """
- with tf.name_scope('RetainBoxesAboveThreshold',
- values=[boxes, labels, label_scores]):
- indices = tf.where(
- tf.logical_or(label_scores > threshold, tf.is_nan(label_scores)))
- indices = tf.squeeze(indices, axis=1)
- retained_boxes = tf.gather(boxes, indices)
- retained_labels = tf.gather(labels, indices)
- retained_label_scores = tf.gather(label_scores, indices)
- result = [retained_boxes, retained_labels, retained_label_scores]
-
- if multiclass_scores is not None:
- retained_multiclass_scores = tf.gather(multiclass_scores, indices)
- result.append(retained_multiclass_scores)
-
- if masks is not None:
- retained_masks = tf.gather(masks, indices)
- result.append(retained_masks)
-
- if keypoints is not None:
- retained_keypoints = tf.gather(keypoints, indices)
- result.append(retained_keypoints)
-
- return result
-
-
-def _flip_boxes_left_right(boxes):
- """Left-right flip the boxes.
-
- Args:
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
-
- Returns:
- Flipped boxes.
- """
- ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1)
- flipped_xmin = tf.subtract(1.0, xmax)
- flipped_xmax = tf.subtract(1.0, xmin)
- flipped_boxes = tf.concat([ymin, flipped_xmin, ymax, flipped_xmax], 1)
- return flipped_boxes
-
-
-def _flip_boxes_up_down(boxes):
- """Up-down flip the boxes.
-
- Args:
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
-
- Returns:
- Flipped boxes.
- """
- ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1)
- flipped_ymin = tf.subtract(1.0, ymax)
- flipped_ymax = tf.subtract(1.0, ymin)
- flipped_boxes = tf.concat([flipped_ymin, xmin, flipped_ymax, xmax], 1)
- return flipped_boxes
-
-
-def _rot90_boxes(boxes):
- """Rotate boxes counter-clockwise by 90 degrees.
-
- Args:
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
-
- Returns:
- Rotated boxes.
- """
- ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1)
- rotated_ymin = tf.subtract(1.0, xmax)
- rotated_ymax = tf.subtract(1.0, xmin)
- rotated_xmin = ymin
- rotated_xmax = ymax
- rotated_boxes = tf.concat(
- [rotated_ymin, rotated_xmin, rotated_ymax, rotated_xmax], 1)
- return rotated_boxes
-
-
-def _flip_masks_left_right(masks):
- """Left-right flip masks.
-
- Args:
- masks: rank 3 float32 tensor with shape
- [num_instances, height, width] representing instance masks.
-
- Returns:
- flipped masks: rank 3 float32 tensor with shape
- [num_instances, height, width] representing instance masks.
- """
- return masks[:, :, ::-1]
-
-
-def _flip_masks_up_down(masks):
- """Up-down flip masks.
-
- Args:
- masks: rank 3 float32 tensor with shape
- [num_instances, height, width] representing instance masks.
-
- Returns:
- flipped masks: rank 3 float32 tensor with shape
- [num_instances, height, width] representing instance masks.
- """
- return masks[:, ::-1, :]
-
-
-def _rot90_masks(masks):
- """Rotate masks counter-clockwise by 90 degrees.
-
- Args:
- masks: rank 3 float32 tensor with shape
- [num_instances, height, width] representing instance masks.
-
- Returns:
- rotated masks: rank 3 float32 tensor with shape
- [num_instances, height, width] representing instance masks.
- """
- masks = tf.transpose(masks, [0, 2, 1])
- return masks[:, ::-1, :]
-
-
-def random_horizontal_flip(image,
- boxes=None,
- masks=None,
- keypoints=None,
- keypoint_flip_permutation=None,
- seed=None,
- preprocess_vars_cache=None):
- """Randomly flips the image and detections horizontally.
-
- The probability of flipping the image is 50%.
-
- Args:
- image: rank 3 float32 tensor with shape [height, width, channels].
- boxes: (optional) rank 2 float32 tensor with shape [N, 4]
- containing the bounding boxes.
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- masks: (optional) rank 3 float32 tensor with shape
- [num_instances, height, width] containing instance masks. The masks
- are of the same height, width as the input `image`.
- keypoints: (optional) rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]. The keypoints are in y-x
- normalized coordinates.
- keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
- permutation.
- seed: random seed
- preprocess_vars_cache: PreprocessorCache object that records previously
- performed augmentations. Updated in-place. If this
- function is called multiple times with the same
- non-null cache, it will perform deterministically.
-
- Returns:
- image: image which is the same shape as input image.
-
- If boxes, masks, keypoints, and keypoint_flip_permutation are not None,
- the function also returns the following tensors.
-
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- masks: rank 3 float32 tensor with shape [num_instances, height, width]
- containing instance masks.
- keypoints: rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]
-
- Raises:
- ValueError: if keypoints are provided but keypoint_flip_permutation is not.
- """
-
- def _flip_image(image):
- # flip image
- image_flipped = tf.image.flip_left_right(image)
- return image_flipped
-
- if keypoints is not None and keypoint_flip_permutation is None:
- raise ValueError(
- 'keypoints are provided but keypoints_flip_permutation is not provided')
-
- with tf.name_scope('RandomHorizontalFlip', values=[image, boxes]):
- result = []
- # random variable defining whether to do flip or not
- generator_func = functools.partial(tf.random_uniform, [], seed=seed)
- do_a_flip_random = _get_or_create_preprocess_rand_vars(
- generator_func,
- preprocessor_cache.PreprocessorCache.HORIZONTAL_FLIP,
- preprocess_vars_cache)
- do_a_flip_random = tf.greater(do_a_flip_random, 0.5)
-
- # flip image
- image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image)
- result.append(image)
-
- # flip boxes
- if boxes is not None:
- boxes = tf.cond(do_a_flip_random, lambda: _flip_boxes_left_right(boxes),
- lambda: boxes)
- result.append(boxes)
-
- # flip masks
- if masks is not None:
- masks = tf.cond(do_a_flip_random, lambda: _flip_masks_left_right(masks),
- lambda: masks)
- result.append(masks)
-
- # flip keypoints
- if keypoints is not None and keypoint_flip_permutation is not None:
- permutation = keypoint_flip_permutation
- keypoints = tf.cond(
- do_a_flip_random,
- lambda: keypoint_ops.flip_horizontal(keypoints, 0.5, permutation),
- lambda: keypoints)
- result.append(keypoints)
-
- return tuple(result)
-
-
-def random_vertical_flip(image,
- boxes=None,
- masks=None,
- keypoints=None,
- keypoint_flip_permutation=None,
- seed=None,
- preprocess_vars_cache=None):
- """Randomly flips the image and detections vertically.
-
- The probability of flipping the image is 50%.
-
- Args:
- image: rank 3 float32 tensor with shape [height, width, channels].
- boxes: (optional) rank 2 float32 tensor with shape [N, 4]
- containing the bounding boxes.
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- masks: (optional) rank 3 float32 tensor with shape
- [num_instances, height, width] containing instance masks. The masks
- are of the same height, width as the input `image`.
- keypoints: (optional) rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]. The keypoints are in y-x
- normalized coordinates.
- keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
- permutation.
- seed: random seed
- preprocess_vars_cache: PreprocessorCache object that records previously
- performed augmentations. Updated in-place. If this
- function is called multiple times with the same
- non-null cache, it will perform deterministically.
-
- Returns:
- image: image which is the same shape as input image.
-
- If boxes, masks, keypoints, and keypoint_flip_permutation are not None,
- the function also returns the following tensors.
-
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- masks: rank 3 float32 tensor with shape [num_instances, height, width]
- containing instance masks.
- keypoints: rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]
-
- Raises:
- ValueError: if keypoints are provided but keypoint_flip_permutation is not.
- """
-
- def _flip_image(image):
- # flip image
- image_flipped = tf.image.flip_up_down(image)
- return image_flipped
-
- if keypoints is not None and keypoint_flip_permutation is None:
- raise ValueError(
- 'keypoints are provided but keypoints_flip_permutation is not provided')
-
- with tf.name_scope('RandomVerticalFlip', values=[image, boxes]):
- result = []
- # random variable defining whether to do flip or not
- generator_func = functools.partial(tf.random_uniform, [], seed=seed)
- do_a_flip_random = _get_or_create_preprocess_rand_vars(
- generator_func, preprocessor_cache.PreprocessorCache.VERTICAL_FLIP,
- preprocess_vars_cache)
- do_a_flip_random = tf.greater(do_a_flip_random, 0.5)
-
- # flip image
- image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image)
- result.append(image)
-
- # flip boxes
- if boxes is not None:
- boxes = tf.cond(do_a_flip_random, lambda: _flip_boxes_up_down(boxes),
- lambda: boxes)
- result.append(boxes)
-
- # flip masks
- if masks is not None:
- masks = tf.cond(do_a_flip_random, lambda: _flip_masks_up_down(masks),
- lambda: masks)
- result.append(masks)
-
- # flip keypoints
- if keypoints is not None and keypoint_flip_permutation is not None:
- permutation = keypoint_flip_permutation
- keypoints = tf.cond(
- do_a_flip_random,
- lambda: keypoint_ops.flip_vertical(keypoints, 0.5, permutation),
- lambda: keypoints)
- result.append(keypoints)
-
- return tuple(result)
-
-
-def random_rotation90(image,
- boxes=None,
- masks=None,
- keypoints=None,
- seed=None,
- preprocess_vars_cache=None):
- """Randomly rotates the image and detections 90 degrees counter-clockwise.
-
- The probability of rotating the image is 50%. This can be combined with
- random_horizontal_flip and random_vertical_flip to produce an output with a
- uniform distribution of the eight possible 90 degree rotation / reflection
- combinations.
-
- Args:
- image: rank 3 float32 tensor with shape [height, width, channels].
- boxes: (optional) rank 2 float32 tensor with shape [N, 4]
- containing the bounding boxes.
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- masks: (optional) rank 3 float32 tensor with shape
- [num_instances, height, width] containing instance masks. The masks
- are of the same height, width as the input `image`.
- keypoints: (optional) rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]. The keypoints are in y-x
- normalized coordinates.
- seed: random seed
- preprocess_vars_cache: PreprocessorCache object that records previously
- performed augmentations. Updated in-place. If this
- function is called multiple times with the same
- non-null cache, it will perform deterministically.
-
- Returns:
- image: image which is the same shape as input image.
-
- If boxes, masks, and keypoints, are not None,
- the function also returns the following tensors.
-
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- masks: rank 3 float32 tensor with shape [num_instances, height, width]
- containing instance masks.
- keypoints: rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]
- """
-
- def _rot90_image(image):
- # flip image
- image_rotated = tf.image.rot90(image)
- return image_rotated
-
- with tf.name_scope('RandomRotation90', values=[image, boxes]):
- result = []
-
- # random variable defining whether to rotate by 90 degrees or not
- generator_func = functools.partial(tf.random_uniform, [], seed=seed)
- do_a_rot90_random = _get_or_create_preprocess_rand_vars(
- generator_func, preprocessor_cache.PreprocessorCache.ROTATION90,
- preprocess_vars_cache)
- do_a_rot90_random = tf.greater(do_a_rot90_random, 0.5)
-
- # flip image
- image = tf.cond(do_a_rot90_random, lambda: _rot90_image(image),
- lambda: image)
- result.append(image)
-
- # flip boxes
- if boxes is not None:
- boxes = tf.cond(do_a_rot90_random, lambda: _rot90_boxes(boxes),
- lambda: boxes)
- result.append(boxes)
-
- # flip masks
- if masks is not None:
- masks = tf.cond(do_a_rot90_random, lambda: _rot90_masks(masks),
- lambda: masks)
- result.append(masks)
-
- # flip keypoints
- if keypoints is not None:
- keypoints = tf.cond(
- do_a_rot90_random,
- lambda: keypoint_ops.rot90(keypoints),
- lambda: keypoints)
- result.append(keypoints)
-
- return tuple(result)
-
-
-def random_pixel_value_scale(image,
- minval=0.9,
- maxval=1.1,
- seed=None,
- preprocess_vars_cache=None):
- """Scales each value in the pixels of the image.
-
- This function scales each pixel independent of the other ones.
- For each value in image tensor, draws a random number between
- minval and maxval and multiples the values with them.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
- with pixel values varying between [0, 255].
- minval: lower ratio of scaling pixel values.
- maxval: upper ratio of scaling pixel values.
- seed: random seed.
- preprocess_vars_cache: PreprocessorCache object that records previously
- performed augmentations. Updated in-place. If this
- function is called multiple times with the same
- non-null cache, it will perform deterministically.
-
- Returns:
- image: image which is the same shape as input image.
- """
- with tf.name_scope('RandomPixelValueScale', values=[image]):
- generator_func = functools.partial(
- tf.random_uniform, tf.shape(image),
- minval=minval, maxval=maxval,
- dtype=tf.float32, seed=seed)
- color_coef = _get_or_create_preprocess_rand_vars(
- generator_func,
- preprocessor_cache.PreprocessorCache.PIXEL_VALUE_SCALE,
- preprocess_vars_cache)
-
- image = tf.multiply(image, color_coef)
- image = tf.clip_by_value(image, 0.0, 255.0)
-
- return image
-
-
-def random_image_scale(image,
- masks=None,
- min_scale_ratio=0.5,
- max_scale_ratio=2.0,
- seed=None,
- preprocess_vars_cache=None):
- """Scales the image size.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels].
- masks: (optional) rank 3 float32 tensor containing masks with
- size [height, width, num_masks]. The value is set to None if there are no
- masks.
- min_scale_ratio: minimum scaling ratio.
- max_scale_ratio: maximum scaling ratio.
- seed: random seed.
- preprocess_vars_cache: PreprocessorCache object that records previously
- performed augmentations. Updated in-place. If this
- function is called multiple times with the same
- non-null cache, it will perform deterministically.
-
- Returns:
- image: image which is the same rank as input image.
- masks: If masks is not none, resized masks which are the same rank as input
- masks will be returned.
- """
- with tf.name_scope('RandomImageScale', values=[image]):
- result = []
- image_shape = tf.shape(image)
- image_height = image_shape[0]
- image_width = image_shape[1]
- generator_func = functools.partial(
- tf.random_uniform, [],
- minval=min_scale_ratio, maxval=max_scale_ratio,
- dtype=tf.float32, seed=seed)
- size_coef = _get_or_create_preprocess_rand_vars(
- generator_func, preprocessor_cache.PreprocessorCache.IMAGE_SCALE,
- preprocess_vars_cache)
-
- image_newysize = tf.to_int32(
- tf.multiply(tf.to_float(image_height), size_coef))
- image_newxsize = tf.to_int32(
- tf.multiply(tf.to_float(image_width), size_coef))
- image = tf.image.resize_images(
- image, [image_newysize, image_newxsize], align_corners=True)
- result.append(image)
- if masks:
- masks = tf.image.resize_nearest_neighbor(
- masks, [image_newysize, image_newxsize], align_corners=True)
- result.append(masks)
- return tuple(result)
-
-
-def random_rgb_to_gray(image,
- probability=0.1,
- seed=None,
- preprocess_vars_cache=None):
- """Changes the image from RGB to Grayscale with the given probability.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
- with pixel values varying between [0, 255].
- probability: the probability of returning a grayscale image.
- The probability should be a number between [0, 1].
- seed: random seed.
- preprocess_vars_cache: PreprocessorCache object that records previously
- performed augmentations. Updated in-place. If this
- function is called multiple times with the same
- non-null cache, it will perform deterministically.
-
- Returns:
- image: image which is the same shape as input image.
- """
- def _image_to_gray(image):
- image_gray1 = _rgb_to_grayscale(image)
- image_gray3 = tf.image.grayscale_to_rgb(image_gray1)
- return image_gray3
-
- with tf.name_scope('RandomRGBtoGray', values=[image]):
- # random variable defining whether to change to grayscale or not
- generator_func = functools.partial(tf.random_uniform, [], seed=seed)
- do_gray_random = _get_or_create_preprocess_rand_vars(
- generator_func, preprocessor_cache.PreprocessorCache.RGB_TO_GRAY,
- preprocess_vars_cache)
-
- image = tf.cond(
- tf.greater(do_gray_random, probability), lambda: image,
- lambda: _image_to_gray(image))
-
- return image
-
-
-def random_adjust_brightness(image,
- max_delta=0.2,
- seed=None,
- preprocess_vars_cache=None):
- """Randomly adjusts brightness.
-
- Makes sure the output image is still between 0 and 255.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
- with pixel values varying between [0, 255].
- max_delta: how much to change the brightness. A value between [0, 1).
- seed: random seed.
- preprocess_vars_cache: PreprocessorCache object that records previously
- performed augmentations. Updated in-place. If this
- function is called multiple times with the same
- non-null cache, it will perform deterministically.
-
- Returns:
- image: image which is the same shape as input image.
- boxes: boxes which is the same shape as input boxes.
- """
- with tf.name_scope('RandomAdjustBrightness', values=[image]):
- generator_func = functools.partial(tf.random_uniform, [],
- -max_delta, max_delta, seed=seed)
- delta = _get_or_create_preprocess_rand_vars(
- generator_func,
- preprocessor_cache.PreprocessorCache.ADJUST_BRIGHTNESS,
- preprocess_vars_cache)
-
- image = tf.image.adjust_brightness(image / 255, delta) * 255
- image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=255.0)
- return image
-
-
-def random_adjust_contrast(image,
- min_delta=0.8,
- max_delta=1.25,
- seed=None,
- preprocess_vars_cache=None):
- """Randomly adjusts contrast.
-
- Makes sure the output image is still between 0 and 255.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
- with pixel values varying between [0, 255].
- min_delta: see max_delta.
- max_delta: how much to change the contrast. Contrast will change with a
- value between min_delta and max_delta. This value will be
- multiplied to the current contrast of the image.
- seed: random seed.
- preprocess_vars_cache: PreprocessorCache object that records previously
- performed augmentations. Updated in-place. If this
- function is called multiple times with the same
- non-null cache, it will perform deterministically.
-
- Returns:
- image: image which is the same shape as input image.
- """
- with tf.name_scope('RandomAdjustContrast', values=[image]):
- generator_func = functools.partial(tf.random_uniform, [],
- min_delta, max_delta, seed=seed)
- contrast_factor = _get_or_create_preprocess_rand_vars(
- generator_func,
- preprocessor_cache.PreprocessorCache.ADJUST_CONTRAST,
- preprocess_vars_cache)
- image = tf.image.adjust_contrast(image / 255, contrast_factor) * 255
- image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=255.0)
- return image
-
-
-def random_adjust_hue(image,
- max_delta=0.02,
- seed=None,
- preprocess_vars_cache=None):
- """Randomly adjusts hue.
-
- Makes sure the output image is still between 0 and 255.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
- with pixel values varying between [0, 255].
- max_delta: change hue randomly with a value between 0 and max_delta.
- seed: random seed.
- preprocess_vars_cache: PreprocessorCache object that records previously
- performed augmentations. Updated in-place. If this
- function is called multiple times with the same
- non-null cache, it will perform deterministically.
-
- Returns:
- image: image which is the same shape as input image.
- """
- with tf.name_scope('RandomAdjustHue', values=[image]):
- generator_func = functools.partial(tf.random_uniform, [],
- -max_delta, max_delta, seed=seed)
- delta = _get_or_create_preprocess_rand_vars(
- generator_func, preprocessor_cache.PreprocessorCache.ADJUST_HUE,
- preprocess_vars_cache)
- image = tf.image.adjust_hue(image / 255, delta) * 255
- image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=255.0)
- return image
-
-
-def random_adjust_saturation(image,
- min_delta=0.8,
- max_delta=1.25,
- seed=None,
- preprocess_vars_cache=None):
- """Randomly adjusts saturation.
-
- Makes sure the output image is still between 0 and 255.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
- with pixel values varying between [0, 255].
- min_delta: see max_delta.
- max_delta: how much to change the saturation. Saturation will change with a
- value between min_delta and max_delta. This value will be
- multiplied to the current saturation of the image.
- seed: random seed.
- preprocess_vars_cache: PreprocessorCache object that records previously
- performed augmentations. Updated in-place. If this
- function is called multiple times with the same
- non-null cache, it will perform deterministically.
-
- Returns:
- image: image which is the same shape as input image.
- """
- with tf.name_scope('RandomAdjustSaturation', values=[image]):
- generator_func = functools.partial(tf.random_uniform, [],
- min_delta, max_delta, seed=seed)
- saturation_factor = _get_or_create_preprocess_rand_vars(
- generator_func,
- preprocessor_cache.PreprocessorCache.ADJUST_SATURATION,
- preprocess_vars_cache)
- image = tf.image.adjust_saturation(image / 255, saturation_factor) * 255
- image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=255.0)
- return image
-
-
-def random_distort_color(image, color_ordering=0, preprocess_vars_cache=None):
- """Randomly distorts color.
-
- Randomly distorts color using a combination of brightness, hue, contrast and
- saturation changes. Makes sure the output image is still between 0 and 255.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
- with pixel values varying between [0, 255].
- color_ordering: Python int, a type of distortion (valid values: 0, 1).
- preprocess_vars_cache: PreprocessorCache object that records previously
- performed augmentations. Updated in-place. If this
- function is called multiple times with the same
- non-null cache, it will perform deterministically.
-
- Returns:
- image: image which is the same shape as input image.
-
- Raises:
- ValueError: if color_ordering is not in {0, 1}.
- """
- with tf.name_scope('RandomDistortColor', values=[image]):
- if color_ordering == 0:
- image = random_adjust_brightness(
- image, max_delta=32. / 255.,
- preprocess_vars_cache=preprocess_vars_cache)
- image = random_adjust_saturation(
- image, min_delta=0.5, max_delta=1.5,
- preprocess_vars_cache=preprocess_vars_cache)
- image = random_adjust_hue(
- image, max_delta=0.2,
- preprocess_vars_cache=preprocess_vars_cache)
- image = random_adjust_contrast(
- image, min_delta=0.5, max_delta=1.5,
- preprocess_vars_cache=preprocess_vars_cache)
-
- elif color_ordering == 1:
- image = random_adjust_brightness(
- image, max_delta=32. / 255.,
- preprocess_vars_cache=preprocess_vars_cache)
- image = random_adjust_contrast(
- image, min_delta=0.5, max_delta=1.5,
- preprocess_vars_cache=preprocess_vars_cache)
- image = random_adjust_saturation(
- image, min_delta=0.5, max_delta=1.5,
- preprocess_vars_cache=preprocess_vars_cache)
- image = random_adjust_hue(
- image, max_delta=0.2,
- preprocess_vars_cache=preprocess_vars_cache)
- else:
- raise ValueError('color_ordering must be in {0, 1}')
- return image
-
-
-def random_jitter_boxes(boxes, ratio=0.05, seed=None):
- """Randomly jitter boxes in image.
-
- Args:
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- ratio: The ratio of the box width and height that the corners can jitter.
- For example if the width is 100 pixels and ratio is 0.05,
- the corners can jitter up to 5 pixels in the x direction.
- seed: random seed.
-
- Returns:
- boxes: boxes which is the same shape as input boxes.
- """
- def random_jitter_box(box, ratio, seed):
- """Randomly jitter box.
-
- Args:
- box: bounding box [1, 1, 4].
- ratio: max ratio between jittered box and original box,
- a number between [0, 0.5].
- seed: random seed.
-
- Returns:
- jittered_box: jittered box.
- """
- rand_numbers = tf.random_uniform(
- [1, 1, 4], minval=-ratio, maxval=ratio, dtype=tf.float32, seed=seed)
- box_width = tf.subtract(box[0, 0, 3], box[0, 0, 1])
- box_height = tf.subtract(box[0, 0, 2], box[0, 0, 0])
- hw_coefs = tf.stack([box_height, box_width, box_height, box_width])
- hw_rand_coefs = tf.multiply(hw_coefs, rand_numbers)
- jittered_box = tf.add(box, hw_rand_coefs)
- jittered_box = tf.clip_by_value(jittered_box, 0.0, 1.0)
- return jittered_box
-
- with tf.name_scope('RandomJitterBoxes', values=[boxes]):
- # boxes are [N, 4]. Lets first make them [N, 1, 1, 4]
- boxes_shape = tf.shape(boxes)
- boxes = tf.expand_dims(boxes, 1)
- boxes = tf.expand_dims(boxes, 2)
-
- distorted_boxes = tf.map_fn(
- lambda x: random_jitter_box(x, ratio, seed), boxes, dtype=tf.float32)
-
- distorted_boxes = tf.reshape(distorted_boxes, boxes_shape)
-
- return distorted_boxes
-
-
-def _strict_random_crop_image(image,
- boxes,
- labels,
- label_scores=None,
- multiclass_scores=None,
- masks=None,
- keypoints=None,
- min_object_covered=1.0,
- aspect_ratio_range=(0.75, 1.33),
- area_range=(0.1, 1.0),
- overlap_thresh=0.3,
- preprocess_vars_cache=None):
- """Performs random crop.
-
- Note: boxes will be clipped to the crop. Keypoint coordinates that are
- outside the crop will be set to NaN, which is consistent with the original
- keypoint encoding for non-existing keypoints. This function always crops
- the image and is supposed to be used by `random_crop_image` function which
- sometimes returns image unchanged.
-
- Args:
- image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- boxes: rank 2 float32 tensor containing the bounding boxes with shape
- [num_instances, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- labels: rank 1 int32 tensor containing the object classes.
- label_scores: (optional) float32 tensor of shape [num_instances]
- representing the score for each box.
- multiclass_scores: (optional) float32 tensor of shape
- [num_instances, num_classes] representing the score for each box for each
- class.
- masks: (optional) rank 3 float32 tensor with shape
- [num_instances, height, width] containing instance masks. The masks
- are of the same height, width as the input `image`.
- keypoints: (optional) rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]. The keypoints are in y-x
- normalized coordinates.
- min_object_covered: the cropped image must cover at least this fraction of
- at least one of the input bounding boxes.
- aspect_ratio_range: allowed range for aspect ratio of cropped image.
- area_range: allowed range for area ratio between cropped image and the
- original image.
- overlap_thresh: minimum overlap thresh with new cropped
- image to keep the box.
- preprocess_vars_cache: PreprocessorCache object that records previously
- performed augmentations. Updated in-place. If this
- function is called multiple times with the same
- non-null cache, it will perform deterministically.
-
- Returns:
- image: image which is the same rank as input image.
- boxes: boxes which is the same rank as input boxes.
- Boxes are in normalized form.
- labels: new labels.
-
- If label_scores, multiclass_scores, masks, or keypoints is not None, the
- function also returns:
- label_scores: rank 1 float32 tensor with shape [num_instances].
- multiclass_scores: rank 2 float32 tensor with shape
- [num_instances, num_classes]
- masks: rank 3 float32 tensor with shape [num_instances, height, width]
- containing instance masks.
- keypoints: rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]
- """
- with tf.name_scope('RandomCropImage', values=[image, boxes]):
- image_shape = tf.shape(image)
-
- # boxes are [N, 4]. Lets first make them [N, 1, 4].
- boxes_expanded = tf.expand_dims(
- tf.clip_by_value(
- boxes, clip_value_min=0.0, clip_value_max=1.0), 1)
-
- generator_func = functools.partial(
- tf.image.sample_distorted_bounding_box,
- image_shape,
- bounding_boxes=boxes_expanded,
- min_object_covered=min_object_covered,
- aspect_ratio_range=aspect_ratio_range,
- area_range=area_range,
- max_attempts=100,
- use_image_if_no_bounding_boxes=True)
-
- # for ssd cropping, each value of min_object_covered has its own
- # cached random variable
- sample_distorted_bounding_box = _get_or_create_preprocess_rand_vars(
- generator_func,
- preprocessor_cache.PreprocessorCache.STRICT_CROP_IMAGE,
- preprocess_vars_cache, key=min_object_covered)
-
- im_box_begin, im_box_size, im_box = sample_distorted_bounding_box
-
- new_image = tf.slice(image, im_box_begin, im_box_size)
- new_image.set_shape([None, None, image.get_shape()[2]])
-
- # [1, 4]
- im_box_rank2 = tf.squeeze(im_box, squeeze_dims=[0])
- # [4]
- im_box_rank1 = tf.squeeze(im_box)
-
- boxlist = box_list.BoxList(boxes)
- boxlist.add_field('labels', labels)
-
- if label_scores is not None:
- boxlist.add_field('label_scores', label_scores)
-
- if multiclass_scores is not None:
- boxlist.add_field('multiclass_scores', multiclass_scores)
-
- im_boxlist = box_list.BoxList(im_box_rank2)
-
- # remove boxes that are outside cropped image
- boxlist, inside_window_ids = box_list_ops.prune_completely_outside_window(
- boxlist, im_box_rank1)
-
- # remove boxes that are outside image
- overlapping_boxlist, keep_ids = box_list_ops.prune_non_overlapping_boxes(
- boxlist, im_boxlist, overlap_thresh)
-
- # change the coordinate of the remaining boxes
- new_labels = overlapping_boxlist.get_field('labels')
- new_boxlist = box_list_ops.change_coordinate_frame(overlapping_boxlist,
- im_box_rank1)
- new_boxes = new_boxlist.get()
- new_boxes = tf.clip_by_value(
- new_boxes, clip_value_min=0.0, clip_value_max=1.0)
-
- result = [new_image, new_boxes, new_labels]
-
- if label_scores is not None:
- new_label_scores = overlapping_boxlist.get_field('label_scores')
- result.append(new_label_scores)
-
- if multiclass_scores is not None:
- new_multiclass_scores = overlapping_boxlist.get_field('multiclass_scores')
- result.append(new_multiclass_scores)
-
- if masks is not None:
- masks_of_boxes_inside_window = tf.gather(masks, inside_window_ids)
- masks_of_boxes_completely_inside_window = tf.gather(
- masks_of_boxes_inside_window, keep_ids)
- masks_box_begin = [0, im_box_begin[0], im_box_begin[1]]
- masks_box_size = [-1, im_box_size[0], im_box_size[1]]
- new_masks = tf.slice(
- masks_of_boxes_completely_inside_window,
- masks_box_begin, masks_box_size)
- result.append(new_masks)
-
- if keypoints is not None:
- keypoints_of_boxes_inside_window = tf.gather(keypoints, inside_window_ids)
- keypoints_of_boxes_completely_inside_window = tf.gather(
- keypoints_of_boxes_inside_window, keep_ids)
- new_keypoints = keypoint_ops.change_coordinate_frame(
- keypoints_of_boxes_completely_inside_window, im_box_rank1)
- new_keypoints = keypoint_ops.prune_outside_window(new_keypoints,
- [0.0, 0.0, 1.0, 1.0])
- result.append(new_keypoints)
-
- return tuple(result)
-
-
-def random_crop_image(image,
- boxes,
- labels,
- label_scores=None,
- multiclass_scores=None,
- masks=None,
- keypoints=None,
- min_object_covered=1.0,
- aspect_ratio_range=(0.75, 1.33),
- area_range=(0.1, 1.0),
- overlap_thresh=0.3,
- random_coef=0.0,
- seed=None,
- preprocess_vars_cache=None):
- """Randomly crops the image.
-
- Given the input image and its bounding boxes, this op randomly
- crops a subimage. Given a user-provided set of input constraints,
- the crop window is resampled until it satisfies these constraints.
- If within 100 trials it is unable to find a valid crop, the original
- image is returned. See the Args section for a description of the input
- constraints. Both input boxes and returned Boxes are in normalized
- form (e.g., lie in the unit square [0, 1]).
- This function will return the original image with probability random_coef.
-
- Note: boxes will be clipped to the crop. Keypoint coordinates that are
- outside the crop will be set to NaN, which is consistent with the original
- keypoint encoding for non-existing keypoints.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- boxes: rank 2 float32 tensor containing the bounding boxes with shape
- [num_instances, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- labels: rank 1 int32 tensor containing the object classes.
- label_scores: (optional) float32 tensor of shape [num_instances].
- representing the score for each box.
- multiclass_scores: (optional) float32 tensor of shape
- [num_instances, num_classes] representing the score for each box for each
- class.
- masks: (optional) rank 3 float32 tensor with shape
- [num_instances, height, width] containing instance masks. The masks
- are of the same height, width as the input `image`.
- keypoints: (optional) rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]. The keypoints are in y-x
- normalized coordinates.
- min_object_covered: the cropped image must cover at least this fraction of
- at least one of the input bounding boxes.
- aspect_ratio_range: allowed range for aspect ratio of cropped image.
- area_range: allowed range for area ratio between cropped image and the
- original image.
- overlap_thresh: minimum overlap thresh with new cropped
- image to keep the box.
- random_coef: a random coefficient that defines the chance of getting the
- original image. If random_coef is 0, we will always get the
- cropped image, and if it is 1.0, we will always get the
- original image.
- seed: random seed.
- preprocess_vars_cache: PreprocessorCache object that records previously
- performed augmentations. Updated in-place. If this
- function is called multiple times with the same
- non-null cache, it will perform deterministically.
-
- Returns:
- image: Image shape will be [new_height, new_width, channels].
- boxes: boxes which is the same rank as input boxes. Boxes are in normalized
- form.
- labels: new labels.
-
- If label_scores, multiclass_scores, masks, or keypoints is not None, the
- function also returns:
- label_scores: rank 1 float32 tensor with shape [num_instances].
- multiclass_scores: rank 2 float32 tensor with shape
- [num_instances, num_classes]
- masks: rank 3 float32 tensor with shape [num_instances, height, width]
- containing instance masks.
- keypoints: rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]
- """
-
- def strict_random_crop_image_fn():
- return _strict_random_crop_image(
- image,
- boxes,
- labels,
- label_scores=label_scores,
- multiclass_scores=multiclass_scores,
- masks=masks,
- keypoints=keypoints,
- min_object_covered=min_object_covered,
- aspect_ratio_range=aspect_ratio_range,
- area_range=area_range,
- overlap_thresh=overlap_thresh,
- preprocess_vars_cache=preprocess_vars_cache)
-
- # avoids tf.cond to make faster RCNN training on borg. See b/140057645.
- if random_coef < sys.float_info.min:
- result = strict_random_crop_image_fn()
- else:
- generator_func = functools.partial(tf.random_uniform, [], seed=seed)
- do_a_crop_random = _get_or_create_preprocess_rand_vars(
- generator_func, preprocessor_cache.PreprocessorCache.CROP_IMAGE,
- preprocess_vars_cache)
- do_a_crop_random = tf.greater(do_a_crop_random, random_coef)
-
- outputs = [image, boxes, labels]
-
- if label_scores is not None:
- outputs.append(label_scores)
- if multiclass_scores is not None:
- outputs.append(multiclass_scores)
- if masks is not None:
- outputs.append(masks)
- if keypoints is not None:
- outputs.append(keypoints)
-
- result = tf.cond(do_a_crop_random, strict_random_crop_image_fn,
- lambda: tuple(outputs))
- return result
-
-
-def random_pad_image(image,
- boxes,
- min_image_size=None,
- max_image_size=None,
- pad_color=None,
- seed=None,
- preprocess_vars_cache=None):
- """Randomly pads the image.
-
- This function randomly pads the image with zeros. The final size of the
- padded image will be between min_image_size and max_image_size.
- if min_image_size is smaller than the input image size, min_image_size will
- be set to the input image size. The same for max_image_size. The input image
- will be located at a uniformly random location inside the padded image.
- The relative location of the boxes to the original image will remain the same.
-
- Args:
- image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- min_image_size: a tensor of size [min_height, min_width], type tf.int32.
- If passed as None, will be set to image size
- [height, width].
- max_image_size: a tensor of size [max_height, max_width], type tf.int32.
- If passed as None, will be set to twice the
- image [height * 2, width * 2].
- pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
- if set as None, it will be set to average color of the input
- image.
- seed: random seed.
- preprocess_vars_cache: PreprocessorCache object that records previously
- performed augmentations. Updated in-place. If this
- function is called multiple times with the same
- non-null cache, it will perform deterministically.
-
- Returns:
- image: Image shape will be [new_height, new_width, channels].
- boxes: boxes which is the same rank as input boxes. Boxes are in normalized
- form.
- """
- if pad_color is None:
- pad_color = tf.reduce_mean(image, axis=[0, 1])
-
- image_shape = tf.shape(image)
- image_height = image_shape[0]
- image_width = image_shape[1]
-
- if max_image_size is None:
- max_image_size = tf.stack([image_height * 2, image_width * 2])
- max_image_size = tf.maximum(max_image_size,
- tf.stack([image_height, image_width]))
-
- if min_image_size is None:
- min_image_size = tf.stack([image_height, image_width])
- min_image_size = tf.maximum(min_image_size,
- tf.stack([image_height, image_width]))
-
- target_height = tf.cond(
- max_image_size[0] > min_image_size[0],
- lambda: _random_integer(min_image_size[0], max_image_size[0], seed),
- lambda: max_image_size[0])
-
- target_width = tf.cond(
- max_image_size[1] > min_image_size[1],
- lambda: _random_integer(min_image_size[1], max_image_size[1], seed),
- lambda: max_image_size[1])
-
- offset_height = tf.cond(
- target_height > image_height,
- lambda: _random_integer(0, target_height - image_height, seed),
- lambda: tf.constant(0, dtype=tf.int32))
-
- offset_width = tf.cond(
- target_width > image_width,
- lambda: _random_integer(0, target_width - image_width, seed),
- lambda: tf.constant(0, dtype=tf.int32))
-
- gen_func = lambda: (target_height, target_width, offset_height, offset_width)
- params = _get_or_create_preprocess_rand_vars(
- gen_func, preprocessor_cache.PreprocessorCache.PAD_IMAGE,
- preprocess_vars_cache)
- target_height, target_width, offset_height, offset_width = params
-
- new_image = tf.image.pad_to_bounding_box(
- image,
- offset_height=offset_height,
- offset_width=offset_width,
- target_height=target_height,
- target_width=target_width)
-
- # Setting color of the padded pixels
- image_ones = tf.ones_like(image)
- image_ones_padded = tf.image.pad_to_bounding_box(
- image_ones,
- offset_height=offset_height,
- offset_width=offset_width,
- target_height=target_height,
- target_width=target_width)
- image_color_padded = (1.0 - image_ones_padded) * pad_color
- new_image += image_color_padded
-
- # setting boxes
- new_window = tf.to_float(
- tf.stack([
- -offset_height, -offset_width, target_height - offset_height,
- target_width - offset_width
- ]))
- new_window /= tf.to_float(
- tf.stack([image_height, image_width, image_height, image_width]))
- boxlist = box_list.BoxList(boxes)
- new_boxlist = box_list_ops.change_coordinate_frame(boxlist, new_window)
- new_boxes = new_boxlist.get()
-
- return new_image, new_boxes
-
-
-def random_crop_pad_image(image,
- boxes,
- labels,
- label_scores=None,
- multiclass_scores=None,
- min_object_covered=1.0,
- aspect_ratio_range=(0.75, 1.33),
- area_range=(0.1, 1.0),
- overlap_thresh=0.3,
- random_coef=0.0,
- min_padded_size_ratio=(1.0, 1.0),
- max_padded_size_ratio=(2.0, 2.0),
- pad_color=None,
- seed=None,
- preprocess_vars_cache=None):
- """Randomly crops and pads the image.
-
- Given an input image and its bounding boxes, this op first randomly crops
- the image and then randomly pads the image with background values. Parameters
- min_padded_size_ratio and max_padded_size_ratio, determine the range of the
- final output image size. Specifically, the final image size will have a size
- in the range of min_padded_size_ratio * tf.shape(image) and
- max_padded_size_ratio * tf.shape(image). Note that these ratios are with
- respect to the size of the original image, so we can't capture the same
- effect easily by independently applying RandomCropImage
- followed by RandomPadImage.
-
- Args:
- image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- labels: rank 1 int32 tensor containing the object classes.
- label_scores: rank 1 float32 containing the label scores.
- multiclass_scores: (optional) float32 tensor of shape
- [num_instances, num_classes] representing the score for each box for each
- class.
- min_object_covered: the cropped image must cover at least this fraction of
- at least one of the input bounding boxes.
- aspect_ratio_range: allowed range for aspect ratio of cropped image.
- area_range: allowed range for area ratio between cropped image and the
- original image.
- overlap_thresh: minimum overlap thresh with new cropped
- image to keep the box.
- random_coef: a random coefficient that defines the chance of getting the
- original image. If random_coef is 0, we will always get the
- cropped image, and if it is 1.0, we will always get the
- original image.
- min_padded_size_ratio: min ratio of padded image height and width to the
- input image's height and width.
- max_padded_size_ratio: max ratio of padded image height and width to the
- input image's height and width.
- pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
- if set as None, it will be set to average color of the randomly
- cropped image.
- seed: random seed.
- preprocess_vars_cache: PreprocessorCache object that records previously
- performed augmentations. Updated in-place. If this
- function is called multiple times with the same
- non-null cache, it will perform deterministically.
-
- Returns:
- padded_image: padded image.
- padded_boxes: boxes which is the same rank as input boxes. Boxes are in
- normalized form.
- cropped_labels: cropped labels.
- if label_scores is not None also returns:
- cropped_label_scores: cropped label scores.
- if multiclass_scores is not None also returns:
- cropped_multiclass_scores: cropped_multiclass_scores.
-
- """
- image_size = tf.shape(image)
- image_height = image_size[0]
- image_width = image_size[1]
- result = random_crop_image(
- image=image,
- boxes=boxes,
- labels=labels,
- label_scores=label_scores,
- multiclass_scores=multiclass_scores,
- min_object_covered=min_object_covered,
- aspect_ratio_range=aspect_ratio_range,
- area_range=area_range,
- overlap_thresh=overlap_thresh,
- random_coef=random_coef,
- seed=seed,
- preprocess_vars_cache=preprocess_vars_cache)
-
- cropped_image, cropped_boxes, cropped_labels = result[:3]
-
- min_image_size = tf.to_int32(
- tf.to_float(tf.stack([image_height, image_width])) *
- min_padded_size_ratio)
- max_image_size = tf.to_int32(
- tf.to_float(tf.stack([image_height, image_width])) *
- max_padded_size_ratio)
-
- padded_image, padded_boxes = random_pad_image(
- cropped_image,
- cropped_boxes,
- min_image_size=min_image_size,
- max_image_size=max_image_size,
- pad_color=pad_color,
- seed=seed,
- preprocess_vars_cache=preprocess_vars_cache)
-
- cropped_padded_output = (padded_image, padded_boxes, cropped_labels)
-
- index = 3
- if label_scores is not None:
- cropped_label_scores = result[index]
- cropped_padded_output += (cropped_label_scores,)
- index += 1
-
- if multiclass_scores is not None:
- cropped_multiclass_scores = result[index]
- cropped_padded_output += (cropped_multiclass_scores,)
-
- return cropped_padded_output
-
-
-def random_crop_to_aspect_ratio(image,
- boxes,
- labels,
- label_scores=None,
- multiclass_scores=None,
- masks=None,
- keypoints=None,
- aspect_ratio=1.0,
- overlap_thresh=0.3,
- seed=None,
- preprocess_vars_cache=None):
- """Randomly crops an image to the specified aspect ratio.
-
- Randomly crops the a portion of the image such that the crop is of the
- specified aspect ratio, and the crop is as large as possible. If the specified
- aspect ratio is larger than the aspect ratio of the image, this op will
- randomly remove rows from the top and bottom of the image. If the specified
- aspect ratio is less than the aspect ratio of the image, this op will randomly
- remove cols from the left and right of the image. If the specified aspect
- ratio is the same as the aspect ratio of the image, this op will return the
- image.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- labels: rank 1 int32 tensor containing the object classes.
- label_scores: (optional) float32 tensor of shape [num_instances]
- representing the score for each box.
- multiclass_scores: (optional) float32 tensor of shape
- [num_instances, num_classes] representing the score for each box for each
- class.
- masks: (optional) rank 3 float32 tensor with shape
- [num_instances, height, width] containing instance masks. The masks
- are of the same height, width as the input `image`.
- keypoints: (optional) rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]. The keypoints are in y-x
- normalized coordinates.
- aspect_ratio: the aspect ratio of cropped image.
- overlap_thresh: minimum overlap thresh with new cropped
- image to keep the box.
- seed: random seed.
- preprocess_vars_cache: PreprocessorCache object that records previously
- performed augmentations. Updated in-place. If this
- function is called multiple times with the same
- non-null cache, it will perform deterministically.
-
- Returns:
- image: image which is the same rank as input image.
- boxes: boxes which is the same rank as input boxes.
- Boxes are in normalized form.
- labels: new labels.
-
- If label_scores, masks, keypoints, or multiclass_scores is not None, the
- function also returns:
- label_scores: rank 1 float32 tensor with shape [num_instances].
- masks: rank 3 float32 tensor with shape [num_instances, height, width]
- containing instance masks.
- keypoints: rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]
- multiclass_scores: rank 2 float32 tensor with shape
- [num_instances, num_classes]
-
- Raises:
- ValueError: If image is not a 3D tensor.
- """
- if len(image.get_shape()) != 3:
- raise ValueError('Image should be 3D tensor')
-
- with tf.name_scope('RandomCropToAspectRatio', values=[image]):
- image_shape = tf.shape(image)
- orig_height = image_shape[0]
- orig_width = image_shape[1]
- orig_aspect_ratio = tf.to_float(orig_width) / tf.to_float(orig_height)
- new_aspect_ratio = tf.constant(aspect_ratio, dtype=tf.float32)
- def target_height_fn():
- return tf.to_int32(tf.round(tf.to_float(orig_width) / new_aspect_ratio))
-
- target_height = tf.cond(orig_aspect_ratio >= new_aspect_ratio,
- lambda: orig_height, target_height_fn)
-
- def target_width_fn():
- return tf.to_int32(tf.round(tf.to_float(orig_height) * new_aspect_ratio))
-
- target_width = tf.cond(orig_aspect_ratio <= new_aspect_ratio,
- lambda: orig_width, target_width_fn)
-
- # either offset_height = 0 and offset_width is randomly chosen from
- # [0, offset_width - target_width), or else offset_width = 0 and
- # offset_height is randomly chosen from [0, offset_height - target_height)
- offset_height = _random_integer(0, orig_height - target_height + 1, seed)
- offset_width = _random_integer(0, orig_width - target_width + 1, seed)
-
- generator_func = lambda: (offset_height, offset_width)
- offset_height, offset_width = _get_or_create_preprocess_rand_vars(
- generator_func,
- preprocessor_cache.PreprocessorCache.CROP_TO_ASPECT_RATIO,
- preprocess_vars_cache)
-
- new_image = tf.image.crop_to_bounding_box(
- image, offset_height, offset_width, target_height, target_width)
-
- im_box = tf.stack([
- tf.to_float(offset_height) / tf.to_float(orig_height),
- tf.to_float(offset_width) / tf.to_float(orig_width),
- tf.to_float(offset_height + target_height) / tf.to_float(orig_height),
- tf.to_float(offset_width + target_width) / tf.to_float(orig_width)
- ])
-
- boxlist = box_list.BoxList(boxes)
- boxlist.add_field('labels', labels)
-
- if label_scores is not None:
- boxlist.add_field('label_scores', label_scores)
-
- if multiclass_scores is not None:
- boxlist.add_field('multiclass_scores', multiclass_scores)
-
- im_boxlist = box_list.BoxList(tf.expand_dims(im_box, 0))
-
- # remove boxes whose overlap with the image is less than overlap_thresh
- overlapping_boxlist, keep_ids = box_list_ops.prune_non_overlapping_boxes(
- boxlist, im_boxlist, overlap_thresh)
-
- # change the coordinate of the remaining boxes
- new_labels = overlapping_boxlist.get_field('labels')
- new_boxlist = box_list_ops.change_coordinate_frame(overlapping_boxlist,
- im_box)
- new_boxlist = box_list_ops.clip_to_window(new_boxlist,
- tf.constant([0.0, 0.0, 1.0, 1.0],
- tf.float32))
- new_boxes = new_boxlist.get()
-
- result = [new_image, new_boxes, new_labels]
-
- if label_scores is not None:
- new_label_scores = overlapping_boxlist.get_field('label_scores')
- result.append(new_label_scores)
-
- if multiclass_scores is not None:
- new_multiclass_scores = overlapping_boxlist.get_field('multiclass_scores')
- result.append(new_multiclass_scores)
-
- if masks is not None:
- masks_inside_window = tf.gather(masks, keep_ids)
- masks_box_begin = tf.stack([0, offset_height, offset_width])
- masks_box_size = tf.stack([-1, target_height, target_width])
- new_masks = tf.slice(masks_inside_window, masks_box_begin, masks_box_size)
- result.append(new_masks)
-
- if keypoints is not None:
- keypoints_inside_window = tf.gather(keypoints, keep_ids)
- new_keypoints = keypoint_ops.change_coordinate_frame(
- keypoints_inside_window, im_box)
- new_keypoints = keypoint_ops.prune_outside_window(new_keypoints,
- [0.0, 0.0, 1.0, 1.0])
- result.append(new_keypoints)
-
- return tuple(result)
-
-
-def random_pad_to_aspect_ratio(image,
- boxes,
- masks=None,
- keypoints=None,
- aspect_ratio=1.0,
- min_padded_size_ratio=(1.0, 1.0),
- max_padded_size_ratio=(2.0, 2.0),
- seed=None,
- preprocess_vars_cache=None):
- """Randomly zero pads an image to the specified aspect ratio.
-
- Pads the image so that the resulting image will have the specified aspect
- ratio without scaling less than the min_padded_size_ratio or more than the
- max_padded_size_ratio. If the min_padded_size_ratio or max_padded_size_ratio
- is lower than what is possible to maintain the aspect ratio, then this method
- will use the least padding to achieve the specified aspect ratio.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- masks: (optional) rank 3 float32 tensor with shape
- [num_instances, height, width] containing instance masks. The masks
- are of the same height, width as the input `image`.
- keypoints: (optional) rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]. The keypoints are in y-x
- normalized coordinates.
- aspect_ratio: aspect ratio of the final image.
- min_padded_size_ratio: min ratio of padded image height and width to the
- input image's height and width.
- max_padded_size_ratio: max ratio of padded image height and width to the
- input image's height and width.
- seed: random seed.
- preprocess_vars_cache: PreprocessorCache object that records previously
- performed augmentations. Updated in-place. If this
- function is called multiple times with the same
- non-null cache, it will perform deterministically.
-
- Returns:
- image: image which is the same rank as input image.
- boxes: boxes which is the same rank as input boxes.
- Boxes are in normalized form.
- labels: new labels.
-
- If masks, or keypoints is not None, the function also returns:
- masks: rank 3 float32 tensor with shape [num_instances, height, width]
- containing instance masks.
- keypoints: rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]
-
- Raises:
- ValueError: If image is not a 3D tensor.
- """
- if len(image.get_shape()) != 3:
- raise ValueError('Image should be 3D tensor')
-
- with tf.name_scope('RandomPadToAspectRatio', values=[image]):
- image_shape = tf.shape(image)
- image_height = tf.to_float(image_shape[0])
- image_width = tf.to_float(image_shape[1])
- image_aspect_ratio = image_width / image_height
- new_aspect_ratio = tf.constant(aspect_ratio, dtype=tf.float32)
- target_height = tf.cond(
- image_aspect_ratio <= new_aspect_ratio,
- lambda: image_height,
- lambda: image_width / new_aspect_ratio)
- target_width = tf.cond(
- image_aspect_ratio >= new_aspect_ratio,
- lambda: image_width,
- lambda: image_height * new_aspect_ratio)
-
- min_height = tf.maximum(
- min_padded_size_ratio[0] * image_height, target_height)
- min_width = tf.maximum(
- min_padded_size_ratio[1] * image_width, target_width)
- max_height = tf.maximum(
- max_padded_size_ratio[0] * image_height, target_height)
- max_width = tf.maximum(
- max_padded_size_ratio[1] * image_width, target_width)
-
- max_scale = tf.minimum(max_height / target_height, max_width / target_width)
- min_scale = tf.minimum(
- max_scale,
- tf.maximum(min_height / target_height, min_width / target_width))
-
- generator_func = functools.partial(tf.random_uniform, [],
- min_scale, max_scale, seed=seed)
- scale = _get_or_create_preprocess_rand_vars(
- generator_func,
- preprocessor_cache.PreprocessorCache.PAD_TO_ASPECT_RATIO,
- preprocess_vars_cache)
-
- target_height = tf.round(scale * target_height)
- target_width = tf.round(scale * target_width)
-
- new_image = tf.image.pad_to_bounding_box(
- image, 0, 0, tf.to_int32(target_height), tf.to_int32(target_width))
-
- im_box = tf.stack([
- 0.0,
- 0.0,
- target_height / image_height,
- target_width / image_width
- ])
- boxlist = box_list.BoxList(boxes)
- new_boxlist = box_list_ops.change_coordinate_frame(boxlist, im_box)
- new_boxes = new_boxlist.get()
-
- result = [new_image, new_boxes]
-
- if masks is not None:
- new_masks = tf.expand_dims(masks, -1)
- new_masks = tf.image.pad_to_bounding_box(new_masks, 0, 0,
- tf.to_int32(target_height),
- tf.to_int32(target_width))
- new_masks = tf.squeeze(new_masks, [-1])
- result.append(new_masks)
-
- if keypoints is not None:
- new_keypoints = keypoint_ops.change_coordinate_frame(keypoints, im_box)
- result.append(new_keypoints)
-
- return tuple(result)
-
-
-def random_black_patches(image,
- max_black_patches=10,
- probability=0.5,
- size_to_image_ratio=0.1,
- random_seed=None,
- preprocess_vars_cache=None):
- """Randomly adds some black patches to the image.
-
- This op adds up to max_black_patches square black patches of a fixed size
- to the image where size is specified via the size_to_image_ratio parameter.
-
- Args:
- image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- max_black_patches: number of times that the function tries to add a
- black box to the image.
- probability: at each try, what is the chance of adding a box.
- size_to_image_ratio: Determines the ratio of the size of the black patches
- to the size of the image.
- box_size = size_to_image_ratio *
- min(image_width, image_height)
- random_seed: random seed.
- preprocess_vars_cache: PreprocessorCache object that records previously
- performed augmentations. Updated in-place. If this
- function is called multiple times with the same
- non-null cache, it will perform deterministically.
-
- Returns:
- image
- """
- def add_black_patch_to_image(image, idx):
- """Function for adding one patch to the image.
-
- Args:
- image: image
- idx: counter for number of patches that could have been added
-
- Returns:
- image with a randomly added black box
- """
- image_shape = tf.shape(image)
- image_height = image_shape[0]
- image_width = image_shape[1]
- box_size = tf.to_int32(
- tf.multiply(
- tf.minimum(tf.to_float(image_height), tf.to_float(image_width)),
- size_to_image_ratio))
-
- generator_func = functools.partial(tf.random_uniform, [], minval=0.0,
- maxval=(1.0 - size_to_image_ratio),
- seed=random_seed)
- normalized_y_min = _get_or_create_preprocess_rand_vars(
- generator_func,
- preprocessor_cache.PreprocessorCache.ADD_BLACK_PATCH,
- preprocess_vars_cache, key=str(idx) + 'y')
- normalized_x_min = _get_or_create_preprocess_rand_vars(
- generator_func,
- preprocessor_cache.PreprocessorCache.ADD_BLACK_PATCH,
- preprocess_vars_cache, key=str(idx) + 'x')
-
- y_min = tf.to_int32(normalized_y_min * tf.to_float(image_height))
- x_min = tf.to_int32(normalized_x_min * tf.to_float(image_width))
- black_box = tf.ones([box_size, box_size, 3], dtype=tf.float32)
- mask = 1.0 - tf.image.pad_to_bounding_box(black_box, y_min, x_min,
- image_height, image_width)
- image = tf.multiply(image, mask)
- return image
-
- with tf.name_scope('RandomBlackPatchInImage', values=[image]):
- for idx in range(max_black_patches):
- generator_func = functools.partial(tf.random_uniform, [],
- minval=0.0, maxval=1.0,
- dtype=tf.float32, seed=random_seed)
- random_prob = _get_or_create_preprocess_rand_vars(
- generator_func,
- preprocessor_cache.PreprocessorCache.BLACK_PATCHES,
- preprocess_vars_cache, key=idx)
- image = tf.cond(
- tf.greater(random_prob, probability), lambda: image,
- functools.partial(add_black_patch_to_image, image=image, idx=idx))
- return image
-
-
-def image_to_float(image):
- """Used in Faster R-CNN. Casts image pixel values to float.
-
- Args:
- image: input image which might be in tf.uint8 or sth else format
-
- Returns:
- image: image in tf.float32 format.
- """
- with tf.name_scope('ImageToFloat', values=[image]):
- image = tf.to_float(image)
- return image
-
-
-def random_resize_method(image, target_size, preprocess_vars_cache=None):
- """Uses a random resize method to resize the image to target size.
-
- Args:
- image: a rank 3 tensor.
- target_size: a list of [target_height, target_width]
- preprocess_vars_cache: PreprocessorCache object that records previously
- performed augmentations. Updated in-place. If this
- function is called multiple times with the same
- non-null cache, it will perform deterministically.
-
- Returns:
- resized image.
- """
-
- resized_image = _apply_with_random_selector(
- image,
- lambda x, method: tf.image.resize_images(x, target_size, method),
- num_cases=4,
- preprocess_vars_cache=preprocess_vars_cache,
- key=preprocessor_cache.PreprocessorCache.RESIZE_METHOD)
-
- return resized_image
-
-
-def _compute_new_static_size(image, min_dimension, max_dimension):
- """Compute new static shape for resize_to_range method."""
- image_shape = image.get_shape().as_list()
- orig_height = image_shape[0]
- orig_width = image_shape[1]
- num_channels = image_shape[2]
- orig_min_dim = min(orig_height, orig_width)
- # Calculates the larger of the possible sizes
- large_scale_factor = min_dimension / float(orig_min_dim)
- # Scaling orig_(height|width) by large_scale_factor will make the smaller
- # dimension equal to min_dimension, save for floating point rounding errors.
- # For reasonably-sized images, taking the nearest integer will reliably
- # eliminate this error.
- large_height = int(round(orig_height * large_scale_factor))
- large_width = int(round(orig_width * large_scale_factor))
- large_size = [large_height, large_width]
- if max_dimension:
- # Calculates the smaller of the possible sizes, use that if the larger
- # is too big.
- orig_max_dim = max(orig_height, orig_width)
- small_scale_factor = max_dimension / float(orig_max_dim)
- # Scaling orig_(height|width) by small_scale_factor will make the larger
- # dimension equal to max_dimension, save for floating point rounding
- # errors. For reasonably-sized images, taking the nearest integer will
- # reliably eliminate this error.
- small_height = int(round(orig_height * small_scale_factor))
- small_width = int(round(orig_width * small_scale_factor))
- small_size = [small_height, small_width]
- new_size = large_size
- if max(large_size) > max_dimension:
- new_size = small_size
- else:
- new_size = large_size
- return tf.constant(new_size + [num_channels])
-
-
-def _compute_new_dynamic_size(image, min_dimension, max_dimension):
- """Compute new dynamic shape for resize_to_range method."""
- image_shape = tf.shape(image)
- orig_height = tf.to_float(image_shape[0])
- orig_width = tf.to_float(image_shape[1])
- num_channels = image_shape[2]
- orig_min_dim = tf.minimum(orig_height, orig_width)
- # Calculates the larger of the possible sizes
- min_dimension = tf.constant(min_dimension, dtype=tf.float32)
- large_scale_factor = min_dimension / orig_min_dim
- # Scaling orig_(height|width) by large_scale_factor will make the smaller
- # dimension equal to min_dimension, save for floating point rounding errors.
- # For reasonably-sized images, taking the nearest integer will reliably
- # eliminate this error.
- large_height = tf.to_int32(tf.round(orig_height * large_scale_factor))
- large_width = tf.to_int32(tf.round(orig_width * large_scale_factor))
- large_size = tf.stack([large_height, large_width])
- if max_dimension:
- # Calculates the smaller of the possible sizes, use that if the larger
- # is too big.
- orig_max_dim = tf.maximum(orig_height, orig_width)
- max_dimension = tf.constant(max_dimension, dtype=tf.float32)
- small_scale_factor = max_dimension / orig_max_dim
- # Scaling orig_(height|width) by small_scale_factor will make the larger
- # dimension equal to max_dimension, save for floating point rounding
- # errors. For reasonably-sized images, taking the nearest integer will
- # reliably eliminate this error.
- small_height = tf.to_int32(tf.round(orig_height * small_scale_factor))
- small_width = tf.to_int32(tf.round(orig_width * small_scale_factor))
- small_size = tf.stack([small_height, small_width])
- new_size = tf.cond(
- tf.to_float(tf.reduce_max(large_size)) > max_dimension,
- lambda: small_size, lambda: large_size)
- else:
- new_size = large_size
- return tf.stack(tf.unstack(new_size) + [num_channels])
-
-
-def resize_to_range(image,
- masks=None,
- min_dimension=None,
- max_dimension=None,
- method=tf.image.ResizeMethod.BILINEAR,
- align_corners=False,
- pad_to_max_dimension=False,
- per_channel_pad_value=(0, 0, 0)):
- """Resizes an image so its dimensions are within the provided value.
-
- The output size can be described by two cases:
- 1. If the image can be rescaled so its minimum dimension is equal to the
- provided value without the other dimension exceeding max_dimension,
- then do so.
- 2. Otherwise, resize so the largest dimension is equal to max_dimension.
-
- Args:
- image: A 3D tensor of shape [height, width, channels]
- masks: (optional) rank 3 float32 tensor with shape
- [num_instances, height, width] containing instance masks.
- min_dimension: (optional) (scalar) desired size of the smaller image
- dimension.
- max_dimension: (optional) (scalar) maximum allowed size
- of the larger image dimension.
- method: (optional) interpolation method used in resizing. Defaults to
- BILINEAR.
- align_corners: bool. If true, exactly align all 4 corners of the input
- and output. Defaults to False.
- pad_to_max_dimension: Whether to resize the image and pad it with zeros
- so the resulting image is of the spatial size
- [max_dimension, max_dimension]. If masks are included they are padded
- similarly.
- per_channel_pad_value: A tuple of per-channel scalar value to use for
- padding. By default pads zeros.
-
- Returns:
- Note that the position of the resized_image_shape changes based on whether
- masks are present.
- resized_image: A 3D tensor of shape [new_height, new_width, channels],
- where the image has been resized (with bilinear interpolation) so that
- min(new_height, new_width) == min_dimension or
- max(new_height, new_width) == max_dimension.
- resized_masks: If masks is not None, also outputs masks. A 3D tensor of
- shape [num_instances, new_height, new_width].
- resized_image_shape: A 1D tensor of shape [3] containing shape of the
- resized image.
-
- Raises:
- ValueError: if the image is not a 3D tensor.
- """
- if len(image.get_shape()) != 3:
- raise ValueError('Image should be 3D tensor')
-
- with tf.name_scope('ResizeToRange', values=[image, min_dimension]):
- if image.get_shape().is_fully_defined():
- new_size = _compute_new_static_size(image, min_dimension, max_dimension)
- else:
- new_size = _compute_new_dynamic_size(image, min_dimension, max_dimension)
- new_image = tf.image.resize_images(
- image, new_size[:-1], method=method, align_corners=align_corners)
-
- if pad_to_max_dimension:
- channels = tf.unstack(new_image, axis=2)
- if len(channels) != len(per_channel_pad_value):
- raise ValueError('Number of channels must be equal to the length of '
- 'per-channel pad value.')
- new_image = tf.stack(
- [
- tf.pad(
- channels[i], [[0, max_dimension - new_size[0]],
- [0, max_dimension - new_size[1]]],
- constant_values=per_channel_pad_value[i])
- for i in range(len(channels))
- ],
- axis=2)
- new_image.set_shape([max_dimension, max_dimension, 3])
-
- result = [new_image]
- if masks is not None:
- new_masks = tf.expand_dims(masks, 3)
- new_masks = tf.image.resize_images(
- new_masks,
- new_size[:-1],
- method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
- align_corners=align_corners)
- new_masks = tf.squeeze(new_masks, 3)
- if pad_to_max_dimension:
- new_masks = tf.image.pad_to_bounding_box(
- new_masks, 0, 0, max_dimension, max_dimension)
- result.append(new_masks)
-
- result.append(new_size)
- return result
-
-
-# TODO(alirezafathi): Make sure the static shapes are preserved.
-def resize_to_min_dimension(image, masks=None, min_dimension=600):
- """Resizes image and masks given the min size maintaining the aspect ratio.
-
- If one of the image dimensions is smaller that min_dimension, it will scale
- the image such that its smallest dimension is equal to min_dimension.
- Otherwise, will keep the image size as is.
-
- Args:
- image: a tensor of size [height, width, channels].
- masks: (optional) a tensors of size [num_instances, height, width].
- min_dimension: minimum image dimension.
-
- Returns:
- Note that the position of the resized_image_shape changes based on whether
- masks are present.
- resized_image: A tensor of size [new_height, new_width, channels].
- resized_masks: If masks is not None, also outputs masks. A 3D tensor of
- shape [num_instances, new_height, new_width]
- resized_image_shape: A 1D tensor of shape [3] containing the shape of the
- resized image.
-
- Raises:
- ValueError: if the image is not a 3D tensor.
- """
- if len(image.get_shape()) != 3:
- raise ValueError('Image should be 3D tensor')
-
- with tf.name_scope('ResizeGivenMinDimension', values=[image, min_dimension]):
- image_height = tf.shape(image)[0]
- image_width = tf.shape(image)[1]
- num_channels = tf.shape(image)[2]
- min_image_dimension = tf.minimum(image_height, image_width)
- min_target_dimension = tf.maximum(min_image_dimension, min_dimension)
- target_ratio = tf.to_float(min_target_dimension) / tf.to_float(
- min_image_dimension)
- target_height = tf.to_int32(tf.to_float(image_height) * target_ratio)
- target_width = tf.to_int32(tf.to_float(image_width) * target_ratio)
- image = tf.image.resize_bilinear(
- tf.expand_dims(image, axis=0),
- size=[target_height, target_width],
- align_corners=True)
- result = [tf.squeeze(image, axis=0)]
-
- if masks is not None:
- masks = tf.image.resize_nearest_neighbor(
- tf.expand_dims(masks, axis=3),
- size=[target_height, target_width],
- align_corners=True)
- result.append(tf.squeeze(masks, axis=3))
-
- result.append(tf.stack([target_height, target_width, num_channels]))
- return result
-
-
-def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None):
- """Scales boxes from normalized to pixel coordinates.
-
- Args:
- image: A 3D float32 tensor of shape [height, width, channels].
- boxes: A 2D float32 tensor of shape [num_boxes, 4] containing the bounding
- boxes in normalized coordinates. Each row is of the form
- [ymin, xmin, ymax, xmax].
- keypoints: (optional) rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized
- coordinates.
-
- Returns:
- image: unchanged input image.
- scaled_boxes: a 2D float32 tensor of shape [num_boxes, 4] containing the
- bounding boxes in pixel coordinates.
- scaled_keypoints: a 3D float32 tensor with shape
- [num_instances, num_keypoints, 2] containing the keypoints in pixel
- coordinates.
- """
- boxlist = box_list.BoxList(boxes)
- image_height = tf.shape(image)[0]
- image_width = tf.shape(image)[1]
- scaled_boxes = box_list_ops.scale(boxlist, image_height, image_width).get()
- result = [image, scaled_boxes]
- if keypoints is not None:
- scaled_keypoints = keypoint_ops.scale(keypoints, image_height, image_width)
- result.append(scaled_keypoints)
- return tuple(result)
-
-
-# TODO(alirezafathi): Investigate if instead the function should return None if
-# masks is None.
-# pylint: disable=g-doc-return-or-yield
-def resize_image(image,
- masks=None,
- new_height=600,
- new_width=1024,
- method=tf.image.ResizeMethod.BILINEAR,
- align_corners=False):
- """Resizes images to the given height and width.
-
- Args:
- image: A 3D tensor of shape [height, width, channels]
- masks: (optional) rank 3 float32 tensor with shape
- [num_instances, height, width] containing instance masks.
- new_height: (optional) (scalar) desired height of the image.
- new_width: (optional) (scalar) desired width of the image.
- method: (optional) interpolation method used in resizing. Defaults to
- BILINEAR.
- align_corners: bool. If true, exactly align all 4 corners of the input
- and output. Defaults to False.
-
- Returns:
- Note that the position of the resized_image_shape changes based on whether
- masks are present.
- resized_image: A tensor of size [new_height, new_width, channels].
- resized_masks: If masks is not None, also outputs masks. A 3D tensor of
- shape [num_instances, new_height, new_width]
- resized_image_shape: A 1D tensor of shape [3] containing the shape of the
- resized image.
- """
- with tf.name_scope(
- 'ResizeImage',
- values=[image, new_height, new_width, method, align_corners]):
- new_image = tf.image.resize_images(
- image, tf.stack([new_height, new_width]),
- method=method,
- align_corners=align_corners)
- image_shape = shape_utils.combined_static_and_dynamic_shape(image)
- result = [new_image]
- if masks is not None:
- num_instances = tf.shape(masks)[0]
- new_size = tf.stack([new_height, new_width])
- def resize_masks_branch():
- new_masks = tf.expand_dims(masks, 3)
- new_masks = tf.image.resize_nearest_neighbor(
- new_masks, new_size, align_corners=align_corners)
- new_masks = tf.squeeze(new_masks, axis=3)
- return new_masks
-
- def reshape_masks_branch():
- # The shape function will be computed for both branches of the
- # condition, regardless of which branch is actually taken. Make sure
- # that we don't trigger an assertion in the shape function when trying
- # to reshape a non empty tensor into an empty one.
- new_masks = tf.reshape(masks, [-1, new_size[0], new_size[1]])
- return new_masks
-
- masks = tf.cond(num_instances > 0, resize_masks_branch,
- reshape_masks_branch)
- result.append(masks)
-
- result.append(tf.stack([new_height, new_width, image_shape[2]]))
- return result
-
-
-def subtract_channel_mean(image, means=None):
- """Normalizes an image by subtracting a mean from each channel.
-
- Args:
- image: A 3D tensor of shape [height, width, channels]
- means: float list containing a mean for each channel
- Returns:
- normalized_images: a tensor of shape [height, width, channels]
- Raises:
- ValueError: if images is not a 4D tensor or if the number of means is not
- equal to the number of channels.
- """
- with tf.name_scope('SubtractChannelMean', values=[image, means]):
- if len(image.get_shape()) != 3:
- raise ValueError('Input must be of size [height, width, channels]')
- if len(means) != image.get_shape()[-1]:
- raise ValueError('len(means) must match the number of channels')
- return image - [[means]]
-
-
-def one_hot_encoding(labels, num_classes=None):
- """One-hot encodes the multiclass labels.
-
- Example usage:
- labels = tf.constant([1, 4], dtype=tf.int32)
- one_hot = OneHotEncoding(labels, num_classes=5)
- one_hot.eval() # evaluates to [0, 1, 0, 0, 1]
-
- Args:
- labels: A tensor of shape [None] corresponding to the labels.
- num_classes: Number of classes in the dataset.
- Returns:
- onehot_labels: a tensor of shape [num_classes] corresponding to the one hot
- encoding of the labels.
- Raises:
- ValueError: if num_classes is not specified.
- """
- with tf.name_scope('OneHotEncoding', values=[labels]):
- if num_classes is None:
- raise ValueError('num_classes must be specified')
-
- labels = tf.one_hot(labels, num_classes, 1, 0)
- return tf.reduce_max(labels, 0)
-
-
-def rgb_to_gray(image):
- """Converts a 3 channel RGB image to a 1 channel grayscale image.
-
- Args:
- image: Rank 3 float32 tensor containing 1 image -> [height, width, 3]
- with pixel values varying between [0, 1].
-
- Returns:
- image: A single channel grayscale image -> [image, height, 1].
- """
- return _rgb_to_grayscale(image)
-
-
-def ssd_random_crop(image,
- boxes,
- labels,
- label_scores=None,
- multiclass_scores=None,
- masks=None,
- keypoints=None,
- min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
- aspect_ratio_range=((0.5, 2.0),) * 7,
- area_range=((0.1, 1.0),) * 7,
- overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
- random_coef=(0.15,) * 7,
- seed=None,
- preprocess_vars_cache=None):
- """Random crop preprocessing with default parameters as in SSD paper.
-
- Liu et al., SSD: Single shot multibox detector.
- For further information on random crop preprocessing refer to RandomCrop
- function above.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- labels: rank 1 int32 tensor containing the object classes.
- label_scores: rank 1 float32 tensor containing the scores.
- multiclass_scores: (optional) float32 tensor of shape
- [num_instances, num_classes] representing the score for each box for each
- class.
- masks: (optional) rank 3 float32 tensor with shape
- [num_instances, height, width] containing instance masks. The masks
- are of the same height, width as the input `image`.
- keypoints: (optional) rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]. The keypoints are in y-x
- normalized coordinates.
- min_object_covered: the cropped image must cover at least this fraction of
- at least one of the input bounding boxes.
- aspect_ratio_range: allowed range for aspect ratio of cropped image.
- area_range: allowed range for area ratio between cropped image and the
- original image.
- overlap_thresh: minimum overlap thresh with new cropped
- image to keep the box.
- random_coef: a random coefficient that defines the chance of getting the
- original image. If random_coef is 0, we will always get the
- cropped image, and if it is 1.0, we will always get the
- original image.
- seed: random seed.
- preprocess_vars_cache: PreprocessorCache object that records previously
- performed augmentations. Updated in-place. If this
- function is called multiple times with the same
- non-null cache, it will perform deterministically.
-
- Returns:
- image: image which is the same rank as input image.
- boxes: boxes which is the same rank as input boxes.
- Boxes are in normalized form.
- labels: new labels.
-
- If label_scores, multiclass_scores, masks, or keypoints is not None, the
- function also returns:
- label_scores: rank 1 float32 tensor with shape [num_instances].
- multiclass_scores: rank 2 float32 tensor with shape
- [num_instances, num_classes]
- masks: rank 3 float32 tensor with shape [num_instances, height, width]
- containing instance masks.
- keypoints: rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]
- """
-
- def random_crop_selector(selected_result, index):
- """Applies random_crop_image to selected result.
-
- Args:
- selected_result: A tuple containing image, boxes, labels, keypoints (if
- not None), and masks (if not None).
- index: The index that was randomly selected.
-
- Returns: A tuple containing image, boxes, labels, keypoints (if not None),
- and masks (if not None).
- """
-
- i = 3
- image, boxes, labels = selected_result[:i]
- selected_label_scores = None
- selected_multiclass_scores = None
- selected_masks = None
- selected_keypoints = None
- if label_scores is not None:
- selected_label_scores = selected_result[i]
- i += 1
- if multiclass_scores is not None:
- selected_multiclass_scores = selected_result[i]
- i += 1
- if masks is not None:
- selected_masks = selected_result[i]
- i += 1
- if keypoints is not None:
- selected_keypoints = selected_result[i]
-
- return random_crop_image(
- image=image,
- boxes=boxes,
- labels=labels,
- label_scores=selected_label_scores,
- multiclass_scores=selected_multiclass_scores,
- masks=selected_masks,
- keypoints=selected_keypoints,
- min_object_covered=min_object_covered[index],
- aspect_ratio_range=aspect_ratio_range[index],
- area_range=area_range[index],
- overlap_thresh=overlap_thresh[index],
- random_coef=random_coef[index],
- seed=seed,
- preprocess_vars_cache=preprocess_vars_cache)
-
- result = _apply_with_random_selector_tuples(
- tuple(
- t for t in (image, boxes, labels, label_scores, multiclass_scores,
- masks, keypoints) if t is not None),
- random_crop_selector,
- num_cases=len(min_object_covered),
- preprocess_vars_cache=preprocess_vars_cache,
- key=preprocessor_cache.PreprocessorCache.SSD_CROP_SELECTOR_ID)
- return result
-
-
-def ssd_random_crop_pad(image,
- boxes,
- labels,
- label_scores=None,
- multiclass_scores=None,
- min_object_covered=(0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
- aspect_ratio_range=((0.5, 2.0),) * 6,
- area_range=((0.1, 1.0),) * 6,
- overlap_thresh=(0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
- random_coef=(0.15,) * 6,
- min_padded_size_ratio=((1.0, 1.0),) * 6,
- max_padded_size_ratio=((2.0, 2.0),) * 6,
- pad_color=(None,) * 6,
- seed=None,
- preprocess_vars_cache=None):
- """Random crop preprocessing with default parameters as in SSD paper.
-
- Liu et al., SSD: Single shot multibox detector.
- For further information on random crop preprocessing refer to RandomCrop
- function above.
-
- Args:
- image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- labels: rank 1 int32 tensor containing the object classes.
- label_scores: float32 tensor of shape [num_instances] representing the
- score for each box.
- multiclass_scores: (optional) float32 tensor of shape
- [num_instances, num_classes] representing the score for each box for each
- class.
- min_object_covered: the cropped image must cover at least this fraction of
- at least one of the input bounding boxes.
- aspect_ratio_range: allowed range for aspect ratio of cropped image.
- area_range: allowed range for area ratio between cropped image and the
- original image.
- overlap_thresh: minimum overlap thresh with new cropped
- image to keep the box.
- random_coef: a random coefficient that defines the chance of getting the
- original image. If random_coef is 0, we will always get the
- cropped image, and if it is 1.0, we will always get the
- original image.
- min_padded_size_ratio: min ratio of padded image height and width to the
- input image's height and width.
- max_padded_size_ratio: max ratio of padded image height and width to the
- input image's height and width.
- pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
- if set as None, it will be set to average color of the randomly
- cropped image.
- seed: random seed.
- preprocess_vars_cache: PreprocessorCache object that records previously
- performed augmentations. Updated in-place. If this
- function is called multiple times with the same
- non-null cache, it will perform deterministically.
-
- Returns:
- image: Image shape will be [new_height, new_width, channels].
- boxes: boxes which is the same rank as input boxes. Boxes are in normalized
- form.
- new_labels: new labels.
- new_label_scores: new label scores.
- """
-
- def random_crop_pad_selector(image_boxes_labels, index):
- """Random crop preprocessing helper."""
- i = 3
- image, boxes, labels = image_boxes_labels[:i]
- selected_label_scores = None
- selected_multiclass_scores = None
- if label_scores is not None:
- selected_label_scores = image_boxes_labels[i]
- i += 1
- if multiclass_scores is not None:
- selected_multiclass_scores = image_boxes_labels[i]
-
- return random_crop_pad_image(
- image,
- boxes,
- labels,
- label_scores=selected_label_scores,
- multiclass_scores=selected_multiclass_scores,
- min_object_covered=min_object_covered[index],
- aspect_ratio_range=aspect_ratio_range[index],
- area_range=area_range[index],
- overlap_thresh=overlap_thresh[index],
- random_coef=random_coef[index],
- min_padded_size_ratio=min_padded_size_ratio[index],
- max_padded_size_ratio=max_padded_size_ratio[index],
- pad_color=pad_color[index],
- seed=seed,
- preprocess_vars_cache=preprocess_vars_cache)
-
- return _apply_with_random_selector_tuples(
- tuple(t for t in (image, boxes, labels, label_scores, multiclass_scores)
- if t is not None),
- random_crop_pad_selector,
- num_cases=len(min_object_covered),
- preprocess_vars_cache=preprocess_vars_cache,
- key=preprocessor_cache.PreprocessorCache.SSD_CROP_PAD_SELECTOR_ID)
-
-
-def ssd_random_crop_fixed_aspect_ratio(
- image,
- boxes,
- labels,
- label_scores=None,
- multiclass_scores=None,
- masks=None,
- keypoints=None,
- min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
- aspect_ratio=1.0,
- area_range=((0.1, 1.0),) * 7,
- overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
- random_coef=(0.15,) * 7,
- seed=None,
- preprocess_vars_cache=None):
- """Random crop preprocessing with default parameters as in SSD paper.
-
- Liu et al., SSD: Single shot multibox detector.
- For further information on random crop preprocessing refer to RandomCrop
- function above.
-
- The only difference is that the aspect ratio of the crops are fixed.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- labels: rank 1 int32 tensor containing the object classes.
- label_scores: (optional) float32 tensor of shape [num_instances]
- representing the score for each box.
- multiclass_scores: (optional) float32 tensor of shape
- [num_instances, num_classes] representing the score for each box for each
- class.
- masks: (optional) rank 3 float32 tensor with shape
- [num_instances, height, width] containing instance masks. The masks
- are of the same height, width as the input `image`.
- keypoints: (optional) rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]. The keypoints are in y-x
- normalized coordinates.
- min_object_covered: the cropped image must cover at least this fraction of
- at least one of the input bounding boxes.
- aspect_ratio: aspect ratio of the cropped image.
- area_range: allowed range for area ratio between cropped image and the
- original image.
- overlap_thresh: minimum overlap thresh with new cropped
- image to keep the box.
- random_coef: a random coefficient that defines the chance of getting the
- original image. If random_coef is 0, we will always get the
- cropped image, and if it is 1.0, we will always get the
- original image.
- seed: random seed.
- preprocess_vars_cache: PreprocessorCache object that records previously
- performed augmentations. Updated in-place. If this
- function is called multiple times with the same
- non-null cache, it will perform deterministically.
-
- Returns:
- image: image which is the same rank as input image.
- boxes: boxes which is the same rank as input boxes.
- Boxes are in normalized form.
- labels: new labels.
-
- If mulitclass_scores, masks, or keypoints is not None, the function also
- returns:
-
- multiclass_scores: rank 2 float32 tensor with shape
- [num_instances, num_classes]
- masks: rank 3 float32 tensor with shape [num_instances, height, width]
- containing instance masks.
- keypoints: rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]
- """
- aspect_ratio_range = ((aspect_ratio, aspect_ratio),) * len(area_range)
-
- crop_result = ssd_random_crop(
- image,
- boxes,
- labels,
- label_scores=label_scores,
- multiclass_scores=multiclass_scores,
- masks=masks,
- keypoints=keypoints,
- min_object_covered=min_object_covered,
- aspect_ratio_range=aspect_ratio_range,
- area_range=area_range,
- overlap_thresh=overlap_thresh,
- random_coef=random_coef,
- seed=seed,
- preprocess_vars_cache=preprocess_vars_cache)
- i = 3
- new_image, new_boxes, new_labels = crop_result[:i]
- new_label_scores = None
- new_multiclass_scores = None
- new_masks = None
- new_keypoints = None
- if label_scores is not None:
- new_label_scores = crop_result[i]
- i += 1
- if multiclass_scores is not None:
- new_multiclass_scores = crop_result[i]
- i += 1
- if masks is not None:
- new_masks = crop_result[i]
- i += 1
- if keypoints is not None:
- new_keypoints = crop_result[i]
-
- result = random_crop_to_aspect_ratio(
- new_image,
- new_boxes,
- new_labels,
- label_scores=new_label_scores,
- multiclass_scores=new_multiclass_scores,
- masks=new_masks,
- keypoints=new_keypoints,
- aspect_ratio=aspect_ratio,
- seed=seed,
- preprocess_vars_cache=preprocess_vars_cache)
-
- return result
-
-
-def ssd_random_crop_pad_fixed_aspect_ratio(
- image,
- boxes,
- labels,
- label_scores=None,
- multiclass_scores=None,
- masks=None,
- keypoints=None,
- min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
- aspect_ratio=1.0,
- aspect_ratio_range=((0.5, 2.0),) * 7,
- area_range=((0.1, 1.0),) * 7,
- overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
- random_coef=(0.15,) * 7,
- min_padded_size_ratio=(1.0, 1.0),
- max_padded_size_ratio=(2.0, 2.0),
- seed=None,
- preprocess_vars_cache=None):
- """Random crop and pad preprocessing with default parameters as in SSD paper.
-
- Liu et al., SSD: Single shot multibox detector.
- For further information on random crop preprocessing refer to RandomCrop
- function above.
-
- The only difference is that after the initial crop, images are zero-padded
- to a fixed aspect ratio instead of being resized to that aspect ratio.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- labels: rank 1 int32 tensor containing the object classes.
- label_scores: (optional) float32 tensor of shape [num_instances]
- representing the score for each box.
- multiclass_scores: (optional) float32 tensor of shape
- [num_instances, num_classes] representing the score for each box for each
- class.
- masks: (optional) rank 3 float32 tensor with shape
- [num_instances, height, width] containing instance masks. The masks
- are of the same height, width as the input `image`.
- keypoints: (optional) rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]. The keypoints are in y-x
- normalized coordinates.
- min_object_covered: the cropped image must cover at least this fraction of
- at least one of the input bounding boxes.
- aspect_ratio: the final aspect ratio to pad to.
- aspect_ratio_range: allowed range for aspect ratio of cropped image.
- area_range: allowed range for area ratio between cropped image and the
- original image.
- overlap_thresh: minimum overlap thresh with new cropped
- image to keep the box.
- random_coef: a random coefficient that defines the chance of getting the
- original image. If random_coef is 0, we will always get the
- cropped image, and if it is 1.0, we will always get the
- original image.
- min_padded_size_ratio: min ratio of padded image height and width to the
- input image's height and width.
- max_padded_size_ratio: max ratio of padded image height and width to the
- input image's height and width.
- seed: random seed.
- preprocess_vars_cache: PreprocessorCache object that records previously
- performed augmentations. Updated in-place. If this
- function is called multiple times with the same
- non-null cache, it will perform deterministically.
-
- Returns:
- image: image which is the same rank as input image.
- boxes: boxes which is the same rank as input boxes.
- Boxes are in normalized form.
- labels: new labels.
-
- If multiclass_scores, masks, or keypoints is not None, the function also
- returns:
-
- multiclass_scores: rank 2 with shape [num_instances, num_classes]
- masks: rank 3 float32 tensor with shape [num_instances, height, width]
- containing instance masks.
- keypoints: rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]
- """
- crop_result = ssd_random_crop(
- image,
- boxes,
- labels,
- label_scores=label_scores,
- multiclass_scores=multiclass_scores,
- masks=masks,
- keypoints=keypoints,
- min_object_covered=min_object_covered,
- aspect_ratio_range=aspect_ratio_range,
- area_range=area_range,
- overlap_thresh=overlap_thresh,
- random_coef=random_coef,
- seed=seed,
- preprocess_vars_cache=preprocess_vars_cache)
- i = 3
- new_image, new_boxes, new_labels = crop_result[:i]
- new_label_scores = None
- new_multiclass_scores = None
- new_masks = None
- new_keypoints = None
- if label_scores is not None:
- new_label_scores = crop_result[i]
- i += 1
- if multiclass_scores is not None:
- new_multiclass_scores = crop_result[i]
- i += 1
- if masks is not None:
- new_masks = crop_result[i]
- i += 1
- if keypoints is not None:
- new_keypoints = crop_result[i]
-
- result = random_pad_to_aspect_ratio(
- new_image,
- new_boxes,
- masks=new_masks,
- keypoints=new_keypoints,
- aspect_ratio=aspect_ratio,
- min_padded_size_ratio=min_padded_size_ratio,
- max_padded_size_ratio=max_padded_size_ratio,
- seed=seed,
- preprocess_vars_cache=preprocess_vars_cache)
-
- result = list(result)
- i = 3
- result.insert(2, new_labels)
- if new_label_scores is not None:
- result.insert(i, new_label_scores)
- i += 1
- if multiclass_scores is not None:
- result.insert(i, new_multiclass_scores)
- result = tuple(result)
-
- return result
-
-
-def get_default_func_arg_map(include_label_scores=False,
- include_multiclass_scores=False,
- include_instance_masks=False,
- include_keypoints=False):
- """Returns the default mapping from a preprocessor function to its args.
-
- Args:
- include_label_scores: If True, preprocessing functions will modify the
- label scores, too.
- include_multiclass_scores: If True, preprocessing functions will modify the
- multiclass scores, too.
- include_instance_masks: If True, preprocessing functions will modify the
- instance masks, too.
- include_keypoints: If True, preprocessing functions will modify the
- keypoints, too.
-
- Returns:
- A map from preprocessing functions to the arguments they receive.
- """
- groundtruth_label_scores = None
- if include_label_scores:
- groundtruth_label_scores = (fields.InputDataFields.groundtruth_label_scores)
-
- multiclass_scores = None
- if include_multiclass_scores:
- multiclass_scores = (fields.InputDataFields.multiclass_scores)
-
- groundtruth_instance_masks = None
- if include_instance_masks:
- groundtruth_instance_masks = (
- fields.InputDataFields.groundtruth_instance_masks)
-
- groundtruth_keypoints = None
- if include_keypoints:
- groundtruth_keypoints = fields.InputDataFields.groundtruth_keypoints
-
- prep_func_arg_map = {
- normalize_image: (fields.InputDataFields.image,),
- random_horizontal_flip: (
- fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes,
- groundtruth_instance_masks,
- groundtruth_keypoints,
- ),
- random_vertical_flip: (
- fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes,
- groundtruth_instance_masks,
- groundtruth_keypoints,
- ),
- random_rotation90: (
- fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes,
- groundtruth_instance_masks,
- groundtruth_keypoints,
- ),
- random_pixel_value_scale: (fields.InputDataFields.image,),
- random_image_scale: (
- fields.InputDataFields.image,
- groundtruth_instance_masks,
- ),
- random_rgb_to_gray: (fields.InputDataFields.image,),
- random_adjust_brightness: (fields.InputDataFields.image,),
- random_adjust_contrast: (fields.InputDataFields.image,),
- random_adjust_hue: (fields.InputDataFields.image,),
- random_adjust_saturation: (fields.InputDataFields.image,),
- random_distort_color: (fields.InputDataFields.image,),
- random_jitter_boxes: (fields.InputDataFields.groundtruth_boxes,),
- random_crop_image: (fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes,
- fields.InputDataFields.groundtruth_classes,
- groundtruth_label_scores, multiclass_scores,
- groundtruth_instance_masks, groundtruth_keypoints),
- random_pad_image: (fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes),
- random_crop_pad_image: (fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes,
- fields.InputDataFields.groundtruth_classes,
- groundtruth_label_scores,
- multiclass_scores),
- random_crop_to_aspect_ratio: (
- fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes,
- fields.InputDataFields.groundtruth_classes,
- groundtruth_label_scores,
- multiclass_scores,
- groundtruth_instance_masks,
- groundtruth_keypoints,
- ),
- random_pad_to_aspect_ratio: (
- fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes,
- groundtruth_instance_masks,
- groundtruth_keypoints,
- ),
- random_black_patches: (fields.InputDataFields.image,),
- retain_boxes_above_threshold: (
- fields.InputDataFields.groundtruth_boxes,
- fields.InputDataFields.groundtruth_classes,
- groundtruth_label_scores,
- multiclass_scores,
- groundtruth_instance_masks,
- groundtruth_keypoints,
- ),
- image_to_float: (fields.InputDataFields.image,),
- random_resize_method: (fields.InputDataFields.image,),
- resize_to_range: (
- fields.InputDataFields.image,
- groundtruth_instance_masks,
- ),
- resize_to_min_dimension: (
- fields.InputDataFields.image,
- groundtruth_instance_masks,
- ),
- scale_boxes_to_pixel_coordinates: (
- fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes,
- groundtruth_keypoints,
- ),
- resize_image: (
- fields.InputDataFields.image,
- groundtruth_instance_masks,
- ),
- subtract_channel_mean: (fields.InputDataFields.image,),
- one_hot_encoding: (fields.InputDataFields.groundtruth_image_classes,),
- rgb_to_gray: (fields.InputDataFields.image,),
- ssd_random_crop: (
- fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes,
- fields.InputDataFields.groundtruth_classes,
- groundtruth_label_scores,
- multiclass_scores,
- groundtruth_instance_masks,
- groundtruth_keypoints
- ),
- ssd_random_crop_pad: (fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes,
- fields.InputDataFields.groundtruth_classes,
- groundtruth_label_scores,
- multiclass_scores),
- ssd_random_crop_fixed_aspect_ratio: (
- fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes,
- fields.InputDataFields.groundtruth_classes, groundtruth_label_scores,
- multiclass_scores, groundtruth_instance_masks, groundtruth_keypoints),
- ssd_random_crop_pad_fixed_aspect_ratio: (
- fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes,
- fields.InputDataFields.groundtruth_classes,
- groundtruth_label_scores,
- multiclass_scores,
- groundtruth_instance_masks,
- groundtruth_keypoints,
- ),
- }
-
- return prep_func_arg_map
-
-
-def preprocess(tensor_dict,
- preprocess_options,
- func_arg_map=None,
- preprocess_vars_cache=None):
- """Preprocess images and bounding boxes.
-
- Various types of preprocessing (to be implemented) based on the
- preprocess_options dictionary e.g. "crop image" (affects image and possibly
- boxes), "white balance image" (affects only image), etc. If self._options
- is None, no preprocessing is done.
-
- Args:
- tensor_dict: dictionary that contains images, boxes, and can contain other
- things as well.
- images-> rank 4 float32 tensor contains
- 1 image -> [1, height, width, 3].
- with pixel values varying between [0, 1]
- boxes-> rank 2 float32 tensor containing
- the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning
- their coordinates vary between [0, 1].
- Each row is in the form
- of [ymin, xmin, ymax, xmax].
- preprocess_options: It is a list of tuples, where each tuple contains a
- function and a dictionary that contains arguments and
- their values.
- func_arg_map: mapping from preprocessing functions to arguments that they
- expect to receive and return.
- preprocess_vars_cache: PreprocessorCache object that records previously
- performed augmentations. Updated in-place. If this
- function is called multiple times with the same
- non-null cache, it will perform deterministically.
-
- Returns:
- tensor_dict: which contains the preprocessed images, bounding boxes, etc.
-
- Raises:
- ValueError: (a) If the functions passed to Preprocess
- are not in func_arg_map.
- (b) If the arguments that a function needs
- do not exist in tensor_dict.
- (c) If image in tensor_dict is not rank 4
- """
- if func_arg_map is None:
- func_arg_map = get_default_func_arg_map()
-
- # changes the images to image (rank 4 to rank 3) since the functions
- # receive rank 3 tensor for image
- if fields.InputDataFields.image in tensor_dict:
- images = tensor_dict[fields.InputDataFields.image]
- if len(images.get_shape()) != 4:
- raise ValueError('images in tensor_dict should be rank 4')
- image = tf.squeeze(images, squeeze_dims=[0])
- tensor_dict[fields.InputDataFields.image] = image
-
- # Preprocess inputs based on preprocess_options
- for option in preprocess_options:
- func, params = option
- if func not in func_arg_map:
- raise ValueError('The function %s does not exist in func_arg_map' %
- (func.__name__))
- arg_names = func_arg_map[func]
- for a in arg_names:
- if a is not None and a not in tensor_dict:
- raise ValueError('The function %s requires argument %s' %
- (func.__name__, a))
-
- def get_arg(key):
- return tensor_dict[key] if key is not None else None
-
- args = [get_arg(a) for a in arg_names]
- if (preprocess_vars_cache is not None and
- 'preprocess_vars_cache' in inspect.getargspec(func).args):
- params['preprocess_vars_cache'] = preprocess_vars_cache
- results = func(*args, **params)
- if not isinstance(results, (list, tuple)):
- results = (results,)
- # Removes None args since the return values will not contain those.
- arg_names = [arg_name for arg_name in arg_names if arg_name is not None]
- for res, arg_name in zip(results, arg_names):
- tensor_dict[arg_name] = res
-
- # changes the image to images (rank 3 to rank 4) to be compatible to what
- # we received in the first place
- if fields.InputDataFields.image in tensor_dict:
- image = tensor_dict[fields.InputDataFields.image]
- images = tf.expand_dims(image, 0)
- tensor_dict[fields.InputDataFields.image] = images
-
- return tensor_dict
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/preprocessor_cache.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/preprocessor_cache.py
deleted file mode 100644
index 2822a2bab209f37738b0c807765624114973de4d..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/preprocessor_cache.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Records previous preprocessing operations and allows them to be repeated.
-
-Used with object_detection.core.preprocessor. Passing a PreprocessorCache
-into individual data augmentation functions or the general preprocess() function
-will store all randomly generated variables in the PreprocessorCache. When
-a preprocessor function is called multiple times with the same
-PreprocessorCache object, that function will perform the same augmentation
-on all calls.
-"""
-
-from collections import defaultdict
-
-
-class PreprocessorCache(object):
- """Dictionary wrapper storing random variables generated during preprocessing.
- """
-
- # Constant keys representing different preprocessing functions
- ROTATION90 = 'rotation90'
- HORIZONTAL_FLIP = 'horizontal_flip'
- VERTICAL_FLIP = 'vertical_flip'
- PIXEL_VALUE_SCALE = 'pixel_value_scale'
- IMAGE_SCALE = 'image_scale'
- RGB_TO_GRAY = 'rgb_to_gray'
- ADJUST_BRIGHTNESS = 'adjust_brightness'
- ADJUST_CONTRAST = 'adjust_contrast'
- ADJUST_HUE = 'adjust_hue'
- ADJUST_SATURATION = 'adjust_saturation'
- DISTORT_COLOR = 'distort_color'
- STRICT_CROP_IMAGE = 'strict_crop_image'
- CROP_IMAGE = 'crop_image'
- PAD_IMAGE = 'pad_image'
- CROP_TO_ASPECT_RATIO = 'crop_to_aspect_ratio'
- RESIZE_METHOD = 'resize_method'
- PAD_TO_ASPECT_RATIO = 'pad_to_aspect_ratio'
- BLACK_PATCHES = 'black_patches'
- ADD_BLACK_PATCH = 'add_black_patch'
- SELECTOR = 'selector'
- SELECTOR_TUPLES = 'selector_tuples'
- SSD_CROP_SELECTOR_ID = 'ssd_crop_selector_id'
- SSD_CROP_PAD_SELECTOR_ID = 'ssd_crop_pad_selector_id'
-
- # 23 permitted function ids
- _VALID_FNS = [ROTATION90, HORIZONTAL_FLIP, VERTICAL_FLIP, PIXEL_VALUE_SCALE,
- IMAGE_SCALE, RGB_TO_GRAY, ADJUST_BRIGHTNESS, ADJUST_CONTRAST,
- ADJUST_HUE, ADJUST_SATURATION, DISTORT_COLOR, STRICT_CROP_IMAGE,
- CROP_IMAGE, PAD_IMAGE, CROP_TO_ASPECT_RATIO, RESIZE_METHOD,
- PAD_TO_ASPECT_RATIO, BLACK_PATCHES, ADD_BLACK_PATCH, SELECTOR,
- SELECTOR_TUPLES, SSD_CROP_SELECTOR_ID, SSD_CROP_PAD_SELECTOR_ID]
-
- def __init__(self):
- self._history = defaultdict(dict)
-
- def clear(self):
- """Resets cache."""
- self._history = {}
-
- def get(self, function_id, key):
- """Gets stored value given a function id and key.
-
- Args:
- function_id: identifier for the preprocessing function used.
- key: identifier for the variable stored.
- Returns:
- value: the corresponding value, expected to be a tensor or
- nested structure of tensors.
- Raises:
- ValueError: if function_id is not one of the 23 valid function ids.
- """
- if function_id not in self._VALID_FNS:
- raise ValueError('Function id not recognized: %s.' % str(function_id))
- return self._history[function_id].get(key)
-
- def update(self, function_id, key, value):
- """Adds a value to the dictionary.
-
- Args:
- function_id: identifier for the preprocessing function used.
- key: identifier for the variable stored.
- value: the value to store, expected to be a tensor or nested structure
- of tensors.
- Raises:
- ValueError: if function_id is not one of the 23 valid function ids.
- """
- if function_id not in self._VALID_FNS:
- raise ValueError('Function id not recognized: %s.' % str(function_id))
- self._history[function_id][key] = value
-
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/preprocessor_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/preprocessor_test.py
deleted file mode 100644
index 588a3f90cb1ec5aa104bd8519ddd5fb5b30dd3be..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/preprocessor_test.py
+++ /dev/null
@@ -1,2814 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.core.preprocessor."""
-
-import numpy as np
-import six
-
-import tensorflow as tf
-
-from object_detection.core import preprocessor
-from object_detection.core import preprocessor_cache
-from object_detection.core import standard_fields as fields
-
-if six.PY2:
- import mock # pylint: disable=g-import-not-at-top
-else:
- from unittest import mock # pylint: disable=g-import-not-at-top
-
-
-class PreprocessorTest(tf.test.TestCase):
-
- def createColorfulTestImage(self):
- ch255 = tf.fill([1, 100, 200, 1], tf.constant(255, dtype=tf.uint8))
- ch128 = tf.fill([1, 100, 200, 1], tf.constant(128, dtype=tf.uint8))
- ch0 = tf.fill([1, 100, 200, 1], tf.constant(0, dtype=tf.uint8))
- imr = tf.concat([ch255, ch0, ch0], 3)
- img = tf.concat([ch255, ch255, ch0], 3)
- imb = tf.concat([ch255, ch0, ch255], 3)
- imw = tf.concat([ch128, ch128, ch128], 3)
- imu = tf.concat([imr, img], 2)
- imd = tf.concat([imb, imw], 2)
- im = tf.concat([imu, imd], 1)
- return im
-
- def createTestImages(self):
- images_r = tf.constant([[[128, 128, 128, 128], [0, 0, 128, 128],
- [0, 128, 128, 128], [192, 192, 128, 128]]],
- dtype=tf.uint8)
- images_r = tf.expand_dims(images_r, 3)
- images_g = tf.constant([[[0, 0, 128, 128], [0, 0, 128, 128],
- [0, 128, 192, 192], [192, 192, 128, 192]]],
- dtype=tf.uint8)
- images_g = tf.expand_dims(images_g, 3)
- images_b = tf.constant([[[128, 128, 192, 0], [0, 0, 128, 192],
- [0, 128, 128, 0], [192, 192, 192, 128]]],
- dtype=tf.uint8)
- images_b = tf.expand_dims(images_b, 3)
- images = tf.concat([images_r, images_g, images_b], 3)
- return images
-
- def createEmptyTestBoxes(self):
- boxes = tf.constant([[]], dtype=tf.float32)
- return boxes
-
- def createTestBoxes(self):
- boxes = tf.constant(
- [[0.0, 0.25, 0.75, 1.0], [0.25, 0.5, 0.75, 1.0]], dtype=tf.float32)
- return boxes
-
- def createTestLabelScores(self):
- return tf.constant([1.0, 0.5], dtype=tf.float32)
-
- def createTestLabelScoresWithMissingScore(self):
- return tf.constant([0.5, np.nan], dtype=tf.float32)
-
- def createTestMasks(self):
- mask = np.array([
- [[255.0, 0.0, 0.0],
- [255.0, 0.0, 0.0],
- [255.0, 0.0, 0.0]],
- [[255.0, 255.0, 0.0],
- [255.0, 255.0, 0.0],
- [255.0, 255.0, 0.0]]])
- return tf.constant(mask, dtype=tf.float32)
-
- def createTestKeypoints(self):
- keypoints = np.array([
- [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
- [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]],
- ])
- return tf.constant(keypoints, dtype=tf.float32)
-
- def createTestKeypointsInsideCrop(self):
- keypoints = np.array([
- [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]],
- [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]],
- ])
- return tf.constant(keypoints, dtype=tf.float32)
-
- def createTestKeypointsOutsideCrop(self):
- keypoints = np.array([
- [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
- [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
- ])
- return tf.constant(keypoints, dtype=tf.float32)
-
- def createKeypointFlipPermutation(self):
- return np.array([0, 2, 1], dtype=np.int32)
-
- def createTestLabels(self):
- labels = tf.constant([1, 2], dtype=tf.int32)
- return labels
-
- def createTestBoxesOutOfImage(self):
- boxes = tf.constant(
- [[-0.1, 0.25, 0.75, 1], [0.25, 0.5, 0.75, 1.1]], dtype=tf.float32)
- return boxes
-
- def createTestMultiClassScores(self):
- return tf.constant([[1.0, 0.0], [0.5, 0.5]], dtype=tf.float32)
-
- def expectedImagesAfterNormalization(self):
- images_r = tf.constant([[[0, 0, 0, 0], [-1, -1, 0, 0],
- [-1, 0, 0, 0], [0.5, 0.5, 0, 0]]],
- dtype=tf.float32)
- images_r = tf.expand_dims(images_r, 3)
- images_g = tf.constant([[[-1, -1, 0, 0], [-1, -1, 0, 0],
- [-1, 0, 0.5, 0.5], [0.5, 0.5, 0, 0.5]]],
- dtype=tf.float32)
- images_g = tf.expand_dims(images_g, 3)
- images_b = tf.constant([[[0, 0, 0.5, -1], [-1, -1, 0, 0.5],
- [-1, 0, 0, -1], [0.5, 0.5, 0.5, 0]]],
- dtype=tf.float32)
- images_b = tf.expand_dims(images_b, 3)
- images = tf.concat([images_r, images_g, images_b], 3)
- return images
-
- def expectedMaxImageAfterColorScale(self):
- images_r = tf.constant([[[0.1, 0.1, 0.1, 0.1], [-0.9, -0.9, 0.1, 0.1],
- [-0.9, 0.1, 0.1, 0.1], [0.6, 0.6, 0.1, 0.1]]],
- dtype=tf.float32)
- images_r = tf.expand_dims(images_r, 3)
- images_g = tf.constant([[[-0.9, -0.9, 0.1, 0.1], [-0.9, -0.9, 0.1, 0.1],
- [-0.9, 0.1, 0.6, 0.6], [0.6, 0.6, 0.1, 0.6]]],
- dtype=tf.float32)
- images_g = tf.expand_dims(images_g, 3)
- images_b = tf.constant([[[0.1, 0.1, 0.6, -0.9], [-0.9, -0.9, 0.1, 0.6],
- [-0.9, 0.1, 0.1, -0.9], [0.6, 0.6, 0.6, 0.1]]],
- dtype=tf.float32)
- images_b = tf.expand_dims(images_b, 3)
- images = tf.concat([images_r, images_g, images_b], 3)
- return images
-
- def expectedMinImageAfterColorScale(self):
- images_r = tf.constant([[[-0.1, -0.1, -0.1, -0.1], [-1, -1, -0.1, -0.1],
- [-1, -0.1, -0.1, -0.1], [0.4, 0.4, -0.1, -0.1]]],
- dtype=tf.float32)
- images_r = tf.expand_dims(images_r, 3)
- images_g = tf.constant([[[-1, -1, -0.1, -0.1], [-1, -1, -0.1, -0.1],
- [-1, -0.1, 0.4, 0.4], [0.4, 0.4, -0.1, 0.4]]],
- dtype=tf.float32)
- images_g = tf.expand_dims(images_g, 3)
- images_b = tf.constant([[[-0.1, -0.1, 0.4, -1], [-1, -1, -0.1, 0.4],
- [-1, -0.1, -0.1, -1], [0.4, 0.4, 0.4, -0.1]]],
- dtype=tf.float32)
- images_b = tf.expand_dims(images_b, 3)
- images = tf.concat([images_r, images_g, images_b], 3)
- return images
-
- def expectedImagesAfterLeftRightFlip(self):
- images_r = tf.constant([[[0, 0, 0, 0], [0, 0, -1, -1],
- [0, 0, 0, -1], [0, 0, 0.5, 0.5]]],
- dtype=tf.float32)
- images_r = tf.expand_dims(images_r, 3)
- images_g = tf.constant([[[0, 0, -1, -1], [0, 0, -1, -1],
- [0.5, 0.5, 0, -1], [0.5, 0, 0.5, 0.5]]],
- dtype=tf.float32)
- images_g = tf.expand_dims(images_g, 3)
- images_b = tf.constant([[[-1, 0.5, 0, 0], [0.5, 0, -1, -1],
- [-1, 0, 0, -1], [0, 0.5, 0.5, 0.5]]],
- dtype=tf.float32)
- images_b = tf.expand_dims(images_b, 3)
- images = tf.concat([images_r, images_g, images_b], 3)
- return images
-
- def expectedImagesAfterUpDownFlip(self):
- images_r = tf.constant([[[0.5, 0.5, 0, 0], [-1, 0, 0, 0],
- [-1, -1, 0, 0], [0, 0, 0, 0]]],
- dtype=tf.float32)
- images_r = tf.expand_dims(images_r, 3)
- images_g = tf.constant([[[0.5, 0.5, 0, 0.5], [-1, 0, 0.5, 0.5],
- [-1, -1, 0, 0], [-1, -1, 0, 0]]],
- dtype=tf.float32)
- images_g = tf.expand_dims(images_g, 3)
- images_b = tf.constant([[[0.5, 0.5, 0.5, 0], [-1, 0, 0, -1],
- [-1, -1, 0, 0.5], [0, 0, 0.5, -1]]],
- dtype=tf.float32)
- images_b = tf.expand_dims(images_b, 3)
- images = tf.concat([images_r, images_g, images_b], 3)
- return images
-
- def expectedImagesAfterRot90(self):
- images_r = tf.constant([[[0, 0, 0, 0], [0, 0, 0, 0],
- [0, -1, 0, 0.5], [0, -1, -1, 0.5]]],
- dtype=tf.float32)
- images_r = tf.expand_dims(images_r, 3)
- images_g = tf.constant([[[0, 0, 0.5, 0.5], [0, 0, 0.5, 0],
- [-1, -1, 0, 0.5], [-1, -1, -1, 0.5]]],
- dtype=tf.float32)
- images_g = tf.expand_dims(images_g, 3)
- images_b = tf.constant([[[-1, 0.5, -1, 0], [0.5, 0, 0, 0.5],
- [0, -1, 0, 0.5], [0, -1, -1, 0.5]]],
- dtype=tf.float32)
- images_b = tf.expand_dims(images_b, 3)
- images = tf.concat([images_r, images_g, images_b], 3)
- return images
-
- def expectedBoxesAfterLeftRightFlip(self):
- boxes = tf.constant([[0.0, 0.0, 0.75, 0.75], [0.25, 0.0, 0.75, 0.5]],
- dtype=tf.float32)
- return boxes
-
- def expectedBoxesAfterUpDownFlip(self):
- boxes = tf.constant([[0.25, 0.25, 1.0, 1.0], [0.25, 0.5, 0.75, 1.0]],
- dtype=tf.float32)
- return boxes
-
- def expectedBoxesAfterRot90(self):
- boxes = tf.constant(
- [[0.0, 0.0, 0.75, 0.75], [0.0, 0.25, 0.5, 0.75]], dtype=tf.float32)
- return boxes
-
- def expectedMasksAfterLeftRightFlip(self):
- mask = np.array([
- [[0.0, 0.0, 255.0],
- [0.0, 0.0, 255.0],
- [0.0, 0.0, 255.0]],
- [[0.0, 255.0, 255.0],
- [0.0, 255.0, 255.0],
- [0.0, 255.0, 255.0]]])
- return tf.constant(mask, dtype=tf.float32)
-
- def expectedMasksAfterUpDownFlip(self):
- mask = np.array([
- [[255.0, 0.0, 0.0],
- [255.0, 0.0, 0.0],
- [255.0, 0.0, 0.0]],
- [[255.0, 255.0, 0.0],
- [255.0, 255.0, 0.0],
- [255.0, 255.0, 0.0]]])
- return tf.constant(mask, dtype=tf.float32)
-
- def expectedMasksAfterRot90(self):
- mask = np.array([
- [[0.0, 0.0, 0.0],
- [0.0, 0.0, 0.0],
- [255.0, 255.0, 255.0]],
- [[0.0, 0.0, 0.0],
- [255.0, 255.0, 255.0],
- [255.0, 255.0, 255.0]]])
- return tf.constant(mask, dtype=tf.float32)
-
- def expectedLabelScoresAfterThresholding(self):
- return tf.constant([1.0], dtype=tf.float32)
-
- def expectedBoxesAfterThresholding(self):
- return tf.constant([[0.0, 0.25, 0.75, 1.0]], dtype=tf.float32)
-
- def expectedLabelsAfterThresholding(self):
- return tf.constant([1], dtype=tf.float32)
-
- def expectedMultiClassScoresAfterThresholding(self):
- return tf.constant([[1.0, 0.0]], dtype=tf.float32)
-
- def expectedMasksAfterThresholding(self):
- mask = np.array([
- [[255.0, 0.0, 0.0],
- [255.0, 0.0, 0.0],
- [255.0, 0.0, 0.0]]])
- return tf.constant(mask, dtype=tf.float32)
-
- def expectedKeypointsAfterThresholding(self):
- keypoints = np.array([
- [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]]
- ])
- return tf.constant(keypoints, dtype=tf.float32)
-
- def expectedLabelScoresAfterThresholdingWithMissingScore(self):
- return tf.constant([np.nan], dtype=tf.float32)
-
- def expectedBoxesAfterThresholdingWithMissingScore(self):
- return tf.constant([[0.25, 0.5, 0.75, 1]], dtype=tf.float32)
-
- def expectedLabelsAfterThresholdingWithMissingScore(self):
- return tf.constant([2], dtype=tf.float32)
-
- def testRgbToGrayscale(self):
- images = self.createTestImages()
- grayscale_images = preprocessor._rgb_to_grayscale(images)
- expected_images = tf.image.rgb_to_grayscale(images)
- with self.test_session() as sess:
- (grayscale_images, expected_images) = sess.run(
- [grayscale_images, expected_images])
- self.assertAllEqual(expected_images, grayscale_images)
-
- def testNormalizeImage(self):
- preprocess_options = [(preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 256,
- 'target_minval': -1,
- 'target_maxval': 1
- })]
- images = self.createTestImages()
- tensor_dict = {fields.InputDataFields.image: images}
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
- images = tensor_dict[fields.InputDataFields.image]
- images_expected = self.expectedImagesAfterNormalization()
-
- with self.test_session() as sess:
- (images_, images_expected_) = sess.run(
- [images, images_expected])
- images_shape_ = images_.shape
- images_expected_shape_ = images_expected_.shape
- expected_shape = [1, 4, 4, 3]
- self.assertAllEqual(images_expected_shape_, images_shape_)
- self.assertAllEqual(images_shape_, expected_shape)
- self.assertAllClose(images_, images_expected_)
-
- def testRetainBoxesAboveThreshold(self):
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- label_scores = self.createTestLabelScores()
- (retained_boxes, retained_labels,
- retained_label_scores) = preprocessor.retain_boxes_above_threshold(
- boxes, labels, label_scores, threshold=0.6)
- with self.test_session() as sess:
- (retained_boxes_, retained_labels_, retained_label_scores_,
- expected_retained_boxes_, expected_retained_labels_,
- expected_retained_label_scores_) = sess.run([
- retained_boxes, retained_labels, retained_label_scores,
- self.expectedBoxesAfterThresholding(),
- self.expectedLabelsAfterThresholding(),
- self.expectedLabelScoresAfterThresholding()])
- self.assertAllClose(
- retained_boxes_, expected_retained_boxes_)
- self.assertAllClose(
- retained_labels_, expected_retained_labels_)
- self.assertAllClose(
- retained_label_scores_, expected_retained_label_scores_)
-
- def testRetainBoxesAboveThresholdWithMultiClassScores(self):
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- label_scores = self.createTestLabelScores()
- multiclass_scores = self.createTestMultiClassScores()
- (_, _, _,
- retained_multiclass_scores) = preprocessor.retain_boxes_above_threshold(
- boxes,
- labels,
- label_scores,
- multiclass_scores=multiclass_scores,
- threshold=0.6)
- with self.test_session() as sess:
- (retained_multiclass_scores_,
- expected_retained_multiclass_scores_) = sess.run([
- retained_multiclass_scores,
- self.expectedMultiClassScoresAfterThresholding()
- ])
-
- self.assertAllClose(retained_multiclass_scores_,
- expected_retained_multiclass_scores_)
-
- def testRetainBoxesAboveThresholdWithMasks(self):
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- label_scores = self.createTestLabelScores()
- masks = self.createTestMasks()
- _, _, _, retained_masks = preprocessor.retain_boxes_above_threshold(
- boxes, labels, label_scores, masks, threshold=0.6)
- with self.test_session() as sess:
- retained_masks_, expected_retained_masks_ = sess.run([
- retained_masks, self.expectedMasksAfterThresholding()])
-
- self.assertAllClose(
- retained_masks_, expected_retained_masks_)
-
- def testRetainBoxesAboveThresholdWithKeypoints(self):
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- label_scores = self.createTestLabelScores()
- keypoints = self.createTestKeypoints()
- (_, _, _, retained_keypoints) = preprocessor.retain_boxes_above_threshold(
- boxes, labels, label_scores, keypoints=keypoints, threshold=0.6)
- with self.test_session() as sess:
- (retained_keypoints_,
- expected_retained_keypoints_) = sess.run([
- retained_keypoints,
- self.expectedKeypointsAfterThresholding()])
-
- self.assertAllClose(
- retained_keypoints_, expected_retained_keypoints_)
-
- def testRetainBoxesAboveThresholdWithMissingScore(self):
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- label_scores = self.createTestLabelScoresWithMissingScore()
- (retained_boxes, retained_labels,
- retained_label_scores) = preprocessor.retain_boxes_above_threshold(
- boxes, labels, label_scores, threshold=0.6)
- with self.test_session() as sess:
- (retained_boxes_, retained_labels_, retained_label_scores_,
- expected_retained_boxes_, expected_retained_labels_,
- expected_retained_label_scores_) = sess.run([
- retained_boxes, retained_labels, retained_label_scores,
- self.expectedBoxesAfterThresholdingWithMissingScore(),
- self.expectedLabelsAfterThresholdingWithMissingScore(),
- self.expectedLabelScoresAfterThresholdingWithMissingScore()])
- self.assertAllClose(
- retained_boxes_, expected_retained_boxes_)
- self.assertAllClose(
- retained_labels_, expected_retained_labels_)
- self.assertAllClose(
- retained_label_scores_, expected_retained_label_scores_)
-
- def testFlipBoxesLeftRight(self):
- boxes = self.createTestBoxes()
- flipped_boxes = preprocessor._flip_boxes_left_right(boxes)
- expected_boxes = self.expectedBoxesAfterLeftRightFlip()
- with self.test_session() as sess:
- flipped_boxes, expected_boxes = sess.run([flipped_boxes, expected_boxes])
- self.assertAllEqual(flipped_boxes.flatten(), expected_boxes.flatten())
-
- def testFlipBoxesUpDown(self):
- boxes = self.createTestBoxes()
- flipped_boxes = preprocessor._flip_boxes_up_down(boxes)
- expected_boxes = self.expectedBoxesAfterUpDownFlip()
- with self.test_session() as sess:
- flipped_boxes, expected_boxes = sess.run([flipped_boxes, expected_boxes])
- self.assertAllEqual(flipped_boxes.flatten(), expected_boxes.flatten())
-
- def testRot90Boxes(self):
- boxes = self.createTestBoxes()
- rotated_boxes = preprocessor._rot90_boxes(boxes)
- expected_boxes = self.expectedBoxesAfterRot90()
- with self.test_session() as sess:
- rotated_boxes, expected_boxes = sess.run([rotated_boxes, expected_boxes])
- self.assertAllEqual(rotated_boxes.flatten(), expected_boxes.flatten())
-
- def testFlipMasksLeftRight(self):
- test_mask = self.createTestMasks()
- flipped_mask = preprocessor._flip_masks_left_right(test_mask)
- expected_mask = self.expectedMasksAfterLeftRightFlip()
- with self.test_session() as sess:
- flipped_mask, expected_mask = sess.run([flipped_mask, expected_mask])
- self.assertAllEqual(flipped_mask.flatten(), expected_mask.flatten())
-
- def testFlipMasksUpDown(self):
- test_mask = self.createTestMasks()
- flipped_mask = preprocessor._flip_masks_up_down(test_mask)
- expected_mask = self.expectedMasksAfterUpDownFlip()
- with self.test_session() as sess:
- flipped_mask, expected_mask = sess.run([flipped_mask, expected_mask])
- self.assertAllEqual(flipped_mask.flatten(), expected_mask.flatten())
-
- def testRot90Masks(self):
- test_mask = self.createTestMasks()
- rotated_mask = preprocessor._rot90_masks(test_mask)
- expected_mask = self.expectedMasksAfterRot90()
- with self.test_session() as sess:
- rotated_mask, expected_mask = sess.run([rotated_mask, expected_mask])
- self.assertAllEqual(rotated_mask.flatten(), expected_mask.flatten())
-
- def _testPreprocessorCache(self,
- preprocess_options,
- test_boxes=False,
- test_masks=False,
- test_keypoints=False,
- num_runs=4):
- cache = preprocessor_cache.PreprocessorCache()
- images = self.createTestImages()
- boxes = self.createTestBoxes()
- classes = self.createTestLabels()
- masks = self.createTestMasks()
- keypoints = self.createTestKeypoints()
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_instance_masks=test_masks, include_keypoints=test_keypoints)
- out = []
- for i in range(num_runs):
- tensor_dict = {
- fields.InputDataFields.image: images,
- }
- num_outputs = 1
- if test_boxes:
- tensor_dict[fields.InputDataFields.groundtruth_boxes] = boxes
- tensor_dict[fields.InputDataFields.groundtruth_classes] = classes
- num_outputs += 1
- if test_masks:
- tensor_dict[fields.InputDataFields.groundtruth_instance_masks] = masks
- num_outputs += 1
- if test_keypoints:
- tensor_dict[fields.InputDataFields.groundtruth_keypoints] = keypoints
- num_outputs += 1
- out.append(preprocessor.preprocess(
- tensor_dict, preprocess_options, preprocessor_arg_map, cache))
-
- with self.test_session() as sess:
- to_run = []
- for i in range(num_runs):
- to_run.append(out[i][fields.InputDataFields.image])
- if test_boxes:
- to_run.append(out[i][fields.InputDataFields.groundtruth_boxes])
- if test_masks:
- to_run.append(
- out[i][fields.InputDataFields.groundtruth_instance_masks])
- if test_keypoints:
- to_run.append(out[i][fields.InputDataFields.groundtruth_keypoints])
-
- out_array = sess.run(to_run)
- for i in range(num_outputs, len(out_array)):
- self.assertAllClose(out_array[i], out_array[i - num_outputs])
-
- def testRandomHorizontalFlip(self):
- preprocess_options = [(preprocessor.random_horizontal_flip, {})]
- images = self.expectedImagesAfterNormalization()
- boxes = self.createTestBoxes()
- tensor_dict = {fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes}
- images_expected1 = self.expectedImagesAfterLeftRightFlip()
- boxes_expected1 = self.expectedBoxesAfterLeftRightFlip()
- images_expected2 = images
- boxes_expected2 = boxes
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
- images = tensor_dict[fields.InputDataFields.image]
- boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
-
- boxes_diff1 = tf.squared_difference(boxes, boxes_expected1)
- boxes_diff2 = tf.squared_difference(boxes, boxes_expected2)
- boxes_diff = tf.multiply(boxes_diff1, boxes_diff2)
- boxes_diff_expected = tf.zeros_like(boxes_diff)
-
- images_diff1 = tf.squared_difference(images, images_expected1)
- images_diff2 = tf.squared_difference(images, images_expected2)
- images_diff = tf.multiply(images_diff1, images_diff2)
- images_diff_expected = tf.zeros_like(images_diff)
-
- with self.test_session() as sess:
- (images_diff_, images_diff_expected_, boxes_diff_,
- boxes_diff_expected_) = sess.run([images_diff, images_diff_expected,
- boxes_diff, boxes_diff_expected])
- self.assertAllClose(boxes_diff_, boxes_diff_expected_)
- self.assertAllClose(images_diff_, images_diff_expected_)
-
- def testRandomHorizontalFlipWithEmptyBoxes(self):
- preprocess_options = [(preprocessor.random_horizontal_flip, {})]
- images = self.expectedImagesAfterNormalization()
- boxes = self.createEmptyTestBoxes()
- tensor_dict = {fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes}
- images_expected1 = self.expectedImagesAfterLeftRightFlip()
- boxes_expected = self.createEmptyTestBoxes()
- images_expected2 = images
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
- images = tensor_dict[fields.InputDataFields.image]
- boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
-
- images_diff1 = tf.squared_difference(images, images_expected1)
- images_diff2 = tf.squared_difference(images, images_expected2)
- images_diff = tf.multiply(images_diff1, images_diff2)
- images_diff_expected = tf.zeros_like(images_diff)
-
- with self.test_session() as sess:
- (images_diff_, images_diff_expected_, boxes_,
- boxes_expected_) = sess.run([images_diff, images_diff_expected, boxes,
- boxes_expected])
- self.assertAllClose(boxes_, boxes_expected_)
- self.assertAllClose(images_diff_, images_diff_expected_)
-
- def testRandomHorizontalFlipWithCache(self):
- keypoint_flip_permutation = self.createKeypointFlipPermutation()
- preprocess_options = [
- (preprocessor.random_horizontal_flip,
- {'keypoint_flip_permutation': keypoint_flip_permutation})]
- self._testPreprocessorCache(preprocess_options,
- test_boxes=True,
- test_masks=True,
- test_keypoints=True)
-
- def testRunRandomHorizontalFlipWithMaskAndKeypoints(self):
- preprocess_options = [(preprocessor.random_horizontal_flip, {})]
- image_height = 3
- image_width = 3
- images = tf.random_uniform([1, image_height, image_width, 3])
- boxes = self.createTestBoxes()
- masks = self.createTestMasks()
- keypoints = self.createTestKeypoints()
- keypoint_flip_permutation = self.createKeypointFlipPermutation()
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_instance_masks: masks,
- fields.InputDataFields.groundtruth_keypoints: keypoints
- }
- preprocess_options = [
- (preprocessor.random_horizontal_flip,
- {'keypoint_flip_permutation': keypoint_flip_permutation})]
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_instance_masks=True, include_keypoints=True)
- tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map)
- boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
- masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
- keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints]
- with self.test_session() as sess:
- boxes, masks, keypoints = sess.run([boxes, masks, keypoints])
- self.assertTrue(boxes is not None)
- self.assertTrue(masks is not None)
- self.assertTrue(keypoints is not None)
-
- def testRandomVerticalFlip(self):
- preprocess_options = [(preprocessor.random_vertical_flip, {})]
- images = self.expectedImagesAfterNormalization()
- boxes = self.createTestBoxes()
- tensor_dict = {fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes}
- images_expected1 = self.expectedImagesAfterUpDownFlip()
- boxes_expected1 = self.expectedBoxesAfterUpDownFlip()
- images_expected2 = images
- boxes_expected2 = boxes
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
- images = tensor_dict[fields.InputDataFields.image]
- boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
-
- boxes_diff1 = tf.squared_difference(boxes, boxes_expected1)
- boxes_diff2 = tf.squared_difference(boxes, boxes_expected2)
- boxes_diff = tf.multiply(boxes_diff1, boxes_diff2)
- boxes_diff_expected = tf.zeros_like(boxes_diff)
-
- images_diff1 = tf.squared_difference(images, images_expected1)
- images_diff2 = tf.squared_difference(images, images_expected2)
- images_diff = tf.multiply(images_diff1, images_diff2)
- images_diff_expected = tf.zeros_like(images_diff)
-
- with self.test_session() as sess:
- (images_diff_, images_diff_expected_, boxes_diff_,
- boxes_diff_expected_) = sess.run([images_diff, images_diff_expected,
- boxes_diff, boxes_diff_expected])
- self.assertAllClose(boxes_diff_, boxes_diff_expected_)
- self.assertAllClose(images_diff_, images_diff_expected_)
-
- def testRandomVerticalFlipWithEmptyBoxes(self):
- preprocess_options = [(preprocessor.random_vertical_flip, {})]
- images = self.expectedImagesAfterNormalization()
- boxes = self.createEmptyTestBoxes()
- tensor_dict = {fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes}
- images_expected1 = self.expectedImagesAfterUpDownFlip()
- boxes_expected = self.createEmptyTestBoxes()
- images_expected2 = images
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
- images = tensor_dict[fields.InputDataFields.image]
- boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
-
- images_diff1 = tf.squared_difference(images, images_expected1)
- images_diff2 = tf.squared_difference(images, images_expected2)
- images_diff = tf.multiply(images_diff1, images_diff2)
- images_diff_expected = tf.zeros_like(images_diff)
-
- with self.test_session() as sess:
- (images_diff_, images_diff_expected_, boxes_,
- boxes_expected_) = sess.run([images_diff, images_diff_expected, boxes,
- boxes_expected])
- self.assertAllClose(boxes_, boxes_expected_)
- self.assertAllClose(images_diff_, images_diff_expected_)
-
- def testRandomVerticalFlipWithCache(self):
- keypoint_flip_permutation = self.createKeypointFlipPermutation()
- preprocess_options = [
- (preprocessor.random_vertical_flip,
- {'keypoint_flip_permutation': keypoint_flip_permutation})]
- self._testPreprocessorCache(preprocess_options,
- test_boxes=True,
- test_masks=True,
- test_keypoints=True)
-
- def testRunRandomVerticalFlipWithMaskAndKeypoints(self):
- preprocess_options = [(preprocessor.random_vertical_flip, {})]
- image_height = 3
- image_width = 3
- images = tf.random_uniform([1, image_height, image_width, 3])
- boxes = self.createTestBoxes()
- masks = self.createTestMasks()
- keypoints = self.createTestKeypoints()
- keypoint_flip_permutation = self.createKeypointFlipPermutation()
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_instance_masks: masks,
- fields.InputDataFields.groundtruth_keypoints: keypoints
- }
- preprocess_options = [
- (preprocessor.random_vertical_flip,
- {'keypoint_flip_permutation': keypoint_flip_permutation})]
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_instance_masks=True, include_keypoints=True)
- tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map)
- boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
- masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
- keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints]
- with self.test_session() as sess:
- boxes, masks, keypoints = sess.run([boxes, masks, keypoints])
- self.assertTrue(boxes is not None)
- self.assertTrue(masks is not None)
- self.assertTrue(keypoints is not None)
-
- def testRandomRotation90(self):
- preprocess_options = [(preprocessor.random_rotation90, {})]
- images = self.expectedImagesAfterNormalization()
- boxes = self.createTestBoxes()
- tensor_dict = {fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes}
- images_expected1 = self.expectedImagesAfterRot90()
- boxes_expected1 = self.expectedBoxesAfterRot90()
- images_expected2 = images
- boxes_expected2 = boxes
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
- images = tensor_dict[fields.InputDataFields.image]
- boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
-
- boxes_diff1 = tf.squared_difference(boxes, boxes_expected1)
- boxes_diff2 = tf.squared_difference(boxes, boxes_expected2)
- boxes_diff = tf.multiply(boxes_diff1, boxes_diff2)
- boxes_diff_expected = tf.zeros_like(boxes_diff)
-
- images_diff1 = tf.squared_difference(images, images_expected1)
- images_diff2 = tf.squared_difference(images, images_expected2)
- images_diff = tf.multiply(images_diff1, images_diff2)
- images_diff_expected = tf.zeros_like(images_diff)
-
- with self.test_session() as sess:
- (images_diff_, images_diff_expected_, boxes_diff_,
- boxes_diff_expected_) = sess.run([images_diff, images_diff_expected,
- boxes_diff, boxes_diff_expected])
- self.assertAllClose(boxes_diff_, boxes_diff_expected_)
- self.assertAllClose(images_diff_, images_diff_expected_)
-
- def testRandomRotation90WithEmptyBoxes(self):
- preprocess_options = [(preprocessor.random_rotation90, {})]
- images = self.expectedImagesAfterNormalization()
- boxes = self.createEmptyTestBoxes()
- tensor_dict = {fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes}
- images_expected1 = self.expectedImagesAfterRot90()
- boxes_expected = self.createEmptyTestBoxes()
- images_expected2 = images
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
- images = tensor_dict[fields.InputDataFields.image]
- boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
-
- images_diff1 = tf.squared_difference(images, images_expected1)
- images_diff2 = tf.squared_difference(images, images_expected2)
- images_diff = tf.multiply(images_diff1, images_diff2)
- images_diff_expected = tf.zeros_like(images_diff)
-
- with self.test_session() as sess:
- (images_diff_, images_diff_expected_, boxes_,
- boxes_expected_) = sess.run([images_diff, images_diff_expected, boxes,
- boxes_expected])
- self.assertAllClose(boxes_, boxes_expected_)
- self.assertAllClose(images_diff_, images_diff_expected_)
-
- def testRandomRotation90WithCache(self):
- preprocess_options = [(preprocessor.random_rotation90, {})]
- self._testPreprocessorCache(preprocess_options,
- test_boxes=True,
- test_masks=True,
- test_keypoints=True)
-
- def testRunRandomRotation90WithMaskAndKeypoints(self):
- preprocess_options = [(preprocessor.random_rotation90, {})]
- image_height = 3
- image_width = 3
- images = tf.random_uniform([1, image_height, image_width, 3])
- boxes = self.createTestBoxes()
- masks = self.createTestMasks()
- keypoints = self.createTestKeypoints()
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_instance_masks: masks,
- fields.InputDataFields.groundtruth_keypoints: keypoints
- }
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_instance_masks=True, include_keypoints=True)
- tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map)
- boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
- masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
- keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints]
- with self.test_session() as sess:
- boxes, masks, keypoints = sess.run([boxes, masks, keypoints])
- self.assertTrue(boxes is not None)
- self.assertTrue(masks is not None)
- self.assertTrue(keypoints is not None)
-
- def testRandomPixelValueScale(self):
- preprocessing_options = []
- preprocessing_options.append((preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }))
- preprocessing_options.append((preprocessor.random_pixel_value_scale, {}))
- images = self.createTestImages()
- tensor_dict = {fields.InputDataFields.image: images}
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
- images_min = tf.to_float(images) * 0.9 / 255.0
- images_max = tf.to_float(images) * 1.1 / 255.0
- images = tensor_dict[fields.InputDataFields.image]
- values_greater = tf.greater_equal(images, images_min)
- values_less = tf.less_equal(images, images_max)
- values_true = tf.fill([1, 4, 4, 3], True)
- with self.test_session() as sess:
- (values_greater_, values_less_, values_true_) = sess.run(
- [values_greater, values_less, values_true])
- self.assertAllClose(values_greater_, values_true_)
- self.assertAllClose(values_less_, values_true_)
-
- def testRandomPixelValueScaleWithCache(self):
- preprocess_options = []
- preprocess_options.append((preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }))
- preprocess_options.append((preprocessor.random_pixel_value_scale, {}))
- self._testPreprocessorCache(preprocess_options,
- test_boxes=True,
- test_masks=False,
- test_keypoints=False)
-
- def testRandomImageScale(self):
- preprocess_options = [(preprocessor.random_image_scale, {})]
- images_original = self.createTestImages()
- tensor_dict = {fields.InputDataFields.image: images_original}
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
- images_scaled = tensor_dict[fields.InputDataFields.image]
- images_original_shape = tf.shape(images_original)
- images_scaled_shape = tf.shape(images_scaled)
- with self.test_session() as sess:
- (images_original_shape_, images_scaled_shape_) = sess.run(
- [images_original_shape, images_scaled_shape])
- self.assertTrue(
- images_original_shape_[1] * 0.5 <= images_scaled_shape_[1])
- self.assertTrue(
- images_original_shape_[1] * 2.0 >= images_scaled_shape_[1])
- self.assertTrue(
- images_original_shape_[2] * 0.5 <= images_scaled_shape_[2])
- self.assertTrue(
- images_original_shape_[2] * 2.0 >= images_scaled_shape_[2])
-
- def testRandomImageScaleWithCache(self):
- preprocess_options = [(preprocessor.random_image_scale, {})]
- self._testPreprocessorCache(preprocess_options,
- test_boxes=False,
- test_masks=False,
- test_keypoints=False)
-
- def testRandomRGBtoGray(self):
- preprocess_options = [(preprocessor.random_rgb_to_gray, {})]
- images_original = self.createTestImages()
- tensor_dict = {fields.InputDataFields.image: images_original}
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
- images_gray = tensor_dict[fields.InputDataFields.image]
- images_gray_r, images_gray_g, images_gray_b = tf.split(
- value=images_gray, num_or_size_splits=3, axis=3)
- images_r, images_g, images_b = tf.split(
- value=images_original, num_or_size_splits=3, axis=3)
- images_r_diff1 = tf.squared_difference(tf.to_float(images_r),
- tf.to_float(images_gray_r))
- images_r_diff2 = tf.squared_difference(tf.to_float(images_gray_r),
- tf.to_float(images_gray_g))
- images_r_diff = tf.multiply(images_r_diff1, images_r_diff2)
- images_g_diff1 = tf.squared_difference(tf.to_float(images_g),
- tf.to_float(images_gray_g))
- images_g_diff2 = tf.squared_difference(tf.to_float(images_gray_g),
- tf.to_float(images_gray_b))
- images_g_diff = tf.multiply(images_g_diff1, images_g_diff2)
- images_b_diff1 = tf.squared_difference(tf.to_float(images_b),
- tf.to_float(images_gray_b))
- images_b_diff2 = tf.squared_difference(tf.to_float(images_gray_b),
- tf.to_float(images_gray_r))
- images_b_diff = tf.multiply(images_b_diff1, images_b_diff2)
- image_zero1 = tf.constant(0, dtype=tf.float32, shape=[1, 4, 4, 1])
- with self.test_session() as sess:
- (images_r_diff_, images_g_diff_, images_b_diff_, image_zero1_) = sess.run(
- [images_r_diff, images_g_diff, images_b_diff, image_zero1])
- self.assertAllClose(images_r_diff_, image_zero1_)
- self.assertAllClose(images_g_diff_, image_zero1_)
- self.assertAllClose(images_b_diff_, image_zero1_)
-
- def testRandomRGBtoGrayWithCache(self):
- preprocess_options = [(
- preprocessor.random_rgb_to_gray, {'probability': 0.5})]
- self._testPreprocessorCache(preprocess_options,
- test_boxes=False,
- test_masks=False,
- test_keypoints=False)
-
- def testRandomAdjustBrightness(self):
- preprocessing_options = []
- preprocessing_options.append((preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }))
- preprocessing_options.append((preprocessor.random_adjust_brightness, {}))
- images_original = self.createTestImages()
- tensor_dict = {fields.InputDataFields.image: images_original}
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
- images_bright = tensor_dict[fields.InputDataFields.image]
- image_original_shape = tf.shape(images_original)
- image_bright_shape = tf.shape(images_bright)
- with self.test_session() as sess:
- (image_original_shape_, image_bright_shape_) = sess.run(
- [image_original_shape, image_bright_shape])
- self.assertAllEqual(image_original_shape_, image_bright_shape_)
-
- def testRandomAdjustBrightnessWithCache(self):
- preprocess_options = []
- preprocess_options.append((preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }))
- preprocess_options.append((preprocessor.random_adjust_brightness, {}))
- self._testPreprocessorCache(preprocess_options,
- test_boxes=False,
- test_masks=False,
- test_keypoints=False)
-
- def testRandomAdjustContrast(self):
- preprocessing_options = []
- preprocessing_options.append((preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }))
- preprocessing_options.append((preprocessor.random_adjust_contrast, {}))
- images_original = self.createTestImages()
- tensor_dict = {fields.InputDataFields.image: images_original}
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
- images_contrast = tensor_dict[fields.InputDataFields.image]
- image_original_shape = tf.shape(images_original)
- image_contrast_shape = tf.shape(images_contrast)
- with self.test_session() as sess:
- (image_original_shape_, image_contrast_shape_) = sess.run(
- [image_original_shape, image_contrast_shape])
- self.assertAllEqual(image_original_shape_, image_contrast_shape_)
-
- def testRandomAdjustContrastWithCache(self):
- preprocess_options = []
- preprocess_options.append((preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }))
- preprocess_options.append((preprocessor.random_adjust_contrast, {}))
- self._testPreprocessorCache(preprocess_options,
- test_boxes=False,
- test_masks=False,
- test_keypoints=False)
-
- def testRandomAdjustHue(self):
- preprocessing_options = []
- preprocessing_options.append((preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }))
- preprocessing_options.append((preprocessor.random_adjust_hue, {}))
- images_original = self.createTestImages()
- tensor_dict = {fields.InputDataFields.image: images_original}
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
- images_hue = tensor_dict[fields.InputDataFields.image]
- image_original_shape = tf.shape(images_original)
- image_hue_shape = tf.shape(images_hue)
- with self.test_session() as sess:
- (image_original_shape_, image_hue_shape_) = sess.run(
- [image_original_shape, image_hue_shape])
- self.assertAllEqual(image_original_shape_, image_hue_shape_)
-
- def testRandomAdjustHueWithCache(self):
- preprocess_options = []
- preprocess_options.append((preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }))
- preprocess_options.append((preprocessor.random_adjust_hue, {}))
- self._testPreprocessorCache(preprocess_options,
- test_boxes=False,
- test_masks=False,
- test_keypoints=False)
-
- def testRandomDistortColor(self):
- preprocessing_options = []
- preprocessing_options.append((preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }))
- preprocessing_options.append((preprocessor.random_distort_color, {}))
- images_original = self.createTestImages()
- images_original_shape = tf.shape(images_original)
- tensor_dict = {fields.InputDataFields.image: images_original}
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
- images_distorted_color = tensor_dict[fields.InputDataFields.image]
- images_distorted_color_shape = tf.shape(images_distorted_color)
- with self.test_session() as sess:
- (images_original_shape_, images_distorted_color_shape_) = sess.run(
- [images_original_shape, images_distorted_color_shape])
- self.assertAllEqual(images_original_shape_, images_distorted_color_shape_)
-
- def testRandomDistortColorWithCache(self):
- preprocess_options = []
- preprocess_options.append((preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }))
- preprocess_options.append((preprocessor.random_distort_color, {}))
- self._testPreprocessorCache(preprocess_options,
- test_boxes=False,
- test_masks=False,
- test_keypoints=False)
-
- def testRandomJitterBoxes(self):
- preprocessing_options = []
- preprocessing_options.append((preprocessor.random_jitter_boxes, {}))
- boxes = self.createTestBoxes()
- boxes_shape = tf.shape(boxes)
- tensor_dict = {fields.InputDataFields.groundtruth_boxes: boxes}
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
- distorted_boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
- distorted_boxes_shape = tf.shape(distorted_boxes)
-
- with self.test_session() as sess:
- (boxes_shape_, distorted_boxes_shape_) = sess.run(
- [boxes_shape, distorted_boxes_shape])
- self.assertAllEqual(boxes_shape_, distorted_boxes_shape_)
-
- def testRandomCropImage(self):
- preprocessing_options = []
- preprocessing_options.append((preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }))
- preprocessing_options.append((preprocessor.random_crop_image, {}))
- images = self.createTestImages()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- }
- distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
- preprocessing_options)
- distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- boxes_rank = tf.rank(boxes)
- distorted_boxes_rank = tf.rank(distorted_boxes)
- images_rank = tf.rank(images)
- distorted_images_rank = tf.rank(distorted_images)
- self.assertEqual(3, distorted_images.get_shape()[3])
-
- with self.test_session() as sess:
- (boxes_rank_, distorted_boxes_rank_, images_rank_,
- distorted_images_rank_) = sess.run([
- boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank
- ])
- self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
- self.assertAllEqual(images_rank_, distorted_images_rank_)
-
- def testRandomCropImageWithCache(self):
- preprocess_options = [(preprocessor.random_rgb_to_gray,
- {'probability': 0.5}),
- (preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1,
- }),
- (preprocessor.random_crop_image, {})]
- self._testPreprocessorCache(preprocess_options,
- test_boxes=True,
- test_masks=False,
- test_keypoints=False)
-
- def testRandomCropImageGrayscale(self):
- preprocessing_options = [(preprocessor.rgb_to_gray, {}),
- (preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1,
- }),
- (preprocessor.random_crop_image, {})]
- images = self.createTestImages()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- }
- distorted_tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocessing_options)
- distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- boxes_rank = tf.rank(boxes)
- distorted_boxes_rank = tf.rank(distorted_boxes)
- images_rank = tf.rank(images)
- distorted_images_rank = tf.rank(distorted_images)
- self.assertEqual(1, distorted_images.get_shape()[3])
-
- with self.test_session() as sess:
- session_results = sess.run([
- boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank
- ])
- (boxes_rank_, distorted_boxes_rank_, images_rank_,
- distorted_images_rank_) = session_results
- self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
- self.assertAllEqual(images_rank_, distorted_images_rank_)
-
- def testRandomCropImageWithBoxOutOfImage(self):
- preprocessing_options = []
- preprocessing_options.append((preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }))
- preprocessing_options.append((preprocessor.random_crop_image, {}))
- images = self.createTestImages()
- boxes = self.createTestBoxesOutOfImage()
- labels = self.createTestLabels()
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- }
- distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
- preprocessing_options)
- distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- boxes_rank = tf.rank(boxes)
- distorted_boxes_rank = tf.rank(distorted_boxes)
- images_rank = tf.rank(images)
- distorted_images_rank = tf.rank(distorted_images)
-
- with self.test_session() as sess:
- (boxes_rank_, distorted_boxes_rank_, images_rank_,
- distorted_images_rank_) = sess.run(
- [boxes_rank, distorted_boxes_rank, images_rank,
- distorted_images_rank])
- self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
- self.assertAllEqual(images_rank_, distorted_images_rank_)
-
- def testRandomCropImageWithRandomCoefOne(self):
- preprocessing_options = [(preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- })]
-
- images = self.createTestImages()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- label_scores = self.createTestLabelScores()
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- fields.InputDataFields.groundtruth_label_scores: label_scores
- }
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
- images = tensor_dict[fields.InputDataFields.image]
-
- preprocessing_options = [(preprocessor.random_crop_image, {
- 'random_coef': 1.0
- })]
- distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
- preprocessing_options)
-
- distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- distorted_labels = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_classes]
- distorted_label_scores = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_label_scores]
- boxes_shape = tf.shape(boxes)
- distorted_boxes_shape = tf.shape(distorted_boxes)
- images_shape = tf.shape(images)
- distorted_images_shape = tf.shape(distorted_images)
-
- with self.test_session() as sess:
- (boxes_shape_, distorted_boxes_shape_, images_shape_,
- distorted_images_shape_, images_, distorted_images_,
- boxes_, distorted_boxes_, labels_, distorted_labels_,
- label_scores_, distorted_label_scores_) = sess.run(
- [boxes_shape, distorted_boxes_shape, images_shape,
- distorted_images_shape, images, distorted_images,
- boxes, distorted_boxes, labels, distorted_labels,
- label_scores, distorted_label_scores])
- self.assertAllEqual(boxes_shape_, distorted_boxes_shape_)
- self.assertAllEqual(images_shape_, distorted_images_shape_)
- self.assertAllClose(images_, distorted_images_)
- self.assertAllClose(boxes_, distorted_boxes_)
- self.assertAllEqual(labels_, distorted_labels_)
- self.assertAllEqual(label_scores_, distorted_label_scores_)
-
- def testRandomCropWithMockSampleDistortedBoundingBox(self):
- preprocessing_options = [(preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- })]
-
- images = self.createColorfulTestImage()
- boxes = tf.constant([[0.1, 0.1, 0.8, 0.3],
- [0.2, 0.4, 0.75, 0.75],
- [0.3, 0.1, 0.4, 0.7]], dtype=tf.float32)
- labels = tf.constant([1, 7, 11], dtype=tf.int32)
-
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- }
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
- images = tensor_dict[fields.InputDataFields.image]
-
- preprocessing_options = [(preprocessor.random_crop_image, {})]
- with mock.patch.object(
- tf.image,
- 'sample_distorted_bounding_box') as mock_sample_distorted_bounding_box:
- mock_sample_distorted_bounding_box.return_value = (tf.constant(
- [6, 143, 0], dtype=tf.int32), tf.constant(
- [190, 237, -1], dtype=tf.int32), tf.constant(
- [[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
-
- distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
- preprocessing_options)
-
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- distorted_labels = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_classes]
- expected_boxes = tf.constant([[0.178947, 0.07173, 0.75789469, 0.66244733],
- [0.28421, 0.0, 0.38947365, 0.57805908]],
- dtype=tf.float32)
- expected_labels = tf.constant([7, 11], dtype=tf.int32)
-
- with self.test_session() as sess:
- (distorted_boxes_, distorted_labels_,
- expected_boxes_, expected_labels_) = sess.run(
- [distorted_boxes, distorted_labels,
- expected_boxes, expected_labels])
- self.assertAllClose(distorted_boxes_, expected_boxes_)
- self.assertAllEqual(distorted_labels_, expected_labels_)
-
- def testRandomCropImageWithMultiClassScores(self):
- preprocessing_options = []
- preprocessing_options.append((preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }))
- preprocessing_options.append((preprocessor.random_crop_image, {}))
- images = self.createTestImages()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- multiclass_scores = self.createTestMultiClassScores()
-
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- fields.InputDataFields.multiclass_scores: multiclass_scores
- }
- distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
- preprocessing_options)
- distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- distorted_multiclass_scores = distorted_tensor_dict[
- fields.InputDataFields.multiclass_scores]
- boxes_rank = tf.rank(boxes)
- distorted_boxes_rank = tf.rank(distorted_boxes)
- images_rank = tf.rank(images)
- distorted_images_rank = tf.rank(distorted_images)
- multiclass_scores_rank = tf.rank(multiclass_scores)
- distorted_multiclass_scores_rank = tf.rank(distorted_multiclass_scores)
-
- with self.test_session() as sess:
- (boxes_rank_, distorted_boxes_, distorted_boxes_rank_, images_rank_,
- distorted_images_rank_, multiclass_scores_rank_,
- distorted_multiclass_scores_rank_,
- distorted_multiclass_scores_) = sess.run([
- boxes_rank, distorted_boxes, distorted_boxes_rank, images_rank,
- distorted_images_rank, multiclass_scores_rank,
- distorted_multiclass_scores_rank, distorted_multiclass_scores
- ])
- self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
- self.assertAllEqual(images_rank_, distorted_images_rank_)
- self.assertAllEqual(multiclass_scores_rank_,
- distorted_multiclass_scores_rank_)
- self.assertAllEqual(distorted_boxes_.shape[0],
- distorted_multiclass_scores_.shape[0])
-
- def testStrictRandomCropImageWithLabelScores(self):
- image = self.createColorfulTestImage()[0]
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- label_scores = self.createTestLabelScores()
- with mock.patch.object(
- tf.image,
- 'sample_distorted_bounding_box'
- ) as mock_sample_distorted_bounding_box:
- mock_sample_distorted_bounding_box.return_value = (
- tf.constant([6, 143, 0], dtype=tf.int32),
- tf.constant([190, 237, -1], dtype=tf.int32),
- tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
- new_image, new_boxes, new_labels, new_label_scores = (
- preprocessor._strict_random_crop_image(
- image, boxes, labels, label_scores))
- with self.test_session() as sess:
- new_image, new_boxes, new_labels, new_label_scores = (
- sess.run(
- [new_image, new_boxes, new_labels, new_label_scores])
- )
-
- expected_boxes = np.array(
- [[0.0, 0.0, 0.75789469, 1.0],
- [0.23157893, 0.24050637, 0.75789469, 1.0]], dtype=np.float32)
- self.assertAllEqual(new_image.shape, [190, 237, 3])
- self.assertAllEqual(new_label_scores, [1.0, 0.5])
- self.assertAllClose(
- new_boxes.flatten(), expected_boxes.flatten())
-
- def testStrictRandomCropImageWithMasks(self):
- image = self.createColorfulTestImage()[0]
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- masks = tf.random_uniform([2, 200, 400], dtype=tf.float32)
- with mock.patch.object(
- tf.image,
- 'sample_distorted_bounding_box'
- ) as mock_sample_distorted_bounding_box:
- mock_sample_distorted_bounding_box.return_value = (
- tf.constant([6, 143, 0], dtype=tf.int32),
- tf.constant([190, 237, -1], dtype=tf.int32),
- tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
- new_image, new_boxes, new_labels, new_masks = (
- preprocessor._strict_random_crop_image(
- image, boxes, labels, masks=masks))
- with self.test_session() as sess:
- new_image, new_boxes, new_labels, new_masks = sess.run(
- [new_image, new_boxes, new_labels, new_masks])
- expected_boxes = np.array(
- [[0.0, 0.0, 0.75789469, 1.0],
- [0.23157893, 0.24050637, 0.75789469, 1.0]], dtype=np.float32)
- self.assertAllEqual(new_image.shape, [190, 237, 3])
- self.assertAllEqual(new_masks.shape, [2, 190, 237])
- self.assertAllClose(
- new_boxes.flatten(), expected_boxes.flatten())
-
- def testStrictRandomCropImageWithKeypoints(self):
- image = self.createColorfulTestImage()[0]
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- keypoints = self.createTestKeypoints()
- with mock.patch.object(
- tf.image,
- 'sample_distorted_bounding_box'
- ) as mock_sample_distorted_bounding_box:
- mock_sample_distorted_bounding_box.return_value = (
- tf.constant([6, 143, 0], dtype=tf.int32),
- tf.constant([190, 237, -1], dtype=tf.int32),
- tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
- new_image, new_boxes, new_labels, new_keypoints = (
- preprocessor._strict_random_crop_image(
- image, boxes, labels, keypoints=keypoints))
- with self.test_session() as sess:
- new_image, new_boxes, new_labels, new_keypoints = sess.run(
- [new_image, new_boxes, new_labels, new_keypoints])
-
- expected_boxes = np.array([
- [0.0, 0.0, 0.75789469, 1.0],
- [0.23157893, 0.24050637, 0.75789469, 1.0],], dtype=np.float32)
- expected_keypoints = np.array([
- [[np.nan, np.nan],
- [np.nan, np.nan],
- [np.nan, np.nan]],
- [[0.38947368, 0.07173],
- [0.49473682, 0.24050637],
- [0.60000002, 0.40928277]]
- ], dtype=np.float32)
- self.assertAllEqual(new_image.shape, [190, 237, 3])
- self.assertAllClose(
- new_boxes.flatten(), expected_boxes.flatten())
- self.assertAllClose(
- new_keypoints.flatten(), expected_keypoints.flatten())
-
- def testRunRandomCropImageWithMasks(self):
- image = self.createColorfulTestImage()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- masks = tf.random_uniform([2, 200, 400], dtype=tf.float32)
-
- tensor_dict = {
- fields.InputDataFields.image: image,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- fields.InputDataFields.groundtruth_instance_masks: masks,
- }
-
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_instance_masks=True)
-
- preprocessing_options = [(preprocessor.random_crop_image, {})]
-
- with mock.patch.object(
- tf.image,
- 'sample_distorted_bounding_box'
- ) as mock_sample_distorted_bounding_box:
- mock_sample_distorted_bounding_box.return_value = (
- tf.constant([6, 143, 0], dtype=tf.int32),
- tf.constant([190, 237, -1], dtype=tf.int32),
- tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
- distorted_tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
- distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- distorted_labels = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_classes]
- distorted_masks = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_instance_masks]
- with self.test_session() as sess:
- (distorted_image_, distorted_boxes_, distorted_labels_,
- distorted_masks_) = sess.run(
- [distorted_image, distorted_boxes, distorted_labels,
- distorted_masks])
-
- expected_boxes = np.array([
- [0.0, 0.0, 0.75789469, 1.0],
- [0.23157893, 0.24050637, 0.75789469, 1.0],
- ], dtype=np.float32)
- self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3])
- self.assertAllEqual(distorted_masks_.shape, [2, 190, 237])
- self.assertAllEqual(distorted_labels_, [1, 2])
- self.assertAllClose(
- distorted_boxes_.flatten(), expected_boxes.flatten())
-
- def testRunRandomCropImageWithKeypointsInsideCrop(self):
- image = self.createColorfulTestImage()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- keypoints = self.createTestKeypointsInsideCrop()
-
- tensor_dict = {
- fields.InputDataFields.image: image,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- fields.InputDataFields.groundtruth_keypoints: keypoints
- }
-
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_keypoints=True)
-
- preprocessing_options = [(preprocessor.random_crop_image, {})]
-
- with mock.patch.object(
- tf.image,
- 'sample_distorted_bounding_box'
- ) as mock_sample_distorted_bounding_box:
- mock_sample_distorted_bounding_box.return_value = (
- tf.constant([6, 143, 0], dtype=tf.int32),
- tf.constant([190, 237, -1], dtype=tf.int32),
- tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
- distorted_tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
- distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- distorted_labels = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_classes]
- distorted_keypoints = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_keypoints]
- with self.test_session() as sess:
- (distorted_image_, distorted_boxes_, distorted_labels_,
- distorted_keypoints_) = sess.run(
- [distorted_image, distorted_boxes, distorted_labels,
- distorted_keypoints])
-
- expected_boxes = np.array([
- [0.0, 0.0, 0.75789469, 1.0],
- [0.23157893, 0.24050637, 0.75789469, 1.0],
- ], dtype=np.float32)
- expected_keypoints = np.array([
- [[0.38947368, 0.07173],
- [0.49473682, 0.24050637],
- [0.60000002, 0.40928277]],
- [[0.38947368, 0.07173],
- [0.49473682, 0.24050637],
- [0.60000002, 0.40928277]]
- ])
- self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3])
- self.assertAllEqual(distorted_labels_, [1, 2])
- self.assertAllClose(
- distorted_boxes_.flatten(), expected_boxes.flatten())
- self.assertAllClose(
- distorted_keypoints_.flatten(), expected_keypoints.flatten())
-
- def testRunRandomCropImageWithKeypointsOutsideCrop(self):
- image = self.createColorfulTestImage()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- keypoints = self.createTestKeypointsOutsideCrop()
-
- tensor_dict = {
- fields.InputDataFields.image: image,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- fields.InputDataFields.groundtruth_keypoints: keypoints
- }
-
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_keypoints=True)
-
- preprocessing_options = [(preprocessor.random_crop_image, {})]
-
- with mock.patch.object(
- tf.image,
- 'sample_distorted_bounding_box'
- ) as mock_sample_distorted_bounding_box:
- mock_sample_distorted_bounding_box.return_value = (
- tf.constant([6, 143, 0], dtype=tf.int32),
- tf.constant([190, 237, -1], dtype=tf.int32),
- tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
- distorted_tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
- distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- distorted_labels = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_classes]
- distorted_keypoints = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_keypoints]
- with self.test_session() as sess:
- (distorted_image_, distorted_boxes_, distorted_labels_,
- distorted_keypoints_) = sess.run(
- [distorted_image, distorted_boxes, distorted_labels,
- distorted_keypoints])
-
- expected_boxes = np.array([
- [0.0, 0.0, 0.75789469, 1.0],
- [0.23157893, 0.24050637, 0.75789469, 1.0],
- ], dtype=np.float32)
- expected_keypoints = np.array([
- [[np.nan, np.nan],
- [np.nan, np.nan],
- [np.nan, np.nan]],
- [[np.nan, np.nan],
- [np.nan, np.nan],
- [np.nan, np.nan]],
- ])
- self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3])
- self.assertAllEqual(distorted_labels_, [1, 2])
- self.assertAllClose(
- distorted_boxes_.flatten(), expected_boxes.flatten())
- self.assertAllClose(
- distorted_keypoints_.flatten(), expected_keypoints.flatten())
-
- def testRunRetainBoxesAboveThreshold(self):
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- label_scores = self.createTestLabelScores()
-
- tensor_dict = {
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- fields.InputDataFields.groundtruth_label_scores: label_scores
- }
-
- preprocessing_options = [
- (preprocessor.retain_boxes_above_threshold, {'threshold': 0.6})
- ]
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_label_scores=True)
- retained_tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
- retained_boxes = retained_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- retained_labels = retained_tensor_dict[
- fields.InputDataFields.groundtruth_classes]
- retained_label_scores = retained_tensor_dict[
- fields.InputDataFields.groundtruth_label_scores]
-
- with self.test_session() as sess:
- (retained_boxes_, retained_labels_,
- retained_label_scores_, expected_retained_boxes_,
- expected_retained_labels_, expected_retained_label_scores_) = sess.run(
- [retained_boxes, retained_labels, retained_label_scores,
- self.expectedBoxesAfterThresholding(),
- self.expectedLabelsAfterThresholding(),
- self.expectedLabelScoresAfterThresholding()])
-
- self.assertAllClose(retained_boxes_, expected_retained_boxes_)
- self.assertAllClose(retained_labels_, expected_retained_labels_)
- self.assertAllClose(
- retained_label_scores_, expected_retained_label_scores_)
-
- def testRunRetainBoxesAboveThresholdWithMasks(self):
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- label_scores = self.createTestLabelScores()
- masks = self.createTestMasks()
-
- tensor_dict = {
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- fields.InputDataFields.groundtruth_label_scores: label_scores,
- fields.InputDataFields.groundtruth_instance_masks: masks
- }
-
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_label_scores=True,
- include_instance_masks=True)
-
- preprocessing_options = [
- (preprocessor.retain_boxes_above_threshold, {'threshold': 0.6})
- ]
-
- retained_tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
- retained_masks = retained_tensor_dict[
- fields.InputDataFields.groundtruth_instance_masks]
-
- with self.test_session() as sess:
- (retained_masks_, expected_masks_) = sess.run(
- [retained_masks,
- self.expectedMasksAfterThresholding()])
- self.assertAllClose(retained_masks_, expected_masks_)
-
- def testRunRetainBoxesAboveThresholdWithKeypoints(self):
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- label_scores = self.createTestLabelScores()
- keypoints = self.createTestKeypoints()
-
- tensor_dict = {
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- fields.InputDataFields.groundtruth_label_scores: label_scores,
- fields.InputDataFields.groundtruth_keypoints: keypoints
- }
-
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_label_scores=True,
- include_keypoints=True)
-
- preprocessing_options = [
- (preprocessor.retain_boxes_above_threshold, {'threshold': 0.6})
- ]
-
- retained_tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
- retained_keypoints = retained_tensor_dict[
- fields.InputDataFields.groundtruth_keypoints]
-
- with self.test_session() as sess:
- (retained_keypoints_, expected_keypoints_) = sess.run(
- [retained_keypoints,
- self.expectedKeypointsAfterThresholding()])
- self.assertAllClose(retained_keypoints_, expected_keypoints_)
-
- def testRandomCropToAspectRatioWithCache(self):
- preprocess_options = [(preprocessor.random_crop_to_aspect_ratio, {})]
- self._testPreprocessorCache(preprocess_options,
- test_boxes=True,
- test_masks=False,
- test_keypoints=False)
-
- def testRunRandomCropToAspectRatioWithMasks(self):
- image = self.createColorfulTestImage()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- masks = tf.random_uniform([2, 200, 400], dtype=tf.float32)
-
- tensor_dict = {
- fields.InputDataFields.image: image,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- fields.InputDataFields.groundtruth_instance_masks: masks
- }
-
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_instance_masks=True)
-
- preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {})]
-
- with mock.patch.object(preprocessor,
- '_random_integer') as mock_random_integer:
- mock_random_integer.return_value = tf.constant(0, dtype=tf.int32)
- distorted_tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
- distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- distorted_labels = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_classes]
- distorted_masks = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_instance_masks]
- with self.test_session() as sess:
- (distorted_image_, distorted_boxes_, distorted_labels_,
- distorted_masks_) = sess.run([
- distorted_image, distorted_boxes, distorted_labels, distorted_masks
- ])
-
- expected_boxes = np.array([0.0, 0.5, 0.75, 1.0], dtype=np.float32)
- self.assertAllEqual(distorted_image_.shape, [1, 200, 200, 3])
- self.assertAllEqual(distorted_labels_, [1])
- self.assertAllClose(distorted_boxes_.flatten(),
- expected_boxes.flatten())
- self.assertAllEqual(distorted_masks_.shape, [1, 200, 200])
-
- def testRunRandomCropToAspectRatioWithKeypoints(self):
- image = self.createColorfulTestImage()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- keypoints = self.createTestKeypoints()
-
- tensor_dict = {
- fields.InputDataFields.image: image,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- fields.InputDataFields.groundtruth_keypoints: keypoints
- }
-
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_keypoints=True)
-
- preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {})]
-
- with mock.patch.object(preprocessor,
- '_random_integer') as mock_random_integer:
- mock_random_integer.return_value = tf.constant(0, dtype=tf.int32)
- distorted_tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
- distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- distorted_labels = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_classes]
- distorted_keypoints = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_keypoints]
- with self.test_session() as sess:
- (distorted_image_, distorted_boxes_, distorted_labels_,
- distorted_keypoints_) = sess.run([
- distorted_image, distorted_boxes, distorted_labels,
- distorted_keypoints
- ])
-
- expected_boxes = np.array([0.0, 0.5, 0.75, 1.0], dtype=np.float32)
- expected_keypoints = np.array(
- [[0.1, 0.2], [0.2, 0.4], [0.3, 0.6]], dtype=np.float32)
- self.assertAllEqual(distorted_image_.shape, [1, 200, 200, 3])
- self.assertAllEqual(distorted_labels_, [1])
- self.assertAllClose(distorted_boxes_.flatten(),
- expected_boxes.flatten())
- self.assertAllClose(distorted_keypoints_.flatten(),
- expected_keypoints.flatten())
-
- def testRandomPadToAspectRatioWithCache(self):
- preprocess_options = [(preprocessor.random_pad_to_aspect_ratio, {})]
- self._testPreprocessorCache(preprocess_options,
- test_boxes=True,
- test_masks=True,
- test_keypoints=True)
-
- def testRunRandomPadToAspectRatioWithMinMaxPaddedSizeRatios(self):
- image = self.createColorfulTestImage()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
-
- tensor_dict = {
- fields.InputDataFields.image: image,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels
- }
-
- preprocessor_arg_map = preprocessor.get_default_func_arg_map()
- preprocessing_options = [(preprocessor.random_pad_to_aspect_ratio,
- {'min_padded_size_ratio': (4.0, 4.0),
- 'max_padded_size_ratio': (4.0, 4.0)})]
-
- distorted_tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
- distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- distorted_labels = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_classes]
- with self.test_session() as sess:
- distorted_image_, distorted_boxes_, distorted_labels_ = sess.run([
- distorted_image, distorted_boxes, distorted_labels])
-
- expected_boxes = np.array(
- [[0.0, 0.125, 0.1875, 0.5], [0.0625, 0.25, 0.1875, 0.5]],
- dtype=np.float32)
- self.assertAllEqual(distorted_image_.shape, [1, 800, 800, 3])
- self.assertAllEqual(distorted_labels_, [1, 2])
- self.assertAllClose(distorted_boxes_.flatten(),
- expected_boxes.flatten())
-
- def testRunRandomPadToAspectRatioWithMasks(self):
- image = self.createColorfulTestImage()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- masks = tf.random_uniform([2, 200, 400], dtype=tf.float32)
-
- tensor_dict = {
- fields.InputDataFields.image: image,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- fields.InputDataFields.groundtruth_instance_masks: masks
- }
-
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_instance_masks=True)
-
- preprocessing_options = [(preprocessor.random_pad_to_aspect_ratio, {})]
-
- distorted_tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
- distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- distorted_labels = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_classes]
- distorted_masks = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_instance_masks]
- with self.test_session() as sess:
- (distorted_image_, distorted_boxes_, distorted_labels_,
- distorted_masks_) = sess.run([
- distorted_image, distorted_boxes, distorted_labels, distorted_masks
- ])
-
- expected_boxes = np.array(
- [[0.0, 0.25, 0.375, 1.0], [0.125, 0.5, 0.375, 1.0]], dtype=np.float32)
- self.assertAllEqual(distorted_image_.shape, [1, 400, 400, 3])
- self.assertAllEqual(distorted_labels_, [1, 2])
- self.assertAllClose(distorted_boxes_.flatten(),
- expected_boxes.flatten())
- self.assertAllEqual(distorted_masks_.shape, [2, 400, 400])
-
- def testRunRandomPadToAspectRatioWithKeypoints(self):
- image = self.createColorfulTestImage()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- keypoints = self.createTestKeypoints()
-
- tensor_dict = {
- fields.InputDataFields.image: image,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- fields.InputDataFields.groundtruth_keypoints: keypoints
- }
-
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_keypoints=True)
-
- preprocessing_options = [(preprocessor.random_pad_to_aspect_ratio, {})]
-
- distorted_tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
- distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- distorted_labels = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_classes]
- distorted_keypoints = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_keypoints]
- with self.test_session() as sess:
- (distorted_image_, distorted_boxes_, distorted_labels_,
- distorted_keypoints_) = sess.run([
- distorted_image, distorted_boxes, distorted_labels,
- distorted_keypoints
- ])
-
- expected_boxes = np.array(
- [[0.0, 0.25, 0.375, 1.0], [0.125, 0.5, 0.375, 1.0]], dtype=np.float32)
- expected_keypoints = np.array([
- [[0.05, 0.1], [0.1, 0.2], [0.15, 0.3]],
- [[0.2, 0.4], [0.25, 0.5], [0.3, 0.6]],
- ], dtype=np.float32)
- self.assertAllEqual(distorted_image_.shape, [1, 400, 400, 3])
- self.assertAllEqual(distorted_labels_, [1, 2])
- self.assertAllClose(distorted_boxes_.flatten(),
- expected_boxes.flatten())
- self.assertAllClose(distorted_keypoints_.flatten(),
- expected_keypoints.flatten())
-
- def testRandomPadImageWithCache(self):
- preprocess_options = [(preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1,}), (preprocessor.random_pad_image, {})]
- self._testPreprocessorCache(preprocess_options,
- test_boxes=True,
- test_masks=True,
- test_keypoints=True)
-
- def testRandomPadImage(self):
- preprocessing_options = [(preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- })]
-
- images = self.createTestImages()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- }
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
- images = tensor_dict[fields.InputDataFields.image]
-
- preprocessing_options = [(preprocessor.random_pad_image, {})]
- padded_tensor_dict = preprocessor.preprocess(tensor_dict,
- preprocessing_options)
-
- padded_images = padded_tensor_dict[fields.InputDataFields.image]
- padded_boxes = padded_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- boxes_shape = tf.shape(boxes)
- padded_boxes_shape = tf.shape(padded_boxes)
- images_shape = tf.shape(images)
- padded_images_shape = tf.shape(padded_images)
-
- with self.test_session() as sess:
- (boxes_shape_, padded_boxes_shape_, images_shape_,
- padded_images_shape_, boxes_, padded_boxes_) = sess.run(
- [boxes_shape, padded_boxes_shape, images_shape,
- padded_images_shape, boxes, padded_boxes])
- self.assertAllEqual(boxes_shape_, padded_boxes_shape_)
- self.assertTrue((images_shape_[1] >= padded_images_shape_[1] * 0.5).all)
- self.assertTrue((images_shape_[2] >= padded_images_shape_[2] * 0.5).all)
- self.assertTrue((images_shape_[1] <= padded_images_shape_[1]).all)
- self.assertTrue((images_shape_[2] <= padded_images_shape_[2]).all)
- self.assertTrue(np.all((boxes_[:, 2] - boxes_[:, 0]) >= (
- padded_boxes_[:, 2] - padded_boxes_[:, 0])))
- self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= (
- padded_boxes_[:, 3] - padded_boxes_[:, 1])))
-
- def testRandomCropPadImageWithCache(self):
- preprocess_options = [(preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1,}), (preprocessor.random_crop_pad_image, {})]
- self._testPreprocessorCache(preprocess_options,
- test_boxes=True,
- test_masks=True,
- test_keypoints=True)
-
- def testRandomCropPadImageWithRandomCoefOne(self):
- preprocessing_options = [(preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- })]
-
- images = self.createTestImages()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- }
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
- images = tensor_dict[fields.InputDataFields.image]
-
- preprocessing_options = [(preprocessor.random_crop_pad_image, {
- 'random_coef': 1.0
- })]
- padded_tensor_dict = preprocessor.preprocess(tensor_dict,
- preprocessing_options)
-
- padded_images = padded_tensor_dict[fields.InputDataFields.image]
- padded_boxes = padded_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- boxes_shape = tf.shape(boxes)
- padded_boxes_shape = tf.shape(padded_boxes)
- images_shape = tf.shape(images)
- padded_images_shape = tf.shape(padded_images)
-
- with self.test_session() as sess:
- (boxes_shape_, padded_boxes_shape_, images_shape_,
- padded_images_shape_, boxes_, padded_boxes_) = sess.run(
- [boxes_shape, padded_boxes_shape, images_shape,
- padded_images_shape, boxes, padded_boxes])
- self.assertAllEqual(boxes_shape_, padded_boxes_shape_)
- self.assertTrue((images_shape_[1] >= padded_images_shape_[1] * 0.5).all)
- self.assertTrue((images_shape_[2] >= padded_images_shape_[2] * 0.5).all)
- self.assertTrue((images_shape_[1] <= padded_images_shape_[1]).all)
- self.assertTrue((images_shape_[2] <= padded_images_shape_[2]).all)
- self.assertTrue(np.all((boxes_[:, 2] - boxes_[:, 0]) >= (
- padded_boxes_[:, 2] - padded_boxes_[:, 0])))
- self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= (
- padded_boxes_[:, 3] - padded_boxes_[:, 1])))
-
- def testRandomCropToAspectRatio(self):
- images = self.createTestImages()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- }
- tensor_dict = preprocessor.preprocess(tensor_dict, [])
- images = tensor_dict[fields.InputDataFields.image]
-
- preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {
- 'aspect_ratio': 2.0
- })]
- cropped_tensor_dict = preprocessor.preprocess(tensor_dict,
- preprocessing_options)
-
- cropped_images = cropped_tensor_dict[fields.InputDataFields.image]
- cropped_boxes = cropped_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- boxes_shape = tf.shape(boxes)
- cropped_boxes_shape = tf.shape(cropped_boxes)
- images_shape = tf.shape(images)
- cropped_images_shape = tf.shape(cropped_images)
-
- with self.test_session() as sess:
- (boxes_shape_, cropped_boxes_shape_, images_shape_,
- cropped_images_shape_) = sess.run([
- boxes_shape, cropped_boxes_shape, images_shape, cropped_images_shape
- ])
- self.assertAllEqual(boxes_shape_, cropped_boxes_shape_)
- self.assertEqual(images_shape_[1], cropped_images_shape_[1] * 2)
- self.assertEqual(images_shape_[2], cropped_images_shape_[2])
-
- def testRandomPadToAspectRatio(self):
- images = self.createTestImages()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- }
- tensor_dict = preprocessor.preprocess(tensor_dict, [])
- images = tensor_dict[fields.InputDataFields.image]
-
- preprocessing_options = [(preprocessor.random_pad_to_aspect_ratio, {
- 'aspect_ratio': 2.0
- })]
- padded_tensor_dict = preprocessor.preprocess(tensor_dict,
- preprocessing_options)
-
- padded_images = padded_tensor_dict[fields.InputDataFields.image]
- padded_boxes = padded_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- boxes_shape = tf.shape(boxes)
- padded_boxes_shape = tf.shape(padded_boxes)
- images_shape = tf.shape(images)
- padded_images_shape = tf.shape(padded_images)
-
- with self.test_session() as sess:
- (boxes_shape_, padded_boxes_shape_, images_shape_,
- padded_images_shape_) = sess.run([
- boxes_shape, padded_boxes_shape, images_shape, padded_images_shape
- ])
- self.assertAllEqual(boxes_shape_, padded_boxes_shape_)
- self.assertEqual(images_shape_[1], padded_images_shape_[1])
- self.assertEqual(2 * images_shape_[2], padded_images_shape_[2])
-
- def testRandomBlackPatchesWithCache(self):
- preprocess_options = []
- preprocess_options.append((preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }))
- preprocess_options.append((preprocessor.random_black_patches, {
- 'size_to_image_ratio': 0.5
- }))
- self._testPreprocessorCache(preprocess_options,
- test_boxes=True,
- test_masks=True,
- test_keypoints=True)
-
- def testRandomBlackPatches(self):
- preprocessing_options = []
- preprocessing_options.append((preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }))
- preprocessing_options.append((preprocessor.random_black_patches, {
- 'size_to_image_ratio': 0.5
- }))
- images = self.createTestImages()
- tensor_dict = {fields.InputDataFields.image: images}
- blacked_tensor_dict = preprocessor.preprocess(tensor_dict,
- preprocessing_options)
- blacked_images = blacked_tensor_dict[fields.InputDataFields.image]
- images_shape = tf.shape(images)
- blacked_images_shape = tf.shape(blacked_images)
-
- with self.test_session() as sess:
- (images_shape_, blacked_images_shape_) = sess.run(
- [images_shape, blacked_images_shape])
- self.assertAllEqual(images_shape_, blacked_images_shape_)
-
- def testRandomResizeMethodWithCache(self):
- preprocess_options = []
- preprocess_options.append((preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }))
- preprocess_options.append((preprocessor.random_resize_method, {
- 'target_size': (75, 150)
- }))
- self._testPreprocessorCache(preprocess_options,
- test_boxes=True,
- test_masks=True,
- test_keypoints=True)
-
- def testRandomResizeMethod(self):
- preprocessing_options = []
- preprocessing_options.append((preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }))
- preprocessing_options.append((preprocessor.random_resize_method, {
- 'target_size': (75, 150)
- }))
- images = self.createTestImages()
- tensor_dict = {fields.InputDataFields.image: images}
- resized_tensor_dict = preprocessor.preprocess(tensor_dict,
- preprocessing_options)
- resized_images = resized_tensor_dict[fields.InputDataFields.image]
- resized_images_shape = tf.shape(resized_images)
- expected_images_shape = tf.constant([1, 75, 150, 3], dtype=tf.int32)
-
- with self.test_session() as sess:
- (expected_images_shape_, resized_images_shape_) = sess.run(
- [expected_images_shape, resized_images_shape])
- self.assertAllEqual(expected_images_shape_,
- resized_images_shape_)
-
- def testResizeImageWithMasks(self):
- """Tests image resizing, checking output sizes."""
- in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
- in_masks_shape_list = [[15, 60, 40], [10, 15, 30]]
- height = 50
- width = 100
- expected_image_shape_list = [[50, 100, 3], [50, 100, 3]]
- expected_masks_shape_list = [[15, 50, 100], [10, 50, 100]]
-
- for (in_image_shape, expected_image_shape, in_masks_shape,
- expected_mask_shape) in zip(in_image_shape_list,
- expected_image_shape_list,
- in_masks_shape_list,
- expected_masks_shape_list):
- in_image = tf.random_uniform(in_image_shape)
- in_masks = tf.random_uniform(in_masks_shape)
- out_image, out_masks, _ = preprocessor.resize_image(
- in_image, in_masks, new_height=height, new_width=width)
- out_image_shape = tf.shape(out_image)
- out_masks_shape = tf.shape(out_masks)
-
- with self.test_session() as sess:
- out_image_shape, out_masks_shape = sess.run(
- [out_image_shape, out_masks_shape])
- self.assertAllEqual(out_image_shape, expected_image_shape)
- self.assertAllEqual(out_masks_shape, expected_mask_shape)
-
- def testResizeImageWithMasksTensorInputHeightAndWidth(self):
- """Tests image resizing, checking output sizes."""
- in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
- in_masks_shape_list = [[15, 60, 40], [10, 15, 30]]
- height = tf.constant(50, dtype=tf.int32)
- width = tf.constant(100, dtype=tf.int32)
- expected_image_shape_list = [[50, 100, 3], [50, 100, 3]]
- expected_masks_shape_list = [[15, 50, 100], [10, 50, 100]]
-
- for (in_image_shape, expected_image_shape, in_masks_shape,
- expected_mask_shape) in zip(in_image_shape_list,
- expected_image_shape_list,
- in_masks_shape_list,
- expected_masks_shape_list):
- in_image = tf.random_uniform(in_image_shape)
- in_masks = tf.random_uniform(in_masks_shape)
- out_image, out_masks, _ = preprocessor.resize_image(
- in_image, in_masks, new_height=height, new_width=width)
- out_image_shape = tf.shape(out_image)
- out_masks_shape = tf.shape(out_masks)
-
- with self.test_session() as sess:
- out_image_shape, out_masks_shape = sess.run(
- [out_image_shape, out_masks_shape])
- self.assertAllEqual(out_image_shape, expected_image_shape)
- self.assertAllEqual(out_masks_shape, expected_mask_shape)
-
- def testResizeImageWithNoInstanceMask(self):
- """Tests image resizing, checking output sizes."""
- in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
- in_masks_shape_list = [[0, 60, 40], [0, 15, 30]]
- height = 50
- width = 100
- expected_image_shape_list = [[50, 100, 3], [50, 100, 3]]
- expected_masks_shape_list = [[0, 50, 100], [0, 50, 100]]
-
- for (in_image_shape, expected_image_shape, in_masks_shape,
- expected_mask_shape) in zip(in_image_shape_list,
- expected_image_shape_list,
- in_masks_shape_list,
- expected_masks_shape_list):
- in_image = tf.random_uniform(in_image_shape)
- in_masks = tf.random_uniform(in_masks_shape)
- out_image, out_masks, _ = preprocessor.resize_image(
- in_image, in_masks, new_height=height, new_width=width)
- out_image_shape = tf.shape(out_image)
- out_masks_shape = tf.shape(out_masks)
-
- with self.test_session() as sess:
- out_image_shape, out_masks_shape = sess.run(
- [out_image_shape, out_masks_shape])
- self.assertAllEqual(out_image_shape, expected_image_shape)
- self.assertAllEqual(out_masks_shape, expected_mask_shape)
-
- def testResizeToRangePreservesStaticSpatialShape(self):
- """Tests image resizing, checking output sizes."""
- in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]]
- min_dim = 50
- max_dim = 100
- expected_shape_list = [[75, 50, 3], [50, 100, 3], [30, 100, 3]]
-
- for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
- in_image = tf.random_uniform(in_shape)
- out_image, _ = preprocessor.resize_to_range(
- in_image, min_dimension=min_dim, max_dimension=max_dim)
- self.assertAllEqual(out_image.get_shape().as_list(), expected_shape)
-
- def testResizeToRangeWithDynamicSpatialShape(self):
- """Tests image resizing, checking output sizes."""
- in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]]
- min_dim = 50
- max_dim = 100
- expected_shape_list = [[75, 50, 3], [50, 100, 3], [30, 100, 3]]
-
- for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
- in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
- out_image, _ = preprocessor.resize_to_range(
- in_image, min_dimension=min_dim, max_dimension=max_dim)
- out_image_shape = tf.shape(out_image)
- with self.test_session() as sess:
- out_image_shape = sess.run(out_image_shape,
- feed_dict={in_image:
- np.random.randn(*in_shape)})
- self.assertAllEqual(out_image_shape, expected_shape)
-
- def testResizeToRangeWithPadToMaxDimensionReturnsCorrectShapes(self):
- in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]]
- min_dim = 50
- max_dim = 100
- expected_shape_list = [[100, 100, 3], [100, 100, 3], [100, 100, 3]]
-
- for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
- in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
- out_image, _ = preprocessor.resize_to_range(
- in_image,
- min_dimension=min_dim,
- max_dimension=max_dim,
- pad_to_max_dimension=True)
- self.assertAllEqual(out_image.shape.as_list(), expected_shape)
- out_image_shape = tf.shape(out_image)
- with self.test_session() as sess:
- out_image_shape = sess.run(
- out_image_shape, feed_dict={in_image: np.random.randn(*in_shape)})
- self.assertAllEqual(out_image_shape, expected_shape)
-
- def testResizeToRangeWithPadToMaxDimensionReturnsCorrectTensor(self):
- in_image_np = np.array([[[0, 1, 2]]], np.float32)
- ex_image_np = np.array(
- [[[0, 1, 2], [123.68, 116.779, 103.939]],
- [[123.68, 116.779, 103.939], [123.68, 116.779, 103.939]]], np.float32)
- min_dim = 1
- max_dim = 2
-
- in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
- out_image, _ = preprocessor.resize_to_range(
- in_image,
- min_dimension=min_dim,
- max_dimension=max_dim,
- pad_to_max_dimension=True,
- per_channel_pad_value=(123.68, 116.779, 103.939))
-
- with self.test_session() as sess:
- out_image_np = sess.run(out_image, feed_dict={in_image: in_image_np})
- self.assertAllClose(ex_image_np, out_image_np)
-
- def testResizeToRangeWithMasksPreservesStaticSpatialShape(self):
- """Tests image resizing, checking output sizes."""
- in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
- in_masks_shape_list = [[15, 60, 40], [10, 15, 30]]
- min_dim = 50
- max_dim = 100
- expected_image_shape_list = [[75, 50, 3], [50, 100, 3]]
- expected_masks_shape_list = [[15, 75, 50], [10, 50, 100]]
-
- for (in_image_shape, expected_image_shape, in_masks_shape,
- expected_mask_shape) in zip(in_image_shape_list,
- expected_image_shape_list,
- in_masks_shape_list,
- expected_masks_shape_list):
- in_image = tf.random_uniform(in_image_shape)
- in_masks = tf.random_uniform(in_masks_shape)
- out_image, out_masks, _ = preprocessor.resize_to_range(
- in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
- self.assertAllEqual(out_masks.get_shape().as_list(), expected_mask_shape)
- self.assertAllEqual(out_image.get_shape().as_list(), expected_image_shape)
-
- def testResizeToRangeWithMasksAndDynamicSpatialShape(self):
- """Tests image resizing, checking output sizes."""
- in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
- in_masks_shape_list = [[15, 60, 40], [10, 15, 30]]
- min_dim = 50
- max_dim = 100
- expected_image_shape_list = [[75, 50, 3], [50, 100, 3]]
- expected_masks_shape_list = [[15, 75, 50], [10, 50, 100]]
-
- for (in_image_shape, expected_image_shape, in_masks_shape,
- expected_mask_shape) in zip(in_image_shape_list,
- expected_image_shape_list,
- in_masks_shape_list,
- expected_masks_shape_list):
- in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
- in_masks = tf.placeholder(tf.float32, shape=(None, None, None))
- in_masks = tf.random_uniform(in_masks_shape)
- out_image, out_masks, _ = preprocessor.resize_to_range(
- in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
- out_image_shape = tf.shape(out_image)
- out_masks_shape = tf.shape(out_masks)
-
- with self.test_session() as sess:
- out_image_shape, out_masks_shape = sess.run(
- [out_image_shape, out_masks_shape],
- feed_dict={
- in_image: np.random.randn(*in_image_shape),
- in_masks: np.random.randn(*in_masks_shape)
- })
- self.assertAllEqual(out_image_shape, expected_image_shape)
- self.assertAllEqual(out_masks_shape, expected_mask_shape)
-
- def testResizeToRangeWithInstanceMasksTensorOfSizeZero(self):
- """Tests image resizing, checking output sizes."""
- in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
- in_masks_shape_list = [[0, 60, 40], [0, 15, 30]]
- min_dim = 50
- max_dim = 100
- expected_image_shape_list = [[75, 50, 3], [50, 100, 3]]
- expected_masks_shape_list = [[0, 75, 50], [0, 50, 100]]
-
- for (in_image_shape, expected_image_shape, in_masks_shape,
- expected_mask_shape) in zip(in_image_shape_list,
- expected_image_shape_list,
- in_masks_shape_list,
- expected_masks_shape_list):
- in_image = tf.random_uniform(in_image_shape)
- in_masks = tf.random_uniform(in_masks_shape)
- out_image, out_masks, _ = preprocessor.resize_to_range(
- in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
- out_image_shape = tf.shape(out_image)
- out_masks_shape = tf.shape(out_masks)
-
- with self.test_session() as sess:
- out_image_shape, out_masks_shape = sess.run(
- [out_image_shape, out_masks_shape])
- self.assertAllEqual(out_image_shape, expected_image_shape)
- self.assertAllEqual(out_masks_shape, expected_mask_shape)
-
- def testResizeToRange4DImageTensor(self):
- image = tf.random_uniform([1, 200, 300, 3])
- with self.assertRaises(ValueError):
- preprocessor.resize_to_range(image, 500, 600)
-
- def testResizeToRangeSameMinMax(self):
- """Tests image resizing, checking output sizes."""
- in_shape_list = [[312, 312, 3], [299, 299, 3]]
- min_dim = 320
- max_dim = 320
- expected_shape_list = [[320, 320, 3], [320, 320, 3]]
-
- for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
- in_image = tf.random_uniform(in_shape)
- out_image, _ = preprocessor.resize_to_range(
- in_image, min_dimension=min_dim, max_dimension=max_dim)
- out_image_shape = tf.shape(out_image)
-
- with self.test_session() as sess:
- out_image_shape = sess.run(out_image_shape)
- self.assertAllEqual(out_image_shape, expected_shape)
-
- def testResizeToMinDimensionTensorShapes(self):
- in_image_shape_list = [[60, 55, 3], [15, 30, 3]]
- in_masks_shape_list = [[15, 60, 55], [10, 15, 30]]
- min_dim = 50
- expected_image_shape_list = [[60, 55, 3], [50, 100, 3]]
- expected_masks_shape_list = [[15, 60, 55], [10, 50, 100]]
-
- for (in_image_shape, expected_image_shape, in_masks_shape,
- expected_mask_shape) in zip(in_image_shape_list,
- expected_image_shape_list,
- in_masks_shape_list,
- expected_masks_shape_list):
- in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
- in_masks = tf.placeholder(tf.float32, shape=(None, None, None))
- in_masks = tf.random_uniform(in_masks_shape)
- out_image, out_masks, _ = preprocessor.resize_to_min_dimension(
- in_image, in_masks, min_dimension=min_dim)
- out_image_shape = tf.shape(out_image)
- out_masks_shape = tf.shape(out_masks)
-
- with self.test_session() as sess:
- out_image_shape, out_masks_shape = sess.run(
- [out_image_shape, out_masks_shape],
- feed_dict={
- in_image: np.random.randn(*in_image_shape),
- in_masks: np.random.randn(*in_masks_shape)
- })
- self.assertAllEqual(out_image_shape, expected_image_shape)
- self.assertAllEqual(out_masks_shape, expected_mask_shape)
-
- def testResizeToMinDimensionWithInstanceMasksTensorOfSizeZero(self):
- """Tests image resizing, checking output sizes."""
- in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
- in_masks_shape_list = [[0, 60, 40], [0, 15, 30]]
- min_dim = 50
- expected_image_shape_list = [[75, 50, 3], [50, 100, 3]]
- expected_masks_shape_list = [[0, 75, 50], [0, 50, 100]]
-
- for (in_image_shape, expected_image_shape, in_masks_shape,
- expected_mask_shape) in zip(in_image_shape_list,
- expected_image_shape_list,
- in_masks_shape_list,
- expected_masks_shape_list):
- in_image = tf.random_uniform(in_image_shape)
- in_masks = tf.random_uniform(in_masks_shape)
- out_image, out_masks, _ = preprocessor.resize_to_min_dimension(
- in_image, in_masks, min_dimension=min_dim)
- out_image_shape = tf.shape(out_image)
- out_masks_shape = tf.shape(out_masks)
-
- with self.test_session() as sess:
- out_image_shape, out_masks_shape = sess.run(
- [out_image_shape, out_masks_shape])
- self.assertAllEqual(out_image_shape, expected_image_shape)
- self.assertAllEqual(out_masks_shape, expected_mask_shape)
-
- def testResizeToMinDimensionRaisesErrorOn4DImage(self):
- image = tf.random_uniform([1, 200, 300, 3])
- with self.assertRaises(ValueError):
- preprocessor.resize_to_min_dimension(image, 500)
-
- def testScaleBoxesToPixelCoordinates(self):
- """Tests box scaling, checking scaled values."""
- in_shape = [60, 40, 3]
- in_boxes = [[0.1, 0.2, 0.4, 0.6],
- [0.5, 0.3, 0.9, 0.7]]
-
- expected_boxes = [[6., 8., 24., 24.],
- [30., 12., 54., 28.]]
-
- in_image = tf.random_uniform(in_shape)
- in_boxes = tf.constant(in_boxes)
- _, out_boxes = preprocessor.scale_boxes_to_pixel_coordinates(
- in_image, boxes=in_boxes)
- with self.test_session() as sess:
- out_boxes = sess.run(out_boxes)
- self.assertAllClose(out_boxes, expected_boxes)
-
- def testScaleBoxesToPixelCoordinatesWithKeypoints(self):
- """Tests box and keypoint scaling, checking scaled values."""
- in_shape = [60, 40, 3]
- in_boxes = self.createTestBoxes()
- in_keypoints = self.createTestKeypoints()
-
- expected_boxes = [[0., 10., 45., 40.],
- [15., 20., 45., 40.]]
- expected_keypoints = [
- [[6., 4.], [12., 8.], [18., 12.]],
- [[24., 16.], [30., 20.], [36., 24.]],
- ]
-
- in_image = tf.random_uniform(in_shape)
- _, out_boxes, out_keypoints = preprocessor.scale_boxes_to_pixel_coordinates(
- in_image, boxes=in_boxes, keypoints=in_keypoints)
- with self.test_session() as sess:
- out_boxes_, out_keypoints_ = sess.run([out_boxes, out_keypoints])
- self.assertAllClose(out_boxes_, expected_boxes)
- self.assertAllClose(out_keypoints_, expected_keypoints)
-
- def testSubtractChannelMean(self):
- """Tests whether channel means have been subtracted."""
- with self.test_session():
- image = tf.zeros((240, 320, 3))
- means = [1, 2, 3]
- actual = preprocessor.subtract_channel_mean(image, means=means)
- actual = actual.eval()
-
- self.assertTrue((actual[:, :, 0] == -1).all())
- self.assertTrue((actual[:, :, 1] == -2).all())
- self.assertTrue((actual[:, :, 2] == -3).all())
-
- def testOneHotEncoding(self):
- """Tests one hot encoding of multiclass labels."""
- with self.test_session():
- labels = tf.constant([1, 4, 2], dtype=tf.int32)
- one_hot = preprocessor.one_hot_encoding(labels, num_classes=5)
- one_hot = one_hot.eval()
-
- self.assertAllEqual([0, 1, 1, 0, 1], one_hot)
-
- def testSSDRandomCropWithCache(self):
- preprocess_options = [
- (preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }),
- (preprocessor.ssd_random_crop, {})]
- self._testPreprocessorCache(preprocess_options,
- test_boxes=True,
- test_masks=False,
- test_keypoints=False)
-
- def testSSDRandomCrop(self):
- preprocessing_options = [
- (preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }),
- (preprocessor.ssd_random_crop, {})]
- images = self.createTestImages()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- }
- distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
- preprocessing_options)
- distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
-
- images_rank = tf.rank(images)
- distorted_images_rank = tf.rank(distorted_images)
- boxes_rank = tf.rank(boxes)
- distorted_boxes_rank = tf.rank(distorted_boxes)
-
- with self.test_session() as sess:
- (boxes_rank_, distorted_boxes_rank_, images_rank_,
- distorted_images_rank_) = sess.run(
- [boxes_rank, distorted_boxes_rank, images_rank,
- distorted_images_rank])
- self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
- self.assertAllEqual(images_rank_, distorted_images_rank_)
-
- def testSSDRandomCropWithMultiClassScores(self):
- preprocessing_options = [(preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }), (preprocessor.ssd_random_crop, {})]
- images = self.createTestImages()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- multiclass_scores = self.createTestMultiClassScores()
-
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- fields.InputDataFields.multiclass_scores: multiclass_scores,
- }
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_multiclass_scores=True)
- distorted_tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
- distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- distorted_multiclass_scores = distorted_tensor_dict[
- fields.InputDataFields.multiclass_scores]
-
- images_rank = tf.rank(images)
- distorted_images_rank = tf.rank(distorted_images)
- boxes_rank = tf.rank(boxes)
- distorted_boxes_rank = tf.rank(distorted_boxes)
- multiclass_scores_rank = tf.rank(multiclass_scores)
- distorted_multiclass_scores_rank = tf.rank(distorted_multiclass_scores)
-
- with self.test_session() as sess:
- (boxes_rank_, distorted_boxes_, distorted_boxes_rank_, images_rank_,
- distorted_images_rank_, multiclass_scores_rank_,
- distorted_multiclass_scores_,
- distorted_multiclass_scores_rank_) = sess.run([
- boxes_rank, distorted_boxes, distorted_boxes_rank, images_rank,
- distorted_images_rank, multiclass_scores_rank,
- distorted_multiclass_scores, distorted_multiclass_scores_rank
- ])
- self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
- self.assertAllEqual(images_rank_, distorted_images_rank_)
- self.assertAllEqual(multiclass_scores_rank_,
- distorted_multiclass_scores_rank_)
- self.assertAllEqual(distorted_boxes_.shape[0],
- distorted_multiclass_scores_.shape[0])
-
- def testSSDRandomCropPad(self):
- images = self.createTestImages()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- preprocessing_options = [
- (preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }),
- (preprocessor.ssd_random_crop_pad, {})]
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- }
- distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
- preprocessing_options)
- distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
-
- images_rank = tf.rank(images)
- distorted_images_rank = tf.rank(distorted_images)
- boxes_rank = tf.rank(boxes)
- distorted_boxes_rank = tf.rank(distorted_boxes)
-
- with self.test_session() as sess:
- (boxes_rank_, distorted_boxes_rank_, images_rank_,
- distorted_images_rank_) = sess.run([
- boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank
- ])
- self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
- self.assertAllEqual(images_rank_, distorted_images_rank_)
-
- def testSSDRandomCropFixedAspectRatioWithCache(self):
- preprocess_options = [
- (preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }),
- (preprocessor.ssd_random_crop_fixed_aspect_ratio, {})]
- self._testPreprocessorCache(preprocess_options,
- test_boxes=True,
- test_masks=False,
- test_keypoints=False)
-
- def _testSSDRandomCropFixedAspectRatio(self,
- include_label_scores,
- include_multiclass_scores,
- include_instance_masks,
- include_keypoints):
- images = self.createTestImages()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- preprocessing_options = [(preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }), (preprocessor.ssd_random_crop_fixed_aspect_ratio, {})]
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- }
- if include_label_scores:
- label_scores = self.createTestLabelScores()
- tensor_dict[fields.InputDataFields.groundtruth_label_scores] = (
- label_scores)
- if include_multiclass_scores:
- multiclass_scores = self.createTestMultiClassScores()
- tensor_dict[fields.InputDataFields.multiclass_scores] = (
- multiclass_scores)
- if include_instance_masks:
- masks = self.createTestMasks()
- tensor_dict[fields.InputDataFields.groundtruth_instance_masks] = masks
- if include_keypoints:
- keypoints = self.createTestKeypoints()
- tensor_dict[fields.InputDataFields.groundtruth_keypoints] = keypoints
-
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_label_scores=include_label_scores,
- include_multiclass_scores=include_multiclass_scores,
- include_instance_masks=include_instance_masks,
- include_keypoints=include_keypoints)
- distorted_tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
- distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- images_rank = tf.rank(images)
- distorted_images_rank = tf.rank(distorted_images)
- boxes_rank = tf.rank(boxes)
- distorted_boxes_rank = tf.rank(distorted_boxes)
-
- with self.test_session() as sess:
- (boxes_rank_, distorted_boxes_rank_, images_rank_,
- distorted_images_rank_) = sess.run(
- [boxes_rank, distorted_boxes_rank, images_rank,
- distorted_images_rank])
- self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
- self.assertAllEqual(images_rank_, distorted_images_rank_)
-
- def testSSDRandomCropFixedAspectRatio(self):
- self._testSSDRandomCropFixedAspectRatio(include_label_scores=False,
- include_multiclass_scores=False,
- include_instance_masks=False,
- include_keypoints=False)
-
- def testSSDRandomCropFixedAspectRatioWithMultiClassScores(self):
- self._testSSDRandomCropFixedAspectRatio(include_label_scores=False,
- include_multiclass_scores=True,
- include_instance_masks=False,
- include_keypoints=False)
-
- def testSSDRandomCropFixedAspectRatioWithMasksAndKeypoints(self):
- self._testSSDRandomCropFixedAspectRatio(include_label_scores=False,
- include_multiclass_scores=False,
- include_instance_masks=True,
- include_keypoints=True)
-
- def testSSDRandomCropFixedAspectRatioWithLabelScoresMasksAndKeypoints(self):
- self._testSSDRandomCropFixedAspectRatio(include_label_scores=True,
- include_multiclass_scores=False,
- include_instance_masks=True,
- include_keypoints=True)
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/region_similarity_calculator.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/region_similarity_calculator.py
deleted file mode 100644
index f344006a3c56c95021dae47fcf5195a1b9743d85..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/region_similarity_calculator.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Region Similarity Calculators for BoxLists.
-
-Region Similarity Calculators compare a pairwise measure of similarity
-between the boxes in two BoxLists.
-"""
-from abc import ABCMeta
-from abc import abstractmethod
-
-import tensorflow as tf
-
-from object_detection.core import box_list_ops
-
-
-class RegionSimilarityCalculator(object):
- """Abstract base class for region similarity calculator."""
- __metaclass__ = ABCMeta
-
- def compare(self, boxlist1, boxlist2, scope=None):
- """Computes matrix of pairwise similarity between BoxLists.
-
- This op (to be overriden) computes a measure of pairwise similarity between
- the boxes in the given BoxLists. Higher values indicate more similarity.
-
- Note that this method simply measures similarity and does not explicitly
- perform a matching.
-
- Args:
- boxlist1: BoxList holding N boxes.
- boxlist2: BoxList holding M boxes.
- scope: Op scope name. Defaults to 'Compare' if None.
-
- Returns:
- a (float32) tensor of shape [N, M] with pairwise similarity score.
- """
- with tf.name_scope(scope, 'Compare', [boxlist1, boxlist2]) as scope:
- return self._compare(boxlist1, boxlist2)
-
- @abstractmethod
- def _compare(self, boxlist1, boxlist2):
- pass
-
-
-class IouSimilarity(RegionSimilarityCalculator):
- """Class to compute similarity based on Intersection over Union (IOU) metric.
-
- This class computes pairwise similarity between two BoxLists based on IOU.
- """
-
- def _compare(self, boxlist1, boxlist2):
- """Compute pairwise IOU similarity between the two BoxLists.
-
- Args:
- boxlist1: BoxList holding N boxes.
- boxlist2: BoxList holding M boxes.
-
- Returns:
- A tensor with shape [N, M] representing pairwise iou scores.
- """
- return box_list_ops.iou(boxlist1, boxlist2)
-
-
-class NegSqDistSimilarity(RegionSimilarityCalculator):
- """Class to compute similarity based on the squared distance metric.
-
- This class computes pairwise similarity between two BoxLists based on the
- negative squared distance metric.
- """
-
- def _compare(self, boxlist1, boxlist2):
- """Compute matrix of (negated) sq distances.
-
- Args:
- boxlist1: BoxList holding N boxes.
- boxlist2: BoxList holding M boxes.
-
- Returns:
- A tensor with shape [N, M] representing negated pairwise squared distance.
- """
- return -1 * box_list_ops.sq_dist(boxlist1, boxlist2)
-
-
-class IoaSimilarity(RegionSimilarityCalculator):
- """Class to compute similarity based on Intersection over Area (IOA) metric.
-
- This class computes pairwise similarity between two BoxLists based on their
- pairwise intersections divided by the areas of second BoxLists.
- """
-
- def _compare(self, boxlist1, boxlist2):
- """Compute pairwise IOA similarity between the two BoxLists.
-
- Args:
- boxlist1: BoxList holding N boxes.
- boxlist2: BoxList holding M boxes.
-
- Returns:
- A tensor with shape [N, M] representing pairwise IOA scores.
- """
- return box_list_ops.ioa(boxlist1, boxlist2)
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/region_similarity_calculator_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/region_similarity_calculator_test.py
deleted file mode 100644
index 162151a3b53468a7724133ca681efc0df5293563..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/region_similarity_calculator_test.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for region_similarity_calculator."""
-import tensorflow as tf
-
-from object_detection.core import box_list
-from object_detection.core import region_similarity_calculator
-
-
-class RegionSimilarityCalculatorTest(tf.test.TestCase):
-
- def test_get_correct_pairwise_similarity_based_on_iou(self):
- corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
- corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0]])
- exp_output = [[2.0 / 16.0, 0, 6.0 / 400.0], [1.0 / 16.0, 0.0, 5.0 / 400.0]]
- boxes1 = box_list.BoxList(corners1)
- boxes2 = box_list.BoxList(corners2)
- iou_similarity_calculator = region_similarity_calculator.IouSimilarity()
- iou_similarity = iou_similarity_calculator.compare(boxes1, boxes2)
- with self.test_session() as sess:
- iou_output = sess.run(iou_similarity)
- self.assertAllClose(iou_output, exp_output)
-
- def test_get_correct_pairwise_similarity_based_on_squared_distances(self):
- corners1 = tf.constant([[0.0, 0.0, 0.0, 0.0],
- [1.0, 1.0, 0.0, 2.0]])
- corners2 = tf.constant([[3.0, 4.0, 1.0, 0.0],
- [-4.0, 0.0, 0.0, 3.0],
- [0.0, 0.0, 0.0, 0.0]])
- exp_output = [[-26, -25, 0], [-18, -27, -6]]
- boxes1 = box_list.BoxList(corners1)
- boxes2 = box_list.BoxList(corners2)
- dist_similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
- dist_similarity = dist_similarity_calc.compare(boxes1, boxes2)
- with self.test_session() as sess:
- dist_output = sess.run(dist_similarity)
- self.assertAllClose(dist_output, exp_output)
-
- def test_get_correct_pairwise_similarity_based_on_ioa(self):
- corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
- corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0]])
- exp_output_1 = [[2.0 / 12.0, 0, 6.0 / 400.0],
- [1.0 / 12.0, 0.0, 5.0 / 400.0]]
- exp_output_2 = [[2.0 / 6.0, 1.0 / 5.0],
- [0, 0],
- [6.0 / 6.0, 5.0 / 5.0]]
- boxes1 = box_list.BoxList(corners1)
- boxes2 = box_list.BoxList(corners2)
- ioa_similarity_calculator = region_similarity_calculator.IoaSimilarity()
- ioa_similarity_1 = ioa_similarity_calculator.compare(boxes1, boxes2)
- ioa_similarity_2 = ioa_similarity_calculator.compare(boxes2, boxes1)
- with self.test_session() as sess:
- iou_output_1, iou_output_2 = sess.run(
- [ioa_similarity_1, ioa_similarity_2])
- self.assertAllClose(iou_output_1, exp_output_1)
- self.assertAllClose(iou_output_2, exp_output_2)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/standard_fields.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/standard_fields.py
deleted file mode 100644
index 11282da6deca075935d25e3558bfe1a25588fb20..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/standard_fields.py
+++ /dev/null
@@ -1,227 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Contains classes specifying naming conventions used for object detection.
-
-
-Specifies:
- InputDataFields: standard fields used by reader/preprocessor/batcher.
- DetectionResultFields: standard fields returned by object detector.
- BoxListFields: standard field used by BoxList
- TfExampleFields: standard fields for tf-example data format (go/tf-example).
-"""
-
-
-class InputDataFields(object):
- """Names for the input tensors.
-
- Holds the standard data field names to use for identifying input tensors. This
- should be used by the decoder to identify keys for the returned tensor_dict
- containing input tensors. And it should be used by the model to identify the
- tensors it needs.
-
- Attributes:
- image: image.
- image_additional_channels: additional channels.
- original_image: image in the original input size.
- key: unique key corresponding to image.
- source_id: source of the original image.
- filename: original filename of the dataset (without common path).
- groundtruth_image_classes: image-level class labels.
- groundtruth_boxes: coordinates of the ground truth boxes in the image.
- groundtruth_classes: box-level class labels.
- groundtruth_label_types: box-level label types (e.g. explicit negative).
- groundtruth_is_crowd: [DEPRECATED, use groundtruth_group_of instead]
- is the groundtruth a single object or a crowd.
- groundtruth_area: area of a groundtruth segment.
- groundtruth_difficult: is a `difficult` object
- groundtruth_group_of: is a `group_of` objects, e.g. multiple objects of the
- same class, forming a connected group, where instances are heavily
- occluding each other.
- proposal_boxes: coordinates of object proposal boxes.
- proposal_objectness: objectness score of each proposal.
- groundtruth_instance_masks: ground truth instance masks.
- groundtruth_instance_boundaries: ground truth instance boundaries.
- groundtruth_instance_classes: instance mask-level class labels.
- groundtruth_keypoints: ground truth keypoints.
- groundtruth_keypoint_visibilities: ground truth keypoint visibilities.
- groundtruth_label_scores: groundtruth label scores.
- groundtruth_weights: groundtruth weight factor for bounding boxes.
- num_groundtruth_boxes: number of groundtruth boxes.
- true_image_shapes: true shapes of images in the resized images, as resized
- images can be padded with zeros.
- verified_labels: list of human-verified image-level labels (note, that a
- label can be verified both as positive and negative).
- multiclass_scores: the label score per class for each box.
- """
- image = 'image'
- image_additional_channels = 'image_additional_channels'
- original_image = 'original_image'
- key = 'key'
- source_id = 'source_id'
- filename = 'filename'
- groundtruth_image_classes = 'groundtruth_image_classes'
- groundtruth_boxes = 'groundtruth_boxes'
- groundtruth_classes = 'groundtruth_classes'
- groundtruth_label_types = 'groundtruth_label_types'
- groundtruth_is_crowd = 'groundtruth_is_crowd'
- groundtruth_area = 'groundtruth_area'
- groundtruth_difficult = 'groundtruth_difficult'
- groundtruth_group_of = 'groundtruth_group_of'
- proposal_boxes = 'proposal_boxes'
- proposal_objectness = 'proposal_objectness'
- groundtruth_instance_masks = 'groundtruth_instance_masks'
- groundtruth_instance_boundaries = 'groundtruth_instance_boundaries'
- groundtruth_instance_classes = 'groundtruth_instance_classes'
- groundtruth_keypoints = 'groundtruth_keypoints'
- groundtruth_keypoint_visibilities = 'groundtruth_keypoint_visibilities'
- groundtruth_label_scores = 'groundtruth_label_scores'
- groundtruth_weights = 'groundtruth_weights'
- num_groundtruth_boxes = 'num_groundtruth_boxes'
- true_image_shape = 'true_image_shape'
- verified_labels = 'verified_labels'
- multiclass_scores = 'multiclass_scores'
-
-
-class DetectionResultFields(object):
- """Naming conventions for storing the output of the detector.
-
- Attributes:
- source_id: source of the original image.
- key: unique key corresponding to image.
- detection_boxes: coordinates of the detection boxes in the image.
- detection_scores: detection scores for the detection boxes in the image.
- detection_classes: detection-level class labels.
- detection_masks: contains a segmentation mask for each detection box.
- detection_boundaries: contains an object boundary for each detection box.
- detection_keypoints: contains detection keypoints for each detection box.
- num_detections: number of detections in the batch.
- """
-
- source_id = 'source_id'
- key = 'key'
- detection_boxes = 'detection_boxes'
- detection_scores = 'detection_scores'
- detection_classes = 'detection_classes'
- detection_masks = 'detection_masks'
- detection_boundaries = 'detection_boundaries'
- detection_keypoints = 'detection_keypoints'
- num_detections = 'num_detections'
-
-
-class BoxListFields(object):
- """Naming conventions for BoxLists.
-
- Attributes:
- boxes: bounding box coordinates.
- classes: classes per bounding box.
- scores: scores per bounding box.
- weights: sample weights per bounding box.
- objectness: objectness score per bounding box.
- masks: masks per bounding box.
- boundaries: boundaries per bounding box.
- keypoints: keypoints per bounding box.
- keypoint_heatmaps: keypoint heatmaps per bounding box.
- is_crowd: is_crowd annotation per bounding box.
- """
- boxes = 'boxes'
- classes = 'classes'
- scores = 'scores'
- weights = 'weights'
- objectness = 'objectness'
- masks = 'masks'
- boundaries = 'boundaries'
- keypoints = 'keypoints'
- keypoint_heatmaps = 'keypoint_heatmaps'
- is_crowd = 'is_crowd'
-
-
-class TfExampleFields(object):
- """TF-example proto feature names for object detection.
-
- Holds the standard feature names to load from an Example proto for object
- detection.
-
- Attributes:
- image_encoded: JPEG encoded string
- image_format: image format, e.g. "JPEG"
- filename: filename
- channels: number of channels of image
- colorspace: colorspace, e.g. "RGB"
- height: height of image in pixels, e.g. 462
- width: width of image in pixels, e.g. 581
- source_id: original source of the image
- image_class_text: image-level label in text format
- image_class_label: image-level label in numerical format
- object_class_text: labels in text format, e.g. ["person", "cat"]
- object_class_label: labels in numbers, e.g. [16, 8]
- object_bbox_xmin: xmin coordinates of groundtruth box, e.g. 10, 30
- object_bbox_xmax: xmax coordinates of groundtruth box, e.g. 50, 40
- object_bbox_ymin: ymin coordinates of groundtruth box, e.g. 40, 50
- object_bbox_ymax: ymax coordinates of groundtruth box, e.g. 80, 70
- object_view: viewpoint of object, e.g. ["frontal", "left"]
- object_truncated: is object truncated, e.g. [true, false]
- object_occluded: is object occluded, e.g. [true, false]
- object_difficult: is object difficult, e.g. [true, false]
- object_group_of: is object a single object or a group of objects
- object_depiction: is object a depiction
- object_is_crowd: [DEPRECATED, use object_group_of instead]
- is the object a single object or a crowd
- object_segment_area: the area of the segment.
- object_weight: a weight factor for the object's bounding box.
- instance_masks: instance segmentation masks.
- instance_boundaries: instance boundaries.
- instance_classes: Classes for each instance segmentation mask.
- detection_class_label: class label in numbers.
- detection_bbox_ymin: ymin coordinates of a detection box.
- detection_bbox_xmin: xmin coordinates of a detection box.
- detection_bbox_ymax: ymax coordinates of a detection box.
- detection_bbox_xmax: xmax coordinates of a detection box.
- detection_score: detection score for the class label and box.
- """
- image_encoded = 'image/encoded'
- image_format = 'image/format' # format is reserved keyword
- filename = 'image/filename'
- channels = 'image/channels'
- colorspace = 'image/colorspace'
- height = 'image/height'
- width = 'image/width'
- source_id = 'image/source_id'
- image_class_text = 'image/class/text'
- image_class_label = 'image/class/label'
- object_class_text = 'image/object/class/text'
- object_class_label = 'image/object/class/label'
- object_bbox_ymin = 'image/object/bbox/ymin'
- object_bbox_xmin = 'image/object/bbox/xmin'
- object_bbox_ymax = 'image/object/bbox/ymax'
- object_bbox_xmax = 'image/object/bbox/xmax'
- object_view = 'image/object/view'
- object_truncated = 'image/object/truncated'
- object_occluded = 'image/object/occluded'
- object_difficult = 'image/object/difficult'
- object_group_of = 'image/object/group_of'
- object_depiction = 'image/object/depiction'
- object_is_crowd = 'image/object/is_crowd'
- object_segment_area = 'image/object/segment/area'
- object_weight = 'image/object/weight'
- instance_masks = 'image/segmentation/object'
- instance_boundaries = 'image/boundaries/object'
- instance_classes = 'image/segmentation/object/class'
- detection_class_label = 'image/detection/label'
- detection_bbox_ymin = 'image/detection/bbox/ymin'
- detection_bbox_xmin = 'image/detection/bbox/xmin'
- detection_bbox_ymax = 'image/detection/bbox/ymax'
- detection_bbox_xmax = 'image/detection/bbox/xmax'
- detection_score = 'image/detection/score'
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/target_assigner.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/target_assigner.py
deleted file mode 100644
index 14e66def1fe0a873c96900288290491718d3d5ab..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/target_assigner.py
+++ /dev/null
@@ -1,458 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Base target assigner module.
-
-The job of a TargetAssigner is, for a given set of anchors (bounding boxes) and
-groundtruth detections (bounding boxes), to assign classification and regression
-targets to each anchor as well as weights to each anchor (specifying, e.g.,
-which anchors should not contribute to training loss).
-
-It assigns classification/regression targets by performing the following steps:
-1) Computing pairwise similarity between anchors and groundtruth boxes using a
- provided RegionSimilarity Calculator
-2) Computing a matching based on the similarity matrix using a provided Matcher
-3) Assigning regression targets based on the matching and a provided BoxCoder
-4) Assigning classification targets based on the matching and groundtruth labels
-
-Note that TargetAssigners only operate on detections from a single
-image at a time, so any logic for applying a TargetAssigner to multiple
-images must be handled externally.
-"""
-import tensorflow as tf
-
-from object_detection.box_coders import faster_rcnn_box_coder
-from object_detection.box_coders import mean_stddev_box_coder
-from object_detection.core import box_coder as bcoder
-from object_detection.core import box_list
-from object_detection.core import matcher as mat
-from object_detection.core import region_similarity_calculator as sim_calc
-from object_detection.core import standard_fields as fields
-from object_detection.matchers import argmax_matcher
-from object_detection.matchers import bipartite_matcher
-from object_detection.utils import shape_utils
-
-
-class TargetAssigner(object):
- """Target assigner to compute classification and regression targets."""
-
- def __init__(self, similarity_calc, matcher, box_coder,
- negative_class_weight=1.0, unmatched_cls_target=None):
- """Construct Object Detection Target Assigner.
-
- Args:
- similarity_calc: a RegionSimilarityCalculator
- matcher: an object_detection.core.Matcher used to match groundtruth to
- anchors.
- box_coder: an object_detection.core.BoxCoder used to encode matching
- groundtruth boxes with respect to anchors.
- negative_class_weight: classification weight to be associated to negative
- anchors (default: 1.0). The weight must be in [0., 1.].
- unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
- which is consistent with the classification target for each
- anchor (and can be empty for scalar targets). This shape must thus be
- compatible with the groundtruth labels that are passed to the "assign"
- function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
- If set to None, unmatched_cls_target is set to be [0] for each anchor.
-
- Raises:
- ValueError: if similarity_calc is not a RegionSimilarityCalculator or
- if matcher is not a Matcher or if box_coder is not a BoxCoder
- """
- if not isinstance(similarity_calc, sim_calc.RegionSimilarityCalculator):
- raise ValueError('similarity_calc must be a RegionSimilarityCalculator')
- if not isinstance(matcher, mat.Matcher):
- raise ValueError('matcher must be a Matcher')
- if not isinstance(box_coder, bcoder.BoxCoder):
- raise ValueError('box_coder must be a BoxCoder')
- self._similarity_calc = similarity_calc
- self._matcher = matcher
- self._box_coder = box_coder
- self._negative_class_weight = negative_class_weight
- if unmatched_cls_target is None:
- self._unmatched_cls_target = tf.constant([0], tf.float32)
- else:
- self._unmatched_cls_target = unmatched_cls_target
-
- @property
- def box_coder(self):
- return self._box_coder
-
- def assign(self, anchors, groundtruth_boxes, groundtruth_labels=None,
- groundtruth_weights=None, **params):
- """Assign classification and regression targets to each anchor.
-
- For a given set of anchors and groundtruth detections, match anchors
- to groundtruth_boxes and assign classification and regression targets to
- each anchor as well as weights based on the resulting match (specifying,
- e.g., which anchors should not contribute to training loss).
-
- Anchors that are not matched to anything are given a classification target
- of self._unmatched_cls_target which can be specified via the constructor.
-
- Args:
- anchors: a BoxList representing N anchors
- groundtruth_boxes: a BoxList representing M groundtruth boxes
- groundtruth_labels: a tensor of shape [M, d_1, ... d_k]
- with labels for each of the ground_truth boxes. The subshape
- [d_1, ... d_k] can be empty (corresponding to scalar inputs). When set
- to None, groundtruth_labels assumes a binary problem where all
- ground_truth boxes get a positive label (of 1).
- groundtruth_weights: a float tensor of shape [M] indicating the weight to
- assign to all anchors match to a particular groundtruth box. The weights
- must be in [0., 1.]. If None, all weights are set to 1.
- **params: Additional keyword arguments for specific implementations of
- the Matcher.
-
- Returns:
- cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
- where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
- which has shape [num_gt_boxes, d_1, d_2, ... d_k].
- cls_weights: a float32 tensor with shape [num_anchors]
- reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension]
- reg_weights: a float32 tensor with shape [num_anchors]
- match: a matcher.Match object encoding the match between anchors and
- groundtruth boxes, with rows corresponding to groundtruth boxes
- and columns corresponding to anchors.
-
- Raises:
- ValueError: if anchors or groundtruth_boxes are not of type
- box_list.BoxList
- """
- if not isinstance(anchors, box_list.BoxList):
- raise ValueError('anchors must be an BoxList')
- if not isinstance(groundtruth_boxes, box_list.BoxList):
- raise ValueError('groundtruth_boxes must be an BoxList')
-
- if groundtruth_labels is None:
- groundtruth_labels = tf.ones(tf.expand_dims(groundtruth_boxes.num_boxes(),
- 0))
- groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)
- unmatched_shape_assert = shape_utils.assert_shape_equal(
- shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)[1:],
- shape_utils.combined_static_and_dynamic_shape(
- self._unmatched_cls_target))
- labels_and_box_shapes_assert = shape_utils.assert_shape_equal(
- shape_utils.combined_static_and_dynamic_shape(
- groundtruth_labels)[:1],
- shape_utils.combined_static_and_dynamic_shape(
- groundtruth_boxes.get())[:1])
-
- if groundtruth_weights is None:
- num_gt_boxes = groundtruth_boxes.num_boxes_static()
- if not num_gt_boxes:
- num_gt_boxes = groundtruth_boxes.num_boxes()
- groundtruth_weights = tf.ones([num_gt_boxes], dtype=tf.float32)
- with tf.control_dependencies(
- [unmatched_shape_assert, labels_and_box_shapes_assert]):
- match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes,
- anchors)
- match = self._matcher.match(match_quality_matrix, **params)
- reg_targets = self._create_regression_targets(anchors,
- groundtruth_boxes,
- match)
- cls_targets = self._create_classification_targets(groundtruth_labels,
- match)
- reg_weights = self._create_regression_weights(match, groundtruth_weights)
- cls_weights = self._create_classification_weights(match,
- groundtruth_weights)
-
- num_anchors = anchors.num_boxes_static()
- if num_anchors is not None:
- reg_targets = self._reset_target_shape(reg_targets, num_anchors)
- cls_targets = self._reset_target_shape(cls_targets, num_anchors)
- reg_weights = self._reset_target_shape(reg_weights, num_anchors)
- cls_weights = self._reset_target_shape(cls_weights, num_anchors)
-
- return cls_targets, cls_weights, reg_targets, reg_weights, match
-
- def _reset_target_shape(self, target, num_anchors):
- """Sets the static shape of the target.
-
- Args:
- target: the target tensor. Its first dimension will be overwritten.
- num_anchors: the number of anchors, which is used to override the target's
- first dimension.
-
- Returns:
- A tensor with the shape info filled in.
- """
- target_shape = target.get_shape().as_list()
- target_shape[0] = num_anchors
- target.set_shape(target_shape)
- return target
-
- def _create_regression_targets(self, anchors, groundtruth_boxes, match):
- """Returns a regression target for each anchor.
-
- Args:
- anchors: a BoxList representing N anchors
- groundtruth_boxes: a BoxList representing M groundtruth_boxes
- match: a matcher.Match object
-
- Returns:
- reg_targets: a float32 tensor with shape [N, box_code_dimension]
- """
- matched_gt_boxes = match.gather_based_on_match(
- groundtruth_boxes.get(),
- unmatched_value=tf.zeros(4),
- ignored_value=tf.zeros(4))
- matched_gt_boxlist = box_list.BoxList(matched_gt_boxes)
- if groundtruth_boxes.has_field(fields.BoxListFields.keypoints):
- groundtruth_keypoints = groundtruth_boxes.get_field(
- fields.BoxListFields.keypoints)
- matched_keypoints = match.gather_based_on_match(
- groundtruth_keypoints,
- unmatched_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]),
- ignored_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]))
- matched_gt_boxlist.add_field(fields.BoxListFields.keypoints,
- matched_keypoints)
- matched_reg_targets = self._box_coder.encode(matched_gt_boxlist, anchors)
- match_results_shape = shape_utils.combined_static_and_dynamic_shape(
- match.match_results)
-
- # Zero out the unmatched and ignored regression targets.
- unmatched_ignored_reg_targets = tf.tile(
- self._default_regression_target(), [match_results_shape[0], 1])
- matched_anchors_mask = match.matched_column_indicator()
- reg_targets = tf.where(matched_anchors_mask,
- matched_reg_targets,
- unmatched_ignored_reg_targets)
- return reg_targets
-
- def _default_regression_target(self):
- """Returns the default target for anchors to regress to.
-
- Default regression targets are set to zero (though in
- this implementation what these targets are set to should
- not matter as the regression weight of any box set to
- regress to the default target is zero).
-
- Returns:
- default_target: a float32 tensor with shape [1, box_code_dimension]
- """
- return tf.constant([self._box_coder.code_size*[0]], tf.float32)
-
- def _create_classification_targets(self, groundtruth_labels, match):
- """Create classification targets for each anchor.
-
- Assign a classification target of for each anchor to the matching
- groundtruth label that is provided by match. Anchors that are not matched
- to anything are given the target self._unmatched_cls_target
-
- Args:
- groundtruth_labels: a tensor of shape [num_gt_boxes, d_1, ... d_k]
- with labels for each of the ground_truth boxes. The subshape
- [d_1, ... d_k] can be empty (corresponding to scalar labels).
- match: a matcher.Match object that provides a matching between anchors
- and groundtruth boxes.
-
- Returns:
- a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], where the
- subshape [d_1, ..., d_k] is compatible with groundtruth_labels which has
- shape [num_gt_boxes, d_1, d_2, ... d_k].
- """
- return match.gather_based_on_match(
- groundtruth_labels,
- unmatched_value=self._unmatched_cls_target,
- ignored_value=self._unmatched_cls_target)
-
- def _create_regression_weights(self, match, groundtruth_weights):
- """Set regression weight for each anchor.
-
- Only positive anchors are set to contribute to the regression loss, so this
- method returns a weight of 1 for every positive anchor and 0 for every
- negative anchor.
-
- Args:
- match: a matcher.Match object that provides a matching between anchors
- and groundtruth boxes.
- groundtruth_weights: a float tensor of shape [M] indicating the weight to
- assign to all anchors match to a particular groundtruth box.
-
- Returns:
- a float32 tensor with shape [num_anchors] representing regression weights.
- """
- return match.gather_based_on_match(
- groundtruth_weights, ignored_value=0., unmatched_value=0.)
-
- def _create_classification_weights(self,
- match,
- groundtruth_weights):
- """Create classification weights for each anchor.
-
- Positive (matched) anchors are associated with a weight of
- positive_class_weight and negative (unmatched) anchors are associated with
- a weight of negative_class_weight. When anchors are ignored, weights are set
- to zero. By default, both positive/negative weights are set to 1.0,
- but they can be adjusted to handle class imbalance (which is almost always
- the case in object detection).
-
- Args:
- match: a matcher.Match object that provides a matching between anchors
- and groundtruth boxes.
- groundtruth_weights: a float tensor of shape [M] indicating the weight to
- assign to all anchors match to a particular groundtruth box.
-
- Returns:
- a float32 tensor with shape [num_anchors] representing classification
- weights.
- """
- return match.gather_based_on_match(
- groundtruth_weights,
- ignored_value=0.,
- unmatched_value=self._negative_class_weight)
-
- def get_box_coder(self):
- """Get BoxCoder of this TargetAssigner.
-
- Returns:
- BoxCoder object.
- """
- return self._box_coder
-
-
-# TODO(rathodv): This method pulls in all the implementation dependencies into
-# core. Therefore its best to have this factory method outside of core.
-def create_target_assigner(reference, stage=None,
- negative_class_weight=1.0,
- unmatched_cls_target=None):
- """Factory function for creating standard target assigners.
-
- Args:
- reference: string referencing the type of TargetAssigner.
- stage: string denoting stage: {proposal, detection}.
- negative_class_weight: classification weight to be associated to negative
- anchors (default: 1.0)
- unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
- which is consistent with the classification target for each
- anchor (and can be empty for scalar targets). This shape must thus be
- compatible with the groundtruth labels that are passed to the Assign
- function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
- If set to None, unmatched_cls_target is set to be 0 for each anchor.
-
- Returns:
- TargetAssigner: desired target assigner.
-
- Raises:
- ValueError: if combination reference+stage is invalid.
- """
- if reference == 'Multibox' and stage == 'proposal':
- similarity_calc = sim_calc.NegSqDistSimilarity()
- matcher = bipartite_matcher.GreedyBipartiteMatcher()
- box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
-
- elif reference == 'FasterRCNN' and stage == 'proposal':
- similarity_calc = sim_calc.IouSimilarity()
- matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.7,
- unmatched_threshold=0.3,
- force_match_for_each_row=True)
- box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
- scale_factors=[10.0, 10.0, 5.0, 5.0])
-
- elif reference == 'FasterRCNN' and stage == 'detection':
- similarity_calc = sim_calc.IouSimilarity()
- # Uses all proposals with IOU < 0.5 as candidate negatives.
- matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
- negatives_lower_than_unmatched=True)
- box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
- scale_factors=[10.0, 10.0, 5.0, 5.0])
-
- elif reference == 'FastRCNN':
- similarity_calc = sim_calc.IouSimilarity()
- matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
- unmatched_threshold=0.1,
- force_match_for_each_row=False,
- negatives_lower_than_unmatched=False)
- box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
-
- else:
- raise ValueError('No valid combination of reference and stage.')
-
- return TargetAssigner(similarity_calc, matcher, box_coder,
- negative_class_weight=negative_class_weight,
- unmatched_cls_target=unmatched_cls_target)
-
-
-def batch_assign_targets(target_assigner,
- anchors_batch,
- gt_box_batch,
- gt_class_targets_batch,
- gt_weights_batch=None):
- """Batched assignment of classification and regression targets.
-
- Args:
- target_assigner: a target assigner.
- anchors_batch: BoxList representing N box anchors or list of BoxList objects
- with length batch_size representing anchor sets.
- gt_box_batch: a list of BoxList objects with length batch_size
- representing groundtruth boxes for each image in the batch
- gt_class_targets_batch: a list of tensors with length batch_size, where
- each tensor has shape [num_gt_boxes_i, classification_target_size] and
- num_gt_boxes_i is the number of boxes in the ith boxlist of
- gt_box_batch.
- gt_weights_batch: A list of 1-D tf.float32 tensors of shape
- [num_boxes] containing weights for groundtruth boxes.
-
- Returns:
- batch_cls_targets: a tensor with shape [batch_size, num_anchors,
- num_classes],
- batch_cls_weights: a tensor with shape [batch_size, num_anchors],
- batch_reg_targets: a tensor with shape [batch_size, num_anchors,
- box_code_dimension]
- batch_reg_weights: a tensor with shape [batch_size, num_anchors],
- match_list: a list of matcher.Match objects encoding the match between
- anchors and groundtruth boxes for each image of the batch,
- with rows of the Match objects corresponding to groundtruth boxes
- and columns corresponding to anchors.
- Raises:
- ValueError: if input list lengths are inconsistent, i.e.,
- batch_size == len(gt_box_batch) == len(gt_class_targets_batch)
- and batch_size == len(anchors_batch) unless anchors_batch is a single
- BoxList.
- """
- if not isinstance(anchors_batch, list):
- anchors_batch = len(gt_box_batch) * [anchors_batch]
- if not all(
- isinstance(anchors, box_list.BoxList) for anchors in anchors_batch):
- raise ValueError('anchors_batch must be a BoxList or list of BoxLists.')
- if not (len(anchors_batch)
- == len(gt_box_batch)
- == len(gt_class_targets_batch)):
- raise ValueError('batch size incompatible with lengths of anchors_batch, '
- 'gt_box_batch and gt_class_targets_batch.')
- cls_targets_list = []
- cls_weights_list = []
- reg_targets_list = []
- reg_weights_list = []
- match_list = []
- if gt_weights_batch is None:
- gt_weights_batch = [None] * len(gt_class_targets_batch)
- for anchors, gt_boxes, gt_class_targets, gt_weights in zip(
- anchors_batch, gt_box_batch, gt_class_targets_batch, gt_weights_batch):
- (cls_targets, cls_weights, reg_targets,
- reg_weights, match) = target_assigner.assign(
- anchors, gt_boxes, gt_class_targets, gt_weights)
- cls_targets_list.append(cls_targets)
- cls_weights_list.append(cls_weights)
- reg_targets_list.append(reg_targets)
- reg_weights_list.append(reg_weights)
- match_list.append(match)
- batch_cls_targets = tf.stack(cls_targets_list)
- batch_cls_weights = tf.stack(cls_weights_list)
- batch_reg_targets = tf.stack(reg_targets_list)
- batch_reg_weights = tf.stack(reg_weights_list)
- return (batch_cls_targets, batch_cls_weights, batch_reg_targets,
- batch_reg_weights, match_list)
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/target_assigner_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/target_assigner_test.py
deleted file mode 100644
index 34a35b6435bcd364faf36ba4f130f1310f6d8b22..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/target_assigner_test.py
+++ /dev/null
@@ -1,827 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.core.target_assigner."""
-import numpy as np
-import tensorflow as tf
-
-from object_detection.box_coders import keypoint_box_coder
-from object_detection.box_coders import mean_stddev_box_coder
-from object_detection.core import box_list
-from object_detection.core import region_similarity_calculator
-from object_detection.core import standard_fields as fields
-from object_detection.core import target_assigner as targetassigner
-from object_detection.matchers import argmax_matcher
-from object_detection.matchers import bipartite_matcher
-from object_detection.utils import test_case
-
-
-class TargetAssignerTest(test_case.TestCase):
-
- def test_assign_agnostic(self):
- def graph_fn(anchor_means, groundtruth_box_corners):
- similarity_calc = region_similarity_calculator.IouSimilarity()
- matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
- unmatched_threshold=0.5)
- box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
- target_assigner = targetassigner.TargetAssigner(
- similarity_calc, matcher, box_coder, unmatched_cls_target=None)
- anchors_boxlist = box_list.BoxList(anchor_means)
- groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
- result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist)
- (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
- return (cls_targets, cls_weights, reg_targets, reg_weights)
-
- anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 1.0, 0.8],
- [0, 0.5, .5, 1.0]], dtype=np.float32)
- groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 0.9, 0.9]],
- dtype=np.float32)
- exp_cls_targets = [[1], [1], [0]]
- exp_cls_weights = [1, 1, 1]
- exp_reg_targets = [[0, 0, 0, 0],
- [0, 0, -1, 1],
- [0, 0, 0, 0]]
- exp_reg_weights = [1, 1, 0]
-
- (cls_targets_out,
- cls_weights_out, reg_targets_out, reg_weights_out) = self.execute(
- graph_fn, [anchor_means, groundtruth_box_corners])
- self.assertAllClose(cls_targets_out, exp_cls_targets)
- self.assertAllClose(cls_weights_out, exp_cls_weights)
- self.assertAllClose(reg_targets_out, exp_reg_targets)
- self.assertAllClose(reg_weights_out, exp_reg_weights)
- self.assertEquals(cls_targets_out.dtype, np.float32)
- self.assertEquals(cls_weights_out.dtype, np.float32)
- self.assertEquals(reg_targets_out.dtype, np.float32)
- self.assertEquals(reg_weights_out.dtype, np.float32)
-
- def test_assign_class_agnostic_with_ignored_matches(self):
- # Note: test is very similar to above. The third box matched with an IOU
- # of 0.35, which is between the matched and unmatched threshold. This means
- # That like above the expected classification targets are [1, 1, 0].
- # Unlike above, the third target is ignored and therefore expected
- # classification weights are [1, 1, 0].
- def graph_fn(anchor_means, groundtruth_box_corners):
- similarity_calc = region_similarity_calculator.IouSimilarity()
- matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
- unmatched_threshold=0.3)
- box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
- target_assigner = targetassigner.TargetAssigner(
- similarity_calc, matcher, box_coder, unmatched_cls_target=None)
- anchors_boxlist = box_list.BoxList(anchor_means)
- groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
- result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist)
- (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
- return (cls_targets, cls_weights, reg_targets, reg_weights)
-
- anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 1.0, 0.8],
- [0.0, 0.5, .9, 1.0]], dtype=np.float32)
- groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 0.9, 0.9]], dtype=np.float32)
- exp_cls_targets = [[1], [1], [0]]
- exp_cls_weights = [1, 1, 0]
- exp_reg_targets = [[0, 0, 0, 0],
- [0, 0, -1, 1],
- [0, 0, 0, 0]]
- exp_reg_weights = [1, 1, 0]
- (cls_targets_out,
- cls_weights_out, reg_targets_out, reg_weights_out) = self.execute(
- graph_fn, [anchor_means, groundtruth_box_corners])
- self.assertAllClose(cls_targets_out, exp_cls_targets)
- self.assertAllClose(cls_weights_out, exp_cls_weights)
- self.assertAllClose(reg_targets_out, exp_reg_targets)
- self.assertAllClose(reg_weights_out, exp_reg_weights)
- self.assertEquals(cls_targets_out.dtype, np.float32)
- self.assertEquals(cls_weights_out.dtype, np.float32)
- self.assertEquals(reg_targets_out.dtype, np.float32)
- self.assertEquals(reg_weights_out.dtype, np.float32)
-
- def test_assign_agnostic_with_keypoints(self):
- def graph_fn(anchor_means, groundtruth_box_corners,
- groundtruth_keypoints):
- similarity_calc = region_similarity_calculator.IouSimilarity()
- matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
- unmatched_threshold=0.5)
- box_coder = keypoint_box_coder.KeypointBoxCoder(
- num_keypoints=6, scale_factors=[10.0, 10.0, 5.0, 5.0])
- target_assigner = targetassigner.TargetAssigner(
- similarity_calc, matcher, box_coder, unmatched_cls_target=None)
- anchors_boxlist = box_list.BoxList(anchor_means)
- groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
- groundtruth_boxlist.add_field(fields.BoxListFields.keypoints,
- groundtruth_keypoints)
- result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist)
- (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
- return (cls_targets, cls_weights, reg_targets, reg_weights)
-
- anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 1.0, 1.0],
- [0.0, 0.5, .9, 1.0]], dtype=np.float32)
- groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
- [0.45, 0.45, 0.95, 0.95]],
- dtype=np.float32)
- groundtruth_keypoints = np.array(
- [[[0.1, 0.2], [0.1, 0.3], [0.2, 0.2], [0.2, 0.2], [0.1, 0.1], [0.9, 0]],
- [[0, 0.3], [0.2, 0.4], [0.5, 0.6], [0, 0.6], [0.8, 0.2], [0.2, 0.4]]],
- dtype=np.float32)
- exp_cls_targets = [[1], [1], [0]]
- exp_cls_weights = [1, 1, 1]
- exp_reg_targets = [[0, 0, 0, 0, -3, -1, -3, 1, -1, -1, -1, -1, -3, -3, 13,
- -5],
- [-1, -1, 0, 0, -15, -9, -11, -7, -5, -3, -15, -3, 1, -11,
- -11, -7],
- [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
- exp_reg_weights = [1, 1, 0]
- (cls_targets_out, cls_weights_out, reg_targets_out,
- reg_weights_out) = self.execute(graph_fn, [anchor_means,
- groundtruth_box_corners,
- groundtruth_keypoints])
- self.assertAllClose(cls_targets_out, exp_cls_targets)
- self.assertAllClose(cls_weights_out, exp_cls_weights)
- self.assertAllClose(reg_targets_out, exp_reg_targets)
- self.assertAllClose(reg_weights_out, exp_reg_weights)
- self.assertEquals(cls_targets_out.dtype, np.float32)
- self.assertEquals(cls_weights_out.dtype, np.float32)
- self.assertEquals(reg_targets_out.dtype, np.float32)
- self.assertEquals(reg_weights_out.dtype, np.float32)
-
- def test_assign_class_agnostic_with_keypoints_and_ignored_matches(self):
- # Note: test is very similar to above. The third box matched with an IOU
- # of 0.35, which is between the matched and unmatched threshold. This means
- # That like above the expected classification targets are [1, 1, 0].
- # Unlike above, the third target is ignored and therefore expected
- # classification weights are [1, 1, 0].
- def graph_fn(anchor_means, groundtruth_box_corners,
- groundtruth_keypoints):
- similarity_calc = region_similarity_calculator.IouSimilarity()
- matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
- unmatched_threshold=0.5)
- box_coder = keypoint_box_coder.KeypointBoxCoder(
- num_keypoints=6, scale_factors=[10.0, 10.0, 5.0, 5.0])
- target_assigner = targetassigner.TargetAssigner(
- similarity_calc, matcher, box_coder, unmatched_cls_target=None)
- anchors_boxlist = box_list.BoxList(anchor_means)
- groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
- groundtruth_boxlist.add_field(fields.BoxListFields.keypoints,
- groundtruth_keypoints)
- result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist)
- (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
- return (cls_targets, cls_weights, reg_targets, reg_weights)
-
- anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 1.0, 1.0],
- [0.0, 0.5, .9, 1.0]], dtype=np.float32)
- groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
- [0.45, 0.45, 0.95, 0.95]],
- dtype=np.float32)
- groundtruth_keypoints = np.array(
- [[[0.1, 0.2], [0.1, 0.3], [0.2, 0.2], [0.2, 0.2], [0.1, 0.1], [0.9, 0]],
- [[0, 0.3], [0.2, 0.4], [0.5, 0.6], [0, 0.6], [0.8, 0.2], [0.2, 0.4]]],
- dtype=np.float32)
- exp_cls_targets = [[1], [1], [0]]
- exp_cls_weights = [1, 1, 1]
- exp_reg_targets = [[0, 0, 0, 0, -3, -1, -3, 1, -1, -1, -1, -1, -3, -3, 13,
- -5],
- [-1, -1, 0, 0, -15, -9, -11, -7, -5, -3, -15, -3, 1, -11,
- -11, -7],
- [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
- exp_reg_weights = [1, 1, 0]
- (cls_targets_out, cls_weights_out, reg_targets_out,
- reg_weights_out) = self.execute(graph_fn, [anchor_means,
- groundtruth_box_corners,
- groundtruth_keypoints])
- self.assertAllClose(cls_targets_out, exp_cls_targets)
- self.assertAllClose(cls_weights_out, exp_cls_weights)
- self.assertAllClose(reg_targets_out, exp_reg_targets)
- self.assertAllClose(reg_weights_out, exp_reg_weights)
- self.assertEquals(cls_targets_out.dtype, np.float32)
- self.assertEquals(cls_weights_out.dtype, np.float32)
- self.assertEquals(reg_targets_out.dtype, np.float32)
- self.assertEquals(reg_weights_out.dtype, np.float32)
-
- def test_assign_multiclass(self):
-
- def graph_fn(anchor_means, groundtruth_box_corners, groundtruth_labels):
- similarity_calc = region_similarity_calculator.IouSimilarity()
- matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
- unmatched_threshold=0.5)
- box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
- unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
- target_assigner = targetassigner.TargetAssigner(
- similarity_calc, matcher, box_coder,
- unmatched_cls_target=unmatched_cls_target)
-
- anchors_boxlist = box_list.BoxList(anchor_means)
- groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
- result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist,
- groundtruth_labels)
- (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
- return (cls_targets, cls_weights, reg_targets, reg_weights)
-
- anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 1.0, 0.8],
- [0, 0.5, .5, 1.0],
- [.75, 0, 1.0, .25]], dtype=np.float32)
- groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 0.9, 0.9],
- [.75, 0, .95, .27]], dtype=np.float32)
- groundtruth_labels = np.array([[0, 1, 0, 0, 0, 0, 0],
- [0, 0, 0, 0, 0, 1, 0],
- [0, 0, 0, 1, 0, 0, 0]], dtype=np.float32)
-
- exp_cls_targets = [[0, 1, 0, 0, 0, 0, 0],
- [0, 0, 0, 0, 0, 1, 0],
- [1, 0, 0, 0, 0, 0, 0],
- [0, 0, 0, 1, 0, 0, 0]]
- exp_cls_weights = [1, 1, 1, 1]
- exp_reg_targets = [[0, 0, 0, 0],
- [0, 0, -1, 1],
- [0, 0, 0, 0],
- [0, 0, -.5, .2]]
- exp_reg_weights = [1, 1, 0, 1]
-
- (cls_targets_out,
- cls_weights_out, reg_targets_out, reg_weights_out) = self.execute(
- graph_fn, [anchor_means, groundtruth_box_corners, groundtruth_labels])
- self.assertAllClose(cls_targets_out, exp_cls_targets)
- self.assertAllClose(cls_weights_out, exp_cls_weights)
- self.assertAllClose(reg_targets_out, exp_reg_targets)
- self.assertAllClose(reg_weights_out, exp_reg_weights)
- self.assertEquals(cls_targets_out.dtype, np.float32)
- self.assertEquals(cls_weights_out.dtype, np.float32)
- self.assertEquals(reg_targets_out.dtype, np.float32)
- self.assertEquals(reg_weights_out.dtype, np.float32)
-
- def test_assign_multiclass_with_groundtruth_weights(self):
-
- def graph_fn(anchor_means, groundtruth_box_corners, groundtruth_labels,
- groundtruth_weights):
- similarity_calc = region_similarity_calculator.IouSimilarity()
- matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
- unmatched_threshold=0.5)
- box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
- unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
- target_assigner = targetassigner.TargetAssigner(
- similarity_calc, matcher, box_coder,
- unmatched_cls_target=unmatched_cls_target)
-
- anchors_boxlist = box_list.BoxList(anchor_means)
- groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
- result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist,
- groundtruth_labels,
- groundtruth_weights)
- (_, cls_weights, _, reg_weights, _) = result
- return (cls_weights, reg_weights)
-
- anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 1.0, 0.8],
- [0, 0.5, .5, 1.0],
- [.75, 0, 1.0, .25]], dtype=np.float32)
- groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 0.9, 0.9],
- [.75, 0, .95, .27]], dtype=np.float32)
- groundtruth_labels = np.array([[0, 1, 0, 0, 0, 0, 0],
- [0, 0, 0, 0, 0, 1, 0],
- [0, 0, 0, 1, 0, 0, 0]], dtype=np.float32)
- groundtruth_weights = np.array([0.3, 0., 0.5], dtype=np.float32)
-
- exp_cls_weights = [0.3, 0., 1, 0.5] # background class gets weight of 1.
- exp_reg_weights = [0.3, 0., 0., 0.5] # background class gets weight of 0.
-
- (cls_weights_out, reg_weights_out) = self.execute(graph_fn, [
- anchor_means, groundtruth_box_corners, groundtruth_labels,
- groundtruth_weights
- ])
- self.assertAllClose(cls_weights_out, exp_cls_weights)
- self.assertAllClose(reg_weights_out, exp_reg_weights)
-
- def test_assign_multidimensional_class_targets(self):
-
- def graph_fn(anchor_means, groundtruth_box_corners, groundtruth_labels):
- similarity_calc = region_similarity_calculator.IouSimilarity()
- matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
- unmatched_threshold=0.5)
- box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
-
- unmatched_cls_target = tf.constant([[0, 0], [0, 0]], tf.float32)
- target_assigner = targetassigner.TargetAssigner(
- similarity_calc, matcher, box_coder,
- unmatched_cls_target=unmatched_cls_target)
-
- anchors_boxlist = box_list.BoxList(anchor_means)
- groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
- result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist,
- groundtruth_labels)
- (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
- return (cls_targets, cls_weights, reg_targets, reg_weights)
-
- anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 1.0, 0.8],
- [0, 0.5, .5, 1.0],
- [.75, 0, 1.0, .25]], dtype=np.float32)
- groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 0.9, 0.9],
- [.75, 0, .95, .27]], dtype=np.float32)
-
- groundtruth_labels = np.array([[[0, 1], [1, 0]],
- [[1, 0], [0, 1]],
- [[0, 1], [1, .5]]], np.float32)
-
- exp_cls_targets = [[[0, 1], [1, 0]],
- [[1, 0], [0, 1]],
- [[0, 0], [0, 0]],
- [[0, 1], [1, .5]]]
- exp_cls_weights = [1, 1, 1, 1]
- exp_reg_targets = [[0, 0, 0, 0],
- [0, 0, -1, 1],
- [0, 0, 0, 0],
- [0, 0, -.5, .2]]
- exp_reg_weights = [1, 1, 0, 1]
- (cls_targets_out,
- cls_weights_out, reg_targets_out, reg_weights_out) = self.execute(
- graph_fn, [anchor_means, groundtruth_box_corners, groundtruth_labels])
- self.assertAllClose(cls_targets_out, exp_cls_targets)
- self.assertAllClose(cls_weights_out, exp_cls_weights)
- self.assertAllClose(reg_targets_out, exp_reg_targets)
- self.assertAllClose(reg_weights_out, exp_reg_weights)
- self.assertEquals(cls_targets_out.dtype, np.float32)
- self.assertEquals(cls_weights_out.dtype, np.float32)
- self.assertEquals(reg_targets_out.dtype, np.float32)
- self.assertEquals(reg_weights_out.dtype, np.float32)
-
- def test_assign_empty_groundtruth(self):
-
- def graph_fn(anchor_means, groundtruth_box_corners, groundtruth_labels):
- similarity_calc = region_similarity_calculator.IouSimilarity()
- matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
- unmatched_threshold=0.5)
- box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
- unmatched_cls_target = tf.constant([0, 0, 0], tf.float32)
- anchors_boxlist = box_list.BoxList(anchor_means)
- groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
- target_assigner = targetassigner.TargetAssigner(
- similarity_calc, matcher, box_coder,
- unmatched_cls_target=unmatched_cls_target)
- result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist,
- groundtruth_labels)
- (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
- return (cls_targets, cls_weights, reg_targets, reg_weights)
-
- groundtruth_box_corners = np.zeros((0, 4), dtype=np.float32)
- groundtruth_labels = np.zeros((0, 3), dtype=np.float32)
- anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 1.0, 0.8],
- [0, 0.5, .5, 1.0],
- [.75, 0, 1.0, .25]],
- dtype=np.float32)
- exp_cls_targets = [[0, 0, 0],
- [0, 0, 0],
- [0, 0, 0],
- [0, 0, 0]]
- exp_cls_weights = [1, 1, 1, 1]
- exp_reg_targets = [[0, 0, 0, 0],
- [0, 0, 0, 0],
- [0, 0, 0, 0],
- [0, 0, 0, 0]]
- exp_reg_weights = [0, 0, 0, 0]
- (cls_targets_out,
- cls_weights_out, reg_targets_out, reg_weights_out) = self.execute(
- graph_fn, [anchor_means, groundtruth_box_corners, groundtruth_labels])
- self.assertAllClose(cls_targets_out, exp_cls_targets)
- self.assertAllClose(cls_weights_out, exp_cls_weights)
- self.assertAllClose(reg_targets_out, exp_reg_targets)
- self.assertAllClose(reg_weights_out, exp_reg_weights)
- self.assertEquals(cls_targets_out.dtype, np.float32)
- self.assertEquals(cls_weights_out.dtype, np.float32)
- self.assertEquals(reg_targets_out.dtype, np.float32)
- self.assertEquals(reg_weights_out.dtype, np.float32)
-
- def test_raises_error_on_incompatible_groundtruth_boxes_and_labels(self):
- similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
- matcher = bipartite_matcher.GreedyBipartiteMatcher()
- box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
- unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
- target_assigner = targetassigner.TargetAssigner(
- similarity_calc, matcher, box_coder,
- unmatched_cls_target=unmatched_cls_target)
-
- prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 1.0, 0.8],
- [0, 0.5, .5, 1.0],
- [.75, 0, 1.0, .25]])
- priors = box_list.BoxList(prior_means)
-
- box_corners = [[0.0, 0.0, 0.5, 0.5],
- [0.0, 0.0, 0.5, 0.8],
- [0.5, 0.5, 0.9, 0.9],
- [.75, 0, .95, .27]]
- boxes = box_list.BoxList(tf.constant(box_corners))
-
- groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0],
- [0, 0, 0, 0, 0, 1, 0],
- [0, 0, 0, 1, 0, 0, 0]], tf.float32)
- with self.assertRaisesRegexp(ValueError, 'Unequal shapes'):
- target_assigner.assign(priors, boxes, groundtruth_labels,
- num_valid_rows=3)
-
- def test_raises_error_on_invalid_groundtruth_labels(self):
- similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
- matcher = bipartite_matcher.GreedyBipartiteMatcher()
- box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=1.0)
- unmatched_cls_target = tf.constant([[0, 0], [0, 0], [0, 0]], tf.float32)
- target_assigner = targetassigner.TargetAssigner(
- similarity_calc, matcher, box_coder,
- unmatched_cls_target=unmatched_cls_target)
-
- prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5]])
- priors = box_list.BoxList(prior_means)
-
- box_corners = [[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 0.9, 0.9],
- [.75, 0, .95, .27]]
- boxes = box_list.BoxList(tf.constant(box_corners))
- groundtruth_labels = tf.constant([[[0, 1], [1, 0]]], tf.float32)
-
- with self.assertRaises(ValueError):
- target_assigner.assign(priors, boxes, groundtruth_labels,
- num_valid_rows=3)
-
-
-class BatchTargetAssignerTest(test_case.TestCase):
-
- def _get_agnostic_target_assigner(self):
- similarity_calc = region_similarity_calculator.IouSimilarity()
- matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
- unmatched_threshold=0.5)
- box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
- return targetassigner.TargetAssigner(
- similarity_calc, matcher, box_coder,
- unmatched_cls_target=None)
-
- def _get_multi_class_target_assigner(self, num_classes):
- similarity_calc = region_similarity_calculator.IouSimilarity()
- matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
- unmatched_threshold=0.5)
- box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
- unmatched_cls_target = tf.constant([1] + num_classes * [0], tf.float32)
- return targetassigner.TargetAssigner(
- similarity_calc, matcher, box_coder,
- unmatched_cls_target=unmatched_cls_target)
-
- def _get_multi_dimensional_target_assigner(self, target_dimensions):
- similarity_calc = region_similarity_calculator.IouSimilarity()
- matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
- unmatched_threshold=0.5)
- box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
- unmatched_cls_target = tf.constant(np.zeros(target_dimensions),
- tf.float32)
- return targetassigner.TargetAssigner(
- similarity_calc, matcher, box_coder,
- unmatched_cls_target=unmatched_cls_target)
-
- def test_batch_assign_targets(self):
-
- def graph_fn(anchor_means, groundtruth_boxlist1, groundtruth_boxlist2):
- box_list1 = box_list.BoxList(groundtruth_boxlist1)
- box_list2 = box_list.BoxList(groundtruth_boxlist2)
- gt_box_batch = [box_list1, box_list2]
- gt_class_targets = [None, None]
- anchors_boxlist = box_list.BoxList(anchor_means)
- agnostic_target_assigner = self._get_agnostic_target_assigner()
- (cls_targets, cls_weights, reg_targets, reg_weights,
- _) = targetassigner.batch_assign_targets(
- agnostic_target_assigner, anchors_boxlist, gt_box_batch,
- gt_class_targets)
- return (cls_targets, cls_weights, reg_targets, reg_weights)
-
- groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2]], dtype=np.float32)
- groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1],
- [0.015789, 0.0985, 0.55789, 0.3842]],
- dtype=np.float32)
- anchor_means = np.array([[0, 0, .25, .25],
- [0, .25, 1, 1],
- [0, .1, .5, .5],
- [.75, .75, 1, 1]], dtype=np.float32)
-
- exp_reg_targets = [[[0, 0, -0.5, -0.5],
- [0, 0, 0, 0],
- [0, 0, 0, 0,],
- [0, 0, 0, 0,],],
- [[0, 0, 0, 0,],
- [0, 0.01231521, 0, 0],
- [0.15789001, -0.01500003, 0.57889998, -1.15799987],
- [0, 0, 0, 0]]]
- exp_cls_weights = [[1, 1, 1, 1],
- [1, 1, 1, 1]]
- exp_cls_targets = [[[1], [0], [0], [0]],
- [[0], [1], [1], [0]]]
- exp_reg_weights = [[1, 0, 0, 0],
- [0, 1, 1, 0]]
-
- (cls_targets_out,
- cls_weights_out, reg_targets_out, reg_weights_out) = self.execute(
- graph_fn, [anchor_means, groundtruth_boxlist1, groundtruth_boxlist2])
- self.assertAllClose(cls_targets_out, exp_cls_targets)
- self.assertAllClose(cls_weights_out, exp_cls_weights)
- self.assertAllClose(reg_targets_out, exp_reg_targets)
- self.assertAllClose(reg_weights_out, exp_reg_weights)
-
- def test_batch_assign_multiclass_targets(self):
-
- def graph_fn(anchor_means, groundtruth_boxlist1, groundtruth_boxlist2,
- class_targets1, class_targets2):
- box_list1 = box_list.BoxList(groundtruth_boxlist1)
- box_list2 = box_list.BoxList(groundtruth_boxlist2)
- gt_box_batch = [box_list1, box_list2]
- gt_class_targets = [class_targets1, class_targets2]
- anchors_boxlist = box_list.BoxList(anchor_means)
- multiclass_target_assigner = self._get_multi_class_target_assigner(
- num_classes=3)
- (cls_targets, cls_weights, reg_targets, reg_weights,
- _) = targetassigner.batch_assign_targets(
- multiclass_target_assigner, anchors_boxlist, gt_box_batch,
- gt_class_targets)
- return (cls_targets, cls_weights, reg_targets, reg_weights)
-
- groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2]], dtype=np.float32)
- groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1],
- [0.015789, 0.0985, 0.55789, 0.3842]],
- dtype=np.float32)
- class_targets1 = np.array([[0, 1, 0, 0]], dtype=np.float32)
- class_targets2 = np.array([[0, 0, 0, 1],
- [0, 0, 1, 0]], dtype=np.float32)
-
- anchor_means = np.array([[0, 0, .25, .25],
- [0, .25, 1, 1],
- [0, .1, .5, .5],
- [.75, .75, 1, 1]], dtype=np.float32)
-
- exp_reg_targets = [[[0, 0, -0.5, -0.5],
- [0, 0, 0, 0],
- [0, 0, 0, 0,],
- [0, 0, 0, 0,],],
- [[0, 0, 0, 0,],
- [0, 0.01231521, 0, 0],
- [0.15789001, -0.01500003, 0.57889998, -1.15799987],
- [0, 0, 0, 0]]]
- exp_cls_weights = [[1, 1, 1, 1],
- [1, 1, 1, 1]]
- exp_cls_targets = [[[0, 1, 0, 0],
- [1, 0, 0, 0],
- [1, 0, 0, 0],
- [1, 0, 0, 0]],
- [[1, 0, 0, 0],
- [0, 0, 0, 1],
- [0, 0, 1, 0],
- [1, 0, 0, 0]]]
- exp_reg_weights = [[1, 0, 0, 0],
- [0, 1, 1, 0]]
-
- (cls_targets_out, cls_weights_out, reg_targets_out,
- reg_weights_out) = self.execute(graph_fn, [
- anchor_means, groundtruth_boxlist1, groundtruth_boxlist2,
- class_targets1, class_targets2
- ])
- self.assertAllClose(cls_targets_out, exp_cls_targets)
- self.assertAllClose(cls_weights_out, exp_cls_weights)
- self.assertAllClose(reg_targets_out, exp_reg_targets)
- self.assertAllClose(reg_weights_out, exp_reg_weights)
-
- def test_batch_assign_multiclass_targets_with_padded_groundtruth(self):
-
- def graph_fn(anchor_means, groundtruth_boxlist1, groundtruth_boxlist2,
- class_targets1, class_targets2, groundtruth_weights1,
- groundtruth_weights2):
- box_list1 = box_list.BoxList(groundtruth_boxlist1)
- box_list2 = box_list.BoxList(groundtruth_boxlist2)
- gt_box_batch = [box_list1, box_list2]
- gt_class_targets = [class_targets1, class_targets2]
- gt_weights = [groundtruth_weights1, groundtruth_weights2]
- anchors_boxlist = box_list.BoxList(anchor_means)
- multiclass_target_assigner = self._get_multi_class_target_assigner(
- num_classes=3)
- (cls_targets, cls_weights, reg_targets, reg_weights,
- _) = targetassigner.batch_assign_targets(
- multiclass_target_assigner, anchors_boxlist, gt_box_batch,
- gt_class_targets, gt_weights)
- return (cls_targets, cls_weights, reg_targets, reg_weights)
-
- groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2],
- [0., 0., 0., 0.]], dtype=np.float32)
- groundtruth_weights1 = np.array([1, 0], dtype=np.float32)
- groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1],
- [0.015789, 0.0985, 0.55789, 0.3842],
- [0, 0, 0, 0]],
- dtype=np.float32)
- groundtruth_weights2 = np.array([1, 1, 0], dtype=np.float32)
- class_targets1 = np.array([[0, 1, 0, 0], [0, 0, 0, 0]], dtype=np.float32)
- class_targets2 = np.array([[0, 0, 0, 1],
- [0, 0, 1, 0],
- [0, 0, 0, 0]], dtype=np.float32)
-
- anchor_means = np.array([[0, 0, .25, .25],
- [0, .25, 1, 1],
- [0, .1, .5, .5],
- [.75, .75, 1, 1]], dtype=np.float32)
-
- exp_reg_targets = [[[0, 0, -0.5, -0.5],
- [0, 0, 0, 0],
- [0, 0, 0, 0,],
- [0, 0, 0, 0,],],
- [[0, 0, 0, 0,],
- [0, 0.01231521, 0, 0],
- [0.15789001, -0.01500003, 0.57889998, -1.15799987],
- [0, 0, 0, 0]]]
- exp_cls_weights = [[1, 1, 1, 1],
- [1, 1, 1, 1]]
- exp_cls_targets = [[[0, 1, 0, 0],
- [1, 0, 0, 0],
- [1, 0, 0, 0],
- [1, 0, 0, 0]],
- [[1, 0, 0, 0],
- [0, 0, 0, 1],
- [0, 0, 1, 0],
- [1, 0, 0, 0]]]
- exp_reg_weights = [[1, 0, 0, 0],
- [0, 1, 1, 0]]
-
- (cls_targets_out, cls_weights_out, reg_targets_out,
- reg_weights_out) = self.execute(graph_fn, [
- anchor_means, groundtruth_boxlist1, groundtruth_boxlist2,
- class_targets1, class_targets2, groundtruth_weights1,
- groundtruth_weights2
- ])
- self.assertAllClose(cls_targets_out, exp_cls_targets)
- self.assertAllClose(cls_weights_out, exp_cls_weights)
- self.assertAllClose(reg_targets_out, exp_reg_targets)
- self.assertAllClose(reg_weights_out, exp_reg_weights)
-
- def test_batch_assign_multidimensional_targets(self):
-
- def graph_fn(anchor_means, groundtruth_boxlist1, groundtruth_boxlist2,
- class_targets1, class_targets2):
- box_list1 = box_list.BoxList(groundtruth_boxlist1)
- box_list2 = box_list.BoxList(groundtruth_boxlist2)
- gt_box_batch = [box_list1, box_list2]
- gt_class_targets = [class_targets1, class_targets2]
- anchors_boxlist = box_list.BoxList(anchor_means)
- multiclass_target_assigner = self._get_multi_dimensional_target_assigner(
- target_dimensions=(2, 3))
- (cls_targets, cls_weights, reg_targets, reg_weights,
- _) = targetassigner.batch_assign_targets(
- multiclass_target_assigner, anchors_boxlist, gt_box_batch,
- gt_class_targets)
- return (cls_targets, cls_weights, reg_targets, reg_weights)
-
- groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2]], dtype=np.float32)
- groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1],
- [0.015789, 0.0985, 0.55789, 0.3842]],
- dtype=np.float32)
- class_targets1 = np.array([[0, 1, 0, 0]], dtype=np.float32)
- class_targets2 = np.array([[0, 0, 0, 1],
- [0, 0, 1, 0]], dtype=np.float32)
- class_targets1 = np.array([[[0, 1, 1],
- [1, 1, 0]]], dtype=np.float32)
- class_targets2 = np.array([[[0, 1, 1],
- [1, 1, 0]],
- [[0, 0, 1],
- [0, 0, 1]]], dtype=np.float32)
-
- anchor_means = np.array([[0, 0, .25, .25],
- [0, .25, 1, 1],
- [0, .1, .5, .5],
- [.75, .75, 1, 1]], dtype=np.float32)
-
- exp_reg_targets = [[[0, 0, -0.5, -0.5],
- [0, 0, 0, 0],
- [0, 0, 0, 0,],
- [0, 0, 0, 0,],],
- [[0, 0, 0, 0,],
- [0, 0.01231521, 0, 0],
- [0.15789001, -0.01500003, 0.57889998, -1.15799987],
- [0, 0, 0, 0]]]
- exp_cls_weights = [[1, 1, 1, 1],
- [1, 1, 1, 1]]
- exp_cls_targets = [[[[0., 1., 1.],
- [1., 1., 0.]],
- [[0., 0., 0.],
- [0., 0., 0.]],
- [[0., 0., 0.],
- [0., 0., 0.]],
- [[0., 0., 0.],
- [0., 0., 0.]]],
- [[[0., 0., 0.],
- [0., 0., 0.]],
- [[0., 1., 1.],
- [1., 1., 0.]],
- [[0., 0., 1.],
- [0., 0., 1.]],
- [[0., 0., 0.],
- [0., 0., 0.]]]]
- exp_reg_weights = [[1, 0, 0, 0],
- [0, 1, 1, 0]]
-
- (cls_targets_out, cls_weights_out, reg_targets_out,
- reg_weights_out) = self.execute(graph_fn, [
- anchor_means, groundtruth_boxlist1, groundtruth_boxlist2,
- class_targets1, class_targets2
- ])
- self.assertAllClose(cls_targets_out, exp_cls_targets)
- self.assertAllClose(cls_weights_out, exp_cls_weights)
- self.assertAllClose(reg_targets_out, exp_reg_targets)
- self.assertAllClose(reg_weights_out, exp_reg_weights)
-
- def test_batch_assign_empty_groundtruth(self):
-
- def graph_fn(anchor_means, groundtruth_box_corners, gt_class_targets):
- groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
- gt_box_batch = [groundtruth_boxlist]
- gt_class_targets_batch = [gt_class_targets]
- anchors_boxlist = box_list.BoxList(anchor_means)
-
- multiclass_target_assigner = self._get_multi_class_target_assigner(
- num_classes=3)
-
- (cls_targets, cls_weights, reg_targets, reg_weights,
- _) = targetassigner.batch_assign_targets(
- multiclass_target_assigner, anchors_boxlist,
- gt_box_batch, gt_class_targets_batch)
- return (cls_targets, cls_weights, reg_targets, reg_weights)
-
- groundtruth_box_corners = np.zeros((0, 4), dtype=np.float32)
- anchor_means = np.array([[0, 0, .25, .25],
- [0, .25, 1, 1]], dtype=np.float32)
- exp_reg_targets = [[[0, 0, 0, 0],
- [0, 0, 0, 0]]]
- exp_cls_weights = [[1, 1]]
- exp_cls_targets = [[[1, 0, 0, 0],
- [1, 0, 0, 0]]]
- exp_reg_weights = [[0, 0]]
- num_classes = 3
- pad = 1
- gt_class_targets = np.zeros((0, num_classes + pad), dtype=np.float32)
-
- (cls_targets_out,
- cls_weights_out, reg_targets_out, reg_weights_out) = self.execute(
- graph_fn, [anchor_means, groundtruth_box_corners, gt_class_targets])
- self.assertAllClose(cls_targets_out, exp_cls_targets)
- self.assertAllClose(cls_weights_out, exp_cls_weights)
- self.assertAllClose(reg_targets_out, exp_reg_targets)
- self.assertAllClose(reg_weights_out, exp_reg_weights)
-
-
-class CreateTargetAssignerTest(tf.test.TestCase):
-
- def test_create_target_assigner(self):
- """Tests that named constructor gives working target assigners.
-
- TODO(rathodv): Make this test more general.
- """
- corners = [[0.0, 0.0, 1.0, 1.0]]
- groundtruth = box_list.BoxList(tf.constant(corners))
-
- priors = box_list.BoxList(tf.constant(corners))
- multibox_ta = (targetassigner
- .create_target_assigner('Multibox', stage='proposal'))
- multibox_ta.assign(priors, groundtruth)
- # No tests on output, as that may vary arbitrarily as new target assigners
- # are added. As long as it is constructed correctly and runs without errors,
- # tests on the individual assigners cover correctness of the assignments.
-
- anchors = box_list.BoxList(tf.constant(corners))
- faster_rcnn_proposals_ta = (targetassigner
- .create_target_assigner('FasterRCNN',
- stage='proposal'))
- faster_rcnn_proposals_ta.assign(anchors, groundtruth)
-
- fast_rcnn_ta = (targetassigner
- .create_target_assigner('FastRCNN'))
- fast_rcnn_ta.assign(anchors, groundtruth)
-
- faster_rcnn_detection_ta = (targetassigner
- .create_target_assigner('FasterRCNN',
- stage='detection'))
- faster_rcnn_detection_ta.assign(anchors, groundtruth)
-
- with self.assertRaises(ValueError):
- targetassigner.create_target_assigner('InvalidDetector',
- stage='invalid_stage')
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/ava_label_map_v2.1.pbtxt b/research/mlperf_object_detection/Mask_RCNN/object_detection/data/ava_label_map_v2.1.pbtxt
deleted file mode 100644
index 5e2c485682830919a09300ac851e6b0e4bdf3efb..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/ava_label_map_v2.1.pbtxt
+++ /dev/null
@@ -1,240 +0,0 @@
-item {
- name: "bend/bow (at the waist)"
- id: 1
-}
-item {
- name: "crouch/kneel"
- id: 3
-}
-item {
- name: "dance"
- id: 4
-}
-item {
- name: "fall down"
- id: 5
-}
-item {
- name: "get up"
- id: 6
-}
-item {
- name: "jump/leap"
- id: 7
-}
-item {
- name: "lie/sleep"
- id: 8
-}
-item {
- name: "martial art"
- id: 9
-}
-item {
- name: "run/jog"
- id: 10
-}
-item {
- name: "sit"
- id: 11
-}
-item {
- name: "stand"
- id: 12
-}
-item {
- name: "swim"
- id: 13
-}
-item {
- name: "walk"
- id: 14
-}
-item {
- name: "answer phone"
- id: 15
-}
-item {
- name: "carry/hold (an object)"
- id: 17
-}
-item {
- name: "climb (e.g., a mountain)"
- id: 20
-}
-item {
- name: "close (e.g., a door, a box)"
- id: 22
-}
-item {
- name: "cut"
- id: 24
-}
-item {
- name: "dress/put on clothing"
- id: 26
-}
-item {
- name: "drink"
- id: 27
-}
-item {
- name: "drive (e.g., a car, a truck)"
- id: 28
-}
-item {
- name: "eat"
- id: 29
-}
-item {
- name: "enter"
- id: 30
-}
-item {
- name: "hit (an object)"
- id: 34
-}
-item {
- name: "lift/pick up"
- id: 36
-}
-item {
- name: "listen (e.g., to music)"
- id: 37
-}
-item {
- name: "open (e.g., a window, a car door)"
- id: 38
-}
-item {
- name: "play musical instrument"
- id: 41
-}
-item {
- name: "point to (an object)"
- id: 43
-}
-item {
- name: "pull (an object)"
- id: 45
-}
-item {
- name: "push (an object)"
- id: 46
-}
-item {
- name: "put down"
- id: 47
-}
-item {
- name: "read"
- id: 48
-}
-item {
- name: "ride (e.g., a bike, a car, a horse)"
- id: 49
-}
-item {
- name: "sail boat"
- id: 51
-}
-item {
- name: "shoot"
- id: 52
-}
-item {
- name: "smoke"
- id: 54
-}
-item {
- name: "take a photo"
- id: 56
-}
-item {
- name: "text on/look at a cellphone"
- id: 57
-}
-item {
- name: "throw"
- id: 58
-}
-item {
- name: "touch (an object)"
- id: 59
-}
-item {
- name: "turn (e.g., a screwdriver)"
- id: 60
-}
-item {
- name: "watch (e.g., TV)"
- id: 61
-}
-item {
- name: "work on a computer"
- id: 62
-}
-item {
- name: "write"
- id: 63
-}
-item {
- name: "fight/hit (a person)"
- id: 64
-}
-item {
- name: "give/serve (an object) to (a person)"
- id: 65
-}
-item {
- name: "grab (a person)"
- id: 66
-}
-item {
- name: "hand clap"
- id: 67
-}
-item {
- name: "hand shake"
- id: 68
-}
-item {
- name: "hand wave"
- id: 69
-}
-item {
- name: "hug (a person)"
- id: 70
-}
-item {
- name: "kiss (a person)"
- id: 72
-}
-item {
- name: "lift (a person)"
- id: 73
-}
-item {
- name: "listen to (a person)"
- id: 74
-}
-item {
- name: "push (another person)"
- id: 76
-}
-item {
- name: "sing to (e.g., self, a person, a group)"
- id: 77
-}
-item {
- name: "take (an object) from (a person)"
- id: 78
-}
-item {
- name: "talk to (e.g., self, a person, a group)"
- id: 79
-}
-item {
- name: "watch (a person)"
- id: 80
-}
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/kitti_label_map.pbtxt b/research/mlperf_object_detection/Mask_RCNN/object_detection/data/kitti_label_map.pbtxt
deleted file mode 100644
index 0afcc6936ebdb37ecbc7c3245929fcf178a02c0b..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/kitti_label_map.pbtxt
+++ /dev/null
@@ -1,9 +0,0 @@
-item {
- id: 1
- name: 'car'
-}
-
-item {
- id: 2
- name: 'pedestrian'
-}
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/mscoco_label_map.pbtxt b/research/mlperf_object_detection/Mask_RCNN/object_detection/data/mscoco_label_map.pbtxt
deleted file mode 100644
index 1f4872bd0c7f53e70beecf88af005c07a5df9e08..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/mscoco_label_map.pbtxt
+++ /dev/null
@@ -1,400 +0,0 @@
-item {
- name: "/m/01g317"
- id: 1
- display_name: "person"
-}
-item {
- name: "/m/0199g"
- id: 2
- display_name: "bicycle"
-}
-item {
- name: "/m/0k4j"
- id: 3
- display_name: "car"
-}
-item {
- name: "/m/04_sv"
- id: 4
- display_name: "motorcycle"
-}
-item {
- name: "/m/05czz6l"
- id: 5
- display_name: "airplane"
-}
-item {
- name: "/m/01bjv"
- id: 6
- display_name: "bus"
-}
-item {
- name: "/m/07jdr"
- id: 7
- display_name: "train"
-}
-item {
- name: "/m/07r04"
- id: 8
- display_name: "truck"
-}
-item {
- name: "/m/019jd"
- id: 9
- display_name: "boat"
-}
-item {
- name: "/m/015qff"
- id: 10
- display_name: "traffic light"
-}
-item {
- name: "/m/01pns0"
- id: 11
- display_name: "fire hydrant"
-}
-item {
- name: "/m/02pv19"
- id: 13
- display_name: "stop sign"
-}
-item {
- name: "/m/015qbp"
- id: 14
- display_name: "parking meter"
-}
-item {
- name: "/m/0cvnqh"
- id: 15
- display_name: "bench"
-}
-item {
- name: "/m/015p6"
- id: 16
- display_name: "bird"
-}
-item {
- name: "/m/01yrx"
- id: 17
- display_name: "cat"
-}
-item {
- name: "/m/0bt9lr"
- id: 18
- display_name: "dog"
-}
-item {
- name: "/m/03k3r"
- id: 19
- display_name: "horse"
-}
-item {
- name: "/m/07bgp"
- id: 20
- display_name: "sheep"
-}
-item {
- name: "/m/01xq0k1"
- id: 21
- display_name: "cow"
-}
-item {
- name: "/m/0bwd_0j"
- id: 22
- display_name: "elephant"
-}
-item {
- name: "/m/01dws"
- id: 23
- display_name: "bear"
-}
-item {
- name: "/m/0898b"
- id: 24
- display_name: "zebra"
-}
-item {
- name: "/m/03bk1"
- id: 25
- display_name: "giraffe"
-}
-item {
- name: "/m/01940j"
- id: 27
- display_name: "backpack"
-}
-item {
- name: "/m/0hnnb"
- id: 28
- display_name: "umbrella"
-}
-item {
- name: "/m/080hkjn"
- id: 31
- display_name: "handbag"
-}
-item {
- name: "/m/01rkbr"
- id: 32
- display_name: "tie"
-}
-item {
- name: "/m/01s55n"
- id: 33
- display_name: "suitcase"
-}
-item {
- name: "/m/02wmf"
- id: 34
- display_name: "frisbee"
-}
-item {
- name: "/m/071p9"
- id: 35
- display_name: "skis"
-}
-item {
- name: "/m/06__v"
- id: 36
- display_name: "snowboard"
-}
-item {
- name: "/m/018xm"
- id: 37
- display_name: "sports ball"
-}
-item {
- name: "/m/02zt3"
- id: 38
- display_name: "kite"
-}
-item {
- name: "/m/03g8mr"
- id: 39
- display_name: "baseball bat"
-}
-item {
- name: "/m/03grzl"
- id: 40
- display_name: "baseball glove"
-}
-item {
- name: "/m/06_fw"
- id: 41
- display_name: "skateboard"
-}
-item {
- name: "/m/019w40"
- id: 42
- display_name: "surfboard"
-}
-item {
- name: "/m/0dv9c"
- id: 43
- display_name: "tennis racket"
-}
-item {
- name: "/m/04dr76w"
- id: 44
- display_name: "bottle"
-}
-item {
- name: "/m/09tvcd"
- id: 46
- display_name: "wine glass"
-}
-item {
- name: "/m/08gqpm"
- id: 47
- display_name: "cup"
-}
-item {
- name: "/m/0dt3t"
- id: 48
- display_name: "fork"
-}
-item {
- name: "/m/04ctx"
- id: 49
- display_name: "knife"
-}
-item {
- name: "/m/0cmx8"
- id: 50
- display_name: "spoon"
-}
-item {
- name: "/m/04kkgm"
- id: 51
- display_name: "bowl"
-}
-item {
- name: "/m/09qck"
- id: 52
- display_name: "banana"
-}
-item {
- name: "/m/014j1m"
- id: 53
- display_name: "apple"
-}
-item {
- name: "/m/0l515"
- id: 54
- display_name: "sandwich"
-}
-item {
- name: "/m/0cyhj_"
- id: 55
- display_name: "orange"
-}
-item {
- name: "/m/0hkxq"
- id: 56
- display_name: "broccoli"
-}
-item {
- name: "/m/0fj52s"
- id: 57
- display_name: "carrot"
-}
-item {
- name: "/m/01b9xk"
- id: 58
- display_name: "hot dog"
-}
-item {
- name: "/m/0663v"
- id: 59
- display_name: "pizza"
-}
-item {
- name: "/m/0jy4k"
- id: 60
- display_name: "donut"
-}
-item {
- name: "/m/0fszt"
- id: 61
- display_name: "cake"
-}
-item {
- name: "/m/01mzpv"
- id: 62
- display_name: "chair"
-}
-item {
- name: "/m/02crq1"
- id: 63
- display_name: "couch"
-}
-item {
- name: "/m/03fp41"
- id: 64
- display_name: "potted plant"
-}
-item {
- name: "/m/03ssj5"
- id: 65
- display_name: "bed"
-}
-item {
- name: "/m/04bcr3"
- id: 67
- display_name: "dining table"
-}
-item {
- name: "/m/09g1w"
- id: 70
- display_name: "toilet"
-}
-item {
- name: "/m/07c52"
- id: 72
- display_name: "tv"
-}
-item {
- name: "/m/01c648"
- id: 73
- display_name: "laptop"
-}
-item {
- name: "/m/020lf"
- id: 74
- display_name: "mouse"
-}
-item {
- name: "/m/0qjjc"
- id: 75
- display_name: "remote"
-}
-item {
- name: "/m/01m2v"
- id: 76
- display_name: "keyboard"
-}
-item {
- name: "/m/050k8"
- id: 77
- display_name: "cell phone"
-}
-item {
- name: "/m/0fx9l"
- id: 78
- display_name: "microwave"
-}
-item {
- name: "/m/029bxz"
- id: 79
- display_name: "oven"
-}
-item {
- name: "/m/01k6s3"
- id: 80
- display_name: "toaster"
-}
-item {
- name: "/m/0130jx"
- id: 81
- display_name: "sink"
-}
-item {
- name: "/m/040b_t"
- id: 82
- display_name: "refrigerator"
-}
-item {
- name: "/m/0bt_c3"
- id: 84
- display_name: "book"
-}
-item {
- name: "/m/01x3z"
- id: 85
- display_name: "clock"
-}
-item {
- name: "/m/02s195"
- id: 86
- display_name: "vase"
-}
-item {
- name: "/m/01lsmm"
- id: 87
- display_name: "scissors"
-}
-item {
- name: "/m/0kmg4"
- id: 88
- display_name: "teddy bear"
-}
-item {
- name: "/m/03wvsk"
- id: 89
- display_name: "hair drier"
-}
-item {
- name: "/m/012xff"
- id: 90
- display_name: "toothbrush"
-}
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/oid_bbox_trainable_label_map.pbtxt b/research/mlperf_object_detection/Mask_RCNN/object_detection/data/oid_bbox_trainable_label_map.pbtxt
deleted file mode 100644
index 863e4f31d719cd148fd56c981e219257334f9c7e..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/oid_bbox_trainable_label_map.pbtxt
+++ /dev/null
@@ -1,2725 +0,0 @@
-item {
- name: "/m/01g317"
- id: 1
- display_name: "Person"
-}
-item {
- name: "/m/09j2d"
- id: 2
- display_name: "Clothing"
-}
-item {
- name: "/m/04yx4"
- id: 3
- display_name: "Man"
-}
-item {
- name: "/m/0dzct"
- id: 4
- display_name: "Face"
-}
-item {
- name: "/m/07j7r"
- id: 5
- display_name: "Tree"
-}
-item {
- name: "/m/05s2s"
- id: 6
- display_name: "Plant"
-}
-item {
- name: "/m/03bt1vf"
- id: 7
- display_name: "Woman"
-}
-item {
- name: "/m/07yv9"
- id: 8
- display_name: "Vehicle"
-}
-item {
- name: "/m/0cgh4"
- id: 9
- display_name: "Building"
-}
-item {
- name: "/m/01prls"
- id: 10
- display_name: "Land vehicle"
-}
-item {
- name: "/m/09j5n"
- id: 11
- display_name: "Footwear"
-}
-item {
- name: "/m/05r655"
- id: 12
- display_name: "Girl"
-}
-item {
- name: "/m/0jbk"
- id: 13
- display_name: "Animal"
-}
-item {
- name: "/m/0k4j"
- id: 14
- display_name: "Car"
-}
-item {
- name: "/m/02wbm"
- id: 15
- display_name: "Food"
-}
-item {
- name: "/m/083wq"
- id: 16
- display_name: "Wheel"
-}
-item {
- name: "/m/0c9ph5"
- id: 17
- display_name: "Flower"
-}
-item {
- name: "/m/0c_jw"
- id: 18
- display_name: "Furniture"
-}
-item {
- name: "/m/0d4v4"
- id: 19
- display_name: "Window"
-}
-item {
- name: "/m/03jm5"
- id: 20
- display_name: "House"
-}
-item {
- name: "/m/01bl7v"
- id: 21
- display_name: "Boy"
-}
-item {
- name: "/m/0463sg"
- id: 22
- display_name: "Fashion accessory"
-}
-item {
- name: "/m/04bcr3"
- id: 23
- display_name: "Table"
-}
-item {
- name: "/m/0jyfg"
- id: 24
- display_name: "Glasses"
-}
-item {
- name: "/m/01xyhv"
- id: 25
- display_name: "Suit"
-}
-item {
- name: "/m/08dz3q"
- id: 26
- display_name: "Auto part"
-}
-item {
- name: "/m/015p6"
- id: 27
- display_name: "Bird"
-}
-item {
- name: "/m/05y5lj"
- id: 28
- display_name: "Sports equipment"
-}
-item {
- name: "/m/01d40f"
- id: 29
- display_name: "Dress"
-}
-item {
- name: "/m/0bt9lr"
- id: 30
- display_name: "Dog"
-}
-item {
- name: "/m/01lrl"
- id: 31
- display_name: "Carnivore"
-}
-item {
- name: "/m/02p0tk3"
- id: 32
- display_name: "Human body"
-}
-item {
- name: "/m/0fly7"
- id: 33
- display_name: "Jeans"
-}
-item {
- name: "/m/04szw"
- id: 34
- display_name: "Musical instrument"
-}
-item {
- name: "/m/0271t"
- id: 35
- display_name: "Drink"
-}
-item {
- name: "/m/019jd"
- id: 36
- display_name: "Boat"
-}
-item {
- name: "/m/03q69"
- id: 37
- display_name: "Hair"
-}
-item {
- name: "/m/0h9mv"
- id: 38
- display_name: "Tire"
-}
-item {
- name: "/m/04hgtk"
- id: 39
- display_name: "Head"
-}
-item {
- name: "/m/01yrx"
- id: 40
- display_name: "Cat"
-}
-item {
- name: "/m/01rzcn"
- id: 41
- display_name: "Watercraft"
-}
-item {
- name: "/m/01mzpv"
- id: 42
- display_name: "Chair"
-}
-item {
- name: "/m/0199g"
- id: 43
- display_name: "Bike"
-}
-item {
- name: "/m/01fdzj"
- id: 44
- display_name: "Tower"
-}
-item {
- name: "/m/04rky"
- id: 45
- display_name: "Mammal"
-}
-item {
- name: "/m/079cl"
- id: 46
- display_name: "Skyscraper"
-}
-item {
- name: "/m/0dzf4"
- id: 47
- display_name: "Arm"
-}
-item {
- name: "/m/0138tl"
- id: 48
- display_name: "Toy"
-}
-item {
- name: "/m/06msq"
- id: 49
- display_name: "Sculpture"
-}
-item {
- name: "/m/03xxp"
- id: 50
- display_name: "Invertebrate"
-}
-item {
- name: "/m/0hg7b"
- id: 51
- display_name: "Microphone"
-}
-item {
- name: "/m/01n5jq"
- id: 52
- display_name: "Poster"
-}
-item {
- name: "/m/03vt0"
- id: 53
- display_name: "Insect"
-}
-item {
- name: "/m/0342h"
- id: 54
- display_name: "Guitar"
-}
-item {
- name: "/m/0k0pj"
- id: 55
- display_name: "Nose"
-}
-item {
- name: "/m/02dl1y"
- id: 56
- display_name: "Hat"
-}
-item {
- name: "/m/04brg2"
- id: 57
- display_name: "Tableware"
-}
-item {
- name: "/m/02dgv"
- id: 58
- display_name: "Door"
-}
-item {
- name: "/m/01bqk0"
- id: 59
- display_name: "Bicycle wheel"
-}
-item {
- name: "/m/017ftj"
- id: 60
- display_name: "Sunglasses"
-}
-item {
- name: "/m/052lwg6"
- id: 61
- display_name: "Baked goods"
-}
-item {
- name: "/m/014sv8"
- id: 62
- display_name: "Eye"
-}
-item {
- name: "/m/0270h"
- id: 63
- display_name: "Dessert"
-}
-item {
- name: "/m/0283dt1"
- id: 64
- display_name: "Mouth"
-}
-item {
- name: "/m/0k5j"
- id: 65
- display_name: "Aircraft"
-}
-item {
- name: "/m/0cmf2"
- id: 66
- display_name: "Airplane"
-}
-item {
- name: "/m/07jdr"
- id: 67
- display_name: "Train"
-}
-item {
- name: "/m/032b3c"
- id: 68
- display_name: "Jacket"
-}
-item {
- name: "/m/033rq4"
- id: 69
- display_name: "Street light"
-}
-item {
- name: "/m/0k65p"
- id: 70
- display_name: "Hand"
-}
-item {
- name: "/m/01ww8y"
- id: 71
- display_name: "Snack"
-}
-item {
- name: "/m/0zvk5"
- id: 72
- display_name: "Helmet"
-}
-item {
- name: "/m/07mhn"
- id: 73
- display_name: "Trousers"
-}
-item {
- name: "/m/04dr76w"
- id: 74
- display_name: "Bottle"
-}
-item {
- name: "/m/03fp41"
- id: 75
- display_name: "Houseplant"
-}
-item {
- name: "/m/03k3r"
- id: 76
- display_name: "Horse"
-}
-item {
- name: "/m/01y9k5"
- id: 77
- display_name: "Desk"
-}
-item {
- name: "/m/0cdl1"
- id: 78
- display_name: "Palm tree"
-}
-item {
- name: "/m/0f4s2w"
- id: 79
- display_name: "Vegetable"
-}
-item {
- name: "/m/02xwb"
- id: 80
- display_name: "Fruit"
-}
-item {
- name: "/m/035r7c"
- id: 81
- display_name: "Leg"
-}
-item {
- name: "/m/0bt_c3"
- id: 82
- display_name: "Book"
-}
-item {
- name: "/m/01_bhs"
- id: 83
- display_name: "Fast food"
-}
-item {
- name: "/m/01599"
- id: 84
- display_name: "Beer"
-}
-item {
- name: "/m/03120"
- id: 85
- display_name: "Flag"
-}
-item {
- name: "/m/026t6"
- id: 86
- display_name: "Drum"
-}
-item {
- name: "/m/01bjv"
- id: 87
- display_name: "Bus"
-}
-item {
- name: "/m/07r04"
- id: 88
- display_name: "Truck"
-}
-item {
- name: "/m/018xm"
- id: 89
- display_name: "Ball"
-}
-item {
- name: "/m/01rkbr"
- id: 90
- display_name: "Tie"
-}
-item {
- name: "/m/0fm3zh"
- id: 91
- display_name: "Flowerpot"
-}
-item {
- name: "/m/02_n6y"
- id: 92
- display_name: "Goggles"
-}
-item {
- name: "/m/04_sv"
- id: 93
- display_name: "Motorcycle"
-}
-item {
- name: "/m/06z37_"
- id: 94
- display_name: "Picture frame"
-}
-item {
- name: "/m/01bfm9"
- id: 95
- display_name: "Shorts"
-}
-item {
- name: "/m/0h8mhzd"
- id: 96
- display_name: "Sports uniform"
-}
-item {
- name: "/m/0d_2m"
- id: 97
- display_name: "Moths and butterflies"
-}
-item {
- name: "/m/0gjbg72"
- id: 98
- display_name: "Shelf"
-}
-item {
- name: "/m/01n4qj"
- id: 99
- display_name: "Shirt"
-}
-item {
- name: "/m/0ch_cf"
- id: 100
- display_name: "Fish"
-}
-item {
- name: "/m/06m11"
- id: 101
- display_name: "Rose"
-}
-item {
- name: "/m/01jfm_"
- id: 102
- display_name: "Licence plate"
-}
-item {
- name: "/m/02crq1"
- id: 103
- display_name: "Couch"
-}
-item {
- name: "/m/083kb"
- id: 104
- display_name: "Weapon"
-}
-item {
- name: "/m/01c648"
- id: 105
- display_name: "Laptop"
-}
-item {
- name: "/m/09tvcd"
- id: 106
- display_name: "Wine glass"
-}
-item {
- name: "/m/0h2r6"
- id: 107
- display_name: "Van"
-}
-item {
- name: "/m/081qc"
- id: 108
- display_name: "Wine"
-}
-item {
- name: "/m/09ddx"
- id: 109
- display_name: "Duck"
-}
-item {
- name: "/m/03p3bw"
- id: 110
- display_name: "Bicycle helmet"
-}
-item {
- name: "/m/0cyf8"
- id: 111
- display_name: "Butterfly"
-}
-item {
- name: "/m/0b_rs"
- id: 112
- display_name: "Swimming pool"
-}
-item {
- name: "/m/039xj_"
- id: 113
- display_name: "Ear"
-}
-item {
- name: "/m/021sj1"
- id: 114
- display_name: "Office"
-}
-item {
- name: "/m/0dv5r"
- id: 115
- display_name: "Camera"
-}
-item {
- name: "/m/01lynh"
- id: 116
- display_name: "Stairs"
-}
-item {
- name: "/m/06bt6"
- id: 117
- display_name: "Reptile"
-}
-item {
- name: "/m/01226z"
- id: 118
- display_name: "Football"
-}
-item {
- name: "/m/0fszt"
- id: 119
- display_name: "Cake"
-}
-item {
- name: "/m/050k8"
- id: 120
- display_name: "Mobile phone"
-}
-item {
- name: "/m/02wbtzl"
- id: 121
- display_name: "Sun hat"
-}
-item {
- name: "/m/02p5f1q"
- id: 122
- display_name: "Coffee cup"
-}
-item {
- name: "/m/025nd"
- id: 123
- display_name: "Christmas tree"
-}
-item {
- name: "/m/02522"
- id: 124
- display_name: "Computer monitor"
-}
-item {
- name: "/m/09ct_"
- id: 125
- display_name: "Helicopter"
-}
-item {
- name: "/m/0cvnqh"
- id: 126
- display_name: "Bench"
-}
-item {
- name: "/m/0d5gx"
- id: 127
- display_name: "Castle"
-}
-item {
- name: "/m/01xygc"
- id: 128
- display_name: "Coat"
-}
-item {
- name: "/m/04m6gz"
- id: 129
- display_name: "Porch"
-}
-item {
- name: "/m/01gkx_"
- id: 130
- display_name: "Swimwear"
-}
-item {
- name: "/m/01s105"
- id: 131
- display_name: "Cabinetry"
-}
-item {
- name: "/m/01j61q"
- id: 132
- display_name: "Tent"
-}
-item {
- name: "/m/0hnnb"
- id: 133
- display_name: "Umbrella"
-}
-item {
- name: "/m/01j51"
- id: 134
- display_name: "Balloon"
-}
-item {
- name: "/m/01knjb"
- id: 135
- display_name: "Billboard"
-}
-item {
- name: "/m/03__z0"
- id: 136
- display_name: "Bookcase"
-}
-item {
- name: "/m/01m2v"
- id: 137
- display_name: "Computer keyboard"
-}
-item {
- name: "/m/0167gd"
- id: 138
- display_name: "Doll"
-}
-item {
- name: "/m/0284d"
- id: 139
- display_name: "Dairy"
-}
-item {
- name: "/m/03ssj5"
- id: 140
- display_name: "Bed"
-}
-item {
- name: "/m/02fq_6"
- id: 141
- display_name: "Fedora"
-}
-item {
- name: "/m/06nwz"
- id: 142
- display_name: "Seafood"
-}
-item {
- name: "/m/0220r2"
- id: 143
- display_name: "Fountain"
-}
-item {
- name: "/m/01mqdt"
- id: 144
- display_name: "Traffic sign"
-}
-item {
- name: "/m/0268lbt"
- id: 145
- display_name: "Hiking equipment"
-}
-item {
- name: "/m/07c52"
- id: 146
- display_name: "Television"
-}
-item {
- name: "/m/0grw1"
- id: 147
- display_name: "Salad"
-}
-item {
- name: "/m/01h3n"
- id: 148
- display_name: "Bee"
-}
-item {
- name: "/m/078n6m"
- id: 149
- display_name: "Coffee table"
-}
-item {
- name: "/m/01xq0k1"
- id: 150
- display_name: "Cattle"
-}
-item {
- name: "/m/0gd2v"
- id: 151
- display_name: "Marine mammal"
-}
-item {
- name: "/m/0dbvp"
- id: 152
- display_name: "Goose"
-}
-item {
- name: "/m/03rszm"
- id: 153
- display_name: "Curtain"
-}
-item {
- name: "/m/0h8n5zk"
- id: 154
- display_name: "Kitchen & dining room table"
-}
-item {
- name: "/m/019dx1"
- id: 155
- display_name: "Home appliance"
-}
-item {
- name: "/m/03hl4l9"
- id: 156
- display_name: "Marine invertebrates"
-}
-item {
- name: "/m/0b3fp9"
- id: 157
- display_name: "Countertop"
-}
-item {
- name: "/m/02rdsp"
- id: 158
- display_name: "Office supplies"
-}
-item {
- name: "/m/0hf58v5"
- id: 159
- display_name: "Luggage and bags"
-}
-item {
- name: "/m/04h7h"
- id: 160
- display_name: "Lighthouse"
-}
-item {
- name: "/m/024g6"
- id: 161
- display_name: "Cocktail"
-}
-item {
- name: "/m/0cffdh"
- id: 162
- display_name: "Maple"
-}
-item {
- name: "/m/03q5c7"
- id: 163
- display_name: "Saucer"
-}
-item {
- name: "/m/014y4n"
- id: 164
- display_name: "Paddle"
-}
-item {
- name: "/m/01yx86"
- id: 165
- display_name: "Bronze sculpture"
-}
-item {
- name: "/m/020jm"
- id: 166
- display_name: "Beetle"
-}
-item {
- name: "/m/025dyy"
- id: 167
- display_name: "Box"
-}
-item {
- name: "/m/01llwg"
- id: 168
- display_name: "Necklace"
-}
-item {
- name: "/m/08pbxl"
- id: 169
- display_name: "Monkey"
-}
-item {
- name: "/m/02d9qx"
- id: 170
- display_name: "Whiteboard"
-}
-item {
- name: "/m/02pkr5"
- id: 171
- display_name: "Plumbing fixture"
-}
-item {
- name: "/m/0h99cwc"
- id: 172
- display_name: "Kitchen appliance"
-}
-item {
- name: "/m/050gv4"
- id: 173
- display_name: "Plate"
-}
-item {
- name: "/m/02vqfm"
- id: 174
- display_name: "Coffee"
-}
-item {
- name: "/m/09kx5"
- id: 175
- display_name: "Deer"
-}
-item {
- name: "/m/019w40"
- id: 176
- display_name: "Surfboard"
-}
-item {
- name: "/m/09dzg"
- id: 177
- display_name: "Turtle"
-}
-item {
- name: "/m/07k1x"
- id: 178
- display_name: "Tool"
-}
-item {
- name: "/m/080hkjn"
- id: 179
- display_name: "Handbag"
-}
-item {
- name: "/m/07qxg_"
- id: 180
- display_name: "Football helmet"
-}
-item {
- name: "/m/0ph39"
- id: 181
- display_name: "Canoe"
-}
-item {
- name: "/m/018p4k"
- id: 182
- display_name: "Cart"
-}
-item {
- name: "/m/02h19r"
- id: 183
- display_name: "Scarf"
-}
-item {
- name: "/m/015h_t"
- id: 184
- display_name: "Beard"
-}
-item {
- name: "/m/0fqfqc"
- id: 185
- display_name: "Drawer"
-}
-item {
- name: "/m/025rp__"
- id: 186
- display_name: "Cowboy hat"
-}
-item {
- name: "/m/01x3z"
- id: 187
- display_name: "Clock"
-}
-item {
- name: "/m/0crjs"
- id: 188
- display_name: "Convenience store"
-}
-item {
- name: "/m/0l515"
- id: 189
- display_name: "Sandwich"
-}
-item {
- name: "/m/015qff"
- id: 190
- display_name: "Traffic light"
-}
-item {
- name: "/m/09kmb"
- id: 191
- display_name: "Spider"
-}
-item {
- name: "/m/09728"
- id: 192
- display_name: "Bread"
-}
-item {
- name: "/m/071qp"
- id: 193
- display_name: "Squirrel"
-}
-item {
- name: "/m/02s195"
- id: 194
- display_name: "Vase"
-}
-item {
- name: "/m/06c54"
- id: 195
- display_name: "Rifle"
-}
-item {
- name: "/m/01xqw"
- id: 196
- display_name: "Cello"
-}
-item {
- name: "/m/05zsy"
- id: 197
- display_name: "Pumpkin"
-}
-item {
- name: "/m/0bwd_0j"
- id: 198
- display_name: "Elephant"
-}
-item {
- name: "/m/04m9y"
- id: 199
- display_name: "Lizard"
-}
-item {
- name: "/m/052sf"
- id: 200
- display_name: "Mushroom"
-}
-item {
- name: "/m/03grzl"
- id: 201
- display_name: "Baseball glove"
-}
-item {
- name: "/m/01z1kdw"
- id: 202
- display_name: "Juice"
-}
-item {
- name: "/m/02wv6h6"
- id: 203
- display_name: "Skirt"
-}
-item {
- name: "/m/016m2d"
- id: 204
- display_name: "Skull"
-}
-item {
- name: "/m/0dtln"
- id: 205
- display_name: "Lamp"
-}
-item {
- name: "/m/057cc"
- id: 206
- display_name: "Musical keyboard"
-}
-item {
- name: "/m/06k2mb"
- id: 207
- display_name: "High heels"
-}
-item {
- name: "/m/0f6wt"
- id: 208
- display_name: "Falcon"
-}
-item {
- name: "/m/0cxn2"
- id: 209
- display_name: "Ice cream"
-}
-item {
- name: "/m/02jvh9"
- id: 210
- display_name: "Mug"
-}
-item {
- name: "/m/0gjkl"
- id: 211
- display_name: "Watch"
-}
-item {
- name: "/m/01b638"
- id: 212
- display_name: "Boot"
-}
-item {
- name: "/m/071p9"
- id: 213
- display_name: "Ski"
-}
-item {
- name: "/m/0pg52"
- id: 214
- display_name: "Taxi"
-}
-item {
- name: "/m/0ftb8"
- id: 215
- display_name: "Sunflower"
-}
-item {
- name: "/m/0hnyx"
- id: 216
- display_name: "Pastry"
-}
-item {
- name: "/m/02jz0l"
- id: 217
- display_name: "Tap"
-}
-item {
- name: "/m/04kkgm"
- id: 218
- display_name: "Bowl"
-}
-item {
- name: "/m/0174n1"
- id: 219
- display_name: "Glove"
-}
-item {
- name: "/m/0gv1x"
- id: 220
- display_name: "Parrot"
-}
-item {
- name: "/m/09csl"
- id: 221
- display_name: "Eagle"
-}
-item {
- name: "/m/02jnhm"
- id: 222
- display_name: "Tin can"
-}
-item {
- name: "/m/099ssp"
- id: 223
- display_name: "Platter"
-}
-item {
- name: "/m/03nfch"
- id: 224
- display_name: "Sandal"
-}
-item {
- name: "/m/07y_7"
- id: 225
- display_name: "Violin"
-}
-item {
- name: "/m/05z6w"
- id: 226
- display_name: "Penguin"
-}
-item {
- name: "/m/03m3pdh"
- id: 227
- display_name: "Sofa bed"
-}
-item {
- name: "/m/09ld4"
- id: 228
- display_name: "Frog"
-}
-item {
- name: "/m/09b5t"
- id: 229
- display_name: "Chicken"
-}
-item {
- name: "/m/054xkw"
- id: 230
- display_name: "Lifejacket"
-}
-item {
- name: "/m/0130jx"
- id: 231
- display_name: "Sink"
-}
-item {
- name: "/m/07fbm7"
- id: 232
- display_name: "Strawberry"
-}
-item {
- name: "/m/01dws"
- id: 233
- display_name: "Bear"
-}
-item {
- name: "/m/01tcjp"
- id: 234
- display_name: "Muffin"
-}
-item {
- name: "/m/0dftk"
- id: 235
- display_name: "Swan"
-}
-item {
- name: "/m/0c06p"
- id: 236
- display_name: "Candle"
-}
-item {
- name: "/m/034c16"
- id: 237
- display_name: "Pillow"
-}
-item {
- name: "/m/09d5_"
- id: 238
- display_name: "Owl"
-}
-item {
- name: "/m/03hlz0c"
- id: 239
- display_name: "Kitchen utensil"
-}
-item {
- name: "/m/0ft9s"
- id: 240
- display_name: "Dragonfly"
-}
-item {
- name: "/m/011k07"
- id: 241
- display_name: "Tortoise"
-}
-item {
- name: "/m/054_l"
- id: 242
- display_name: "Mirror"
-}
-item {
- name: "/m/0jqgx"
- id: 243
- display_name: "Lily"
-}
-item {
- name: "/m/0663v"
- id: 244
- display_name: "Pizza"
-}
-item {
- name: "/m/0242l"
- id: 245
- display_name: "Coin"
-}
-item {
- name: "/m/014trl"
- id: 246
- display_name: "Cosmetics"
-}
-item {
- name: "/m/05r5c"
- id: 247
- display_name: "Piano"
-}
-item {
- name: "/m/07j87"
- id: 248
- display_name: "Tomato"
-}
-item {
- name: "/m/05kyg_"
- id: 249
- display_name: "Chest of drawers"
-}
-item {
- name: "/m/0kmg4"
- id: 250
- display_name: "Teddy bear"
-}
-item {
- name: "/m/07cmd"
- id: 251
- display_name: "Tank"
-}
-item {
- name: "/m/0dv77"
- id: 252
- display_name: "Squash"
-}
-item {
- name: "/m/096mb"
- id: 253
- display_name: "Lion"
-}
-item {
- name: "/m/01gmv2"
- id: 254
- display_name: "Brassiere"
-}
-item {
- name: "/m/07bgp"
- id: 255
- display_name: "Sheep"
-}
-item {
- name: "/m/0cmx8"
- id: 256
- display_name: "Spoon"
-}
-item {
- name: "/m/029tx"
- id: 257
- display_name: "Dinosaur"
-}
-item {
- name: "/m/073bxn"
- id: 258
- display_name: "Tripod"
-}
-item {
- name: "/m/0bh9flk"
- id: 259
- display_name: "Tablet computer"
-}
-item {
- name: "/m/06mf6"
- id: 260
- display_name: "Rabbit"
-}
-item {
- name: "/m/06_fw"
- id: 261
- display_name: "Skateboard"
-}
-item {
- name: "/m/078jl"
- id: 262
- display_name: "Snake"
-}
-item {
- name: "/m/0fbdv"
- id: 263
- display_name: "Shellfish"
-}
-item {
- name: "/m/0h23m"
- id: 264
- display_name: "Sparrow"
-}
-item {
- name: "/m/014j1m"
- id: 265
- display_name: "Apple"
-}
-item {
- name: "/m/03fwl"
- id: 266
- display_name: "Goat"
-}
-item {
- name: "/m/02y6n"
- id: 267
- display_name: "French fries"
-}
-item {
- name: "/m/06c7f7"
- id: 268
- display_name: "Lipstick"
-}
-item {
- name: "/m/026qbn5"
- id: 269
- display_name: "studio couch"
-}
-item {
- name: "/m/0cdn1"
- id: 270
- display_name: "Hamburger"
-}
-item {
- name: "/m/07clx"
- id: 271
- display_name: "Tea"
-}
-item {
- name: "/m/07cx4"
- id: 272
- display_name: "Telephone"
-}
-item {
- name: "/m/03g8mr"
- id: 273
- display_name: "Baseball bat"
-}
-item {
- name: "/m/0cnyhnx"
- id: 274
- display_name: "Bull"
-}
-item {
- name: "/m/01b7fy"
- id: 275
- display_name: "Headphones"
-}
-item {
- name: "/m/04gth"
- id: 276
- display_name: "Lavender"
-}
-item {
- name: "/m/0cyfs"
- id: 277
- display_name: "Parachute"
-}
-item {
- name: "/m/021mn"
- id: 278
- display_name: "Cookie"
-}
-item {
- name: "/m/07dm6"
- id: 279
- display_name: "Tiger"
-}
-item {
- name: "/m/0k1tl"
- id: 280
- display_name: "Pen"
-}
-item {
- name: "/m/0dv9c"
- id: 281
- display_name: "Racket"
-}
-item {
- name: "/m/0dt3t"
- id: 282
- display_name: "Fork"
-}
-item {
- name: "/m/04yqq2"
- id: 283
- display_name: "Bust"
-}
-item {
- name: "/m/01cmb2"
- id: 284
- display_name: "Miniskirt"
-}
-item {
- name: "/m/0gd36"
- id: 285
- display_name: "Sea lion"
-}
-item {
- name: "/m/033cnk"
- id: 286
- display_name: "Egg"
-}
-item {
- name: "/m/06ncr"
- id: 287
- display_name: "Saxophone"
-}
-item {
- name: "/m/03bk1"
- id: 288
- display_name: "Giraffe"
-}
-item {
- name: "/m/0bjyj5"
- id: 289
- display_name: "Waste container"
-}
-item {
- name: "/m/06__v"
- id: 290
- display_name: "Snowboard"
-}
-item {
- name: "/m/0qmmr"
- id: 291
- display_name: "Wheelchair"
-}
-item {
- name: "/m/01xgg_"
- id: 292
- display_name: "Medical equipment"
-}
-item {
- name: "/m/0czz2"
- id: 293
- display_name: "Antelope"
-}
-item {
- name: "/m/02l8p9"
- id: 294
- display_name: "Harbor seal"
-}
-item {
- name: "/m/09g1w"
- id: 295
- display_name: "Toilet"
-}
-item {
- name: "/m/0ll1f78"
- id: 296
- display_name: "Shrimp"
-}
-item {
- name: "/m/0cyhj_"
- id: 297
- display_name: "Orange"
-}
-item {
- name: "/m/0642b4"
- id: 298
- display_name: "Cupboard"
-}
-item {
- name: "/m/0h8mzrc"
- id: 299
- display_name: "Wall clock"
-}
-item {
- name: "/m/068zj"
- id: 300
- display_name: "Pig"
-}
-item {
- name: "/m/02z51p"
- id: 301
- display_name: "Nightstand"
-}
-item {
- name: "/m/0h8nr_l"
- id: 302
- display_name: "Bathroom accessory"
-}
-item {
- name: "/m/0388q"
- id: 303
- display_name: "Grape"
-}
-item {
- name: "/m/02hj4"
- id: 304
- display_name: "Dolphin"
-}
-item {
- name: "/m/01jfsr"
- id: 305
- display_name: "Lantern"
-}
-item {
- name: "/m/07gql"
- id: 306
- display_name: "Trumpet"
-}
-item {
- name: "/m/0h8my_4"
- id: 307
- display_name: "Tennis racket"
-}
-item {
- name: "/m/0n28_"
- id: 308
- display_name: "Crab"
-}
-item {
- name: "/m/0120dh"
- id: 309
- display_name: "Sea turtle"
-}
-item {
- name: "/m/020kz"
- id: 310
- display_name: "Cannon"
-}
-item {
- name: "/m/0mkg"
- id: 311
- display_name: "Accordion"
-}
-item {
- name: "/m/03c7gz"
- id: 312
- display_name: "Door handle"
-}
-item {
- name: "/m/09k_b"
- id: 313
- display_name: "Lemon"
-}
-item {
- name: "/m/031n1"
- id: 314
- display_name: "Foot"
-}
-item {
- name: "/m/04rmv"
- id: 315
- display_name: "Mouse"
-}
-item {
- name: "/m/084rd"
- id: 316
- display_name: "Wok"
-}
-item {
- name: "/m/02rgn06"
- id: 317
- display_name: "Volleyball"
-}
-item {
- name: "/m/05z55"
- id: 318
- display_name: "Pasta"
-}
-item {
- name: "/m/01r546"
- id: 319
- display_name: "Earrings"
-}
-item {
- name: "/m/09qck"
- id: 320
- display_name: "Banana"
-}
-item {
- name: "/m/012w5l"
- id: 321
- display_name: "Ladder"
-}
-item {
- name: "/m/01940j"
- id: 322
- display_name: "Backpack"
-}
-item {
- name: "/m/09f_2"
- id: 323
- display_name: "Crocodile"
-}
-item {
- name: "/m/02p3w7d"
- id: 324
- display_name: "Roller skates"
-}
-item {
- name: "/m/057p5t"
- id: 325
- display_name: "Scoreboard"
-}
-item {
- name: "/m/0d8zb"
- id: 326
- display_name: "Jellyfish"
-}
-item {
- name: "/m/01nq26"
- id: 327
- display_name: "Sock"
-}
-item {
- name: "/m/01x_v"
- id: 328
- display_name: "Camel"
-}
-item {
- name: "/m/05gqfk"
- id: 329
- display_name: "Plastic bag"
-}
-item {
- name: "/m/0cydv"
- id: 330
- display_name: "Caterpillar"
-}
-item {
- name: "/m/07030"
- id: 331
- display_name: "Sushi"
-}
-item {
- name: "/m/084zz"
- id: 332
- display_name: "Whale"
-}
-item {
- name: "/m/0c29q"
- id: 333
- display_name: "Leopard"
-}
-item {
- name: "/m/02zn6n"
- id: 334
- display_name: "Barrel"
-}
-item {
- name: "/m/03tw93"
- id: 335
- display_name: "Fireplace"
-}
-item {
- name: "/m/0fqt361"
- id: 336
- display_name: "Stool"
-}
-item {
- name: "/m/0f9_l"
- id: 337
- display_name: "Snail"
-}
-item {
- name: "/m/0gm28"
- id: 338
- display_name: "Candy"
-}
-item {
- name: "/m/09rvcxw"
- id: 339
- display_name: "Rocket"
-}
-item {
- name: "/m/01nkt"
- id: 340
- display_name: "Cheese"
-}
-item {
- name: "/m/04p0qw"
- id: 341
- display_name: "Billiard table"
-}
-item {
- name: "/m/03hj559"
- id: 342
- display_name: "Mixing bowl"
-}
-item {
- name: "/m/07pj7bq"
- id: 343
- display_name: "Bowling equipment"
-}
-item {
- name: "/m/04ctx"
- id: 344
- display_name: "Knife"
-}
-item {
- name: "/m/0703r8"
- id: 345
- display_name: "Loveseat"
-}
-item {
- name: "/m/03qrc"
- id: 346
- display_name: "Hamster"
-}
-item {
- name: "/m/020lf"
- id: 347
- display_name: "Mouse"
-}
-item {
- name: "/m/0by6g"
- id: 348
- display_name: "Shark"
-}
-item {
- name: "/m/01fh4r"
- id: 349
- display_name: "Teapot"
-}
-item {
- name: "/m/07c6l"
- id: 350
- display_name: "Trombone"
-}
-item {
- name: "/m/03bj1"
- id: 351
- display_name: "Panda"
-}
-item {
- name: "/m/0898b"
- id: 352
- display_name: "Zebra"
-}
-item {
- name: "/m/02x984l"
- id: 353
- display_name: "Mechanical fan"
-}
-item {
- name: "/m/0fj52s"
- id: 354
- display_name: "Carrot"
-}
-item {
- name: "/m/0cd4d"
- id: 355
- display_name: "Cheetah"
-}
-item {
- name: "/m/02068x"
- id: 356
- display_name: "Gondola"
-}
-item {
- name: "/m/01vbnl"
- id: 357
- display_name: "Bidet"
-}
-item {
- name: "/m/0449p"
- id: 358
- display_name: "Jaguar"
-}
-item {
- name: "/m/0gj37"
- id: 359
- display_name: "Ladybug"
-}
-item {
- name: "/m/0nl46"
- id: 360
- display_name: "Crown"
-}
-item {
- name: "/m/0152hh"
- id: 361
- display_name: "Snowman"
-}
-item {
- name: "/m/03dnzn"
- id: 362
- display_name: "Bathtub"
-}
-item {
- name: "/m/05_5p_0"
- id: 363
- display_name: "Table tennis racket"
-}
-item {
- name: "/m/02jfl0"
- id: 364
- display_name: "Sombrero"
-}
-item {
- name: "/m/01dxs"
- id: 365
- display_name: "Brown bear"
-}
-item {
- name: "/m/0cjq5"
- id: 366
- display_name: "Lobster"
-}
-item {
- name: "/m/040b_t"
- id: 367
- display_name: "Refrigerator"
-}
-item {
- name: "/m/0_cp5"
- id: 368
- display_name: "Oyster"
-}
-item {
- name: "/m/0gxl3"
- id: 369
- display_name: "Handgun"
-}
-item {
- name: "/m/029bxz"
- id: 370
- display_name: "Oven"
-}
-item {
- name: "/m/02zt3"
- id: 371
- display_name: "Kite"
-}
-item {
- name: "/m/03d443"
- id: 372
- display_name: "Rhinoceros"
-}
-item {
- name: "/m/0306r"
- id: 373
- display_name: "Fox"
-}
-item {
- name: "/m/0h8l4fh"
- id: 374
- display_name: "Light bulb"
-}
-item {
- name: "/m/0633h"
- id: 375
- display_name: "Polar bear"
-}
-item {
- name: "/m/01s55n"
- id: 376
- display_name: "Suitcase"
-}
-item {
- name: "/m/0hkxq"
- id: 377
- display_name: "Broccoli"
-}
-item {
- name: "/m/0cn6p"
- id: 378
- display_name: "Otter"
-}
-item {
- name: "/m/0dbzx"
- id: 379
- display_name: "Mule"
-}
-item {
- name: "/m/01dy8n"
- id: 380
- display_name: "Woodpecker"
-}
-item {
- name: "/m/01h8tj"
- id: 381
- display_name: "Starfish"
-}
-item {
- name: "/m/03s_tn"
- id: 382
- display_name: "Kettle"
-}
-item {
- name: "/m/01xs3r"
- id: 383
- display_name: "Jet ski"
-}
-item {
- name: "/m/031b6r"
- id: 384
- display_name: "Window blind"
-}
-item {
- name: "/m/06j2d"
- id: 385
- display_name: "Raven"
-}
-item {
- name: "/m/0hqkz"
- id: 386
- display_name: "Grapefruit"
-}
-item {
- name: "/m/01_5g"
- id: 387
- display_name: "Chopsticks"
-}
-item {
- name: "/m/02zvsm"
- id: 388
- display_name: "Tart"
-}
-item {
- name: "/m/0kpqd"
- id: 389
- display_name: "Watermelon"
-}
-item {
- name: "/m/015x4r"
- id: 390
- display_name: "Cucumber"
-}
-item {
- name: "/m/061hd_"
- id: 391
- display_name: "Infant bed"
-}
-item {
- name: "/m/04ylt"
- id: 392
- display_name: "Missile"
-}
-item {
- name: "/m/02wv84t"
- id: 393
- display_name: "Gas stove"
-}
-item {
- name: "/m/04y4h8h"
- id: 394
- display_name: "Bathroom cabinet"
-}
-item {
- name: "/m/01gllr"
- id: 395
- display_name: "Beehive"
-}
-item {
- name: "/m/0pcr"
- id: 396
- display_name: "Alpaca"
-}
-item {
- name: "/m/0jy4k"
- id: 397
- display_name: "Doughnut"
-}
-item {
- name: "/m/09f20"
- id: 398
- display_name: "Hippopotamus"
-}
-item {
- name: "/m/0mcx2"
- id: 399
- display_name: "Ipod"
-}
-item {
- name: "/m/04c0y"
- id: 400
- display_name: "Kangaroo"
-}
-item {
- name: "/m/0_k2"
- id: 401
- display_name: "Ant"
-}
-item {
- name: "/m/0jg57"
- id: 402
- display_name: "Bell pepper"
-}
-item {
- name: "/m/03fj2"
- id: 403
- display_name: "Goldfish"
-}
-item {
- name: "/m/03ldnb"
- id: 404
- display_name: "Ceiling fan"
-}
-item {
- name: "/m/06nrc"
- id: 405
- display_name: "Shotgun"
-}
-item {
- name: "/m/01btn"
- id: 406
- display_name: "Barge"
-}
-item {
- name: "/m/05vtc"
- id: 407
- display_name: "Potato"
-}
-item {
- name: "/m/08hvt4"
- id: 408
- display_name: "Jug"
-}
-item {
- name: "/m/0fx9l"
- id: 409
- display_name: "Microwave oven"
-}
-item {
- name: "/m/01h44"
- id: 410
- display_name: "Bat"
-}
-item {
- name: "/m/05n4y"
- id: 411
- display_name: "Ostrich"
-}
-item {
- name: "/m/0jly1"
- id: 412
- display_name: "Turkey"
-}
-item {
- name: "/m/06y5r"
- id: 413
- display_name: "Sword"
-}
-item {
- name: "/m/05ctyq"
- id: 414
- display_name: "Tennis ball"
-}
-item {
- name: "/m/0fp6w"
- id: 415
- display_name: "Pineapple"
-}
-item {
- name: "/m/0d4w1"
- id: 416
- display_name: "Closet"
-}
-item {
- name: "/m/02pv19"
- id: 417
- display_name: "Stop sign"
-}
-item {
- name: "/m/07crc"
- id: 418
- display_name: "Taco"
-}
-item {
- name: "/m/01dwwc"
- id: 419
- display_name: "Pancake"
-}
-item {
- name: "/m/01b9xk"
- id: 420
- display_name: "Hot dog"
-}
-item {
- name: "/m/013y1f"
- id: 421
- display_name: "Organ"
-}
-item {
- name: "/m/0m53l"
- id: 422
- display_name: "Rays and skates"
-}
-item {
- name: "/m/0174k2"
- id: 423
- display_name: "Washing machine"
-}
-item {
- name: "/m/01dwsz"
- id: 424
- display_name: "Waffle"
-}
-item {
- name: "/m/04vv5k"
- id: 425
- display_name: "Snowplow"
-}
-item {
- name: "/m/04cp_"
- id: 426
- display_name: "Koala"
-}
-item {
- name: "/m/0fz0h"
- id: 427
- display_name: "Honeycomb"
-}
-item {
- name: "/m/0llzx"
- id: 428
- display_name: "Sewing machine"
-}
-item {
- name: "/m/0319l"
- id: 429
- display_name: "Horn"
-}
-item {
- name: "/m/04v6l4"
- id: 430
- display_name: "Frying pan"
-}
-item {
- name: "/m/0dkzw"
- id: 431
- display_name: "Seat belt"
-}
-item {
- name: "/m/027pcv"
- id: 432
- display_name: "Zucchini"
-}
-item {
- name: "/m/0323sq"
- id: 433
- display_name: "Golf cart"
-}
-item {
- name: "/m/054fyh"
- id: 434
- display_name: "Pitcher"
-}
-item {
- name: "/m/01pns0"
- id: 435
- display_name: "Fire hydrant"
-}
-item {
- name: "/m/012n7d"
- id: 436
- display_name: "Ambulance"
-}
-item {
- name: "/m/044r5d"
- id: 437
- display_name: "Golf ball"
-}
-item {
- name: "/m/01krhy"
- id: 438
- display_name: "Tiara"
-}
-item {
- name: "/m/0dq75"
- id: 439
- display_name: "Raccoon"
-}
-item {
- name: "/m/0176mf"
- id: 440
- display_name: "Belt"
-}
-item {
- name: "/m/0h8lkj8"
- id: 441
- display_name: "Corded phone"
-}
-item {
- name: "/m/04tn4x"
- id: 442
- display_name: "Swim cap"
-}
-item {
- name: "/m/06l9r"
- id: 443
- display_name: "Red panda"
-}
-item {
- name: "/m/0cjs7"
- id: 444
- display_name: "Asparagus"
-}
-item {
- name: "/m/01lsmm"
- id: 445
- display_name: "Scissors"
-}
-item {
- name: "/m/01lcw4"
- id: 446
- display_name: "Limousine"
-}
-item {
- name: "/m/047j0r"
- id: 447
- display_name: "Filing cabinet"
-}
-item {
- name: "/m/01fb_0"
- id: 448
- display_name: "Bagel"
-}
-item {
- name: "/m/04169hn"
- id: 449
- display_name: "Wood-burning stove"
-}
-item {
- name: "/m/076bq"
- id: 450
- display_name: "Segway"
-}
-item {
- name: "/m/0hdln"
- id: 451
- display_name: "Ruler"
-}
-item {
- name: "/m/01g3x7"
- id: 452
- display_name: "Bow and arrow"
-}
-item {
- name: "/m/0l3ms"
- id: 453
- display_name: "Balance beam"
-}
-item {
- name: "/m/058qzx"
- id: 454
- display_name: "Kitchen knife"
-}
-item {
- name: "/m/0h8n6ft"
- id: 455
- display_name: "Cake stand"
-}
-item {
- name: "/m/018j2"
- id: 456
- display_name: "Banjo"
-}
-item {
- name: "/m/0l14j_"
- id: 457
- display_name: "Flute"
-}
-item {
- name: "/m/0wdt60w"
- id: 458
- display_name: "Rugby ball"
-}
-item {
- name: "/m/02gzp"
- id: 459
- display_name: "Dagger"
-}
-item {
- name: "/m/0h8n6f9"
- id: 460
- display_name: "Dog bed"
-}
-item {
- name: "/m/0fbw6"
- id: 461
- display_name: "Cabbage"
-}
-item {
- name: "/m/07kng9"
- id: 462
- display_name: "Picnic basket"
-}
-item {
- name: "/m/0dj6p"
- id: 463
- display_name: "Peach"
-}
-item {
- name: "/m/06pcq"
- id: 464
- display_name: "Submarine sandwich"
-}
-item {
- name: "/m/061_f"
- id: 465
- display_name: "Pear"
-}
-item {
- name: "/m/04g2r"
- id: 466
- display_name: "Lynx"
-}
-item {
- name: "/m/0jwn_"
- id: 467
- display_name: "Pomegranate"
-}
-item {
- name: "/m/02f9f_"
- id: 468
- display_name: "Shower"
-}
-item {
- name: "/m/01f8m5"
- id: 469
- display_name: "Blue jay"
-}
-item {
- name: "/m/01m4t"
- id: 470
- display_name: "Printer"
-}
-item {
- name: "/m/0cl4p"
- id: 471
- display_name: "Hedgehog"
-}
-item {
- name: "/m/07xyvk"
- id: 472
- display_name: "Coffeemaker"
-}
-item {
- name: "/m/084hf"
- id: 473
- display_name: "Worm"
-}
-item {
- name: "/m/03v5tg"
- id: 474
- display_name: "Drinking straw"
-}
-item {
- name: "/m/0qjjc"
- id: 475
- display_name: "Remote control"
-}
-item {
- name: "/m/015x5n"
- id: 476
- display_name: "Radish"
-}
-item {
- name: "/m/0ccs93"
- id: 477
- display_name: "Canary"
-}
-item {
- name: "/m/0nybt"
- id: 478
- display_name: "Seahorse"
-}
-item {
- name: "/m/02vkqh8"
- id: 479
- display_name: "Wardrobe"
-}
-item {
- name: "/m/09gtd"
- id: 480
- display_name: "Toilet paper"
-}
-item {
- name: "/m/019h78"
- id: 481
- display_name: "Centipede"
-}
-item {
- name: "/m/015wgc"
- id: 482
- display_name: "Croissant"
-}
-item {
- name: "/m/01x3jk"
- id: 483
- display_name: "Snowmobile"
-}
-item {
- name: "/m/01j3zr"
- id: 484
- display_name: "Burrito"
-}
-item {
- name: "/m/0c568"
- id: 485
- display_name: "Porcupine"
-}
-item {
- name: "/m/02pdsw"
- id: 486
- display_name: "Cutting board"
-}
-item {
- name: "/m/029b3"
- id: 487
- display_name: "Dice"
-}
-item {
- name: "/m/03q5t"
- id: 488
- display_name: "Harpsichord"
-}
-item {
- name: "/m/0p833"
- id: 489
- display_name: "Perfume"
-}
-item {
- name: "/m/01d380"
- id: 490
- display_name: "Drill"
-}
-item {
- name: "/m/024d2"
- id: 491
- display_name: "Calculator"
-}
-item {
- name: "/m/0mw_6"
- id: 492
- display_name: "Willow"
-}
-item {
- name: "/m/01f91_"
- id: 493
- display_name: "Pretzel"
-}
-item {
- name: "/m/02g30s"
- id: 494
- display_name: "Guacamole"
-}
-item {
- name: "/m/01hrv5"
- id: 495
- display_name: "Popcorn"
-}
-item {
- name: "/m/03m5k"
- id: 496
- display_name: "Harp"
-}
-item {
- name: "/m/0162_1"
- id: 497
- display_name: "Towel"
-}
-item {
- name: "/m/063rgb"
- id: 498
- display_name: "Mixer"
-}
-item {
- name: "/m/06_72j"
- id: 499
- display_name: "Digital clock"
-}
-item {
- name: "/m/046dlr"
- id: 500
- display_name: "Alarm clock"
-}
-item {
- name: "/m/047v4b"
- id: 501
- display_name: "Artichoke"
-}
-item {
- name: "/m/04zpv"
- id: 502
- display_name: "Milk"
-}
-item {
- name: "/m/043nyj"
- id: 503
- display_name: "Common fig"
-}
-item {
- name: "/m/03bbps"
- id: 504
- display_name: "Power plugs and sockets"
-}
-item {
- name: "/m/02w3r3"
- id: 505
- display_name: "Paper towel"
-}
-item {
- name: "/m/02pjr4"
- id: 506
- display_name: "Blender"
-}
-item {
- name: "/m/0755b"
- id: 507
- display_name: "Scorpion"
-}
-item {
- name: "/m/02lbcq"
- id: 508
- display_name: "Stretcher"
-}
-item {
- name: "/m/0fldg"
- id: 509
- display_name: "Mango"
-}
-item {
- name: "/m/012074"
- id: 510
- display_name: "Magpie"
-}
-item {
- name: "/m/035vxb"
- id: 511
- display_name: "Isopod"
-}
-item {
- name: "/m/02w3_ws"
- id: 512
- display_name: "Personal care"
-}
-item {
- name: "/m/0f6nr"
- id: 513
- display_name: "Unicycle"
-}
-item {
- name: "/m/0420v5"
- id: 514
- display_name: "Punching bag"
-}
-item {
- name: "/m/0frqm"
- id: 515
- display_name: "Envelope"
-}
-item {
- name: "/m/03txqz"
- id: 516
- display_name: "Scale"
-}
-item {
- name: "/m/0271qf7"
- id: 517
- display_name: "Wine rack"
-}
-item {
- name: "/m/074d1"
- id: 518
- display_name: "Submarine"
-}
-item {
- name: "/m/08p92x"
- id: 519
- display_name: "Cream"
-}
-item {
- name: "/m/01j4z9"
- id: 520
- display_name: "Chainsaw"
-}
-item {
- name: "/m/0kpt_"
- id: 521
- display_name: "Cantaloupe"
-}
-item {
- name: "/m/0h8n27j"
- id: 522
- display_name: "Serving tray"
-}
-item {
- name: "/m/03y6mg"
- id: 523
- display_name: "Food processor"
-}
-item {
- name: "/m/04h8sr"
- id: 524
- display_name: "Dumbbell"
-}
-item {
- name: "/m/065h6l"
- id: 525
- display_name: "Jacuzzi"
-}
-item {
- name: "/m/02tsc9"
- id: 526
- display_name: "Slow cooker"
-}
-item {
- name: "/m/012ysf"
- id: 527
- display_name: "Syringe"
-}
-item {
- name: "/m/0ky7b"
- id: 528
- display_name: "Dishwasher"
-}
-item {
- name: "/m/02wg_p"
- id: 529
- display_name: "Tree house"
-}
-item {
- name: "/m/0584n8"
- id: 530
- display_name: "Briefcase"
-}
-item {
- name: "/m/03kt2w"
- id: 531
- display_name: "Stationary bicycle"
-}
-item {
- name: "/m/05kms"
- id: 532
- display_name: "Oboe"
-}
-item {
- name: "/m/030610"
- id: 533
- display_name: "Treadmill"
-}
-item {
- name: "/m/0lt4_"
- id: 534
- display_name: "Binoculars"
-}
-item {
- name: "/m/076lb9"
- id: 535
- display_name: "Bench"
-}
-item {
- name: "/m/02ctlc"
- id: 536
- display_name: "Cricket ball"
-}
-item {
- name: "/m/02x8cch"
- id: 537
- display_name: "Salt and pepper shakers"
-}
-item {
- name: "/m/09gys"
- id: 538
- display_name: "Squid"
-}
-item {
- name: "/m/03jbxj"
- id: 539
- display_name: "Light switch"
-}
-item {
- name: "/m/012xff"
- id: 540
- display_name: "Toothbrush"
-}
-item {
- name: "/m/0h8kx63"
- id: 541
- display_name: "Spice rack"
-}
-item {
- name: "/m/073g6"
- id: 542
- display_name: "Stethoscope"
-}
-item {
- name: "/m/02cvgx"
- id: 543
- display_name: "Winter melon"
-}
-item {
- name: "/m/027rl48"
- id: 544
- display_name: "Ladle"
-}
-item {
- name: "/m/01kb5b"
- id: 545
- display_name: "Flashlight"
-}
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/oid_object_detection_challenge_500_label_map.pbtxt b/research/mlperf_object_detection/Mask_RCNN/object_detection/data/oid_object_detection_challenge_500_label_map.pbtxt
deleted file mode 100644
index 044f6d4c813729a693cac761f43a2246e07f7b6a..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/oid_object_detection_challenge_500_label_map.pbtxt
+++ /dev/null
@@ -1,2500 +0,0 @@
-item {
- name: "/m/061hd_"
- id: 1
- display_name: "Infant bed"
-}
-item {
- name: "/m/06m11"
- id: 2
- display_name: "Rose"
-}
-item {
- name: "/m/03120"
- id: 3
- display_name: "Flag"
-}
-item {
- name: "/m/01kb5b"
- id: 4
- display_name: "Flashlight"
-}
-item {
- name: "/m/0120dh"
- id: 5
- display_name: "Sea turtle"
-}
-item {
- name: "/m/0dv5r"
- id: 6
- display_name: "Camera"
-}
-item {
- name: "/m/0jbk"
- id: 7
- display_name: "Animal"
-}
-item {
- name: "/m/0174n1"
- id: 8
- display_name: "Glove"
-}
-item {
- name: "/m/09f_2"
- id: 9
- display_name: "Crocodile"
-}
-item {
- name: "/m/01xq0k1"
- id: 10
- display_name: "Cattle"
-}
-item {
- name: "/m/03jm5"
- id: 11
- display_name: "House"
-}
-item {
- name: "/m/02g30s"
- id: 12
- display_name: "Guacamole"
-}
-item {
- name: "/m/05z6w"
- id: 13
- display_name: "Penguin"
-}
-item {
- name: "/m/01jfm_"
- id: 14
- display_name: "Vehicle registration plate"
-}
-item {
- name: "/m/076lb9"
- id: 15
- display_name: "Training bench"
-}
-item {
- name: "/m/0gj37"
- id: 16
- display_name: "Ladybug"
-}
-item {
- name: "/m/0k0pj"
- id: 17
- display_name: "Human nose"
-}
-item {
- name: "/m/0kpqd"
- id: 18
- display_name: "Watermelon"
-}
-item {
- name: "/m/0l14j_"
- id: 19
- display_name: "Flute"
-}
-item {
- name: "/m/0cyf8"
- id: 20
- display_name: "Butterfly"
-}
-item {
- name: "/m/0174k2"
- id: 21
- display_name: "Washing machine"
-}
-item {
- name: "/m/0dq75"
- id: 22
- display_name: "Raccoon"
-}
-item {
- name: "/m/076bq"
- id: 23
- display_name: "Segway"
-}
-item {
- name: "/m/07crc"
- id: 24
- display_name: "Taco"
-}
-item {
- name: "/m/0d8zb"
- id: 25
- display_name: "Jellyfish"
-}
-item {
- name: "/m/0fszt"
- id: 26
- display_name: "Cake"
-}
-item {
- name: "/m/0k1tl"
- id: 27
- display_name: "Pen"
-}
-item {
- name: "/m/020kz"
- id: 28
- display_name: "Cannon"
-}
-item {
- name: "/m/09728"
- id: 29
- display_name: "Bread"
-}
-item {
- name: "/m/07j7r"
- id: 30
- display_name: "Tree"
-}
-item {
- name: "/m/0fbdv"
- id: 31
- display_name: "Shellfish"
-}
-item {
- name: "/m/03ssj5"
- id: 32
- display_name: "Bed"
-}
-item {
- name: "/m/03qrc"
- id: 33
- display_name: "Hamster"
-}
-item {
- name: "/m/02dl1y"
- id: 34
- display_name: "Hat"
-}
-item {
- name: "/m/01k6s3"
- id: 35
- display_name: "Toaster"
-}
-item {
- name: "/m/02jfl0"
- id: 36
- display_name: "Sombrero"
-}
-item {
- name: "/m/01krhy"
- id: 37
- display_name: "Tiara"
-}
-item {
- name: "/m/04kkgm"
- id: 38
- display_name: "Bowl"
-}
-item {
- name: "/m/0ft9s"
- id: 39
- display_name: "Dragonfly"
-}
-item {
- name: "/m/0d_2m"
- id: 40
- display_name: "Moths and butterflies"
-}
-item {
- name: "/m/0czz2"
- id: 41
- display_name: "Antelope"
-}
-item {
- name: "/m/0f4s2w"
- id: 42
- display_name: "Vegetable"
-}
-item {
- name: "/m/07dd4"
- id: 43
- display_name: "Torch"
-}
-item {
- name: "/m/0cgh4"
- id: 44
- display_name: "Building"
-}
-item {
- name: "/m/03bbps"
- id: 45
- display_name: "Power plugs and sockets"
-}
-item {
- name: "/m/02pjr4"
- id: 46
- display_name: "Blender"
-}
-item {
- name: "/m/04p0qw"
- id: 47
- display_name: "Billiard table"
-}
-item {
- name: "/m/02pdsw"
- id: 48
- display_name: "Cutting board"
-}
-item {
- name: "/m/01yx86"
- id: 49
- display_name: "Bronze sculpture"
-}
-item {
- name: "/m/09dzg"
- id: 50
- display_name: "Turtle"
-}
-item {
- name: "/m/0hkxq"
- id: 51
- display_name: "Broccoli"
-}
-item {
- name: "/m/07dm6"
- id: 52
- display_name: "Tiger"
-}
-item {
- name: "/m/054_l"
- id: 53
- display_name: "Mirror"
-}
-item {
- name: "/m/01dws"
- id: 54
- display_name: "Bear"
-}
-item {
- name: "/m/027pcv"
- id: 55
- display_name: "Zucchini"
-}
-item {
- name: "/m/01d40f"
- id: 56
- display_name: "Dress"
-}
-item {
- name: "/m/02rgn06"
- id: 57
- display_name: "Volleyball"
-}
-item {
- name: "/m/0342h"
- id: 58
- display_name: "Guitar"
-}
-item {
- name: "/m/06bt6"
- id: 59
- display_name: "Reptile"
-}
-item {
- name: "/m/0323sq"
- id: 60
- display_name: "Golf cart"
-}
-item {
- name: "/m/02zvsm"
- id: 61
- display_name: "Tart"
-}
-item {
- name: "/m/02fq_6"
- id: 62
- display_name: "Fedora"
-}
-item {
- name: "/m/01lrl"
- id: 63
- display_name: "Carnivore"
-}
-item {
- name: "/m/0k4j"
- id: 64
- display_name: "Car"
-}
-item {
- name: "/m/04h7h"
- id: 65
- display_name: "Lighthouse"
-}
-item {
- name: "/m/07xyvk"
- id: 66
- display_name: "Coffeemaker"
-}
-item {
- name: "/m/03y6mg"
- id: 67
- display_name: "Food processor"
-}
-item {
- name: "/m/07r04"
- id: 68
- display_name: "Truck"
-}
-item {
- name: "/m/03__z0"
- id: 69
- display_name: "Bookcase"
-}
-item {
- name: "/m/019w40"
- id: 70
- display_name: "Surfboard"
-}
-item {
- name: "/m/09j5n"
- id: 71
- display_name: "Footwear"
-}
-item {
- name: "/m/0cvnqh"
- id: 72
- display_name: "Bench"
-}
-item {
- name: "/m/01llwg"
- id: 73
- display_name: "Necklace"
-}
-item {
- name: "/m/0c9ph5"
- id: 74
- display_name: "Flower"
-}
-item {
- name: "/m/015x5n"
- id: 75
- display_name: "Radish"
-}
-item {
- name: "/m/0gd2v"
- id: 76
- display_name: "Marine mammal"
-}
-item {
- name: "/m/04v6l4"
- id: 77
- display_name: "Frying pan"
-}
-item {
- name: "/m/02jz0l"
- id: 78
- display_name: "Tap"
-}
-item {
- name: "/m/0dj6p"
- id: 79
- display_name: "Peach"
-}
-item {
- name: "/m/04ctx"
- id: 80
- display_name: "Knife"
-}
-item {
- name: "/m/080hkjn"
- id: 81
- display_name: "Handbag"
-}
-item {
- name: "/m/01c648"
- id: 82
- display_name: "Laptop"
-}
-item {
- name: "/m/01j61q"
- id: 83
- display_name: "Tent"
-}
-item {
- name: "/m/012n7d"
- id: 84
- display_name: "Ambulance"
-}
-item {
- name: "/m/025nd"
- id: 85
- display_name: "Christmas tree"
-}
-item {
- name: "/m/09csl"
- id: 86
- display_name: "Eagle"
-}
-item {
- name: "/m/01lcw4"
- id: 87
- display_name: "Limousine"
-}
-item {
- name: "/m/0h8n5zk"
- id: 88
- display_name: "Kitchen & dining room table"
-}
-item {
- name: "/m/0633h"
- id: 89
- display_name: "Polar bear"
-}
-item {
- name: "/m/01fdzj"
- id: 90
- display_name: "Tower"
-}
-item {
- name: "/m/01226z"
- id: 91
- display_name: "Football"
-}
-item {
- name: "/m/0mw_6"
- id: 92
- display_name: "Willow"
-}
-item {
- name: "/m/04hgtk"
- id: 93
- display_name: "Human head"
-}
-item {
- name: "/m/02pv19"
- id: 94
- display_name: "Stop sign"
-}
-item {
- name: "/m/09qck"
- id: 95
- display_name: "Banana"
-}
-item {
- name: "/m/063rgb"
- id: 96
- display_name: "Mixer"
-}
-item {
- name: "/m/0lt4_"
- id: 97
- display_name: "Binoculars"
-}
-item {
- name: "/m/0270h"
- id: 98
- display_name: "Dessert"
-}
-item {
- name: "/m/01h3n"
- id: 99
- display_name: "Bee"
-}
-item {
- name: "/m/01mzpv"
- id: 100
- display_name: "Chair"
-}
-item {
- name: "/m/04169hn"
- id: 101
- display_name: "Wood-burning stove"
-}
-item {
- name: "/m/0fm3zh"
- id: 102
- display_name: "Flowerpot"
-}
-item {
- name: "/m/0d20w4"
- id: 103
- display_name: "Beaker"
-}
-item {
- name: "/m/0_cp5"
- id: 104
- display_name: "Oyster"
-}
-item {
- name: "/m/01dy8n"
- id: 105
- display_name: "Woodpecker"
-}
-item {
- name: "/m/03m5k"
- id: 106
- display_name: "Harp"
-}
-item {
- name: "/m/03dnzn"
- id: 107
- display_name: "Bathtub"
-}
-item {
- name: "/m/0h8mzrc"
- id: 108
- display_name: "Wall clock"
-}
-item {
- name: "/m/0h8mhzd"
- id: 109
- display_name: "Sports uniform"
-}
-item {
- name: "/m/03d443"
- id: 110
- display_name: "Rhinoceros"
-}
-item {
- name: "/m/01gllr"
- id: 111
- display_name: "Beehive"
-}
-item {
- name: "/m/0642b4"
- id: 112
- display_name: "Cupboard"
-}
-item {
- name: "/m/09b5t"
- id: 113
- display_name: "Chicken"
-}
-item {
- name: "/m/04yx4"
- id: 114
- display_name: "Man"
-}
-item {
- name: "/m/01f8m5"
- id: 115
- display_name: "Blue jay"
-}
-item {
- name: "/m/015x4r"
- id: 116
- display_name: "Cucumber"
-}
-item {
- name: "/m/01j51"
- id: 117
- display_name: "Balloon"
-}
-item {
- name: "/m/02zt3"
- id: 118
- display_name: "Kite"
-}
-item {
- name: "/m/03tw93"
- id: 119
- display_name: "Fireplace"
-}
-item {
- name: "/m/01jfsr"
- id: 120
- display_name: "Lantern"
-}
-item {
- name: "/m/04ylt"
- id: 121
- display_name: "Missile"
-}
-item {
- name: "/m/0bt_c3"
- id: 122
- display_name: "Book"
-}
-item {
- name: "/m/0cmx8"
- id: 123
- display_name: "Spoon"
-}
-item {
- name: "/m/0hqkz"
- id: 124
- display_name: "Grapefruit"
-}
-item {
- name: "/m/071qp"
- id: 125
- display_name: "Squirrel"
-}
-item {
- name: "/m/0cyhj_"
- id: 126
- display_name: "Orange"
-}
-item {
- name: "/m/01xygc"
- id: 127
- display_name: "Coat"
-}
-item {
- name: "/m/0420v5"
- id: 128
- display_name: "Punching bag"
-}
-item {
- name: "/m/0898b"
- id: 129
- display_name: "Zebra"
-}
-item {
- name: "/m/01knjb"
- id: 130
- display_name: "Billboard"
-}
-item {
- name: "/m/0199g"
- id: 131
- display_name: "Bicycle"
-}
-item {
- name: "/m/03c7gz"
- id: 132
- display_name: "Door handle"
-}
-item {
- name: "/m/02x984l"
- id: 133
- display_name: "Mechanical fan"
-}
-item {
- name: "/m/04zwwv"
- id: 134
- display_name: "Ring binder"
-}
-item {
- name: "/m/04bcr3"
- id: 135
- display_name: "Table"
-}
-item {
- name: "/m/0gv1x"
- id: 136
- display_name: "Parrot"
-}
-item {
- name: "/m/01nq26"
- id: 137
- display_name: "Sock"
-}
-item {
- name: "/m/02s195"
- id: 138
- display_name: "Vase"
-}
-item {
- name: "/m/083kb"
- id: 139
- display_name: "Weapon"
-}
-item {
- name: "/m/06nrc"
- id: 140
- display_name: "Shotgun"
-}
-item {
- name: "/m/0jyfg"
- id: 141
- display_name: "Glasses"
-}
-item {
- name: "/m/0nybt"
- id: 142
- display_name: "Seahorse"
-}
-item {
- name: "/m/0176mf"
- id: 143
- display_name: "Belt"
-}
-item {
- name: "/m/01rzcn"
- id: 144
- display_name: "Watercraft"
-}
-item {
- name: "/m/0d4v4"
- id: 145
- display_name: "Window"
-}
-item {
- name: "/m/03bk1"
- id: 146
- display_name: "Giraffe"
-}
-item {
- name: "/m/096mb"
- id: 147
- display_name: "Lion"
-}
-item {
- name: "/m/0h9mv"
- id: 148
- display_name: "Tire"
-}
-item {
- name: "/m/07yv9"
- id: 149
- display_name: "Vehicle"
-}
-item {
- name: "/m/0ph39"
- id: 150
- display_name: "Canoe"
-}
-item {
- name: "/m/01rkbr"
- id: 151
- display_name: "Tie"
-}
-item {
- name: "/m/0gjbg72"
- id: 152
- display_name: "Shelf"
-}
-item {
- name: "/m/06z37_"
- id: 153
- display_name: "Picture frame"
-}
-item {
- name: "/m/01m4t"
- id: 154
- display_name: "Printer"
-}
-item {
- name: "/m/035r7c"
- id: 155
- display_name: "Human leg"
-}
-item {
- name: "/m/019jd"
- id: 156
- display_name: "Boat"
-}
-item {
- name: "/m/02tsc9"
- id: 157
- display_name: "Slow cooker"
-}
-item {
- name: "/m/015wgc"
- id: 158
- display_name: "Croissant"
-}
-item {
- name: "/m/0c06p"
- id: 159
- display_name: "Candle"
-}
-item {
- name: "/m/01dwwc"
- id: 160
- display_name: "Pancake"
-}
-item {
- name: "/m/034c16"
- id: 161
- display_name: "Pillow"
-}
-item {
- name: "/m/0242l"
- id: 162
- display_name: "Coin"
-}
-item {
- name: "/m/02lbcq"
- id: 163
- display_name: "Stretcher"
-}
-item {
- name: "/m/03nfch"
- id: 164
- display_name: "Sandal"
-}
-item {
- name: "/m/03bt1vf"
- id: 165
- display_name: "Woman"
-}
-item {
- name: "/m/01lynh"
- id: 166
- display_name: "Stairs"
-}
-item {
- name: "/m/03q5t"
- id: 167
- display_name: "Harpsichord"
-}
-item {
- name: "/m/0fqt361"
- id: 168
- display_name: "Stool"
-}
-item {
- name: "/m/01bjv"
- id: 169
- display_name: "Bus"
-}
-item {
- name: "/m/01s55n"
- id: 170
- display_name: "Suitcase"
-}
-item {
- name: "/m/0283dt1"
- id: 171
- display_name: "Human mouth"
-}
-item {
- name: "/m/01z1kdw"
- id: 172
- display_name: "Juice"
-}
-item {
- name: "/m/016m2d"
- id: 173
- display_name: "Skull"
-}
-item {
- name: "/m/02dgv"
- id: 174
- display_name: "Door"
-}
-item {
- name: "/m/07y_7"
- id: 175
- display_name: "Violin"
-}
-item {
- name: "/m/01_5g"
- id: 176
- display_name: "Chopsticks"
-}
-item {
- name: "/m/06_72j"
- id: 177
- display_name: "Digital clock"
-}
-item {
- name: "/m/0ftb8"
- id: 178
- display_name: "Sunflower"
-}
-item {
- name: "/m/0c29q"
- id: 179
- display_name: "Leopard"
-}
-item {
- name: "/m/0jg57"
- id: 180
- display_name: "Bell pepper"
-}
-item {
- name: "/m/02l8p9"
- id: 181
- display_name: "Harbor seal"
-}
-item {
- name: "/m/078jl"
- id: 182
- display_name: "Snake"
-}
-item {
- name: "/m/0llzx"
- id: 183
- display_name: "Sewing machine"
-}
-item {
- name: "/m/0dbvp"
- id: 184
- display_name: "Goose"
-}
-item {
- name: "/m/09ct_"
- id: 185
- display_name: "Helicopter"
-}
-item {
- name: "/m/0dkzw"
- id: 186
- display_name: "Seat belt"
-}
-item {
- name: "/m/02p5f1q"
- id: 187
- display_name: "Coffee cup"
-}
-item {
- name: "/m/0fx9l"
- id: 188
- display_name: "Microwave oven"
-}
-item {
- name: "/m/01b9xk"
- id: 189
- display_name: "Hot dog"
-}
-item {
- name: "/m/0b3fp9"
- id: 190
- display_name: "Countertop"
-}
-item {
- name: "/m/0h8n27j"
- id: 191
- display_name: "Serving tray"
-}
-item {
- name: "/m/0h8n6f9"
- id: 192
- display_name: "Dog bed"
-}
-item {
- name: "/m/01599"
- id: 193
- display_name: "Beer"
-}
-item {
- name: "/m/017ftj"
- id: 194
- display_name: "Sunglasses"
-}
-item {
- name: "/m/044r5d"
- id: 195
- display_name: "Golf ball"
-}
-item {
- name: "/m/01dwsz"
- id: 196
- display_name: "Waffle"
-}
-item {
- name: "/m/0cdl1"
- id: 197
- display_name: "Palm tree"
-}
-item {
- name: "/m/07gql"
- id: 198
- display_name: "Trumpet"
-}
-item {
- name: "/m/0hdln"
- id: 199
- display_name: "Ruler"
-}
-item {
- name: "/m/0zvk5"
- id: 200
- display_name: "Helmet"
-}
-item {
- name: "/m/012w5l"
- id: 201
- display_name: "Ladder"
-}
-item {
- name: "/m/021sj1"
- id: 202
- display_name: "Office building"
-}
-item {
- name: "/m/0bh9flk"
- id: 203
- display_name: "Tablet computer"
-}
-item {
- name: "/m/09gtd"
- id: 204
- display_name: "Toilet paper"
-}
-item {
- name: "/m/0jwn_"
- id: 205
- display_name: "Pomegranate"
-}
-item {
- name: "/m/02wv6h6"
- id: 206
- display_name: "Skirt"
-}
-item {
- name: "/m/02wv84t"
- id: 207
- display_name: "Gas stove"
-}
-item {
- name: "/m/021mn"
- id: 208
- display_name: "Cookie"
-}
-item {
- name: "/m/018p4k"
- id: 209
- display_name: "Cart"
-}
-item {
- name: "/m/06j2d"
- id: 210
- display_name: "Raven"
-}
-item {
- name: "/m/033cnk"
- id: 211
- display_name: "Egg"
-}
-item {
- name: "/m/01j3zr"
- id: 212
- display_name: "Burrito"
-}
-item {
- name: "/m/03fwl"
- id: 213
- display_name: "Goat"
-}
-item {
- name: "/m/058qzx"
- id: 214
- display_name: "Kitchen knife"
-}
-item {
- name: "/m/06_fw"
- id: 215
- display_name: "Skateboard"
-}
-item {
- name: "/m/02x8cch"
- id: 216
- display_name: "Salt and pepper shakers"
-}
-item {
- name: "/m/04g2r"
- id: 217
- display_name: "Lynx"
-}
-item {
- name: "/m/01b638"
- id: 218
- display_name: "Boot"
-}
-item {
- name: "/m/099ssp"
- id: 219
- display_name: "Platter"
-}
-item {
- name: "/m/071p9"
- id: 220
- display_name: "Ski"
-}
-item {
- name: "/m/01gkx_"
- id: 221
- display_name: "Swimwear"
-}
-item {
- name: "/m/0b_rs"
- id: 222
- display_name: "Swimming pool"
-}
-item {
- name: "/m/03v5tg"
- id: 223
- display_name: "Drinking straw"
-}
-item {
- name: "/m/01j5ks"
- id: 224
- display_name: "Wrench"
-}
-item {
- name: "/m/026t6"
- id: 225
- display_name: "Drum"
-}
-item {
- name: "/m/0_k2"
- id: 226
- display_name: "Ant"
-}
-item {
- name: "/m/039xj_"
- id: 227
- display_name: "Human ear"
-}
-item {
- name: "/m/01b7fy"
- id: 228
- display_name: "Headphones"
-}
-item {
- name: "/m/0220r2"
- id: 229
- display_name: "Fountain"
-}
-item {
- name: "/m/015p6"
- id: 230
- display_name: "Bird"
-}
-item {
- name: "/m/0fly7"
- id: 231
- display_name: "Jeans"
-}
-item {
- name: "/m/07c52"
- id: 232
- display_name: "Television"
-}
-item {
- name: "/m/0n28_"
- id: 233
- display_name: "Crab"
-}
-item {
- name: "/m/0hg7b"
- id: 234
- display_name: "Microphone"
-}
-item {
- name: "/m/019dx1"
- id: 235
- display_name: "Home appliance"
-}
-item {
- name: "/m/04vv5k"
- id: 236
- display_name: "Snowplow"
-}
-item {
- name: "/m/020jm"
- id: 237
- display_name: "Beetle"
-}
-item {
- name: "/m/047v4b"
- id: 238
- display_name: "Artichoke"
-}
-item {
- name: "/m/01xs3r"
- id: 239
- display_name: "Jet ski"
-}
-item {
- name: "/m/03kt2w"
- id: 240
- display_name: "Stationary bicycle"
-}
-item {
- name: "/m/03q69"
- id: 241
- display_name: "Human hair"
-}
-item {
- name: "/m/01dxs"
- id: 242
- display_name: "Brown bear"
-}
-item {
- name: "/m/01h8tj"
- id: 243
- display_name: "Starfish"
-}
-item {
- name: "/m/0dt3t"
- id: 244
- display_name: "Fork"
-}
-item {
- name: "/m/0cjq5"
- id: 245
- display_name: "Lobster"
-}
-item {
- name: "/m/0h8lkj8"
- id: 246
- display_name: "Corded phone"
-}
-item {
- name: "/m/0271t"
- id: 247
- display_name: "Drink"
-}
-item {
- name: "/m/03q5c7"
- id: 248
- display_name: "Saucer"
-}
-item {
- name: "/m/0fj52s"
- id: 249
- display_name: "Carrot"
-}
-item {
- name: "/m/03vt0"
- id: 250
- display_name: "Insect"
-}
-item {
- name: "/m/01x3z"
- id: 251
- display_name: "Clock"
-}
-item {
- name: "/m/0d5gx"
- id: 252
- display_name: "Castle"
-}
-item {
- name: "/m/0h8my_4"
- id: 253
- display_name: "Tennis racket"
-}
-item {
- name: "/m/03ldnb"
- id: 254
- display_name: "Ceiling fan"
-}
-item {
- name: "/m/0cjs7"
- id: 255
- display_name: "Asparagus"
-}
-item {
- name: "/m/0449p"
- id: 256
- display_name: "Jaguar"
-}
-item {
- name: "/m/04szw"
- id: 257
- display_name: "Musical instrument"
-}
-item {
- name: "/m/07jdr"
- id: 258
- display_name: "Train"
-}
-item {
- name: "/m/01yrx"
- id: 259
- display_name: "Cat"
-}
-item {
- name: "/m/06c54"
- id: 260
- display_name: "Rifle"
-}
-item {
- name: "/m/04h8sr"
- id: 261
- display_name: "Dumbbell"
-}
-item {
- name: "/m/050k8"
- id: 262
- display_name: "Mobile phone"
-}
-item {
- name: "/m/0pg52"
- id: 263
- display_name: "Taxi"
-}
-item {
- name: "/m/02f9f_"
- id: 264
- display_name: "Shower"
-}
-item {
- name: "/m/054fyh"
- id: 265
- display_name: "Pitcher"
-}
-item {
- name: "/m/09k_b"
- id: 266
- display_name: "Lemon"
-}
-item {
- name: "/m/03xxp"
- id: 267
- display_name: "Invertebrate"
-}
-item {
- name: "/m/0jly1"
- id: 268
- display_name: "Turkey"
-}
-item {
- name: "/m/06k2mb"
- id: 269
- display_name: "High heels"
-}
-item {
- name: "/m/04yqq2"
- id: 270
- display_name: "Bust"
-}
-item {
- name: "/m/0bwd_0j"
- id: 271
- display_name: "Elephant"
-}
-item {
- name: "/m/02h19r"
- id: 272
- display_name: "Scarf"
-}
-item {
- name: "/m/02zn6n"
- id: 273
- display_name: "Barrel"
-}
-item {
- name: "/m/07c6l"
- id: 274
- display_name: "Trombone"
-}
-item {
- name: "/m/05zsy"
- id: 275
- display_name: "Pumpkin"
-}
-item {
- name: "/m/025dyy"
- id: 276
- display_name: "Box"
-}
-item {
- name: "/m/07j87"
- id: 277
- display_name: "Tomato"
-}
-item {
- name: "/m/09ld4"
- id: 278
- display_name: "Frog"
-}
-item {
- name: "/m/01vbnl"
- id: 279
- display_name: "Bidet"
-}
-item {
- name: "/m/0dzct"
- id: 280
- display_name: "Human face"
-}
-item {
- name: "/m/03fp41"
- id: 281
- display_name: "Houseplant"
-}
-item {
- name: "/m/0h2r6"
- id: 282
- display_name: "Van"
-}
-item {
- name: "/m/0by6g"
- id: 283
- display_name: "Shark"
-}
-item {
- name: "/m/0cxn2"
- id: 284
- display_name: "Ice cream"
-}
-item {
- name: "/m/04tn4x"
- id: 285
- display_name: "Swim cap"
-}
-item {
- name: "/m/0f6wt"
- id: 286
- display_name: "Falcon"
-}
-item {
- name: "/m/05n4y"
- id: 287
- display_name: "Ostrich"
-}
-item {
- name: "/m/0gxl3"
- id: 288
- display_name: "Handgun"
-}
-item {
- name: "/m/02d9qx"
- id: 289
- display_name: "Whiteboard"
-}
-item {
- name: "/m/04m9y"
- id: 290
- display_name: "Lizard"
-}
-item {
- name: "/m/05z55"
- id: 291
- display_name: "Pasta"
-}
-item {
- name: "/m/01x3jk"
- id: 292
- display_name: "Snowmobile"
-}
-item {
- name: "/m/0h8l4fh"
- id: 293
- display_name: "Light bulb"
-}
-item {
- name: "/m/031b6r"
- id: 294
- display_name: "Window blind"
-}
-item {
- name: "/m/01tcjp"
- id: 295
- display_name: "Muffin"
-}
-item {
- name: "/m/01f91_"
- id: 296
- display_name: "Pretzel"
-}
-item {
- name: "/m/02522"
- id: 297
- display_name: "Computer monitor"
-}
-item {
- name: "/m/0319l"
- id: 298
- display_name: "Horn"
-}
-item {
- name: "/m/0c_jw"
- id: 299
- display_name: "Furniture"
-}
-item {
- name: "/m/0l515"
- id: 300
- display_name: "Sandwich"
-}
-item {
- name: "/m/0306r"
- id: 301
- display_name: "Fox"
-}
-item {
- name: "/m/0crjs"
- id: 302
- display_name: "Convenience store"
-}
-item {
- name: "/m/0ch_cf"
- id: 303
- display_name: "Fish"
-}
-item {
- name: "/m/02xwb"
- id: 304
- display_name: "Fruit"
-}
-item {
- name: "/m/01r546"
- id: 305
- display_name: "Earrings"
-}
-item {
- name: "/m/03rszm"
- id: 306
- display_name: "Curtain"
-}
-item {
- name: "/m/0388q"
- id: 307
- display_name: "Grape"
-}
-item {
- name: "/m/03m3pdh"
- id: 308
- display_name: "Sofa bed"
-}
-item {
- name: "/m/03k3r"
- id: 309
- display_name: "Horse"
-}
-item {
- name: "/m/0hf58v5"
- id: 310
- display_name: "Luggage and bags"
-}
-item {
- name: "/m/01y9k5"
- id: 311
- display_name: "Desk"
-}
-item {
- name: "/m/05441v"
- id: 312
- display_name: "Crutch"
-}
-item {
- name: "/m/03p3bw"
- id: 313
- display_name: "Bicycle helmet"
-}
-item {
- name: "/m/0175cv"
- id: 314
- display_name: "Tick"
-}
-item {
- name: "/m/0cmf2"
- id: 315
- display_name: "Airplane"
-}
-item {
- name: "/m/0ccs93"
- id: 316
- display_name: "Canary"
-}
-item {
- name: "/m/02d1br"
- id: 317
- display_name: "Spatula"
-}
-item {
- name: "/m/0gjkl"
- id: 318
- display_name: "Watch"
-}
-item {
- name: "/m/0jqgx"
- id: 319
- display_name: "Lily"
-}
-item {
- name: "/m/0h99cwc"
- id: 320
- display_name: "Kitchen appliance"
-}
-item {
- name: "/m/047j0r"
- id: 321
- display_name: "Filing cabinet"
-}
-item {
- name: "/m/0k5j"
- id: 322
- display_name: "Aircraft"
-}
-item {
- name: "/m/0h8n6ft"
- id: 323
- display_name: "Cake stand"
-}
-item {
- name: "/m/0gm28"
- id: 324
- display_name: "Candy"
-}
-item {
- name: "/m/0130jx"
- id: 325
- display_name: "Sink"
-}
-item {
- name: "/m/04rmv"
- id: 326
- display_name: "Mouse"
-}
-item {
- name: "/m/081qc"
- id: 327
- display_name: "Wine"
-}
-item {
- name: "/m/0qmmr"
- id: 328
- display_name: "Wheelchair"
-}
-item {
- name: "/m/03fj2"
- id: 329
- display_name: "Goldfish"
-}
-item {
- name: "/m/040b_t"
- id: 330
- display_name: "Refrigerator"
-}
-item {
- name: "/m/02y6n"
- id: 331
- display_name: "French fries"
-}
-item {
- name: "/m/0fqfqc"
- id: 332
- display_name: "Drawer"
-}
-item {
- name: "/m/030610"
- id: 333
- display_name: "Treadmill"
-}
-item {
- name: "/m/07kng9"
- id: 334
- display_name: "Picnic basket"
-}
-item {
- name: "/m/029b3"
- id: 335
- display_name: "Dice"
-}
-item {
- name: "/m/0fbw6"
- id: 336
- display_name: "Cabbage"
-}
-item {
- name: "/m/07qxg_"
- id: 337
- display_name: "Football helmet"
-}
-item {
- name: "/m/068zj"
- id: 338
- display_name: "Pig"
-}
-item {
- name: "/m/01g317"
- id: 339
- display_name: "Person"
-}
-item {
- name: "/m/01bfm9"
- id: 340
- display_name: "Shorts"
-}
-item {
- name: "/m/02068x"
- id: 341
- display_name: "Gondola"
-}
-item {
- name: "/m/0fz0h"
- id: 342
- display_name: "Honeycomb"
-}
-item {
- name: "/m/0jy4k"
- id: 343
- display_name: "Doughnut"
-}
-item {
- name: "/m/05kyg_"
- id: 344
- display_name: "Chest of drawers"
-}
-item {
- name: "/m/01prls"
- id: 345
- display_name: "Land vehicle"
-}
-item {
- name: "/m/01h44"
- id: 346
- display_name: "Bat"
-}
-item {
- name: "/m/08pbxl"
- id: 347
- display_name: "Monkey"
-}
-item {
- name: "/m/02gzp"
- id: 348
- display_name: "Dagger"
-}
-item {
- name: "/m/04brg2"
- id: 349
- display_name: "Tableware"
-}
-item {
- name: "/m/031n1"
- id: 350
- display_name: "Human foot"
-}
-item {
- name: "/m/02jvh9"
- id: 351
- display_name: "Mug"
-}
-item {
- name: "/m/046dlr"
- id: 352
- display_name: "Alarm clock"
-}
-item {
- name: "/m/0h8ntjv"
- id: 353
- display_name: "Pressure cooker"
-}
-item {
- name: "/m/0k65p"
- id: 354
- display_name: "Human hand"
-}
-item {
- name: "/m/011k07"
- id: 355
- display_name: "Tortoise"
-}
-item {
- name: "/m/03grzl"
- id: 356
- display_name: "Baseball glove"
-}
-item {
- name: "/m/06y5r"
- id: 357
- display_name: "Sword"
-}
-item {
- name: "/m/061_f"
- id: 358
- display_name: "Pear"
-}
-item {
- name: "/m/01cmb2"
- id: 359
- display_name: "Miniskirt"
-}
-item {
- name: "/m/01mqdt"
- id: 360
- display_name: "Traffic sign"
-}
-item {
- name: "/m/05r655"
- id: 361
- display_name: "Girl"
-}
-item {
- name: "/m/02p3w7d"
- id: 362
- display_name: "Roller skates"
-}
-item {
- name: "/m/029tx"
- id: 363
- display_name: "Dinosaur"
-}
-item {
- name: "/m/04m6gz"
- id: 364
- display_name: "Porch"
-}
-item {
- name: "/m/015h_t"
- id: 365
- display_name: "Human beard"
-}
-item {
- name: "/m/06pcq"
- id: 366
- display_name: "Submarine sandwich"
-}
-item {
- name: "/m/01bms0"
- id: 367
- display_name: "Screwdriver"
-}
-item {
- name: "/m/07fbm7"
- id: 368
- display_name: "Strawberry"
-}
-item {
- name: "/m/09tvcd"
- id: 369
- display_name: "Wine glass"
-}
-item {
- name: "/m/06nwz"
- id: 370
- display_name: "Seafood"
-}
-item {
- name: "/m/0dv9c"
- id: 371
- display_name: "Racket"
-}
-item {
- name: "/m/083wq"
- id: 372
- display_name: "Wheel"
-}
-item {
- name: "/m/0gd36"
- id: 373
- display_name: "Sea lion"
-}
-item {
- name: "/m/0138tl"
- id: 374
- display_name: "Toy"
-}
-item {
- name: "/m/07clx"
- id: 375
- display_name: "Tea"
-}
-item {
- name: "/m/05ctyq"
- id: 376
- display_name: "Tennis ball"
-}
-item {
- name: "/m/0bjyj5"
- id: 377
- display_name: "Waste container"
-}
-item {
- name: "/m/0dbzx"
- id: 378
- display_name: "Mule"
-}
-item {
- name: "/m/02ctlc"
- id: 379
- display_name: "Cricket ball"
-}
-item {
- name: "/m/0fp6w"
- id: 380
- display_name: "Pineapple"
-}
-item {
- name: "/m/0djtd"
- id: 381
- display_name: "Coconut"
-}
-item {
- name: "/m/0167gd"
- id: 382
- display_name: "Doll"
-}
-item {
- name: "/m/078n6m"
- id: 383
- display_name: "Coffee table"
-}
-item {
- name: "/m/0152hh"
- id: 384
- display_name: "Snowman"
-}
-item {
- name: "/m/04gth"
- id: 385
- display_name: "Lavender"
-}
-item {
- name: "/m/0ll1f78"
- id: 386
- display_name: "Shrimp"
-}
-item {
- name: "/m/0cffdh"
- id: 387
- display_name: "Maple"
-}
-item {
- name: "/m/025rp__"
- id: 388
- display_name: "Cowboy hat"
-}
-item {
- name: "/m/02_n6y"
- id: 389
- display_name: "Goggles"
-}
-item {
- name: "/m/0wdt60w"
- id: 390
- display_name: "Rugby ball"
-}
-item {
- name: "/m/0cydv"
- id: 391
- display_name: "Caterpillar"
-}
-item {
- name: "/m/01n5jq"
- id: 392
- display_name: "Poster"
-}
-item {
- name: "/m/09rvcxw"
- id: 393
- display_name: "Rocket"
-}
-item {
- name: "/m/013y1f"
- id: 394
- display_name: "Organ"
-}
-item {
- name: "/m/06ncr"
- id: 395
- display_name: "Saxophone"
-}
-item {
- name: "/m/015qff"
- id: 396
- display_name: "Traffic light"
-}
-item {
- name: "/m/024g6"
- id: 397
- display_name: "Cocktail"
-}
-item {
- name: "/m/05gqfk"
- id: 398
- display_name: "Plastic bag"
-}
-item {
- name: "/m/0dv77"
- id: 399
- display_name: "Squash"
-}
-item {
- name: "/m/052sf"
- id: 400
- display_name: "Mushroom"
-}
-item {
- name: "/m/0cdn1"
- id: 401
- display_name: "Hamburger"
-}
-item {
- name: "/m/03jbxj"
- id: 402
- display_name: "Light switch"
-}
-item {
- name: "/m/0cyfs"
- id: 403
- display_name: "Parachute"
-}
-item {
- name: "/m/0kmg4"
- id: 404
- display_name: "Teddy bear"
-}
-item {
- name: "/m/02cvgx"
- id: 405
- display_name: "Winter melon"
-}
-item {
- name: "/m/09kx5"
- id: 406
- display_name: "Deer"
-}
-item {
- name: "/m/057cc"
- id: 407
- display_name: "Musical keyboard"
-}
-item {
- name: "/m/02pkr5"
- id: 408
- display_name: "Plumbing fixture"
-}
-item {
- name: "/m/057p5t"
- id: 409
- display_name: "Scoreboard"
-}
-item {
- name: "/m/03g8mr"
- id: 410
- display_name: "Baseball bat"
-}
-item {
- name: "/m/0frqm"
- id: 411
- display_name: "Envelope"
-}
-item {
- name: "/m/03m3vtv"
- id: 412
- display_name: "Adhesive tape"
-}
-item {
- name: "/m/0584n8"
- id: 413
- display_name: "Briefcase"
-}
-item {
- name: "/m/014y4n"
- id: 414
- display_name: "Paddle"
-}
-item {
- name: "/m/01g3x7"
- id: 415
- display_name: "Bow and arrow"
-}
-item {
- name: "/m/07cx4"
- id: 416
- display_name: "Telephone"
-}
-item {
- name: "/m/07bgp"
- id: 417
- display_name: "Sheep"
-}
-item {
- name: "/m/032b3c"
- id: 418
- display_name: "Jacket"
-}
-item {
- name: "/m/01bl7v"
- id: 419
- display_name: "Boy"
-}
-item {
- name: "/m/0663v"
- id: 420
- display_name: "Pizza"
-}
-item {
- name: "/m/0cn6p"
- id: 421
- display_name: "Otter"
-}
-item {
- name: "/m/02rdsp"
- id: 422
- display_name: "Office supplies"
-}
-item {
- name: "/m/02crq1"
- id: 423
- display_name: "Couch"
-}
-item {
- name: "/m/01xqw"
- id: 424
- display_name: "Cello"
-}
-item {
- name: "/m/0cnyhnx"
- id: 425
- display_name: "Bull"
-}
-item {
- name: "/m/01x_v"
- id: 426
- display_name: "Camel"
-}
-item {
- name: "/m/018xm"
- id: 427
- display_name: "Ball"
-}
-item {
- name: "/m/09ddx"
- id: 428
- display_name: "Duck"
-}
-item {
- name: "/m/084zz"
- id: 429
- display_name: "Whale"
-}
-item {
- name: "/m/01n4qj"
- id: 430
- display_name: "Shirt"
-}
-item {
- name: "/m/07cmd"
- id: 431
- display_name: "Tank"
-}
-item {
- name: "/m/04_sv"
- id: 432
- display_name: "Motorcycle"
-}
-item {
- name: "/m/0mkg"
- id: 433
- display_name: "Accordion"
-}
-item {
- name: "/m/09d5_"
- id: 434
- display_name: "Owl"
-}
-item {
- name: "/m/0c568"
- id: 435
- display_name: "Porcupine"
-}
-item {
- name: "/m/02wbtzl"
- id: 436
- display_name: "Sun hat"
-}
-item {
- name: "/m/05bm6"
- id: 437
- display_name: "Nail"
-}
-item {
- name: "/m/01lsmm"
- id: 438
- display_name: "Scissors"
-}
-item {
- name: "/m/0dftk"
- id: 439
- display_name: "Swan"
-}
-item {
- name: "/m/0dtln"
- id: 440
- display_name: "Lamp"
-}
-item {
- name: "/m/0nl46"
- id: 441
- display_name: "Crown"
-}
-item {
- name: "/m/05r5c"
- id: 442
- display_name: "Piano"
-}
-item {
- name: "/m/06msq"
- id: 443
- display_name: "Sculpture"
-}
-item {
- name: "/m/0cd4d"
- id: 444
- display_name: "Cheetah"
-}
-item {
- name: "/m/05kms"
- id: 445
- display_name: "Oboe"
-}
-item {
- name: "/m/02jnhm"
- id: 446
- display_name: "Tin can"
-}
-item {
- name: "/m/0fldg"
- id: 447
- display_name: "Mango"
-}
-item {
- name: "/m/073bxn"
- id: 448
- display_name: "Tripod"
-}
-item {
- name: "/m/029bxz"
- id: 449
- display_name: "Oven"
-}
-item {
- name: "/m/020lf"
- id: 450
- display_name: "Computer mouse"
-}
-item {
- name: "/m/01btn"
- id: 451
- display_name: "Barge"
-}
-item {
- name: "/m/02vqfm"
- id: 452
- display_name: "Coffee"
-}
-item {
- name: "/m/06__v"
- id: 453
- display_name: "Snowboard"
-}
-item {
- name: "/m/043nyj"
- id: 454
- display_name: "Common fig"
-}
-item {
- name: "/m/0grw1"
- id: 455
- display_name: "Salad"
-}
-item {
- name: "/m/03hl4l9"
- id: 456
- display_name: "Marine invertebrates"
-}
-item {
- name: "/m/0hnnb"
- id: 457
- display_name: "Umbrella"
-}
-item {
- name: "/m/04c0y"
- id: 458
- display_name: "Kangaroo"
-}
-item {
- name: "/m/0dzf4"
- id: 459
- display_name: "Human arm"
-}
-item {
- name: "/m/07v9_z"
- id: 460
- display_name: "Measuring cup"
-}
-item {
- name: "/m/0f9_l"
- id: 461
- display_name: "Snail"
-}
-item {
- name: "/m/0703r8"
- id: 462
- display_name: "Loveseat"
-}
-item {
- name: "/m/01xyhv"
- id: 463
- display_name: "Suit"
-}
-item {
- name: "/m/01fh4r"
- id: 464
- display_name: "Teapot"
-}
-item {
- name: "/m/04dr76w"
- id: 465
- display_name: "Bottle"
-}
-item {
- name: "/m/0pcr"
- id: 466
- display_name: "Alpaca"
-}
-item {
- name: "/m/03s_tn"
- id: 467
- display_name: "Kettle"
-}
-item {
- name: "/m/07mhn"
- id: 468
- display_name: "Trousers"
-}
-item {
- name: "/m/01hrv5"
- id: 469
- display_name: "Popcorn"
-}
-item {
- name: "/m/019h78"
- id: 470
- display_name: "Centipede"
-}
-item {
- name: "/m/09kmb"
- id: 471
- display_name: "Spider"
-}
-item {
- name: "/m/0h23m"
- id: 472
- display_name: "Sparrow"
-}
-item {
- name: "/m/050gv4"
- id: 473
- display_name: "Plate"
-}
-item {
- name: "/m/01fb_0"
- id: 474
- display_name: "Bagel"
-}
-item {
- name: "/m/02w3_ws"
- id: 475
- display_name: "Personal care"
-}
-item {
- name: "/m/014j1m"
- id: 476
- display_name: "Apple"
-}
-item {
- name: "/m/01gmv2"
- id: 477
- display_name: "Brassiere"
-}
-item {
- name: "/m/04y4h8h"
- id: 478
- display_name: "Bathroom cabinet"
-}
-item {
- name: "/m/026qbn5"
- id: 479
- display_name: "Studio couch"
-}
-item {
- name: "/m/01m2v"
- id: 480
- display_name: "Computer keyboard"
-}
-item {
- name: "/m/05_5p_0"
- id: 481
- display_name: "Table tennis racket"
-}
-item {
- name: "/m/07030"
- id: 482
- display_name: "Sushi"
-}
-item {
- name: "/m/01s105"
- id: 483
- display_name: "Cabinetry"
-}
-item {
- name: "/m/033rq4"
- id: 484
- display_name: "Street light"
-}
-item {
- name: "/m/0162_1"
- id: 485
- display_name: "Towel"
-}
-item {
- name: "/m/02z51p"
- id: 486
- display_name: "Nightstand"
-}
-item {
- name: "/m/06mf6"
- id: 487
- display_name: "Rabbit"
-}
-item {
- name: "/m/02hj4"
- id: 488
- display_name: "Dolphin"
-}
-item {
- name: "/m/0bt9lr"
- id: 489
- display_name: "Dog"
-}
-item {
- name: "/m/08hvt4"
- id: 490
- display_name: "Jug"
-}
-item {
- name: "/m/084rd"
- id: 491
- display_name: "Wok"
-}
-item {
- name: "/m/01pns0"
- id: 492
- display_name: "Fire hydrant"
-}
-item {
- name: "/m/014sv8"
- id: 493
- display_name: "Human eye"
-}
-item {
- name: "/m/079cl"
- id: 494
- display_name: "Skyscraper"
-}
-item {
- name: "/m/01940j"
- id: 495
- display_name: "Backpack"
-}
-item {
- name: "/m/05vtc"
- id: 496
- display_name: "Potato"
-}
-item {
- name: "/m/02w3r3"
- id: 497
- display_name: "Paper towel"
-}
-item {
- name: "/m/054xkw"
- id: 498
- display_name: "Lifejacket"
-}
-item {
- name: "/m/01bqk0"
- id: 499
- display_name: "Bicycle wheel"
-}
-item {
- name: "/m/09g1w"
- id: 500
- display_name: "Toilet"
-}
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/pascal_label_map.pbtxt b/research/mlperf_object_detection/Mask_RCNN/object_detection/data/pascal_label_map.pbtxt
deleted file mode 100644
index c9e9e2affcd73ae5cb272a51b44306a74cf22eea..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/pascal_label_map.pbtxt
+++ /dev/null
@@ -1,99 +0,0 @@
-item {
- id: 1
- name: 'aeroplane'
-}
-
-item {
- id: 2
- name: 'bicycle'
-}
-
-item {
- id: 3
- name: 'bird'
-}
-
-item {
- id: 4
- name: 'boat'
-}
-
-item {
- id: 5
- name: 'bottle'
-}
-
-item {
- id: 6
- name: 'bus'
-}
-
-item {
- id: 7
- name: 'car'
-}
-
-item {
- id: 8
- name: 'cat'
-}
-
-item {
- id: 9
- name: 'chair'
-}
-
-item {
- id: 10
- name: 'cow'
-}
-
-item {
- id: 11
- name: 'diningtable'
-}
-
-item {
- id: 12
- name: 'dog'
-}
-
-item {
- id: 13
- name: 'horse'
-}
-
-item {
- id: 14
- name: 'motorbike'
-}
-
-item {
- id: 15
- name: 'person'
-}
-
-item {
- id: 16
- name: 'pottedplant'
-}
-
-item {
- id: 17
- name: 'sheep'
-}
-
-item {
- id: 18
- name: 'sofa'
-}
-
-item {
- id: 19
- name: 'train'
-}
-
-item {
- id: 20
- name: 'tvmonitor'
-}
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/pet_label_map.pbtxt b/research/mlperf_object_detection/Mask_RCNN/object_detection/data/pet_label_map.pbtxt
deleted file mode 100644
index 54d7d3518941ceb0d2dc3465bdf702d4eaac3f07..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/pet_label_map.pbtxt
+++ /dev/null
@@ -1,184 +0,0 @@
-item {
- id: 1
- name: 'Abyssinian'
-}
-
-item {
- id: 2
- name: 'american_bulldog'
-}
-
-item {
- id: 3
- name: 'american_pit_bull_terrier'
-}
-
-item {
- id: 4
- name: 'basset_hound'
-}
-
-item {
- id: 5
- name: 'beagle'
-}
-
-item {
- id: 6
- name: 'Bengal'
-}
-
-item {
- id: 7
- name: 'Birman'
-}
-
-item {
- id: 8
- name: 'Bombay'
-}
-
-item {
- id: 9
- name: 'boxer'
-}
-
-item {
- id: 10
- name: 'British_Shorthair'
-}
-
-item {
- id: 11
- name: 'chihuahua'
-}
-
-item {
- id: 12
- name: 'Egyptian_Mau'
-}
-
-item {
- id: 13
- name: 'english_cocker_spaniel'
-}
-
-item {
- id: 14
- name: 'english_setter'
-}
-
-item {
- id: 15
- name: 'german_shorthaired'
-}
-
-item {
- id: 16
- name: 'great_pyrenees'
-}
-
-item {
- id: 17
- name: 'havanese'
-}
-
-item {
- id: 18
- name: 'japanese_chin'
-}
-
-item {
- id: 19
- name: 'keeshond'
-}
-
-item {
- id: 20
- name: 'leonberger'
-}
-
-item {
- id: 21
- name: 'Maine_Coon'
-}
-
-item {
- id: 22
- name: 'miniature_pinscher'
-}
-
-item {
- id: 23
- name: 'newfoundland'
-}
-
-item {
- id: 24
- name: 'Persian'
-}
-
-item {
- id: 25
- name: 'pomeranian'
-}
-
-item {
- id: 26
- name: 'pug'
-}
-
-item {
- id: 27
- name: 'Ragdoll'
-}
-
-item {
- id: 28
- name: 'Russian_Blue'
-}
-
-item {
- id: 29
- name: 'saint_bernard'
-}
-
-item {
- id: 30
- name: 'samoyed'
-}
-
-item {
- id: 31
- name: 'scottish_terrier'
-}
-
-item {
- id: 32
- name: 'shiba_inu'
-}
-
-item {
- id: 33
- name: 'Siamese'
-}
-
-item {
- id: 34
- name: 'Sphynx'
-}
-
-item {
- id: 35
- name: 'staffordshire_bull_terrier'
-}
-
-item {
- id: 36
- name: 'wheaten_terrier'
-}
-
-item {
- id: 37
- name: 'yorkshire_terrier'
-}
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/data_decoders/__init__.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/data_decoders/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/data_decoders/tf_example_decoder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/data_decoders/tf_example_decoder.py
deleted file mode 100644
index 8480a14b4ade6497e57db505875ae0795b191063..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/data_decoders/tf_example_decoder.py
+++ /dev/null
@@ -1,439 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tensorflow Example proto decoder for object detection.
-
-A decoder to decode string tensors containing serialized tensorflow.Example
-protos for object detection.
-"""
-import tensorflow as tf
-
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from object_detection.core import data_decoder
-from object_detection.core import standard_fields as fields
-from object_detection.protos import input_reader_pb2
-from object_detection.utils import label_map_util
-
-slim_example_decoder = tf.contrib.slim.tfexample_decoder
-
-
-# TODO(lzc): keep LookupTensor and BackupHandler in sync with
-# tf.contrib.slim.tfexample_decoder version.
-class LookupTensor(slim_example_decoder.Tensor):
- """An ItemHandler that returns a parsed Tensor, the result of a lookup."""
-
- def __init__(self,
- tensor_key,
- table,
- shape_keys=None,
- shape=None,
- default_value=''):
- """Initializes the LookupTensor handler.
-
- Simply calls a vocabulary (most often, a label mapping) lookup.
-
- Args:
- tensor_key: the name of the `TFExample` feature to read the tensor from.
- table: A tf.lookup table.
- shape_keys: Optional name or list of names of the TF-Example feature in
- which the tensor shape is stored. If a list, then each corresponds to
- one dimension of the shape.
- shape: Optional output shape of the `Tensor`. If provided, the `Tensor` is
- reshaped accordingly.
- default_value: The value used when the `tensor_key` is not found in a
- particular `TFExample`.
-
- Raises:
- ValueError: if both `shape_keys` and `shape` are specified.
- """
- self._table = table
- super(LookupTensor, self).__init__(tensor_key, shape_keys, shape,
- default_value)
-
- def tensors_to_item(self, keys_to_tensors):
- unmapped_tensor = super(LookupTensor, self).tensors_to_item(keys_to_tensors)
- return self._table.lookup(unmapped_tensor)
-
-
-class BackupHandler(slim_example_decoder.ItemHandler):
- """An ItemHandler that tries two ItemHandlers in order."""
-
- def __init__(self, handler, backup):
- """Initializes the BackupHandler handler.
-
- If the first Handler's tensors_to_item returns a Tensor with no elements,
- the second Handler is used.
-
- Args:
- handler: The primary ItemHandler.
- backup: The backup ItemHandler.
-
- Raises:
- ValueError: if either is not an ItemHandler.
- """
- if not isinstance(handler, slim_example_decoder.ItemHandler):
- raise ValueError('Primary handler is of type %s instead of ItemHandler' %
- type(handler))
- if not isinstance(backup, slim_example_decoder.ItemHandler):
- raise ValueError(
- 'Backup handler is of type %s instead of ItemHandler' % type(backup))
- self._handler = handler
- self._backup = backup
- super(BackupHandler, self).__init__(handler.keys + backup.keys)
-
- def tensors_to_item(self, keys_to_tensors):
- item = self._handler.tensors_to_item(keys_to_tensors)
- return control_flow_ops.cond(
- pred=math_ops.equal(math_ops.reduce_prod(array_ops.shape(item)), 0),
- true_fn=lambda: self._backup.tensors_to_item(keys_to_tensors),
- false_fn=lambda: item)
-
-
-class TfExampleDecoder(data_decoder.DataDecoder):
- """Tensorflow Example proto decoder."""
-
- def __init__(self,
- load_instance_masks=False,
- instance_mask_type=input_reader_pb2.NUMERICAL_MASKS,
- label_map_proto_file=None,
- use_display_name=False,
- dct_method='',
- num_keypoints=0,
- num_additional_channels=0):
- """Constructor sets keys_to_features and items_to_handlers.
-
- Args:
- load_instance_masks: whether or not to load and handle instance masks.
- instance_mask_type: type of instance masks. Options are provided in
- input_reader.proto. This is only used if `load_instance_masks` is True.
- label_map_proto_file: a file path to a
- object_detection.protos.StringIntLabelMap proto. If provided, then the
- mapped IDs of 'image/object/class/text' will take precedence over the
- existing 'image/object/class/label' ID. Also, if provided, it is
- assumed that 'image/object/class/text' will be in the data.
- use_display_name: whether or not to use the `display_name` for label
- mapping (instead of `name`). Only used if label_map_proto_file is
- provided.
- dct_method: An optional string. Defaults to None. It only takes
- effect when image format is jpeg, used to specify a hint about the
- algorithm used for jpeg decompression. Currently valid values
- are ['INTEGER_FAST', 'INTEGER_ACCURATE']. The hint may be ignored, for
- example, the jpeg library does not have that specific option.
- num_keypoints: the number of keypoints per object.
- num_additional_channels: how many additional channels to use.
-
- Raises:
- ValueError: If `instance_mask_type` option is not one of
- input_reader_pb2.DEFAULT, input_reader_pb2.NUMERICAL, or
- input_reader_pb2.PNG_MASKS.
- """
- self.keys_to_features = {
- 'image/encoded':
- tf.FixedLenFeature((), tf.string, default_value=''),
- 'image/format':
- tf.FixedLenFeature((), tf.string, default_value='jpeg'),
- 'image/filename':
- tf.FixedLenFeature((), tf.string, default_value=''),
- 'image/key/sha256':
- tf.FixedLenFeature((), tf.string, default_value=''),
- 'image/source_id':
- tf.FixedLenFeature((), tf.string, default_value=''),
- 'image/height':
- tf.FixedLenFeature((), tf.int64, default_value=1),
- 'image/width':
- tf.FixedLenFeature((), tf.int64, default_value=1),
- # Object boxes and classes.
- 'image/object/bbox/xmin':
- tf.VarLenFeature(tf.float32),
- 'image/object/bbox/xmax':
- tf.VarLenFeature(tf.float32),
- 'image/object/bbox/ymin':
- tf.VarLenFeature(tf.float32),
- 'image/object/bbox/ymax':
- tf.VarLenFeature(tf.float32),
- 'image/object/class/label':
- tf.VarLenFeature(tf.int64),
- 'image/object/class/text':
- tf.VarLenFeature(tf.string),
- 'image/object/area':
- tf.VarLenFeature(tf.float32),
- 'image/object/is_crowd':
- tf.VarLenFeature(tf.int64),
- 'image/object/difficult':
- tf.VarLenFeature(tf.int64),
- 'image/object/group_of':
- tf.VarLenFeature(tf.int64),
- 'image/object/weight':
- tf.VarLenFeature(tf.float32),
- }
- # We are checking `dct_method` instead of passing it directly in order to
- # ensure TF version 1.6 compatibility.
- if dct_method:
- image = slim_example_decoder.Image(
- image_key='image/encoded',
- format_key='image/format',
- channels=3,
- dct_method=dct_method)
- additional_channel_image = slim_example_decoder.Image(
- image_key='image/additional_channels/encoded',
- format_key='image/format',
- channels=1,
- repeated=True,
- dct_method=dct_method)
- else:
- image = slim_example_decoder.Image(
- image_key='image/encoded', format_key='image/format', channels=3)
- additional_channel_image = slim_example_decoder.Image(
- image_key='image/additional_channels/encoded',
- format_key='image/format',
- channels=1,
- repeated=True)
- self.items_to_handlers = {
- fields.InputDataFields.image:
- image,
- fields.InputDataFields.source_id: (
- slim_example_decoder.Tensor('image/source_id')),
- fields.InputDataFields.key: (
- slim_example_decoder.Tensor('image/key/sha256')),
- fields.InputDataFields.filename: (
- slim_example_decoder.Tensor('image/filename')),
- # Object boxes and classes.
- fields.InputDataFields.groundtruth_boxes: (
- slim_example_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'],
- 'image/object/bbox/')),
- fields.InputDataFields.groundtruth_area:
- slim_example_decoder.Tensor('image/object/area'),
- fields.InputDataFields.groundtruth_is_crowd: (
- slim_example_decoder.Tensor('image/object/is_crowd')),
- fields.InputDataFields.groundtruth_difficult: (
- slim_example_decoder.Tensor('image/object/difficult')),
- fields.InputDataFields.groundtruth_group_of: (
- slim_example_decoder.Tensor('image/object/group_of')),
- fields.InputDataFields.groundtruth_weights: (
- slim_example_decoder.Tensor('image/object/weight')),
- }
- if num_additional_channels > 0:
- self.keys_to_features[
- 'image/additional_channels/encoded'] = tf.FixedLenFeature(
- (num_additional_channels,), tf.string)
- self.items_to_handlers[
- fields.InputDataFields.
- image_additional_channels] = additional_channel_image
- self._num_keypoints = num_keypoints
- if num_keypoints > 0:
- self.keys_to_features['image/object/keypoint/x'] = (
- tf.VarLenFeature(tf.float32))
- self.keys_to_features['image/object/keypoint/y'] = (
- tf.VarLenFeature(tf.float32))
- self.items_to_handlers[fields.InputDataFields.groundtruth_keypoints] = (
- slim_example_decoder.ItemHandlerCallback(
- ['image/object/keypoint/y', 'image/object/keypoint/x'],
- self._reshape_keypoints))
- if load_instance_masks:
- if instance_mask_type in (input_reader_pb2.DEFAULT,
- input_reader_pb2.NUMERICAL_MASKS):
- self.keys_to_features['image/object/mask'] = (
- tf.VarLenFeature(tf.float32))
- self.items_to_handlers[
- fields.InputDataFields.groundtruth_instance_masks] = (
- slim_example_decoder.ItemHandlerCallback(
- ['image/object/mask', 'image/height', 'image/width'],
- self._reshape_instance_masks))
- elif instance_mask_type == input_reader_pb2.PNG_MASKS:
- self.keys_to_features['image/object/mask'] = tf.VarLenFeature(tf.string)
- self.items_to_handlers[
- fields.InputDataFields.groundtruth_instance_masks] = (
- slim_example_decoder.ItemHandlerCallback(
- ['image/object/mask', 'image/height', 'image/width'],
- self._decode_png_instance_masks))
- else:
- raise ValueError('Did not recognize the `instance_mask_type` option.')
- if label_map_proto_file:
- label_map = label_map_util.get_label_map_dict(label_map_proto_file,
- use_display_name)
- # We use a default_value of -1, but we expect all labels to be contained
- # in the label map.
- table = tf.contrib.lookup.HashTable(
- initializer=tf.contrib.lookup.KeyValueTensorInitializer(
- keys=tf.constant(list(label_map.keys())),
- values=tf.constant(list(label_map.values()), dtype=tf.int64)),
- default_value=-1)
- # If the label_map_proto is provided, try to use it in conjunction with
- # the class text, and fall back to a materialized ID.
- # TODO(lzc): note that here we are using BackupHandler defined in this
- # file(which is branching slim_example_decoder.BackupHandler). Need to
- # switch back to slim_example_decoder.BackupHandler once tf 1.5 becomes
- # more popular.
- label_handler = BackupHandler(
- LookupTensor('image/object/class/text', table, default_value=''),
- slim_example_decoder.Tensor('image/object/class/label'))
- else:
- label_handler = slim_example_decoder.Tensor('image/object/class/label')
- self.items_to_handlers[
- fields.InputDataFields.groundtruth_classes] = label_handler
-
- def decode(self, tf_example_string_tensor):
- """Decodes serialized tensorflow example and returns a tensor dictionary.
-
- Args:
- tf_example_string_tensor: a string tensor holding a serialized tensorflow
- example proto.
-
- Returns:
- A dictionary of the following tensors.
- fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3]
- containing image.
- fields.InputDataFields.source_id - string tensor containing original
- image id.
- fields.InputDataFields.key - string tensor with unique sha256 hash key.
- fields.InputDataFields.filename - string tensor with original dataset
- filename.
- fields.InputDataFields.groundtruth_boxes - 2D float32 tensor of shape
- [None, 4] containing box corners.
- fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape
- [None] containing classes for the boxes.
- fields.InputDataFields.groundtruth_weights - 1D float32 tensor of
- shape [None] indicating the weights of groundtruth boxes.
- fields.InputDataFields.num_groundtruth_boxes - int32 scalar indicating
- the number of groundtruth_boxes.
- fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape
- [None] containing containing object mask area in pixel squared.
- fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape
- [None] indicating if the boxes enclose a crowd.
-
- Optional:
- fields.InputDataFields.image_additional_channels - 3D uint8 tensor of
- shape [None, None, num_additional_channels]. 1st dim is height; 2nd dim
- is width; 3rd dim is the number of additional channels.
- fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape
- [None] indicating if the boxes represent `difficult` instances.
- fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape
- [None] indicating if the boxes represent `group_of` instances.
- fields.InputDataFields.groundtruth_keypoints - 3D float32 tensor of
- shape [None, None, 2] containing keypoints, where the coordinates of
- the keypoints are ordered (y, x).
- fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of
- shape [None, None, None] containing instance masks.
- """
- serialized_example = tf.reshape(tf_example_string_tensor, shape=[])
- decoder = slim_example_decoder.TFExampleDecoder(self.keys_to_features,
- self.items_to_handlers)
- keys = decoder.list_items()
- tensors = decoder.decode(serialized_example, items=keys)
- tensor_dict = dict(zip(keys, tensors))
- is_crowd = fields.InputDataFields.groundtruth_is_crowd
- tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
- tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
- tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape(
- tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]
-
- if fields.InputDataFields.image_additional_channels in tensor_dict:
- channels = tensor_dict[fields.InputDataFields.image_additional_channels]
- channels = tf.squeeze(channels, axis=3)
- channels = tf.transpose(channels, perm=[1, 2, 0])
- tensor_dict[fields.InputDataFields.image_additional_channels] = channels
-
- def default_groundtruth_weights():
- return tf.ones(
- [tf.shape(tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]],
- dtype=tf.float32)
-
- tensor_dict[fields.InputDataFields.groundtruth_weights] = tf.cond(
- tf.greater(
- tf.shape(
- tensor_dict[fields.InputDataFields.groundtruth_weights])[0],
- 0), lambda: tensor_dict[fields.InputDataFields.groundtruth_weights],
- default_groundtruth_weights)
- return tensor_dict
-
- def _reshape_keypoints(self, keys_to_tensors):
- """Reshape keypoints.
-
- The instance segmentation masks are reshaped to [num_instances,
- num_keypoints, 2].
-
- Args:
- keys_to_tensors: a dictionary from keys to tensors.
-
- Returns:
- A 3-D float tensor of shape [num_instances, num_keypoints, 2] with values
- in {0, 1}.
- """
- y = keys_to_tensors['image/object/keypoint/y']
- if isinstance(y, tf.SparseTensor):
- y = tf.sparse_tensor_to_dense(y)
- y = tf.expand_dims(y, 1)
- x = keys_to_tensors['image/object/keypoint/x']
- if isinstance(x, tf.SparseTensor):
- x = tf.sparse_tensor_to_dense(x)
- x = tf.expand_dims(x, 1)
- keypoints = tf.concat([y, x], 1)
- keypoints = tf.reshape(keypoints, [-1, self._num_keypoints, 2])
- return keypoints
-
- def _reshape_instance_masks(self, keys_to_tensors):
- """Reshape instance segmentation masks.
-
- The instance segmentation masks are reshaped to [num_instances, height,
- width].
-
- Args:
- keys_to_tensors: a dictionary from keys to tensors.
-
- Returns:
- A 3-D float tensor of shape [num_instances, height, width] with values
- in {0, 1}.
- """
- height = keys_to_tensors['image/height']
- width = keys_to_tensors['image/width']
- to_shape = tf.cast(tf.stack([-1, height, width]), tf.int32)
- masks = keys_to_tensors['image/object/mask']
- if isinstance(masks, tf.SparseTensor):
- masks = tf.sparse_tensor_to_dense(masks)
- masks = tf.reshape(tf.to_float(tf.greater(masks, 0.0)), to_shape)
- return tf.cast(masks, tf.float32)
-
- def _decode_png_instance_masks(self, keys_to_tensors):
- """Decode PNG instance segmentation masks and stack into dense tensor.
-
- The instance segmentation masks are reshaped to [num_instances, height,
- width].
-
- Args:
- keys_to_tensors: a dictionary from keys to tensors.
-
- Returns:
- A 3-D float tensor of shape [num_instances, height, width] with values
- in {0, 1}.
- """
-
- def decode_png_mask(image_buffer):
- image = tf.squeeze(
- tf.image.decode_image(image_buffer, channels=1), axis=2)
- image.set_shape([None, None])
- image = tf.to_float(tf.greater(image, 0))
- return image
-
- png_masks = keys_to_tensors['image/object/mask']
- height = keys_to_tensors['image/height']
- width = keys_to_tensors['image/width']
- if isinstance(png_masks, tf.SparseTensor):
- png_masks = tf.sparse_tensor_to_dense(png_masks, default_value='')
- return tf.cond(
- tf.greater(tf.size(png_masks), 0),
- lambda: tf.map_fn(decode_png_mask, png_masks, dtype=tf.float32),
- lambda: tf.zeros(tf.to_int32(tf.stack([0, height, width]))))
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/data_decoders/tf_example_decoder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/data_decoders/tf_example_decoder_test.py
deleted file mode 100644
index b567b8c20f442f135653b49ace7e85088fd67ad1..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/data_decoders/tf_example_decoder_test.py
+++ /dev/null
@@ -1,767 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.data_decoders.tf_example_decoder."""
-
-import os
-import numpy as np
-import tensorflow as tf
-
-from tensorflow.core.example import example_pb2
-from tensorflow.core.example import feature_pb2
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import test_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import lookup_ops
-from tensorflow.python.ops import parsing_ops
-from object_detection.core import standard_fields as fields
-from object_detection.data_decoders import tf_example_decoder
-from object_detection.protos import input_reader_pb2
-
-slim_example_decoder = tf.contrib.slim.tfexample_decoder
-
-
-class TfExampleDecoderTest(tf.test.TestCase):
-
- def _EncodeImage(self, image_tensor, encoding_type='jpeg'):
- with self.test_session():
- if encoding_type == 'jpeg':
- image_encoded = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
- elif encoding_type == 'png':
- image_encoded = tf.image.encode_png(tf.constant(image_tensor)).eval()
- else:
- raise ValueError('Invalid encoding type.')
- return image_encoded
-
- def _DecodeImage(self, image_encoded, encoding_type='jpeg'):
- with self.test_session():
- if encoding_type == 'jpeg':
- image_decoded = tf.image.decode_jpeg(tf.constant(image_encoded)).eval()
- elif encoding_type == 'png':
- image_decoded = tf.image.decode_png(tf.constant(image_encoded)).eval()
- else:
- raise ValueError('Invalid encoding type.')
- return image_decoded
-
- def _Int64Feature(self, value):
- return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
-
- def _FloatFeature(self, value):
- return tf.train.Feature(float_list=tf.train.FloatList(value=value))
-
- def _BytesFeature(self, value):
- if isinstance(value, list):
- return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
- return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
-
- def _Int64FeatureFromList(self, ndarray):
- return feature_pb2.Feature(
- int64_list=feature_pb2.Int64List(value=ndarray.flatten().tolist()))
-
- def _BytesFeatureFromList(self, ndarray):
- values = ndarray.flatten().tolist()
- return feature_pb2.Feature(bytes_list=feature_pb2.BytesList(value=values))
-
- def testDecodeAdditionalChannels(self):
- image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
-
- additional_channel_tensor = np.random.randint(
- 256, size=(4, 5, 1)).astype(np.uint8)
- encoded_additional_channel = self._EncodeImage(additional_channel_tensor)
- decoded_additional_channel = self._DecodeImage(encoded_additional_channel)
-
- example = tf.train.Example(
- features=tf.train.Features(
- feature={
- 'image/encoded':
- self._BytesFeature(encoded_jpeg),
- 'image/additional_channels/encoded':
- self._BytesFeatureFromList(
- np.array([encoded_additional_channel] * 2)),
- 'image/format':
- self._BytesFeature('jpeg'),
- 'image/source_id':
- self._BytesFeature('image_id'),
- })).SerializeToString()
-
- example_decoder = tf_example_decoder.TfExampleDecoder(
- num_additional_channels=2)
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- with self.test_session() as sess:
- tensor_dict = sess.run(tensor_dict)
- self.assertAllEqual(
- np.concatenate([decoded_additional_channel] * 2, axis=2),
- tensor_dict[fields.InputDataFields.image_additional_channels])
-
- def testDecodeExampleWithBranchedBackupHandler(self):
- example1 = example_pb2.Example(
- features=feature_pb2.Features(
- feature={
- 'image/object/class/text':
- self._BytesFeatureFromList(
- np.array(['cat', 'dog', 'guinea pig'])),
- 'image/object/class/label':
- self._Int64FeatureFromList(np.array([42, 10, 900]))
- }))
- example2 = example_pb2.Example(
- features=feature_pb2.Features(
- feature={
- 'image/object/class/text':
- self._BytesFeatureFromList(
- np.array(['cat', 'dog', 'guinea pig'])),
- }))
- example3 = example_pb2.Example(
- features=feature_pb2.Features(
- feature={
- 'image/object/class/label':
- self._Int64FeatureFromList(np.array([42, 10, 901]))
- }))
- # 'dog' -> 0, 'guinea pig' -> 1, 'cat' -> 2
- table = lookup_ops.index_table_from_tensor(
- constant_op.constant(['dog', 'guinea pig', 'cat']))
- keys_to_features = {
- 'image/object/class/text': parsing_ops.VarLenFeature(dtypes.string),
- 'image/object/class/label': parsing_ops.VarLenFeature(dtypes.int64),
- }
- backup_handler = tf_example_decoder.BackupHandler(
- handler=slim_example_decoder.Tensor('image/object/class/label'),
- backup=tf_example_decoder.LookupTensor('image/object/class/text',
- table))
- items_to_handlers = {
- 'labels': backup_handler,
- }
- decoder = slim_example_decoder.TFExampleDecoder(keys_to_features,
- items_to_handlers)
- obtained_class_ids_each_example = []
- with self.test_session() as sess:
- sess.run(lookup_ops.tables_initializer())
- for example in [example1, example2, example3]:
- serialized_example = array_ops.reshape(
- example.SerializeToString(), shape=[])
- obtained_class_ids_each_example.append(
- decoder.decode(serialized_example)[0].eval())
-
- self.assertAllClose([42, 10, 900], obtained_class_ids_each_example[0])
- self.assertAllClose([2, 0, 1], obtained_class_ids_each_example[1])
- self.assertAllClose([42, 10, 901], obtained_class_ids_each_example[2])
-
- def testDecodeExampleWithBranchedLookup(self):
-
- example = example_pb2.Example(features=feature_pb2.Features(feature={
- 'image/object/class/text': self._BytesFeatureFromList(
- np.array(['cat', 'dog', 'guinea pig'])),
- }))
- serialized_example = example.SerializeToString()
- # 'dog' -> 0, 'guinea pig' -> 1, 'cat' -> 2
- table = lookup_ops.index_table_from_tensor(
- constant_op.constant(['dog', 'guinea pig', 'cat']))
-
- with self.test_session() as sess:
- sess.run(lookup_ops.tables_initializer())
-
- serialized_example = array_ops.reshape(serialized_example, shape=[])
-
- keys_to_features = {
- 'image/object/class/text': parsing_ops.VarLenFeature(dtypes.string),
- }
-
- items_to_handlers = {
- 'labels':
- tf_example_decoder.LookupTensor('image/object/class/text', table),
- }
-
- decoder = slim_example_decoder.TFExampleDecoder(keys_to_features,
- items_to_handlers)
- obtained_class_ids = decoder.decode(serialized_example)[0].eval()
-
- self.assertAllClose([2, 0, 1], obtained_class_ids)
-
- def testDecodeJpegImage(self):
- image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
- decoded_jpeg = self._DecodeImage(encoded_jpeg)
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/encoded': self._BytesFeature(encoded_jpeg),
- 'image/format': self._BytesFeature('jpeg'),
- 'image/source_id': self._BytesFeature('image_id'),
- })).SerializeToString()
-
- example_decoder = tf_example_decoder.TfExampleDecoder()
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
- get_shape().as_list()), [None, None, 3])
- with self.test_session() as sess:
- tensor_dict = sess.run(tensor_dict)
-
- self.assertAllEqual(decoded_jpeg, tensor_dict[fields.InputDataFields.image])
- self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
-
- def testDecodeImageKeyAndFilename(self):
- image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/encoded': self._BytesFeature(encoded_jpeg),
- 'image/key/sha256': self._BytesFeature('abc'),
- 'image/filename': self._BytesFeature('filename')
- })).SerializeToString()
-
- example_decoder = tf_example_decoder.TfExampleDecoder()
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- with self.test_session() as sess:
- tensor_dict = sess.run(tensor_dict)
-
- self.assertEqual('abc', tensor_dict[fields.InputDataFields.key])
- self.assertEqual('filename', tensor_dict[fields.InputDataFields.filename])
-
- def testDecodePngImage(self):
- image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
- encoded_png = self._EncodeImage(image_tensor, encoding_type='png')
- decoded_png = self._DecodeImage(encoded_png, encoding_type='png')
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/encoded': self._BytesFeature(encoded_png),
- 'image/format': self._BytesFeature('png'),
- 'image/source_id': self._BytesFeature('image_id')
- })).SerializeToString()
-
- example_decoder = tf_example_decoder.TfExampleDecoder()
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
- get_shape().as_list()), [None, None, 3])
- with self.test_session() as sess:
- tensor_dict = sess.run(tensor_dict)
-
- self.assertAllEqual(decoded_png, tensor_dict[fields.InputDataFields.image])
- self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
-
- def testDecodePngInstanceMasks(self):
- image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
- mask_1 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8)
- mask_2 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8)
- encoded_png_1 = self._EncodeImage(mask_1, encoding_type='png')
- decoded_png_1 = np.squeeze(mask_1.astype(np.float32))
- encoded_png_2 = self._EncodeImage(mask_2, encoding_type='png')
- decoded_png_2 = np.squeeze(mask_2.astype(np.float32))
- encoded_masks = [encoded_png_1, encoded_png_2]
- decoded_masks = np.stack([decoded_png_1, decoded_png_2])
- example = tf.train.Example(
- features=tf.train.Features(
- feature={
- 'image/encoded': self._BytesFeature(encoded_jpeg),
- 'image/format': self._BytesFeature('jpeg'),
- 'image/object/mask': self._BytesFeature(encoded_masks)
- })).SerializeToString()
-
- example_decoder = tf_example_decoder.TfExampleDecoder(
- load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS)
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- with self.test_session() as sess:
- tensor_dict = sess.run(tensor_dict)
-
- self.assertAllEqual(
- decoded_masks,
- tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
-
- def testDecodeEmptyPngInstanceMasks(self):
- image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
- encoded_masks = []
- example = tf.train.Example(
- features=tf.train.Features(
- feature={
- 'image/encoded': self._BytesFeature(encoded_jpeg),
- 'image/format': self._BytesFeature('jpeg'),
- 'image/object/mask': self._BytesFeature(encoded_masks),
- 'image/height': self._Int64Feature([10]),
- 'image/width': self._Int64Feature([10]),
- })).SerializeToString()
-
- example_decoder = tf_example_decoder.TfExampleDecoder(
- load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS)
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- with self.test_session() as sess:
- tensor_dict = sess.run(tensor_dict)
- self.assertAllEqual(
- tensor_dict[fields.InputDataFields.groundtruth_instance_masks].shape,
- [0, 10, 10])
-
- def testDecodeBoundingBox(self):
- image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
- bbox_ymins = [0.0, 4.0]
- bbox_xmins = [1.0, 5.0]
- bbox_ymaxs = [2.0, 6.0]
- bbox_xmaxs = [3.0, 7.0]
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/encoded': self._BytesFeature(encoded_jpeg),
- 'image/format': self._BytesFeature('jpeg'),
- 'image/object/bbox/ymin': self._FloatFeature(bbox_ymins),
- 'image/object/bbox/xmin': self._FloatFeature(bbox_xmins),
- 'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs),
- 'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs),
- })).SerializeToString()
-
- example_decoder = tf_example_decoder.TfExampleDecoder()
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes].
- get_shape().as_list()), [None, 4])
- with self.test_session() as sess:
- tensor_dict = sess.run(tensor_dict)
-
- expected_boxes = np.vstack([bbox_ymins, bbox_xmins,
- bbox_ymaxs, bbox_xmaxs]).transpose()
- self.assertAllEqual(expected_boxes,
- tensor_dict[fields.InputDataFields.groundtruth_boxes])
- self.assertAllEqual(
- 2, tensor_dict[fields.InputDataFields.num_groundtruth_boxes])
-
- @test_util.enable_c_shapes
- def testDecodeKeypoint(self):
- image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
- bbox_ymins = [0.0, 4.0]
- bbox_xmins = [1.0, 5.0]
- bbox_ymaxs = [2.0, 6.0]
- bbox_xmaxs = [3.0, 7.0]
- keypoint_ys = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
- keypoint_xs = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/encoded': self._BytesFeature(encoded_jpeg),
- 'image/format': self._BytesFeature('jpeg'),
- 'image/object/bbox/ymin': self._FloatFeature(bbox_ymins),
- 'image/object/bbox/xmin': self._FloatFeature(bbox_xmins),
- 'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs),
- 'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs),
- 'image/object/keypoint/y': self._FloatFeature(keypoint_ys),
- 'image/object/keypoint/x': self._FloatFeature(keypoint_xs),
- })).SerializeToString()
-
- example_decoder = tf_example_decoder.TfExampleDecoder(num_keypoints=3)
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes].
- get_shape().as_list()), [None, 4])
- self.assertAllEqual((tensor_dict[fields.InputDataFields.
- groundtruth_keypoints].
- get_shape().as_list()), [2, 3, 2])
- with self.test_session() as sess:
- tensor_dict = sess.run(tensor_dict)
-
- expected_boxes = np.vstack([bbox_ymins, bbox_xmins,
- bbox_ymaxs, bbox_xmaxs]).transpose()
- self.assertAllEqual(expected_boxes,
- tensor_dict[fields.InputDataFields.groundtruth_boxes])
- self.assertAllEqual(
- 2, tensor_dict[fields.InputDataFields.num_groundtruth_boxes])
-
- expected_keypoints = (
- np.vstack([keypoint_ys, keypoint_xs]).transpose().reshape((2, 3, 2)))
- self.assertAllEqual(expected_keypoints,
- tensor_dict[
- fields.InputDataFields.groundtruth_keypoints])
-
- def testDecodeDefaultGroundtruthWeights(self):
- image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
- bbox_ymins = [0.0, 4.0]
- bbox_xmins = [1.0, 5.0]
- bbox_ymaxs = [2.0, 6.0]
- bbox_xmaxs = [3.0, 7.0]
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/encoded': self._BytesFeature(encoded_jpeg),
- 'image/format': self._BytesFeature('jpeg'),
- 'image/object/bbox/ymin': self._FloatFeature(bbox_ymins),
- 'image/object/bbox/xmin': self._FloatFeature(bbox_xmins),
- 'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs),
- 'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs),
- })).SerializeToString()
-
- example_decoder = tf_example_decoder.TfExampleDecoder()
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes].
- get_shape().as_list()), [None, 4])
-
- with self.test_session() as sess:
- tensor_dict = sess.run(tensor_dict)
-
- self.assertAllClose(tensor_dict[fields.InputDataFields.groundtruth_weights],
- np.ones(2, dtype=np.float32))
-
- @test_util.enable_c_shapes
- def testDecodeObjectLabel(self):
- image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
- bbox_classes = [0, 1]
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/encoded': self._BytesFeature(encoded_jpeg),
- 'image/format': self._BytesFeature('jpeg'),
- 'image/object/class/label': self._Int64Feature(bbox_classes),
- })).SerializeToString()
-
- example_decoder = tf_example_decoder.TfExampleDecoder()
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- self.assertAllEqual((tensor_dict[
- fields.InputDataFields.groundtruth_classes].get_shape().as_list()),
- [2])
-
- with self.test_session() as sess:
- tensor_dict = sess.run(tensor_dict)
-
- self.assertAllEqual(bbox_classes,
- tensor_dict[fields.InputDataFields.groundtruth_classes])
-
- def testDecodeObjectLabelNoText(self):
- image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
- bbox_classes = [1, 2]
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/encoded': self._BytesFeature(encoded_jpeg),
- 'image/format': self._BytesFeature('jpeg'),
- 'image/object/class/label': self._Int64Feature(bbox_classes),
- })).SerializeToString()
- label_map_string = """
- item {
- id:1
- name:'cat'
- }
- item {
- id:2
- name:'dog'
- }
- """
- label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
- with tf.gfile.Open(label_map_path, 'wb') as f:
- f.write(label_map_string)
-
- example_decoder = tf_example_decoder.TfExampleDecoder(
- label_map_proto_file=label_map_path)
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- self.assertAllEqual((tensor_dict[
- fields.InputDataFields.groundtruth_classes].get_shape().as_list()),
- [None])
-
- init = tf.tables_initializer()
- with self.test_session() as sess:
- sess.run(init)
- tensor_dict = sess.run(tensor_dict)
-
- self.assertAllEqual(bbox_classes,
- tensor_dict[fields.InputDataFields.groundtruth_classes])
-
- def testDecodeObjectLabelUnrecognizedName(self):
- image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
- bbox_classes_text = ['cat', 'cheetah']
- example = tf.train.Example(
- features=tf.train.Features(
- feature={
- 'image/encoded':
- self._BytesFeature(encoded_jpeg),
- 'image/format':
- self._BytesFeature('jpeg'),
- 'image/object/class/text':
- self._BytesFeature(bbox_classes_text),
- })).SerializeToString()
-
- label_map_string = """
- item {
- id:2
- name:'cat'
- }
- item {
- id:1
- name:'dog'
- }
- """
- label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
- with tf.gfile.Open(label_map_path, 'wb') as f:
- f.write(label_map_string)
- example_decoder = tf_example_decoder.TfExampleDecoder(
- label_map_proto_file=label_map_path)
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
- .get_shape().as_list()), [None])
-
- with self.test_session() as sess:
- sess.run(tf.tables_initializer())
- tensor_dict = sess.run(tensor_dict)
-
- self.assertAllEqual([2, -1],
- tensor_dict[fields.InputDataFields.groundtruth_classes])
-
- def testDecodeObjectLabelWithMapping(self):
- image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
- bbox_classes_text = ['cat', 'dog']
- example = tf.train.Example(
- features=tf.train.Features(
- feature={
- 'image/encoded':
- self._BytesFeature(encoded_jpeg),
- 'image/format':
- self._BytesFeature('jpeg'),
- 'image/object/class/text':
- self._BytesFeature(bbox_classes_text),
- })).SerializeToString()
-
- label_map_string = """
- item {
- id:3
- name:'cat'
- }
- item {
- id:1
- name:'dog'
- }
- """
- label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
- with tf.gfile.Open(label_map_path, 'wb') as f:
- f.write(label_map_string)
- example_decoder = tf_example_decoder.TfExampleDecoder(
- label_map_proto_file=label_map_path)
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
- .get_shape().as_list()), [None])
-
- with self.test_session() as sess:
- sess.run(tf.tables_initializer())
- tensor_dict = sess.run(tensor_dict)
-
- self.assertAllEqual([3, 1],
- tensor_dict[fields.InputDataFields.groundtruth_classes])
-
- @test_util.enable_c_shapes
- def testDecodeObjectArea(self):
- image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
- object_area = [100., 174.]
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/encoded': self._BytesFeature(encoded_jpeg),
- 'image/format': self._BytesFeature('jpeg'),
- 'image/object/area': self._FloatFeature(object_area),
- })).SerializeToString()
-
- example_decoder = tf_example_decoder.TfExampleDecoder()
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_area].
- get_shape().as_list()), [2])
- with self.test_session() as sess:
- tensor_dict = sess.run(tensor_dict)
-
- self.assertAllEqual(object_area,
- tensor_dict[fields.InputDataFields.groundtruth_area])
-
- @test_util.enable_c_shapes
- def testDecodeObjectIsCrowd(self):
- image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
- object_is_crowd = [0, 1]
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/encoded': self._BytesFeature(encoded_jpeg),
- 'image/format': self._BytesFeature('jpeg'),
- 'image/object/is_crowd': self._Int64Feature(object_is_crowd),
- })).SerializeToString()
-
- example_decoder = tf_example_decoder.TfExampleDecoder()
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- self.assertAllEqual((tensor_dict[
- fields.InputDataFields.groundtruth_is_crowd].get_shape().as_list()),
- [2])
- with self.test_session() as sess:
- tensor_dict = sess.run(tensor_dict)
-
- self.assertAllEqual([bool(item) for item in object_is_crowd],
- tensor_dict[
- fields.InputDataFields.groundtruth_is_crowd])
-
- @test_util.enable_c_shapes
- def testDecodeObjectDifficult(self):
- image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
- object_difficult = [0, 1]
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/encoded': self._BytesFeature(encoded_jpeg),
- 'image/format': self._BytesFeature('jpeg'),
- 'image/object/difficult': self._Int64Feature(object_difficult),
- })).SerializeToString()
-
- example_decoder = tf_example_decoder.TfExampleDecoder()
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- self.assertAllEqual((tensor_dict[
- fields.InputDataFields.groundtruth_difficult].get_shape().as_list()),
- [2])
- with self.test_session() as sess:
- tensor_dict = sess.run(tensor_dict)
-
- self.assertAllEqual([bool(item) for item in object_difficult],
- tensor_dict[
- fields.InputDataFields.groundtruth_difficult])
-
- @test_util.enable_c_shapes
- def testDecodeObjectGroupOf(self):
- image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
- object_group_of = [0, 1]
- example = tf.train.Example(features=tf.train.Features(
- feature={
- 'image/encoded': self._BytesFeature(encoded_jpeg),
- 'image/format': self._BytesFeature('jpeg'),
- 'image/object/group_of': self._Int64Feature(object_group_of),
- })).SerializeToString()
-
- example_decoder = tf_example_decoder.TfExampleDecoder()
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- self.assertAllEqual((tensor_dict[
- fields.InputDataFields.groundtruth_group_of].get_shape().as_list()),
- [2])
- with self.test_session() as sess:
- tensor_dict = sess.run(tensor_dict)
-
- self.assertAllEqual(
- [bool(item) for item in object_group_of],
- tensor_dict[fields.InputDataFields.groundtruth_group_of])
-
- def testDecodeObjectWeight(self):
- image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
- object_weights = [0.75, 1.0]
- example = tf.train.Example(features=tf.train.Features(
- feature={
- 'image/encoded': self._BytesFeature(encoded_jpeg),
- 'image/format': self._BytesFeature('jpeg'),
- 'image/object/weight': self._FloatFeature(object_weights),
- })).SerializeToString()
-
- example_decoder = tf_example_decoder.TfExampleDecoder()
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- self.assertAllEqual((tensor_dict[
- fields.InputDataFields.groundtruth_weights].get_shape().as_list()),
- [None])
- with self.test_session() as sess:
- tensor_dict = sess.run(tensor_dict)
-
- self.assertAllEqual(
- object_weights,
- tensor_dict[fields.InputDataFields.groundtruth_weights])
-
- @test_util.enable_c_shapes
- def testDecodeInstanceSegmentation(self):
- num_instances = 4
- image_height = 5
- image_width = 3
-
- # Randomly generate image.
- image_tensor = np.random.randint(256, size=(image_height,
- image_width,
- 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
-
- # Randomly generate instance segmentation masks.
- instance_masks = (
- np.random.randint(2, size=(num_instances,
- image_height,
- image_width)).astype(np.float32))
- instance_masks_flattened = np.reshape(instance_masks, [-1])
-
- # Randomly generate class labels for each instance.
- object_classes = np.random.randint(
- 100, size=(num_instances)).astype(np.int64)
-
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/encoded': self._BytesFeature(encoded_jpeg),
- 'image/format': self._BytesFeature('jpeg'),
- 'image/height': self._Int64Feature([image_height]),
- 'image/width': self._Int64Feature([image_width]),
- 'image/object/mask': self._FloatFeature(instance_masks_flattened),
- 'image/object/class/label': self._Int64Feature(
- object_classes)})).SerializeToString()
- example_decoder = tf_example_decoder.TfExampleDecoder(
- load_instance_masks=True)
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- self.assertAllEqual((
- tensor_dict[fields.InputDataFields.groundtruth_instance_masks].
- get_shape().as_list()), [4, 5, 3])
-
- self.assertAllEqual((
- tensor_dict[fields.InputDataFields.groundtruth_classes].
- get_shape().as_list()), [4])
-
- with self.test_session() as sess:
- tensor_dict = sess.run(tensor_dict)
-
- self.assertAllEqual(
- instance_masks.astype(np.float32),
- tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
- self.assertAllEqual(
- object_classes,
- tensor_dict[fields.InputDataFields.groundtruth_classes])
-
- def testInstancesNotAvailableByDefault(self):
- num_instances = 4
- image_height = 5
- image_width = 3
- # Randomly generate image.
- image_tensor = np.random.randint(256, size=(image_height,
- image_width,
- 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
-
- # Randomly generate instance segmentation masks.
- instance_masks = (
- np.random.randint(2, size=(num_instances,
- image_height,
- image_width)).astype(np.float32))
- instance_masks_flattened = np.reshape(instance_masks, [-1])
-
- # Randomly generate class labels for each instance.
- object_classes = np.random.randint(
- 100, size=(num_instances)).astype(np.int64)
-
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/encoded': self._BytesFeature(encoded_jpeg),
- 'image/format': self._BytesFeature('jpeg'),
- 'image/height': self._Int64Feature([image_height]),
- 'image/width': self._Int64Feature([image_width]),
- 'image/object/mask': self._FloatFeature(instance_masks_flattened),
- 'image/object/class/label': self._Int64Feature(
- object_classes)})).SerializeToString()
- example_decoder = tf_example_decoder.TfExampleDecoder()
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
- self.assertTrue(fields.InputDataFields.groundtruth_instance_masks
- not in tensor_dict)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/__init__.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_coco_tf_record.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_coco_tf_record.py
deleted file mode 100644
index 9928443d805effb24b46f599929c4b7db73fb2c8..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_coco_tf_record.py
+++ /dev/null
@@ -1,273 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-r"""Convert raw COCO dataset to TFRecord for object_detection.
-
-Example usage:
- python create_coco_tf_record.py --logtostderr \
- --train_image_dir="${TRAIN_IMAGE_DIR}" \
- --val_image_dir="${VAL_IMAGE_DIR}" \
- --test_image_dir="${TEST_IMAGE_DIR}" \
- --train_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \
- --val_annotations_file="${VAL_ANNOTATIONS_FILE}" \
- --testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \
- --output_dir="${OUTPUT_DIR}"
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import hashlib
-import io
-import json
-import os
-import numpy as np
-import PIL.Image
-
-from pycocotools import mask
-import tensorflow as tf
-
-from object_detection.utils import dataset_util
-from object_detection.utils import label_map_util
-
-
-flags = tf.app.flags
-tf.flags.DEFINE_boolean('include_masks', False,
- 'Whether to include instance segmentations masks '
- '(PNG encoded) in the result. default: False.')
-tf.flags.DEFINE_string('train_image_dir', '',
- 'Training image directory.')
-tf.flags.DEFINE_string('val_image_dir', '',
- 'Validation image directory.')
-tf.flags.DEFINE_string('test_image_dir', '',
- 'Test image directory.')
-tf.flags.DEFINE_string('train_annotations_file', '',
- 'Training annotations JSON file.')
-tf.flags.DEFINE_string('val_annotations_file', '',
- 'Validation annotations JSON file.')
-tf.flags.DEFINE_string('testdev_annotations_file', '',
- 'Test-dev annotations JSON file.')
-tf.flags.DEFINE_string('output_dir', '/tmp/', 'Output data directory.')
-
-FLAGS = flags.FLAGS
-
-tf.logging.set_verbosity(tf.logging.INFO)
-
-
-def create_tf_example(image,
- annotations_list,
- image_dir,
- category_index,
- include_masks=False):
- """Converts image and annotations to a tf.Example proto.
-
- Args:
- image: dict with keys:
- [u'license', u'file_name', u'coco_url', u'height', u'width',
- u'date_captured', u'flickr_url', u'id']
- annotations_list:
- list of dicts with keys:
- [u'segmentation', u'area', u'iscrowd', u'image_id',
- u'bbox', u'category_id', u'id']
- Notice that bounding box coordinates in the official COCO dataset are
- given as [x, y, width, height] tuples using absolute coordinates where
- x, y represent the top-left (0-indexed) corner. This function converts
- to the format expected by the Tensorflow Object Detection API (which is
- which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
- to image size).
- image_dir: directory containing the image files.
- category_index: a dict containing COCO category information keyed
- by the 'id' field of each category. See the
- label_map_util.create_category_index function.
- include_masks: Whether to include instance segmentations masks
- (PNG encoded) in the result. default: False.
- Returns:
- example: The converted tf.Example
- num_annotations_skipped: Number of (invalid) annotations that were ignored.
-
- Raises:
- ValueError: if the image pointed to by data['filename'] is not a valid JPEG
- """
- image_height = image['height']
- image_width = image['width']
- filename = image['file_name']
- image_id = image['id']
-
- full_path = os.path.join(image_dir, filename)
- with tf.gfile.GFile(full_path, 'rb') as fid:
- encoded_jpg = fid.read()
- encoded_jpg_io = io.BytesIO(encoded_jpg)
- image = PIL.Image.open(encoded_jpg_io)
- key = hashlib.sha256(encoded_jpg).hexdigest()
-
- xmin = []
- xmax = []
- ymin = []
- ymax = []
- is_crowd = []
- category_names = []
- category_ids = []
- area = []
- encoded_mask_png = []
- num_annotations_skipped = 0
- for object_annotations in annotations_list:
- (x, y, width, height) = tuple(object_annotations['bbox'])
- if width <= 0 or height <= 0:
- num_annotations_skipped += 1
- continue
- if x + width > image_width or y + height > image_height:
- num_annotations_skipped += 1
- continue
- xmin.append(float(x) / image_width)
- xmax.append(float(x + width) / image_width)
- ymin.append(float(y) / image_height)
- ymax.append(float(y + height) / image_height)
- is_crowd.append(object_annotations['iscrowd'])
- category_id = int(object_annotations['category_id'])
- category_ids.append(category_id)
- category_names.append(category_index[category_id]['name'].encode('utf8'))
- area.append(object_annotations['area'])
-
- if include_masks:
- run_len_encoding = mask.frPyObjects(object_annotations['segmentation'],
- image_height, image_width)
- binary_mask = mask.decode(run_len_encoding)
- if not object_annotations['iscrowd']:
- binary_mask = np.amax(binary_mask, axis=2)
- pil_image = PIL.Image.fromarray(binary_mask)
- output_io = io.BytesIO()
- pil_image.save(output_io, format='PNG')
- encoded_mask_png.append(output_io.getvalue())
- feature_dict = {
- 'image/height':
- dataset_util.int64_feature(image_height),
- 'image/width':
- dataset_util.int64_feature(image_width),
- 'image/filename':
- dataset_util.bytes_feature(filename.encode('utf8')),
- 'image/source_id':
- dataset_util.bytes_feature(str(image_id).encode('utf8')),
- 'image/key/sha256':
- dataset_util.bytes_feature(key.encode('utf8')),
- 'image/encoded':
- dataset_util.bytes_feature(encoded_jpg),
- 'image/format':
- dataset_util.bytes_feature('jpeg'.encode('utf8')),
- 'image/object/bbox/xmin':
- dataset_util.float_list_feature(xmin),
- 'image/object/bbox/xmax':
- dataset_util.float_list_feature(xmax),
- 'image/object/bbox/ymin':
- dataset_util.float_list_feature(ymin),
- 'image/object/bbox/ymax':
- dataset_util.float_list_feature(ymax),
- 'image/object/class/label':
- dataset_util.int64_list_feature(category_ids),
- 'image/object/is_crowd':
- dataset_util.int64_list_feature(is_crowd),
- 'image/object/area':
- dataset_util.float_list_feature(area),
- }
- if include_masks:
- feature_dict['image/object/mask'] = (
- dataset_util.bytes_list_feature(encoded_mask_png))
- example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
- return key, example, num_annotations_skipped
-
-
-def _create_tf_record_from_coco_annotations(
- annotations_file, image_dir, output_path, include_masks):
- """Loads COCO annotation json files and converts to tf.Record format.
-
- Args:
- annotations_file: JSON file containing bounding box annotations.
- image_dir: Directory containing the image files.
- output_path: Path to output tf.Record file.
- include_masks: Whether to include instance segmentations masks
- (PNG encoded) in the result. default: False.
- """
- with tf.gfile.GFile(annotations_file, 'r') as fid:
- groundtruth_data = json.load(fid)
- images = groundtruth_data['images']
- category_index = label_map_util.create_category_index(
- groundtruth_data['categories'])
-
- annotations_index = {}
- if 'annotations' in groundtruth_data:
- tf.logging.info(
- 'Found groundtruth annotations. Building annotations index.')
- for annotation in groundtruth_data['annotations']:
- image_id = annotation['image_id']
- if image_id not in annotations_index:
- annotations_index[image_id] = []
- annotations_index[image_id].append(annotation)
- missing_annotation_count = 0
- for image in images:
- image_id = image['id']
- if image_id not in annotations_index:
- missing_annotation_count += 1
- annotations_index[image_id] = []
- tf.logging.info('%d images are missing annotations.',
- missing_annotation_count)
-
- tf.logging.info('writing to output path: %s', output_path)
- writer = tf.python_io.TFRecordWriter(output_path)
- total_num_annotations_skipped = 0
- for idx, image in enumerate(images):
- if idx % 100 == 0:
- tf.logging.info('On image %d of %d', idx, len(images))
- annotations_list = annotations_index[image['id']]
- _, tf_example, num_annotations_skipped = create_tf_example(
- image, annotations_list, image_dir, category_index, include_masks)
- total_num_annotations_skipped += num_annotations_skipped
- writer.write(tf_example.SerializeToString())
- writer.close()
- tf.logging.info('Finished writing, skipped %d annotations.',
- total_num_annotations_skipped)
-
-
-def main(_):
- assert FLAGS.train_image_dir, '`train_image_dir` missing.'
- assert FLAGS.val_image_dir, '`val_image_dir` missing.'
- assert FLAGS.test_image_dir, '`test_image_dir` missing.'
- assert FLAGS.train_annotations_file, '`train_annotations_file` missing.'
- assert FLAGS.val_annotations_file, '`val_annotations_file` missing.'
- assert FLAGS.testdev_annotations_file, '`testdev_annotations_file` missing.'
-
- if not tf.gfile.IsDirectory(FLAGS.output_dir):
- tf.gfile.MakeDirs(FLAGS.output_dir)
- train_output_path = os.path.join(FLAGS.output_dir, 'coco_train.record')
- val_output_path = os.path.join(FLAGS.output_dir, 'coco_val.record')
- testdev_output_path = os.path.join(FLAGS.output_dir, 'coco_testdev.record')
-
- _create_tf_record_from_coco_annotations(
- FLAGS.train_annotations_file,
- FLAGS.train_image_dir,
- train_output_path,
- FLAGS.include_masks)
- _create_tf_record_from_coco_annotations(
- FLAGS.val_annotations_file,
- FLAGS.val_image_dir,
- val_output_path,
- FLAGS.include_masks)
- _create_tf_record_from_coco_annotations(
- FLAGS.testdev_annotations_file,
- FLAGS.test_image_dir,
- testdev_output_path,
- FLAGS.include_masks)
-
-
-if __name__ == '__main__':
- tf.app.run()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_coco_tf_record_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_coco_tf_record_test.py
deleted file mode 100644
index 45697eeff5bc9f103621fda2cb729ee71ef7c4d6..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_coco_tf_record_test.py
+++ /dev/null
@@ -1,188 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Test for create_coco_tf_record.py."""
-
-import io
-import os
-
-import numpy as np
-import PIL.Image
-import tensorflow as tf
-
-from object_detection.dataset_tools import create_coco_tf_record
-
-
-class CreateCocoTFRecordTest(tf.test.TestCase):
-
- def _assertProtoEqual(self, proto_field, expectation):
- """Helper function to assert if a proto field equals some value.
-
- Args:
- proto_field: The protobuf field to compare.
- expectation: The expected value of the protobuf field.
- """
- proto_list = [p for p in proto_field]
- self.assertListEqual(proto_list, expectation)
-
- def test_create_tf_example(self):
- image_file_name = 'tmp_image.jpg'
- image_data = np.random.rand(256, 256, 3)
- tmp_dir = self.get_temp_dir()
- save_path = os.path.join(tmp_dir, image_file_name)
- image = PIL.Image.fromarray(image_data, 'RGB')
- image.save(save_path)
-
- image = {
- 'file_name': image_file_name,
- 'height': 256,
- 'width': 256,
- 'id': 11,
- }
-
- annotations_list = [{
- 'area': .5,
- 'iscrowd': False,
- 'image_id': 11,
- 'bbox': [64, 64, 128, 128],
- 'category_id': 2,
- 'id': 1000,
- }]
-
- image_dir = tmp_dir
- category_index = {
- 1: {
- 'name': 'dog',
- 'id': 1
- },
- 2: {
- 'name': 'cat',
- 'id': 2
- },
- 3: {
- 'name': 'human',
- 'id': 3
- }
- }
-
- (_, example,
- num_annotations_skipped) = create_coco_tf_record.create_tf_example(
- image, annotations_list, image_dir, category_index)
-
- self.assertEqual(num_annotations_skipped, 0)
- self._assertProtoEqual(
- example.features.feature['image/height'].int64_list.value, [256])
- self._assertProtoEqual(
- example.features.feature['image/width'].int64_list.value, [256])
- self._assertProtoEqual(
- example.features.feature['image/filename'].bytes_list.value,
- [image_file_name])
- self._assertProtoEqual(
- example.features.feature['image/source_id'].bytes_list.value,
- [str(image['id'])])
- self._assertProtoEqual(
- example.features.feature['image/format'].bytes_list.value, ['jpeg'])
- self._assertProtoEqual(
- example.features.feature['image/object/bbox/xmin'].float_list.value,
- [0.25])
- self._assertProtoEqual(
- example.features.feature['image/object/bbox/ymin'].float_list.value,
- [0.25])
- self._assertProtoEqual(
- example.features.feature['image/object/bbox/xmax'].float_list.value,
- [0.75])
- self._assertProtoEqual(
- example.features.feature['image/object/bbox/ymax'].float_list.value,
- [0.75])
-
- def test_create_tf_example_with_instance_masks(self):
- image_file_name = 'tmp_image.jpg'
- image_data = np.random.rand(8, 8, 3)
- tmp_dir = self.get_temp_dir()
- save_path = os.path.join(tmp_dir, image_file_name)
- image = PIL.Image.fromarray(image_data, 'RGB')
- image.save(save_path)
-
- image = {
- 'file_name': image_file_name,
- 'height': 8,
- 'width': 8,
- 'id': 11,
- }
-
- annotations_list = [{
- 'area': .5,
- 'iscrowd': False,
- 'image_id': 11,
- 'bbox': [0, 0, 8, 8],
- 'segmentation': [[4, 0, 0, 0, 0, 4], [8, 4, 4, 8, 8, 8]],
- 'category_id': 1,
- 'id': 1000,
- }]
-
- image_dir = tmp_dir
- category_index = {
- 1: {
- 'name': 'dog',
- 'id': 1
- },
- }
-
- (_, example,
- num_annotations_skipped) = create_coco_tf_record.create_tf_example(
- image, annotations_list, image_dir, category_index, include_masks=True)
-
- self.assertEqual(num_annotations_skipped, 0)
- self._assertProtoEqual(
- example.features.feature['image/height'].int64_list.value, [8])
- self._assertProtoEqual(
- example.features.feature['image/width'].int64_list.value, [8])
- self._assertProtoEqual(
- example.features.feature['image/filename'].bytes_list.value,
- [image_file_name])
- self._assertProtoEqual(
- example.features.feature['image/source_id'].bytes_list.value,
- [str(image['id'])])
- self._assertProtoEqual(
- example.features.feature['image/format'].bytes_list.value, ['jpeg'])
- self._assertProtoEqual(
- example.features.feature['image/object/bbox/xmin'].float_list.value,
- [0])
- self._assertProtoEqual(
- example.features.feature['image/object/bbox/ymin'].float_list.value,
- [0])
- self._assertProtoEqual(
- example.features.feature['image/object/bbox/xmax'].float_list.value,
- [1])
- self._assertProtoEqual(
- example.features.feature['image/object/bbox/ymax'].float_list.value,
- [1])
- encoded_mask_pngs = [
- io.BytesIO(encoded_masks) for encoded_masks in example.features.feature[
- 'image/object/mask'].bytes_list.value
- ]
- pil_masks = [
- np.array(PIL.Image.open(encoded_mask_png))
- for encoded_mask_png in encoded_mask_pngs
- ]
- self.assertTrue(len(pil_masks) == 1)
- self.assertAllEqual(pil_masks[0],
- [[1, 1, 1, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0, 0, 0],
- [1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
- [0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 1, 1],
- [0, 0, 0, 0, 0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 1, 1]])
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_kitti_tf_record.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_kitti_tf_record.py
deleted file mode 100644
index c612db99166114689b8c40112bc03be53db44eef..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_kitti_tf_record.py
+++ /dev/null
@@ -1,310 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-r"""Convert raw KITTI detection dataset to TFRecord for object_detection.
-
-Converts KITTI detection dataset to TFRecords with a standard format allowing
- to use this dataset to train object detectors. The raw dataset can be
- downloaded from:
- http://kitti.is.tue.mpg.de/kitti/data_object_image_2.zip.
- http://kitti.is.tue.mpg.de/kitti/data_object_label_2.zip
- Permission can be requested at the main website.
-
- KITTI detection dataset contains 7481 training images. Using this code with
- the default settings will set aside the first 500 images as a validation set.
- This can be altered using the flags, see details below.
-
-Example usage:
- python object_detection/dataset_tools/create_kitti_tf_record.py \
- --data_dir=/home/user/kitti \
- --output_path=/home/user/kitti.record
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-
-import hashlib
-import io
-import os
-
-import numpy as np
-import PIL.Image as pil
-import tensorflow as tf
-
-from object_detection.utils import dataset_util
-from object_detection.utils import label_map_util
-from object_detection.utils.np_box_ops import iou
-
-tf.app.flags.DEFINE_string('data_dir', '', 'Location of root directory for the '
- 'data. Folder structure is assumed to be:'
- '/training/label_2 (annotations) and'
- '/data_object_image_2/training/image_2'
- '(images).')
-tf.app.flags.DEFINE_string('output_path', '', 'Path to which TFRecord files'
- 'will be written. The TFRecord with the training set'
- 'will be located at: _train.tfrecord.'
- 'And the TFRecord with the validation set will be'
- 'located at: _val.tfrecord')
-tf.app.flags.DEFINE_string('classes_to_use', 'car,pedestrian,dontcare',
- 'Comma separated list of class names that will be'
- 'used. Adding the dontcare class will remove all'
- 'bboxs in the dontcare regions.')
-tf.app.flags.DEFINE_string('label_map_path', 'data/kitti_label_map.pbtxt',
- 'Path to label map proto.')
-tf.app.flags.DEFINE_integer('validation_set_size', '500', 'Number of images to'
- 'be used as a validation set.')
-FLAGS = tf.app.flags.FLAGS
-
-
-def convert_kitti_to_tfrecords(data_dir, output_path, classes_to_use,
- label_map_path, validation_set_size):
- """Convert the KITTI detection dataset to TFRecords.
-
- Args:
- data_dir: The full path to the unzipped folder containing the unzipped data
- from data_object_image_2 and data_object_label_2.zip.
- Folder structure is assumed to be: data_dir/training/label_2 (annotations)
- and data_dir/data_object_image_2/training/image_2 (images).
- output_path: The path to which TFRecord files will be written. The TFRecord
- with the training set will be located at: _train.tfrecord
- And the TFRecord with the validation set will be located at:
- _val.tfrecord
- classes_to_use: List of strings naming the classes for which data should be
- converted. Use the same names as presented in the KIITI README file.
- Adding dontcare class will remove all other bounding boxes that overlap
- with areas marked as dontcare regions.
- label_map_path: Path to label map proto
- validation_set_size: How many images should be left as the validation set.
- (Ffirst `validation_set_size` examples are selected to be in the
- validation set).
- """
- label_map_dict = label_map_util.get_label_map_dict(label_map_path)
- train_count = 0
- val_count = 0
-
- annotation_dir = os.path.join(data_dir,
- 'training',
- 'label_2')
-
- image_dir = os.path.join(data_dir,
- 'data_object_image_2',
- 'training',
- 'image_2')
-
- train_writer = tf.python_io.TFRecordWriter('%s_train.tfrecord'%
- output_path)
- val_writer = tf.python_io.TFRecordWriter('%s_val.tfrecord'%
- output_path)
-
- images = sorted(tf.gfile.ListDirectory(image_dir))
- for img_name in images:
- img_num = int(img_name.split('.')[0])
- is_validation_img = img_num < validation_set_size
- img_anno = read_annotation_file(os.path.join(annotation_dir,
- str(img_num).zfill(6)+'.txt'))
-
- image_path = os.path.join(image_dir, img_name)
-
- # Filter all bounding boxes of this frame that are of a legal class, and
- # don't overlap with a dontcare region.
- # TODO(talremez) filter out targets that are truncated or heavily occluded.
- annotation_for_image = filter_annotations(img_anno, classes_to_use)
-
- example = prepare_example(image_path, annotation_for_image, label_map_dict)
- if is_validation_img:
- val_writer.write(example.SerializeToString())
- val_count += 1
- else:
- train_writer.write(example.SerializeToString())
- train_count += 1
-
- train_writer.close()
- val_writer.close()
-
-
-def prepare_example(image_path, annotations, label_map_dict):
- """Converts a dictionary with annotations for an image to tf.Example proto.
-
- Args:
- image_path: The complete path to image.
- annotations: A dictionary representing the annotation of a single object
- that appears in the image.
- label_map_dict: A map from string label names to integer ids.
-
- Returns:
- example: The converted tf.Example.
- """
- with tf.gfile.GFile(image_path, 'rb') as fid:
- encoded_png = fid.read()
- encoded_png_io = io.BytesIO(encoded_png)
- image = pil.open(encoded_png_io)
- image = np.asarray(image)
-
- key = hashlib.sha256(encoded_png).hexdigest()
-
- width = int(image.shape[1])
- height = int(image.shape[0])
-
- xmin_norm = annotations['2d_bbox_left'] / float(width)
- ymin_norm = annotations['2d_bbox_top'] / float(height)
- xmax_norm = annotations['2d_bbox_right'] / float(width)
- ymax_norm = annotations['2d_bbox_bottom'] / float(height)
-
- difficult_obj = [0]*len(xmin_norm)
-
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/height': dataset_util.int64_feature(height),
- 'image/width': dataset_util.int64_feature(width),
- 'image/filename': dataset_util.bytes_feature(image_path.encode('utf8')),
- 'image/source_id': dataset_util.bytes_feature(image_path.encode('utf8')),
- 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
- 'image/encoded': dataset_util.bytes_feature(encoded_png),
- 'image/format': dataset_util.bytes_feature('png'.encode('utf8')),
- 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin_norm),
- 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax_norm),
- 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin_norm),
- 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax_norm),
- 'image/object/class/text': dataset_util.bytes_list_feature(
- [x.encode('utf8') for x in annotations['type']]),
- 'image/object/class/label': dataset_util.int64_list_feature(
- [label_map_dict[x] for x in annotations['type']]),
- 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
- 'image/object/truncated': dataset_util.float_list_feature(
- annotations['truncated']),
- 'image/object/alpha': dataset_util.float_list_feature(
- annotations['alpha']),
- 'image/object/3d_bbox/height': dataset_util.float_list_feature(
- annotations['3d_bbox_height']),
- 'image/object/3d_bbox/width': dataset_util.float_list_feature(
- annotations['3d_bbox_width']),
- 'image/object/3d_bbox/length': dataset_util.float_list_feature(
- annotations['3d_bbox_length']),
- 'image/object/3d_bbox/x': dataset_util.float_list_feature(
- annotations['3d_bbox_x']),
- 'image/object/3d_bbox/y': dataset_util.float_list_feature(
- annotations['3d_bbox_y']),
- 'image/object/3d_bbox/z': dataset_util.float_list_feature(
- annotations['3d_bbox_z']),
- 'image/object/3d_bbox/rot_y': dataset_util.float_list_feature(
- annotations['3d_bbox_rot_y']),
- }))
-
- return example
-
-
-def filter_annotations(img_all_annotations, used_classes):
- """Filters out annotations from the unused classes and dontcare regions.
-
- Filters out the annotations that belong to classes we do now wish to use and
- (optionally) also removes all boxes that overlap with dontcare regions.
-
- Args:
- img_all_annotations: A list of annotation dictionaries. See documentation of
- read_annotation_file for more details about the format of the annotations.
- used_classes: A list of strings listing the classes we want to keep, if the
- list contains "dontcare", all bounding boxes with overlapping with dont
- care regions will also be filtered out.
-
- Returns:
- img_filtered_annotations: A list of annotation dictionaries that have passed
- the filtering.
- """
-
- img_filtered_annotations = {}
-
- # Filter the type of the objects.
- relevant_annotation_indices = [
- i for i, x in enumerate(img_all_annotations['type']) if x in used_classes
- ]
-
- for key in img_all_annotations.keys():
- img_filtered_annotations[key] = (
- img_all_annotations[key][relevant_annotation_indices])
-
- if 'dontcare' in used_classes:
- dont_care_indices = [i for i,
- x in enumerate(img_filtered_annotations['type'])
- if x == 'dontcare']
-
- # bounding box format [y_min, x_min, y_max, x_max]
- all_boxes = np.stack([img_filtered_annotations['2d_bbox_top'],
- img_filtered_annotations['2d_bbox_left'],
- img_filtered_annotations['2d_bbox_bottom'],
- img_filtered_annotations['2d_bbox_right']],
- axis=1)
-
- ious = iou(boxes1=all_boxes,
- boxes2=all_boxes[dont_care_indices])
-
- # Remove all bounding boxes that overlap with a dontcare region.
- if ious.size > 0:
- boxes_to_remove = np.amax(ious, axis=1) > 0.0
- for key in img_all_annotations.keys():
- img_filtered_annotations[key] = (
- img_filtered_annotations[key][np.logical_not(boxes_to_remove)])
-
- return img_filtered_annotations
-
-
-def read_annotation_file(filename):
- """Reads a KITTI annotation file.
-
- Converts a KITTI annotation file into a dictionary containing all the
- relevant information.
-
- Args:
- filename: the path to the annotataion text file.
-
- Returns:
- anno: A dictionary with the converted annotation information. See annotation
- README file for details on the different fields.
- """
- with open(filename) as f:
- content = f.readlines()
- content = [x.strip().split(' ') for x in content]
-
- anno = {}
- anno['type'] = np.array([x[0].lower() for x in content])
- anno['truncated'] = np.array([float(x[1]) for x in content])
- anno['occluded'] = np.array([int(x[2]) for x in content])
- anno['alpha'] = np.array([float(x[3]) for x in content])
-
- anno['2d_bbox_left'] = np.array([float(x[4]) for x in content])
- anno['2d_bbox_top'] = np.array([float(x[5]) for x in content])
- anno['2d_bbox_right'] = np.array([float(x[6]) for x in content])
- anno['2d_bbox_bottom'] = np.array([float(x[7]) for x in content])
-
- anno['3d_bbox_height'] = np.array([float(x[8]) for x in content])
- anno['3d_bbox_width'] = np.array([float(x[9]) for x in content])
- anno['3d_bbox_length'] = np.array([float(x[10]) for x in content])
- anno['3d_bbox_x'] = np.array([float(x[11]) for x in content])
- anno['3d_bbox_y'] = np.array([float(x[12]) for x in content])
- anno['3d_bbox_z'] = np.array([float(x[13]) for x in content])
- anno['3d_bbox_rot_y'] = np.array([float(x[14]) for x in content])
-
- return anno
-
-
-def main(_):
- convert_kitti_to_tfrecords(
- data_dir=FLAGS.data_dir,
- output_path=FLAGS.output_path,
- classes_to_use=FLAGS.classes_to_use.split(','),
- label_map_path=FLAGS.label_map_path,
- validation_set_size=FLAGS.validation_set_size)
-
-if __name__ == '__main__':
- tf.app.run()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_kitti_tf_record_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_kitti_tf_record_test.py
deleted file mode 100644
index 37ac4b8b19d65f8533ecefec318b409df12bce5f..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_kitti_tf_record_test.py
+++ /dev/null
@@ -1,130 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Test for create_kitti_tf_record.py."""
-
-import os
-
-import numpy as np
-import PIL.Image
-import tensorflow as tf
-
-from object_detection.dataset_tools import create_kitti_tf_record
-
-
-class CreateKittiTFRecordTest(tf.test.TestCase):
-
- def _assertProtoEqual(self, proto_field, expectation):
- """Helper function to assert if a proto field equals some value.
-
- Args:
- proto_field: The protobuf field to compare.
- expectation: The expected value of the protobuf field.
- """
- proto_list = [p for p in proto_field]
- self.assertListEqual(proto_list, expectation)
-
- def test_dict_to_tf_example(self):
- image_file_name = 'tmp_image.jpg'
- image_data = np.random.rand(256, 256, 3)
- save_path = os.path.join(self.get_temp_dir(), image_file_name)
- image = PIL.Image.fromarray(image_data, 'RGB')
- image.save(save_path)
-
- annotations = {}
- annotations['2d_bbox_left'] = np.array([64])
- annotations['2d_bbox_top'] = np.array([64])
- annotations['2d_bbox_right'] = np.array([192])
- annotations['2d_bbox_bottom'] = np.array([192])
- annotations['type'] = ['car']
- annotations['truncated'] = np.array([1])
- annotations['alpha'] = np.array([2])
- annotations['3d_bbox_height'] = np.array([10])
- annotations['3d_bbox_width'] = np.array([11])
- annotations['3d_bbox_length'] = np.array([12])
- annotations['3d_bbox_x'] = np.array([13])
- annotations['3d_bbox_y'] = np.array([14])
- annotations['3d_bbox_z'] = np.array([15])
- annotations['3d_bbox_rot_y'] = np.array([4])
-
- label_map_dict = {
- 'background': 0,
- 'car': 1,
- }
-
- example = create_kitti_tf_record.prepare_example(
- save_path,
- annotations,
- label_map_dict)
-
- self._assertProtoEqual(
- example.features.feature['image/height'].int64_list.value, [256])
- self._assertProtoEqual(
- example.features.feature['image/width'].int64_list.value, [256])
- self._assertProtoEqual(
- example.features.feature['image/filename'].bytes_list.value,
- [save_path])
- self._assertProtoEqual(
- example.features.feature['image/source_id'].bytes_list.value,
- [save_path])
- self._assertProtoEqual(
- example.features.feature['image/format'].bytes_list.value, ['png'])
- self._assertProtoEqual(
- example.features.feature['image/object/bbox/xmin'].float_list.value,
- [0.25])
- self._assertProtoEqual(
- example.features.feature['image/object/bbox/ymin'].float_list.value,
- [0.25])
- self._assertProtoEqual(
- example.features.feature['image/object/bbox/xmax'].float_list.value,
- [0.75])
- self._assertProtoEqual(
- example.features.feature['image/object/bbox/ymax'].float_list.value,
- [0.75])
- self._assertProtoEqual(
- example.features.feature['image/object/class/text'].bytes_list.value,
- ['car'])
- self._assertProtoEqual(
- example.features.feature['image/object/class/label'].int64_list.value,
- [1])
- self._assertProtoEqual(
- example.features.feature['image/object/truncated'].float_list.value,
- [1])
- self._assertProtoEqual(
- example.features.feature['image/object/alpha'].float_list.value,
- [2])
- self._assertProtoEqual(example.features.feature[
- 'image/object/3d_bbox/height'].float_list.value, [10])
- self._assertProtoEqual(
- example.features.feature['image/object/3d_bbox/width'].float_list.value,
- [11])
- self._assertProtoEqual(example.features.feature[
- 'image/object/3d_bbox/length'].float_list.value, [12])
- self._assertProtoEqual(
- example.features.feature['image/object/3d_bbox/x'].float_list.value,
- [13])
- self._assertProtoEqual(
- example.features.feature['image/object/3d_bbox/y'].float_list.value,
- [14])
- self._assertProtoEqual(
- example.features.feature['image/object/3d_bbox/z'].float_list.value,
- [15])
- self._assertProtoEqual(
- example.features.feature['image/object/3d_bbox/rot_y'].float_list.value,
- [4])
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_oid_tf_record.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_oid_tf_record.py
deleted file mode 100644
index 26d9699c8ee4ec17ef329f91e0df31ca79d50c99..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_oid_tf_record.py
+++ /dev/null
@@ -1,117 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-r"""Creates TFRecords of Open Images dataset for object detection.
-
-Example usage:
- python object_detection/dataset_tools/create_oid_tf_record.py \
- --input_box_annotations_csv=/path/to/input/annotations-human-bbox.csv \
- --input_image_label_annotations_csv=/path/to/input/annotations-label.csv \
- --input_images_directory=/path/to/input/image_pixels_directory \
- --input_label_map=/path/to/input/labels_bbox_545.labelmap \
- --output_tf_record_path_prefix=/path/to/output/prefix.tfrecord
-
-CSVs with bounding box annotations and image metadata (including the image URLs)
-can be downloaded from the Open Images GitHub repository:
-https://github.com/openimages/dataset
-
-This script will include every image found in the input_images_directory in the
-output TFRecord, even if the image has no corresponding bounding box annotations
-in the input_annotations_csv. If input_image_label_annotations_csv is specified,
-it will add image-level labels as well. Note that the information of whether a
-label is positivelly or negativelly verified is NOT added to tfrecord.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-
-import contextlib2
-import pandas as pd
-import tensorflow as tf
-
-from object_detection.dataset_tools import oid_tfrecord_creation
-from object_detection.dataset_tools import tf_record_creation_util
-from object_detection.utils import label_map_util
-
-tf.flags.DEFINE_string('input_box_annotations_csv', None,
- 'Path to CSV containing image bounding box annotations')
-tf.flags.DEFINE_string('input_images_directory', None,
- 'Directory containing the image pixels '
- 'downloaded from the OpenImages GitHub repository.')
-tf.flags.DEFINE_string('input_image_label_annotations_csv', None,
- 'Path to CSV containing image-level labels annotations')
-tf.flags.DEFINE_string('input_label_map', None, 'Path to the label map proto')
-tf.flags.DEFINE_string(
- 'output_tf_record_path_prefix', None,
- 'Path to the output TFRecord. The shard index and the number of shards '
- 'will be appended for each output shard.')
-tf.flags.DEFINE_integer('num_shards', 100, 'Number of TFRecord shards')
-
-FLAGS = tf.flags.FLAGS
-
-
-def main(_):
- tf.logging.set_verbosity(tf.logging.INFO)
-
- required_flags = [
- 'input_box_annotations_csv', 'input_images_directory', 'input_label_map',
- 'output_tf_record_path_prefix'
- ]
- for flag_name in required_flags:
- if not getattr(FLAGS, flag_name):
- raise ValueError('Flag --{} is required'.format(flag_name))
-
- label_map = label_map_util.get_label_map_dict(FLAGS.input_label_map)
- all_box_annotations = pd.read_csv(FLAGS.input_box_annotations_csv)
- if FLAGS.input_image_label_annotations_csv:
- all_label_annotations = pd.read_csv(FLAGS.input_image_label_annotations_csv)
- all_label_annotations.rename(
- columns={'Confidence': 'ConfidenceImageLabel'}, inplace=True)
- else:
- all_label_annotations = None
- all_images = tf.gfile.Glob(
- os.path.join(FLAGS.input_images_directory, '*.jpg'))
- all_image_ids = [os.path.splitext(os.path.basename(v))[0] for v in all_images]
- all_image_ids = pd.DataFrame({'ImageID': all_image_ids})
- all_annotations = pd.concat(
- [all_box_annotations, all_image_ids, all_label_annotations])
-
- tf.logging.log(tf.logging.INFO, 'Found %d images...', len(all_image_ids))
-
- with contextlib2.ExitStack() as tf_record_close_stack:
- output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
- tf_record_close_stack, FLAGS.output_tf_record_path_prefix,
- FLAGS.num_shards)
-
- for counter, image_data in enumerate(all_annotations.groupby('ImageID')):
- tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 1000,
- counter)
-
- image_id, image_annotations = image_data
- # In OID image file names are formed by appending ".jpg" to the image ID.
- image_path = os.path.join(FLAGS.input_images_directory, image_id + '.jpg')
- with tf.gfile.Open(image_path) as image_file:
- encoded_image = image_file.read()
-
- tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
- image_annotations, label_map, encoded_image)
- if tf_example:
- shard_idx = int(image_id, 16) % FLAGS.num_shards
- output_tfrecords[shard_idx].write(tf_example.SerializeToString())
-
-
-if __name__ == '__main__':
- tf.app.run()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_pascal_tf_record.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_pascal_tf_record.py
deleted file mode 100644
index 813071c924ae457453190710181be2d702b439ce..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_pascal_tf_record.py
+++ /dev/null
@@ -1,185 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-r"""Convert raw PASCAL dataset to TFRecord for object_detection.
-
-Example usage:
- python object_detection/dataset_tools/create_pascal_tf_record.py \
- --data_dir=/home/user/VOCdevkit \
- --year=VOC2012 \
- --output_path=/home/user/pascal.record
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import hashlib
-import io
-import logging
-import os
-
-from lxml import etree
-import PIL.Image
-import tensorflow as tf
-
-from object_detection.utils import dataset_util
-from object_detection.utils import label_map_util
-
-
-flags = tf.app.flags
-flags.DEFINE_string('data_dir', '', 'Root directory to raw PASCAL VOC dataset.')
-flags.DEFINE_string('set', 'train', 'Convert training set, validation set or '
- 'merged set.')
-flags.DEFINE_string('annotations_dir', 'Annotations',
- '(Relative) path to annotations directory.')
-flags.DEFINE_string('year', 'VOC2007', 'Desired challenge year.')
-flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
-flags.DEFINE_string('label_map_path', 'data/pascal_label_map.pbtxt',
- 'Path to label map proto')
-flags.DEFINE_boolean('ignore_difficult_instances', False, 'Whether to ignore '
- 'difficult instances')
-FLAGS = flags.FLAGS
-
-SETS = ['train', 'val', 'trainval', 'test']
-YEARS = ['VOC2007', 'VOC2012', 'merged']
-
-
-def dict_to_tf_example(data,
- dataset_directory,
- label_map_dict,
- ignore_difficult_instances=False,
- image_subdirectory='JPEGImages'):
- """Convert XML derived dict to tf.Example proto.
-
- Notice that this function normalizes the bounding box coordinates provided
- by the raw data.
-
- Args:
- data: dict holding PASCAL XML fields for a single image (obtained by
- running dataset_util.recursive_parse_xml_to_dict)
- dataset_directory: Path to root directory holding PASCAL dataset
- label_map_dict: A map from string label names to integers ids.
- ignore_difficult_instances: Whether to skip difficult instances in the
- dataset (default: False).
- image_subdirectory: String specifying subdirectory within the
- PASCAL dataset directory holding the actual image data.
-
- Returns:
- example: The converted tf.Example.
-
- Raises:
- ValueError: if the image pointed to by data['filename'] is not a valid JPEG
- """
- img_path = os.path.join(data['folder'], image_subdirectory, data['filename'])
- full_path = os.path.join(dataset_directory, img_path)
- with tf.gfile.GFile(full_path, 'rb') as fid:
- encoded_jpg = fid.read()
- encoded_jpg_io = io.BytesIO(encoded_jpg)
- image = PIL.Image.open(encoded_jpg_io)
- if image.format != 'JPEG':
- raise ValueError('Image format not JPEG')
- key = hashlib.sha256(encoded_jpg).hexdigest()
-
- width = int(data['size']['width'])
- height = int(data['size']['height'])
-
- xmin = []
- ymin = []
- xmax = []
- ymax = []
- classes = []
- classes_text = []
- truncated = []
- poses = []
- difficult_obj = []
- if 'object' in data:
- for obj in data['object']:
- difficult = bool(int(obj['difficult']))
- if ignore_difficult_instances and difficult:
- continue
-
- difficult_obj.append(int(difficult))
-
- xmin.append(float(obj['bndbox']['xmin']) / width)
- ymin.append(float(obj['bndbox']['ymin']) / height)
- xmax.append(float(obj['bndbox']['xmax']) / width)
- ymax.append(float(obj['bndbox']['ymax']) / height)
- classes_text.append(obj['name'].encode('utf8'))
- classes.append(label_map_dict[obj['name']])
- truncated.append(int(obj['truncated']))
- poses.append(obj['pose'].encode('utf8'))
-
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/height': dataset_util.int64_feature(height),
- 'image/width': dataset_util.int64_feature(width),
- 'image/filename': dataset_util.bytes_feature(
- data['filename'].encode('utf8')),
- 'image/source_id': dataset_util.bytes_feature(
- data['filename'].encode('utf8')),
- 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
- 'image/encoded': dataset_util.bytes_feature(encoded_jpg),
- 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
- 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
- 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
- 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
- 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
- 'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
- 'image/object/class/label': dataset_util.int64_list_feature(classes),
- 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
- 'image/object/truncated': dataset_util.int64_list_feature(truncated),
- 'image/object/view': dataset_util.bytes_list_feature(poses),
- }))
- return example
-
-
-def main(_):
- if FLAGS.set not in SETS:
- raise ValueError('set must be in : {}'.format(SETS))
- if FLAGS.year not in YEARS:
- raise ValueError('year must be in : {}'.format(YEARS))
-
- data_dir = FLAGS.data_dir
- years = ['VOC2007', 'VOC2012']
- if FLAGS.year != 'merged':
- years = [FLAGS.year]
-
- writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
-
- label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
-
- for year in years:
- logging.info('Reading from PASCAL %s dataset.', year)
- examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main',
- 'aeroplane_' + FLAGS.set + '.txt')
- annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir)
- examples_list = dataset_util.read_examples_list(examples_path)
- for idx, example in enumerate(examples_list):
- if idx % 100 == 0:
- logging.info('On image %d of %d', idx, len(examples_list))
- path = os.path.join(annotations_dir, example + '.xml')
- with tf.gfile.GFile(path, 'r') as fid:
- xml_str = fid.read()
- xml = etree.fromstring(xml_str)
- data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
-
- tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
- FLAGS.ignore_difficult_instances)
- writer.write(tf_example.SerializeToString())
-
- writer.close()
-
-
-if __name__ == '__main__':
- tf.app.run()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_pascal_tf_record_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_pascal_tf_record_test.py
deleted file mode 100644
index 66929bd466a3db5acc9b79460993486c1cd10f34..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_pascal_tf_record_test.py
+++ /dev/null
@@ -1,118 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Test for create_pascal_tf_record.py."""
-
-import os
-
-import numpy as np
-import PIL.Image
-import tensorflow as tf
-
-from object_detection.dataset_tools import create_pascal_tf_record
-
-
-class CreatePascalTFRecordTest(tf.test.TestCase):
-
- def _assertProtoEqual(self, proto_field, expectation):
- """Helper function to assert if a proto field equals some value.
-
- Args:
- proto_field: The protobuf field to compare.
- expectation: The expected value of the protobuf field.
- """
- proto_list = [p for p in proto_field]
- self.assertListEqual(proto_list, expectation)
-
- def test_dict_to_tf_example(self):
- image_file_name = 'tmp_image.jpg'
- image_data = np.random.rand(256, 256, 3)
- save_path = os.path.join(self.get_temp_dir(), image_file_name)
- image = PIL.Image.fromarray(image_data, 'RGB')
- image.save(save_path)
-
- data = {
- 'folder': '',
- 'filename': image_file_name,
- 'size': {
- 'height': 256,
- 'width': 256,
- },
- 'object': [
- {
- 'difficult': 1,
- 'bndbox': {
- 'xmin': 64,
- 'ymin': 64,
- 'xmax': 192,
- 'ymax': 192,
- },
- 'name': 'person',
- 'truncated': 0,
- 'pose': '',
- },
- ],
- }
-
- label_map_dict = {
- 'background': 0,
- 'person': 1,
- 'notperson': 2,
- }
-
- example = create_pascal_tf_record.dict_to_tf_example(
- data, self.get_temp_dir(), label_map_dict, image_subdirectory='')
- self._assertProtoEqual(
- example.features.feature['image/height'].int64_list.value, [256])
- self._assertProtoEqual(
- example.features.feature['image/width'].int64_list.value, [256])
- self._assertProtoEqual(
- example.features.feature['image/filename'].bytes_list.value,
- [image_file_name])
- self._assertProtoEqual(
- example.features.feature['image/source_id'].bytes_list.value,
- [image_file_name])
- self._assertProtoEqual(
- example.features.feature['image/format'].bytes_list.value, ['jpeg'])
- self._assertProtoEqual(
- example.features.feature['image/object/bbox/xmin'].float_list.value,
- [0.25])
- self._assertProtoEqual(
- example.features.feature['image/object/bbox/ymin'].float_list.value,
- [0.25])
- self._assertProtoEqual(
- example.features.feature['image/object/bbox/xmax'].float_list.value,
- [0.75])
- self._assertProtoEqual(
- example.features.feature['image/object/bbox/ymax'].float_list.value,
- [0.75])
- self._assertProtoEqual(
- example.features.feature['image/object/class/text'].bytes_list.value,
- ['person'])
- self._assertProtoEqual(
- example.features.feature['image/object/class/label'].int64_list.value,
- [1])
- self._assertProtoEqual(
- example.features.feature['image/object/difficult'].int64_list.value,
- [1])
- self._assertProtoEqual(
- example.features.feature['image/object/truncated'].int64_list.value,
- [0])
- self._assertProtoEqual(
- example.features.feature['image/object/view'].bytes_list.value, [''])
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_pet_tf_record.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_pet_tf_record.py
deleted file mode 100644
index 9b3b55c60009fb14d7384097d8c7fad02c5d345a..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_pet_tf_record.py
+++ /dev/null
@@ -1,318 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-r"""Convert the Oxford pet dataset to TFRecord for object_detection.
-
-See: O. M. Parkhi, A. Vedaldi, A. Zisserman, C. V. Jawahar
- Cats and Dogs
- IEEE Conference on Computer Vision and Pattern Recognition, 2012
- http://www.robots.ox.ac.uk/~vgg/data/pets/
-
-Example usage:
- python object_detection/dataset_tools/create_pet_tf_record.py \
- --data_dir=/home/user/pet \
- --output_dir=/home/user/pet/output
-"""
-
-import hashlib
-import io
-import logging
-import os
-import random
-import re
-
-import contextlib2
-from lxml import etree
-import numpy as np
-import PIL.Image
-import tensorflow as tf
-
-from object_detection.dataset_tools import tf_record_creation_util
-from object_detection.utils import dataset_util
-from object_detection.utils import label_map_util
-
-flags = tf.app.flags
-flags.DEFINE_string('data_dir', '', 'Root directory to raw pet dataset.')
-flags.DEFINE_string('output_dir', '', 'Path to directory to output TFRecords.')
-flags.DEFINE_string('label_map_path', 'data/pet_label_map.pbtxt',
- 'Path to label map proto')
-flags.DEFINE_boolean('faces_only', True, 'If True, generates bounding boxes '
- 'for pet faces. Otherwise generates bounding boxes (as '
- 'well as segmentations for full pet bodies). Note that '
- 'in the latter case, the resulting files are much larger.')
-flags.DEFINE_string('mask_type', 'png', 'How to represent instance '
- 'segmentation masks. Options are "png" or "numerical".')
-flags.DEFINE_integer('num_shards', 10, 'Number of TFRecord shards')
-
-FLAGS = flags.FLAGS
-
-
-def get_class_name_from_filename(file_name):
- """Gets the class name from a file.
-
- Args:
- file_name: The file name to get the class name from.
- ie. "american_pit_bull_terrier_105.jpg"
-
- Returns:
- A string of the class name.
- """
- match = re.match(r'([A-Za-z_]+)(_[0-9]+\.jpg)', file_name, re.I)
- return match.groups()[0]
-
-
-def dict_to_tf_example(data,
- mask_path,
- label_map_dict,
- image_subdirectory,
- ignore_difficult_instances=False,
- faces_only=True,
- mask_type='png'):
- """Convert XML derived dict to tf.Example proto.
-
- Notice that this function normalizes the bounding box coordinates provided
- by the raw data.
-
- Args:
- data: dict holding PASCAL XML fields for a single image (obtained by
- running dataset_util.recursive_parse_xml_to_dict)
- mask_path: String path to PNG encoded mask.
- label_map_dict: A map from string label names to integers ids.
- image_subdirectory: String specifying subdirectory within the
- Pascal dataset directory holding the actual image data.
- ignore_difficult_instances: Whether to skip difficult instances in the
- dataset (default: False).
- faces_only: If True, generates bounding boxes for pet faces. Otherwise
- generates bounding boxes (as well as segmentations for full pet bodies).
- mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
- smaller file sizes.
-
- Returns:
- example: The converted tf.Example.
-
- Raises:
- ValueError: if the image pointed to by data['filename'] is not a valid JPEG
- """
- img_path = os.path.join(image_subdirectory, data['filename'])
- with tf.gfile.GFile(img_path, 'rb') as fid:
- encoded_jpg = fid.read()
- encoded_jpg_io = io.BytesIO(encoded_jpg)
- image = PIL.Image.open(encoded_jpg_io)
- if image.format != 'JPEG':
- raise ValueError('Image format not JPEG')
- key = hashlib.sha256(encoded_jpg).hexdigest()
-
- with tf.gfile.GFile(mask_path, 'rb') as fid:
- encoded_mask_png = fid.read()
- encoded_png_io = io.BytesIO(encoded_mask_png)
- mask = PIL.Image.open(encoded_png_io)
- if mask.format != 'PNG':
- raise ValueError('Mask format not PNG')
-
- mask_np = np.asarray(mask)
- nonbackground_indices_x = np.any(mask_np != 2, axis=0)
- nonbackground_indices_y = np.any(mask_np != 2, axis=1)
- nonzero_x_indices = np.where(nonbackground_indices_x)
- nonzero_y_indices = np.where(nonbackground_indices_y)
-
- width = int(data['size']['width'])
- height = int(data['size']['height'])
-
- xmins = []
- ymins = []
- xmaxs = []
- ymaxs = []
- classes = []
- classes_text = []
- truncated = []
- poses = []
- difficult_obj = []
- masks = []
- if 'object' in data:
- for obj in data['object']:
- difficult = bool(int(obj['difficult']))
- if ignore_difficult_instances and difficult:
- continue
- difficult_obj.append(int(difficult))
-
- if faces_only:
- xmin = float(obj['bndbox']['xmin'])
- xmax = float(obj['bndbox']['xmax'])
- ymin = float(obj['bndbox']['ymin'])
- ymax = float(obj['bndbox']['ymax'])
- else:
- xmin = float(np.min(nonzero_x_indices))
- xmax = float(np.max(nonzero_x_indices))
- ymin = float(np.min(nonzero_y_indices))
- ymax = float(np.max(nonzero_y_indices))
-
- xmins.append(xmin / width)
- ymins.append(ymin / height)
- xmaxs.append(xmax / width)
- ymaxs.append(ymax / height)
- class_name = get_class_name_from_filename(data['filename'])
- classes_text.append(class_name.encode('utf8'))
- classes.append(label_map_dict[class_name])
- truncated.append(int(obj['truncated']))
- poses.append(obj['pose'].encode('utf8'))
- if not faces_only:
- mask_remapped = (mask_np != 2).astype(np.uint8)
- masks.append(mask_remapped)
-
- feature_dict = {
- 'image/height': dataset_util.int64_feature(height),
- 'image/width': dataset_util.int64_feature(width),
- 'image/filename': dataset_util.bytes_feature(
- data['filename'].encode('utf8')),
- 'image/source_id': dataset_util.bytes_feature(
- data['filename'].encode('utf8')),
- 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
- 'image/encoded': dataset_util.bytes_feature(encoded_jpg),
- 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
- 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
- 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
- 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
- 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
- 'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
- 'image/object/class/label': dataset_util.int64_list_feature(classes),
- 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
- 'image/object/truncated': dataset_util.int64_list_feature(truncated),
- 'image/object/view': dataset_util.bytes_list_feature(poses),
- }
- if not faces_only:
- if mask_type == 'numerical':
- mask_stack = np.stack(masks).astype(np.float32)
- masks_flattened = np.reshape(mask_stack, [-1])
- feature_dict['image/object/mask'] = (
- dataset_util.float_list_feature(masks_flattened.tolist()))
- elif mask_type == 'png':
- encoded_mask_png_list = []
- for mask in masks:
- img = PIL.Image.fromarray(mask)
- output = io.BytesIO()
- img.save(output, format='PNG')
- encoded_mask_png_list.append(output.getvalue())
- feature_dict['image/object/mask'] = (
- dataset_util.bytes_list_feature(encoded_mask_png_list))
-
- example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
- return example
-
-
-def create_tf_record(output_filename,
- num_shards,
- label_map_dict,
- annotations_dir,
- image_dir,
- examples,
- faces_only=True,
- mask_type='png'):
- """Creates a TFRecord file from examples.
-
- Args:
- output_filename: Path to where output file is saved.
- num_shards: Number of shards for output file.
- label_map_dict: The label map dictionary.
- annotations_dir: Directory where annotation files are stored.
- image_dir: Directory where image files are stored.
- examples: Examples to parse and save to tf record.
- faces_only: If True, generates bounding boxes for pet faces. Otherwise
- generates bounding boxes (as well as segmentations for full pet bodies).
- mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
- smaller file sizes.
- """
- with contextlib2.ExitStack() as tf_record_close_stack:
- output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
- tf_record_close_stack, output_filename, num_shards)
- for idx, example in enumerate(examples):
- if idx % 100 == 0:
- logging.info('On image %d of %d', idx, len(examples))
- xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml')
- mask_path = os.path.join(annotations_dir, 'trimaps', example + '.png')
-
- if not os.path.exists(xml_path):
- logging.warning('Could not find %s, ignoring example.', xml_path)
- continue
- with tf.gfile.GFile(xml_path, 'r') as fid:
- xml_str = fid.read()
- xml = etree.fromstring(xml_str)
- data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
-
- try:
- tf_example = dict_to_tf_example(
- data,
- mask_path,
- label_map_dict,
- image_dir,
- faces_only=faces_only,
- mask_type=mask_type)
- if tf_example:
- shard_idx = idx % num_shards
- output_tfrecords[shard_idx].write(tf_example.SerializeToString())
- except ValueError:
- logging.warning('Invalid example: %s, ignoring.', xml_path)
-
-
-# TODO(derekjchow): Add test for pet/PASCAL main files.
-def main(_):
- data_dir = FLAGS.data_dir
- label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
-
- logging.info('Reading from Pet dataset.')
- image_dir = os.path.join(data_dir, 'images')
- annotations_dir = os.path.join(data_dir, 'annotations')
- examples_path = os.path.join(annotations_dir, 'trainval.txt')
- examples_list = dataset_util.read_examples_list(examples_path)
-
- # Test images are not included in the downloaded data set, so we shall perform
- # our own split.
- random.seed(42)
- random.shuffle(examples_list)
- num_examples = len(examples_list)
- num_train = int(0.7 * num_examples)
- train_examples = examples_list[:num_train]
- val_examples = examples_list[num_train:]
- logging.info('%d training and %d validation examples.',
- len(train_examples), len(val_examples))
-
- train_output_path = os.path.join(FLAGS.output_dir, 'pet_faces_train.record')
- val_output_path = os.path.join(FLAGS.output_dir, 'pet_faces_val.record')
- if not FLAGS.faces_only:
- train_output_path = os.path.join(FLAGS.output_dir,
- 'pets_fullbody_with_masks_train.record')
- val_output_path = os.path.join(FLAGS.output_dir,
- 'pets_fullbody_with_masks_val.record')
- create_tf_record(
- train_output_path,
- FLAGS.num_shards,
- label_map_dict,
- annotations_dir,
- image_dir,
- train_examples,
- faces_only=FLAGS.faces_only,
- mask_type=FLAGS.mask_type)
- create_tf_record(
- val_output_path,
- FLAGS.num_shards,
- label_map_dict,
- annotations_dir,
- image_dir,
- val_examples,
- faces_only=FLAGS.faces_only,
- mask_type=FLAGS.mask_type)
-
-
-if __name__ == '__main__':
- tf.app.run()
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/download_and_preprocess_mscoco.sh b/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/download_and_preprocess_mscoco.sh
deleted file mode 100644
index 843ba86938d35eed18dd6f7968ea87c90551fc13..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/download_and_preprocess_mscoco.sh
+++ /dev/null
@@ -1,106 +0,0 @@
-#!/bin/bash
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-# Script to download and preprocess the MSCOCO data set for detection.
-#
-# The outputs of this script are TFRecord files containing serialized
-# tf.Example protocol buffers. See create_coco_tf_record.py for details of how
-# the tf.Example protocol buffers are constructed and see
-# http://cocodataset.org/#overview for an overview of the dataset.
-#
-# usage:
-# bash object_detection/dataset_tools/download_and_preprocess_mscoco.sh \
-# /tmp/mscoco
-set -e
-
-if [ -z "$1" ]; then
- echo "usage download_and_preprocess_mscoco.sh [data dir]"
- exit
-fi
-
-if [ "$(uname)" == "Darwin" ]; then
- UNZIP="tar -xf"
-else
- UNZIP="unzip -nq"
-fi
-
-# Create the output directories.
-OUTPUT_DIR="${1%/}"
-SCRATCH_DIR="${OUTPUT_DIR}/raw-data"
-mkdir -p "${OUTPUT_DIR}"
-mkdir -p "${SCRATCH_DIR}"
-CURRENT_DIR=$(pwd)
-
-# Helper function to download and unpack a .zip file.
-function download_and_unzip() {
- local BASE_URL=${1}
- local FILENAME=${2}
-
- if [ ! -f ${FILENAME} ]; then
- echo "Downloading ${FILENAME} to $(pwd)"
- wget -nd -c "${BASE_URL}/${FILENAME}"
- else
- echo "Skipping download of ${FILENAME}"
- fi
- echo "Unzipping ${FILENAME}"
- ${UNZIP} ${FILENAME}
-}
-
-cd ${SCRATCH_DIR}
-
-# Download the images.
-BASE_IMAGE_URL="http://images.cocodataset.org/zips"
-
-TRAIN_IMAGE_FILE="train2017.zip"
-download_and_unzip ${BASE_IMAGE_URL} ${TRAIN_IMAGE_FILE}
-TRAIN_IMAGE_DIR="${SCRATCH_DIR}/train2017"
-
-VAL_IMAGE_FILE="val2017.zip"
-download_and_unzip ${BASE_IMAGE_URL} ${VAL_IMAGE_FILE}
-VAL_IMAGE_DIR="${SCRATCH_DIR}/val2017"
-
-TEST_IMAGE_FILE="test2017.zip"
-download_and_unzip ${BASE_IMAGE_URL} ${TEST_IMAGE_FILE}
-TEST_IMAGE_DIR="${SCRATCH_DIR}/test2017"
-
-# Download the annotations.
-BASE_INSTANCES_URL="http://images.cocodataset.org/annotations"
-INSTANCES_FILE="annotations_trainval2017.zip"
-download_and_unzip ${BASE_INSTANCES_URL} ${INSTANCES_FILE}
-
-TRAIN_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/instances_train2017.json"
-VAL_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/instances_val2017.json"
-
-# Download the test image info.
-BASE_IMAGE_INFO_URL="http://images.cocodataset.org/annotations"
-IMAGE_INFO_FILE="image_info_test2017.zip"
-download_and_unzip ${BASE_IMAGE_INFO_URL} ${IMAGE_INFO_FILE}
-
-TESTDEV_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/image_info_test-dev2017.json"
-
-# Build TFRecords of the image data.
-cd "${CURRENT_DIR}"
-python object_detection/dataset_tools/create_coco_tf_record.py \
- --logtostderr \
- --include_masks \
- --train_image_dir="${TRAIN_IMAGE_DIR}" \
- --val_image_dir="${VAL_IMAGE_DIR}" \
- --test_image_dir="${TEST_IMAGE_DIR}" \
- --train_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \
- --val_annotations_file="${VAL_ANNOTATIONS_FILE}" \
- --testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \
- --output_dir="${OUTPUT_DIR}"
-
diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/oid_hierarchical_labels_expansion.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/oid_hierarchical_labels_expansion.py
deleted file mode 100644
index 6c00ac429102841ccff77de78e5bf06a0d3d6a5a..0000000000000000000000000000000000000000
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/oid_hierarchical_labels_expansion.py
+++ /dev/null
@@ -1,172 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""A class and executable to expand hierarchically image-level labels and boxes.
-
-Example usage:
- ./hierarchical_labels_expansion
-