diff --git a/research/mlperf_object_detection/Mask_RCNN/README.md b/research/mlperf_object_detection/Mask_RCNN/README.md deleted file mode 100644 index 16bdc70c4bf9152560215bdd8a435940d20051df..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/README.md +++ /dev/null @@ -1 +0,0 @@ -Mask RCNN Implimentation adopted from models/research/object_detection/ diff --git a/research/mlperf_object_detection/Mask_RCNN/configs/e2e_mask_rcnn_R-50-C4_atrous.config b/research/mlperf_object_detection/Mask_RCNN/configs/e2e_mask_rcnn_R-50-C4_atrous.config deleted file mode 100644 index 6085c7838582260fad17858ab4f65bd08e6ea14f..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/configs/e2e_mask_rcnn_R-50-C4_atrous.config +++ /dev/null @@ -1,170 +0,0 @@ -# Mask R-CNN with Resnet-50 (v1), Atrous version -# Configured for MSCOCO Dataset. -# Users should configure the fine_tune_checkpoint field in the train config as -# well as the label_map_path and input_path fields in the train_input_reader and -# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that -# should be configured. - -model { - faster_rcnn { - num_classes: 81 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 800 - max_dimension: 1365 - } - } - number_of_stages: 3 - feature_extractor { - type: 'faster_rcnn_resnet50' - first_stage_features_stride: 8 - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.125, 0.25, 0.5, 1.0, 2.0] # base size=256**2 => anchor sizes=32 64 128 256 512 - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 8 - width_stride: 8 - } - } - first_stage_atrous_rate: 2 - first_stage_box_predictor_conv_hyperparams { - op: CONV - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.01 - } - } - } - first_stage_nms_score_threshold: 0.0 - first_stage_nms_iou_threshold: 0.7 - first_stage_max_proposals: 512 - first_stage_localization_loss_weight: 2.0 - first_stage_objectness_loss_weight: 1.0 - initial_crop_size: 14 - maxpool_kernel_size: 2 - maxpool_stride: 2 - second_stage_batch_size: 512 - second_stage_box_predictor { - mask_rcnn_box_predictor { - use_dropout: false - dropout_keep_probability: 1.0 - predict_instance_masks: true - mask_height: 14 - mask_width: 14 - mask_prediction_conv_depth: 0 - mask_prediction_num_conv_layers: 3 #from mask rcnn heads - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - variance_scaling_initializer { - factor: 1.0 - uniform: true - mode: FAN_AVG - } - } - } - conv_hyperparams { - op: CONV - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.01 - } - } - } - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.0 - iou_threshold: 0.6 - max_detections_per_class: 2000 - max_total_detections: 2000 - } - score_converter: SOFTMAX - } - second_stage_localization_loss_weight: 2.0 - second_stage_classification_loss_weight: 1.0 - second_stage_mask_prediction_loss_weight: 4.0 - } -} - -train_config: { - batch_size: 4 - optimizer { - momentum_optimizer: { - learning_rate: { - manual_step_learning_rate { - initial_learning_rate: 0.01 - schedule { - step: 120000 - learning_rate: .001 - } - schedule { - step: 160000 - learning_rate: .0001 - } - } - } - momentum_optimizer_value: 0.9 - } - use_moving_average: false - } - gradient_clipping_by_norm: 10.0 - #fine_tune_checkpoint: "/home/mehdisharif/data/coco/resnet_v1_50.ckpt" - #from_detection_checkpoint: True - # Note: The below line limits the training process to 200K steps, which we - # empirically found to be sufficient enough to train the pets dataset. This - # effectively bypasses the learning rate schedule (the learning rate will - # never decay). Remove the below line to train indefinitely. - num_steps: 20000000 - data_augmentation_options { - random_horizontal_flip { - } - } -} - -train_input_reader: { - tf_record_input_reader { - input_path: "/home/mehdisharif/data/coco/output2017/coco_train.record" - } - label_map_path: "/home/mehdisharif/data/coco/output2017/mscoco_label_map.pbtxt" - load_instance_masks: true - mask_type: PNG_MASKS -} - -eval_config: { - metrics_set: ['coco_detection_metrics', 'coco_mask_metrics'] - num_examples: 50 - # Note: The below line limits the evaluation process to 10 evaluations. - # Remove the below line to evaluate indefinitely. - max_evals: 1 - num_visualizations: 50 - eval_interval_secs: 120 -} - -eval_input_reader: { - tf_record_input_reader { - input_path: "/home/mehdisharif/data/coco/output2017/coco_val.record" - } - label_map_path: "/home/mehdisharif/data/coco/output2017/mscoco_label_map.pbtxt" - load_instance_masks: true - mask_type: PNG_MASKS - shuffle: false - num_readers: 1 -} diff --git a/research/mlperf_object_detection/Mask_RCNN/configs/mask_rcnn_resnet50_atrous_coco.config b/research/mlperf_object_detection/Mask_RCNN/configs/mask_rcnn_resnet50_atrous_coco.config deleted file mode 100644 index 17c8bd5e7c1400938f14eb068bb6ff60f445f109..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/configs/mask_rcnn_resnet50_atrous_coco.config +++ /dev/null @@ -1,169 +0,0 @@ -# Mask R-CNN with Resnet-50 (v1), Atrous version -# Configured for MSCOCO Dataset. -# Users should configure the fine_tune_checkpoint field in the train config as -# well as the label_map_path and input_path fields in the train_input_reader and -# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that -# should be configured. - -model { - faster_rcnn { - num_classes: 90 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 800 - max_dimension: 1365 - } - } - number_of_stages: 3 - feature_extractor { - type: 'faster_rcnn_resnet50' - first_stage_features_stride: 8 - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 8 - width_stride: 8 - } - } - first_stage_atrous_rate: 2 - first_stage_box_predictor_conv_hyperparams { - op: CONV - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.01 - } - } - } - first_stage_nms_score_threshold: 0.0 - first_stage_nms_iou_threshold: 0.7 - first_stage_max_proposals: 300 - first_stage_localization_loss_weight: 2.0 - first_stage_objectness_loss_weight: 1.0 - initial_crop_size: 14 - maxpool_kernel_size: 2 - maxpool_stride: 2 - second_stage_box_predictor { - mask_rcnn_box_predictor { - use_dropout: false - dropout_keep_probability: 1.0 - predict_instance_masks: true - mask_height: 33 - mask_width: 33 - mask_prediction_conv_depth: 0 - mask_prediction_num_conv_layers: 4 - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - variance_scaling_initializer { - factor: 1.0 - uniform: true - mode: FAN_AVG - } - } - } - conv_hyperparams { - op: CONV - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.01 - } - } - } - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.0 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - score_converter: SOFTMAX - } - second_stage_localization_loss_weight: 2.0 - second_stage_classification_loss_weight: 1.0 - second_stage_mask_prediction_loss_weight: 4.0 - } -} - -train_config: { - batch_size: 2 - optimizer { - momentum_optimizer: { - learning_rate: { - manual_step_learning_rate { - initial_learning_rate: 0.0003 - schedule { - step: 900000 - learning_rate: .00003 - } - schedule { - step: 1200000 - learning_rate: .000003 - } - } - } - momentum_optimizer_value: 0.9 - } - use_moving_average: false - } - gradient_clipping_by_norm: 10.0 - #fine_tune_checkpoint: "" - from_detection_checkpoint: false - # Note: The below line limits the training process to 200K steps, which we - # empirically found to be sufficient enough to train the pets dataset. This - # effectively bypasses the learning rate schedule (the learning rate will - # never decay). Remove the below line to train indefinitely. - #num_steps: 200000 - data_augmentation_options { - random_horizontal_flip { - } - } -} - -train_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/coco_train.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" - load_instance_masks: true - mask_type: PNG_MASKS -} - -eval_config: { - metrics_set: ['coco_detection_metrics', 'coco_mask_metrics'] - num_examples: 50 - # Note: The below line limits the evaluation process to 10 evaluations. - # Remove the below line to evaluate indefinitely. - max_evals: 1 - num_visualizations: 50 - eval_interval_secs: 120 -} - -eval_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/coco_val.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" - load_instance_masks: true - mask_type: PNG_MASKS - shuffle: true - num_readers: 1 -} diff --git a/research/mlperf_object_detection/Mask_RCNN/mask_rcnn_run_loop.py b/research/mlperf_object_detection/Mask_RCNN/mask_rcnn_run_loop.py deleted file mode 100644 index 70c9185c94d25b39c2f778ed6c6a769d8aa3108c..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/mask_rcnn_run_loop.py +++ /dev/null @@ -1,243 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Training and evaluation for Mask_RCNN. - - This module repeatedly runs 1 training epoch and then evaluation - ##add explanation for all the options!!!!!!! -""" - -import functools -import json -import os - -from object_detection import evaluator -from object_detection import trainer -from object_detection.builders import dataset_builder -from object_detection.builders import graph_rewriter_builder -from object_detection.builders import model_builder -from object_detection.utils import config_util -from object_detection.utils import dataset_util -from object_detection.utils import label_map_util - -import tensorflow as tf - -tf.logging.set_verbosity(tf.logging.INFO) - -flags = tf.app.flags -flags.DEFINE_string('master', '', 'Name of the TensorFlow master to use.') -flags.DEFINE_integer('task', 0, 'task id') -flags.DEFINE_integer('num_clones', 1, 'Number of clones to deploy per worker.') -flags.DEFINE_boolean('clone_on_cpu', False, - 'Force clones to be deployed on CPU. Note that even if ' - 'set to False (allowing ops to run on gpu), some ops may ' - 'still be run on the CPU if they have no GPU kernel.') -flags.DEFINE_integer('worker_replicas', 1, 'Number of worker+trainer ' - 'replicas.') -flags.DEFINE_integer('parameter_server_tasks', 0, - 'Number of parameter server tasks. If None, does not use ' - 'a parameter server.') -flags.DEFINE_string('train_dir', '', - 'Directory to save the checkpoints and training summaries.') - -flags.DEFINE_string('pipeline_config_path', '', - 'Path to a pipeline_pb2.TrainEvalPipelineConfig config ' - 'file. If provided, other configs are ignored') - -flags.DEFINE_boolean('eval_training_data', False, - 'If training data should be evaluated for this job.') - -flags.DEFINE_string('eval_dir', '', - 'Directory to write eval summaries to.') - -flags.DEFINE_boolean('run_once', False, 'Option to only run a single pass of ' - 'evaluation. Overrides the `max_evals`' - ' parameter in the provided config.') -flags.DEFINE_float('box_min_ap', -1, 'Option to run until the box average' - 'precision reaches this number') -flags.DEFINE_float('mask_min_ap', -1, 'Option to run until the mask average' - 'precision reaches this number') -flags.DEFINE_integer('epochs_between_evals', 1, 'Number of training epochs to ' - 'run before running eval.') -FLAGS = flags.FLAGS - - -def stopping_criteria_met(eval_metrics, mask_min_ap, box_min_ap): - """Returns true if both of the min precision criteria are met in the given - evaluation metrics. - - Args: - eval_metrics: dict of metrics names as keys and their corresponding values, - containing "DetectionMasks_Precision/mAP", and - "DetectionBoxes_Precision/mAP" fields. - mask_min_ap: minimum desired mask average precision, will be ignored if -1 - box_min_ap: minimum desired box average precision, will be ignored if -1 - - Returns: - True if non -1 criteria are met, false o.w. - """ - assert mask_min_ap == -1 or 0 < mask_min_ap < 1 - assert box_min_ap == -1 or 0 < box_min_ap < 1 - try: - mask_mAP_reached = eval_metrics['DetectionMasks_Precision/mAP'] - box_mAP_reached = eval_metrics['DetectionBoxes_Precision/mAP'] - except KeyError as err: - raise Exception('eval_metrics dict does not contain the mAP field') from err - - return (mask_min_ap == -1 or mask_mAP_reached > mask_min_ap) & \ - (box_min_ap == -1 or box_mAP_reached > box_min_ap) & \ - (mask_min_ap != -1 or box_min_ap != -1) - - -def main(_): - assert FLAGS.train_dir, '`train_dir` is missing.' - assert FLAGS.pipeline_config_path, '`pipeline_config_path` is missing' - assert FLAGS.eval_dir, '`eval_dir` is missing.' - - configs = config_util.get_configs_from_pipeline_file( - FLAGS.pipeline_config_path) - if FLAGS.task == 0: - tf.gfile.MakeDirs(FLAGS.train_dir) - tf.gfile.Copy(FLAGS.pipeline_config_path, - os.path.join(FLAGS.train_dir, 'pipeline.config'), - overwrite=True) - - tf.gfile.MakeDirs(FLAGS.eval_dir) - tf.gfile.Copy(FLAGS.pipeline_config_path, - os.path.join(FLAGS.eval_dir, 'pipeline.config'), - overwrite=True) - - model_config = configs['model'] - - train_config = configs['train_config'] - train_input_config = configs['train_input_config'] - - eval_config = configs['eval_config'] - if FLAGS.eval_training_data: - eval_input_config = configs['train_input_config'] - else: - eval_input_config = configs['eval_input_config'] - - # setting to run evaluation after EPOCHS_BETWEEN_EVALS epochs of training. - # total number of training is set to total_num_epochs provided in the config - if train_config.num_steps: - total_num_epochs = train_config.num_steps - train_config.num_steps = FLAGS.epochs_between_evals - total_training_cycle = total_num_epochs // train_config.num_steps - else: - # TODO(mehdi): make it run indef - total_num_epochs = 20000000 - train_config.num_steps = FLAGS.epochs_between_evals - total_training_cycle = total_num_epochs // train_config.num_steps - - train_model_fn = functools.partial(model_builder.build, - model_config=model_config, - is_training=True) - eval_model_fn = functools.partial(model_builder.build, - model_config=model_config, - is_training=False) - - def get_next(config): - return dataset_util.make_initializable_iterator( - dataset_builder.build(config)).get_next() - - # functions to create a tensor input dictionary for both training & evaluation - train_input_dict_fn = functools.partial(get_next, train_input_config) - eval_input_dict_fn = functools.partial(get_next, eval_input_config) - - # If not explicitly specified in the constructor and the TF_CONFIG - # environment variable is present, load cluster_spec from TF_CONFIG. - env = json.loads(os.environ.get('TF_CONFIG', '{}')) - cluster_data = env.get('cluster', None) - cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None - task_data = env.get('task', {'type': 'master', 'index': 0}) - task_info = type('TaskSpec', (object,), task_data) - - # Parameters for a single worker. - parameter_server_tasks = 0 - worker_replicas = 1 - worker_job_name = 'lonely_worker' - task = 0 - is_chief = True - master = '' - - if cluster_data and 'worker' in cluster_data: - # Number of total worker replicas include "worker"s and the "master". - worker_replicas = len(cluster_data['worker']) + 1 - if cluster_data and 'ps' in cluster_data: - parameter_server_tasks = len(cluster_data['ps']) - - if worker_replicas > 1 and parameter_server_tasks < 1: - raise ValueError('At least 1 ps task is needed for distributed training.') - - if worker_replicas >= 1 and parameter_server_tasks > 0: - # Set up distributed training. - server = tf.train.Server(tf.train.ClusterSpec(cluster), protocol='grpc', - job_name=task_info.type, - task_index=task_info.index) - if task_info.type == 'ps': - server.join() - return - - worker_job_name = '%s/task:%d' % (task_info.type, task_info.index) - task = task_info.index - is_chief = (task_info.type == 'master') - master = server.target - - label_map = label_map_util.load_labelmap(eval_input_config.label_map_path) - max_num_classes = max([item.id for item in label_map.item]) - categories = label_map_util.convert_label_map_to_categories(label_map, - max_num_classes) - - if FLAGS.run_once: - eval_config.max_evals = 1 - - train_graph_rewriter_fn = eval_graph_rewriter_fn = None - if 'graph_rewriter_config' in configs: - train_graph_rewriter_fn = graph_rewriter_builder.build( - configs['graph_rewriter_config'], is_training=True) - eval_graph_rewriter_fn = graph_rewriter_builder.build( - configs['eval_rewriter_config'], is_training=False) - - def train(): - return trainer.train(create_tensor_dict_fn=train_input_dict_fn, - create_model_fn=train_model_fn, - train_config=train_config, master=master, task=task, - num_clones=FLAGS.num_clones, - worker_replicas=worker_replicas, - clone_on_cpu=FLAGS.clone_on_cpu, - ps_tasks=parameter_server_tasks, - worker_job_name=worker_job_name, - is_chief=is_chief, train_dir=FLAGS.train_dir, - graph_hook_fn=train_graph_rewriter_fn) - - def evaluate(): - return evaluator.evaluate(eval_input_dict_fn, eval_model_fn, eval_config, - categories, FLAGS.train_dir, FLAGS.eval_dir, - graph_hook_fn=eval_graph_rewriter_fn) - - for cycle_index in range(total_training_cycle): - tf.logging.info('Starting a training cycle: %d/%d', - cycle_index, total_training_cycle) - train() - tf.logging.info('Starting to evaluate.') - eval_metrics = evaluate() - if stopping_criteria_met(eval_metrics, FLAGS.mask_min_ap, FLAGS.box_min_ap): - tf.logging.info('Stopping criteria met. Training stopped') - break - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/CONTRIBUTING.md b/research/mlperf_object_detection/Mask_RCNN/object_detection/CONTRIBUTING.md deleted file mode 100644 index e3d87e3ce90fb4dd22b00a2c5368bf17c3610661..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/CONTRIBUTING.md +++ /dev/null @@ -1,13 +0,0 @@ -# Contributing to the Tensorflow Object Detection API - -Patches to Tensorflow Object Detection API are welcome! - -We require contributors to fill out either the individual or corporate -Contributor License Agreement (CLA). - - * If you are an individual writing original source code and you're sure you own the intellectual property, then you'll need to sign an [individual CLA](http://code.google.com/legal/individual-cla-v1.0.html). - * If you work for a company that wants to allow you to contribute your work, then you'll need to sign a [corporate CLA](http://code.google.com/legal/corporate-cla-v1.0.html). - -Please follow the -[Tensorflow contributing guidelines](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md) -when submitting pull requests. diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/README.md b/research/mlperf_object_detection/Mask_RCNN/object_detection/README.md deleted file mode 100644 index 52bf3565ede8269b90bd148f86c2bb73b4fc112f..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/README.md +++ /dev/null @@ -1,190 +0,0 @@ - -# Tensorflow Object Detection API -Creating accurate machine learning models capable of localizing and identifying -multiple objects in a single image remains a core challenge in computer vision. -The TensorFlow Object Detection API is an open source framework built on top of -TensorFlow that makes it easy to construct, train and deploy object detection -models. At Google we’ve certainly found this codebase to be useful for our -computer vision needs, and we hope that you will as well. -

- -

-Contributions to the codebase are welcome and we would love to hear back from -you if you find this API useful. Finally if you use the Tensorflow Object -Detection API for a research publication, please consider citing: - -``` -"Speed/accuracy trade-offs for modern convolutional object detectors." -Huang J, Rathod V, Sun C, Zhu M, Korattikara A, Fathi A, Fischer I, Wojna Z, -Song Y, Guadarrama S, Murphy K, CVPR 2017 -``` -\[[link](https://arxiv.org/abs/1611.10012)\]\[[bibtex]( -https://scholar.googleusercontent.com/scholar.bib?q=info:l291WsrB-hQJ:scholar.google.com/&output=citation&scisig=AAGBfm0AAAAAWUIIlnPZ_L9jxvPwcC49kDlELtaeIyU-&scisf=4&ct=citation&cd=-1&hl=en&scfhb=1)\] - -

- -

- -## Maintainers - -* Jonathan Huang, github: [jch1](https://github.com/jch1) -* Vivek Rathod, github: [tombstone](https://github.com/tombstone) -* Ronny Votel, github: [ronnyvotel](https://github.com/ronnyvotel) -* Derek Chow, github: [derekjchow](https://github.com/derekjchow) -* Chen Sun, github: [jesu9](https://github.com/jesu9) -* Menglong Zhu, github: [dreamdragon](https://github.com/dreamdragon) -* Alireza Fathi, github: [afathi3](https://github.com/afathi3) -* Zhichao Lu, github: [pkulzc](https://github.com/pkulzc) - - -## Table of contents - -Quick Start: - - * - Quick Start: Jupyter notebook for off-the-shelf inference
- * Quick Start: Training a pet detector
- -Setup: - - * Installation
- * - Configuring an object detection pipeline
- * Preparing inputs
- -Running: - - * Running locally
- * Running on the cloud
- -Extras: - - * Tensorflow detection model zoo
- * - Exporting a trained model for inference
- * - Defining your own model architecture
- * - Bringing in your own dataset
- * - Supported object detection evaluation protocols
- * - Inference and evaluation on the Open Images dataset
- * - Run an instance segmentation model
- -## Getting Help - -To get help with issues you may encounter using the Tensorflow Object Detection -API, create a new question on [StackOverflow](https://stackoverflow.com/) with -the tags "tensorflow" and "object-detection". - -Please report bugs (actually broken code, not usage questions) to the -tensorflow/models GitHub -[issue tracker](https://github.com/tensorflow/models/issues), prefixing the -issue name with "object_detection". - -Please check [FAQ](g3doc/faq.md) for frequently asked questions before -reporting an issue. - - -## Release information - -### April 30, 2018 - -We have released a Faster R-CNN detector with ResNet-101 feature extractor trained on [AVA](https://research.google.com/ava/) v2.1. -Compared with other commonly used object detectors, it changes the action classification loss function to per-class Sigmoid loss to handle boxes with multiple labels. -The model is trained on the training split of AVA v2.1 for 1.5M iterations, it achieves mean AP of 11.25% over 60 classes on the validation split of AVA v2.1. -For more details please refer to this [paper](https://arxiv.org/abs/1705.08421). - -Thanks to contributors: Chen Sun, David Ross - -### April 2, 2018 - -Supercharge your mobile phones with the next generation mobile object detector! -We are adding support for MobileNet V2 with SSDLite presented in -[MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381). -This model is 35% faster than Mobilenet V1 SSD on a Google Pixel phone CPU (200ms vs. 270ms) at the same accuracy. -Along with the model definition, we are also releasing a model checkpoint trained on the COCO dataset. - -Thanks to contributors: Menglong Zhu, Mark Sandler, Zhichao Lu, Vivek Rathod, Jonathan Huang - -### February 9, 2018 - -We now support instance segmentation!! In this API update we support a number of instance segmentation models similar to those discussed in the [Mask R-CNN paper](https://arxiv.org/abs/1703.06870). For further details refer to -[our slides](http://presentations.cocodataset.org/Places17-GMRI.pdf) from the 2017 Coco + Places Workshop. -Refer to the section on [Running an Instance Segmentation Model](g3doc/instance_segmentation.md) for instructions on how to configure a model -that predicts masks in addition to object bounding boxes. - -Thanks to contributors: Alireza Fathi, Zhichao Lu, Vivek Rathod, Ronny Votel, Jonathan Huang - -### November 17, 2017 - -As a part of the Open Images V3 release we have released: - -* An implementation of the Open Images evaluation metric and the [protocol](g3doc/evaluation_protocols.md#open-images). -* Additional tools to separate inference of detection and evaluation (see [this tutorial](g3doc/oid_inference_and_evaluation.md)). -* A new detection model trained on the Open Images V2 data release (see [Open Images model](g3doc/detection_model_zoo.md#open-images-models)). - -See more information on the [Open Images website](https://github.com/openimages/dataset)! - -Thanks to contributors: Stefan Popov, Alina Kuznetsova - -### November 6, 2017 - -We have re-released faster versions of our (pre-trained) models in the -model zoo. In addition to what -was available before, we are also adding Faster R-CNN models trained on COCO -with Inception V2 and Resnet-50 feature extractors, as well as a Faster R-CNN -with Resnet-101 model trained on the KITTI dataset. - -Thanks to contributors: Jonathan Huang, Vivek Rathod, Derek Chow, -Tal Remez, Chen Sun. - -### October 31, 2017 - -We have released a new state-of-the-art model for object detection using -the Faster-RCNN with the -[NASNet-A image featurization](https://arxiv.org/abs/1707.07012). This -model achieves mAP of 43.1% on the test-dev validation dataset for COCO, -improving on the best available model in the zoo by 6% in terms -of absolute mAP. - -Thanks to contributors: Barret Zoph, Vijay Vasudevan, Jonathon Shlens, Quoc Le - -### August 11, 2017 - -We have released an update to the [Android Detect -demo](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android) -which will now run models trained using the Tensorflow Object -Detection API on an Android device. By default, it currently runs a -frozen SSD w/Mobilenet detector trained on COCO, but we encourage -you to try out other detection models! - -Thanks to contributors: Jonathan Huang, Andrew Harp - - -### June 15, 2017 - -In addition to our base Tensorflow detection model definitions, this -release includes: - -* A selection of trainable detection models, including: - * Single Shot Multibox Detector (SSD) with MobileNet, - * SSD with Inception V2, - * Region-Based Fully Convolutional Networks (R-FCN) with Resnet 101, - * Faster RCNN with Resnet 101, - * Faster RCNN with Inception Resnet v2 -* Frozen weights (trained on the COCO dataset) for each of the above models to - be used for out-of-the-box inference purposes. -* A [Jupyter notebook](object_detection_tutorial.ipynb) for performing - out-of-the-box inference with one of our released models -* Convenient [local training](g3doc/running_locally.md) scripts as well as - distributed training and evaluation pipelines via - [Google Cloud](g3doc/running_on_cloud.md). - - -Thanks to contributors: Jonathan Huang, Vivek Rathod, Derek Chow, -Chen Sun, Menglong Zhu, Matthew Tang, Anoop Korattikara, Alireza Fathi, Ian Fischer, Zbigniew Wojna, Yang Song, Sergio Guadarrama, Jasper Uijlings, -Viacheslav Kovalevskyi, Kevin Murphy - diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/__init__.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/__init__.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/grid_anchor_generator.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/grid_anchor_generator.py deleted file mode 100644 index ba43f0135481e433402b77e17a5db39a90ace8be..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/grid_anchor_generator.py +++ /dev/null @@ -1,205 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Generates grid anchors on the fly as used in Faster RCNN. - -Generates grid anchors on the fly as described in: -"Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" -Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. -""" - -import tensorflow as tf - -from object_detection.core import anchor_generator -from object_detection.core import box_list -from object_detection.utils import ops - - -class GridAnchorGenerator(anchor_generator.AnchorGenerator): - """Generates a grid of anchors at given scales and aspect ratios.""" - - def __init__(self, - scales=(0.5, 1.0, 2.0), - aspect_ratios=(0.5, 1.0, 2.0), - base_anchor_size=None, - anchor_stride=None, - anchor_offset=None): - """Constructs a GridAnchorGenerator. - - Args: - scales: a list of (float) scales, default=(0.5, 1.0, 2.0) - aspect_ratios: a list of (float) aspect ratios, default=(0.5, 1.0, 2.0) - base_anchor_size: base anchor size as height, width ( - (length-2 float32 list or tensor, default=[256, 256]) - anchor_stride: difference in centers between base anchors for adjacent - grid positions (length-2 float32 list or tensor, - default=[16, 16]) - anchor_offset: center of the anchor with scale and aspect ratio 1 for the - upper left element of the grid, this should be zero for - feature networks with only VALID padding and even receptive - field size, but may need additional calculation if other - padding is used (length-2 float32 list or tensor, - default=[0, 0]) - """ - # Handle argument defaults - if base_anchor_size is None: - base_anchor_size = [256, 256] - base_anchor_size = tf.to_float(tf.convert_to_tensor(base_anchor_size)) - if anchor_stride is None: - anchor_stride = [16, 16] - anchor_stride = tf.to_float(tf.convert_to_tensor(anchor_stride)) - if anchor_offset is None: - anchor_offset = [0, 0] - anchor_offset = tf.to_float(tf.convert_to_tensor(anchor_offset)) - - self._scales = scales - self._aspect_ratios = aspect_ratios - self._base_anchor_size = base_anchor_size - self._anchor_stride = anchor_stride - self._anchor_offset = anchor_offset - - def name_scope(self): - return 'GridAnchorGenerator' - - def num_anchors_per_location(self): - """Returns the number of anchors per spatial location. - - Returns: - a list of integers, one for each expected feature map to be passed to - the `generate` function. - """ - return [len(self._scales) * len(self._aspect_ratios)] - - def _generate(self, feature_map_shape_list): - """Generates a collection of bounding boxes to be used as anchors. - - Args: - feature_map_shape_list: list of pairs of convnet layer resolutions in the - format [(height_0, width_0)]. For example, setting - feature_map_shape_list=[(8, 8)] asks for anchors that correspond - to an 8x8 layer. For this anchor generator, only lists of length 1 are - allowed. - - Returns: - boxes_list: a list of BoxLists each holding anchor boxes corresponding to - the input feature map shapes. - - Raises: - ValueError: if feature_map_shape_list, box_specs_list do not have the same - length. - ValueError: if feature_map_shape_list does not consist of pairs of - integers - """ - if not (isinstance(feature_map_shape_list, list) - and len(feature_map_shape_list) == 1): - raise ValueError('feature_map_shape_list must be a list of length 1.') - if not all([isinstance(list_item, tuple) and len(list_item) == 2 - for list_item in feature_map_shape_list]): - raise ValueError('feature_map_shape_list must be a list of pairs.') - grid_height, grid_width = feature_map_shape_list[0] - scales_grid, aspect_ratios_grid = ops.meshgrid(self._scales, - self._aspect_ratios) - scales_grid = tf.reshape(scales_grid, [-1]) - aspect_ratios_grid = tf.reshape(aspect_ratios_grid, [-1]) - anchors = tile_anchors(grid_height, - grid_width, - scales_grid, - aspect_ratios_grid, - self._base_anchor_size, - self._anchor_stride, - self._anchor_offset) - - num_anchors = anchors.num_boxes_static() - if num_anchors is None: - num_anchors = anchors.num_boxes() - anchor_indices = tf.zeros([num_anchors]) - anchors.add_field('feature_map_index', anchor_indices) - return [anchors] - - -def tile_anchors(grid_height, - grid_width, - scales, - aspect_ratios, - base_anchor_size, - anchor_stride, - anchor_offset): - """Create a tiled set of anchors strided along a grid in image space. - - This op creates a set of anchor boxes by placing a "basis" collection of - boxes with user-specified scales and aspect ratios centered at evenly - distributed points along a grid. The basis collection is specified via the - scale and aspect_ratios arguments. For example, setting scales=[.1, .2, .2] - and aspect ratios = [2,2,1/2] means that we create three boxes: one with scale - .1, aspect ratio 2, one with scale .2, aspect ratio 2, and one with scale .2 - and aspect ratio 1/2. Each box is multiplied by "base_anchor_size" before - placing it over its respective center. - - Grid points are specified via grid_height, grid_width parameters as well as - the anchor_stride and anchor_offset parameters. - - Args: - grid_height: size of the grid in the y direction (int or int scalar tensor) - grid_width: size of the grid in the x direction (int or int scalar tensor) - scales: a 1-d (float) tensor representing the scale of each box in the - basis set. - aspect_ratios: a 1-d (float) tensor representing the aspect ratio of each - box in the basis set. The length of the scales and aspect_ratios tensors - must be equal. - base_anchor_size: base anchor size as [height, width] - (float tensor of shape [2]) - anchor_stride: difference in centers between base anchors for adjacent grid - positions (float tensor of shape [2]) - anchor_offset: center of the anchor with scale and aspect ratio 1 for the - upper left element of the grid, this should be zero for - feature networks with only VALID padding and even receptive - field size, but may need some additional calculation if other - padding is used (float tensor of shape [2]) - Returns: - a BoxList holding a collection of N anchor boxes - """ - ratio_sqrts = tf.sqrt(aspect_ratios) - heights = scales / ratio_sqrts * base_anchor_size[0] - widths = scales * ratio_sqrts * base_anchor_size[1] - - # Get a grid of box centers - y_centers = tf.to_float(tf.range(grid_height)) - y_centers = y_centers * anchor_stride[0] + anchor_offset[0] - x_centers = tf.to_float(tf.range(grid_width)) - x_centers = x_centers * anchor_stride[1] + anchor_offset[1] - x_centers, y_centers = ops.meshgrid(x_centers, y_centers) - - widths_grid, x_centers_grid = ops.meshgrid(widths, x_centers) - heights_grid, y_centers_grid = ops.meshgrid(heights, y_centers) - bbox_centers = tf.stack([y_centers_grid, x_centers_grid], axis=3) - bbox_sizes = tf.stack([heights_grid, widths_grid], axis=3) - bbox_centers = tf.reshape(bbox_centers, [-1, 2]) - bbox_sizes = tf.reshape(bbox_sizes, [-1, 2]) - bbox_corners = _center_size_bbox_to_corners_bbox(bbox_centers, bbox_sizes) - return box_list.BoxList(bbox_corners) - - -def _center_size_bbox_to_corners_bbox(centers, sizes): - """Converts bbox center-size representation to corners representation. - - Args: - centers: a tensor with shape [N, 2] representing bounding box centers - sizes: a tensor with shape [N, 2] representing bounding boxes - - Returns: - corners: tensor with shape [N, 4] representing bounding boxes in corners - representation - """ - return tf.concat([centers - .5 * sizes, centers + .5 * sizes], 1) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/grid_anchor_generator_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/grid_anchor_generator_test.py deleted file mode 100644 index 8de74aa7ede1c5d26bb72cff3d04e1a1a544f4f3..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/grid_anchor_generator_test.py +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.grid_anchor_generator.""" -import numpy as np -import tensorflow as tf - -from object_detection.anchor_generators import grid_anchor_generator -from object_detection.utils import test_case - - -class GridAnchorGeneratorTest(test_case.TestCase): - - def test_construct_single_anchor(self): - """Builds a 1x1 anchor grid to test the size of the output boxes.""" - def graph_fn(): - scales = [0.5, 1.0, 2.0] - aspect_ratios = [0.25, 1.0, 4.0] - anchor_offset = [7, -3] - anchor_generator = grid_anchor_generator.GridAnchorGenerator( - scales, aspect_ratios, anchor_offset=anchor_offset) - anchors_list = anchor_generator.generate(feature_map_shape_list=[(1, 1)]) - anchor_corners = anchors_list[0].get() - return (anchor_corners,) - exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61], - [-505, -131, 519, 125], [-57, -67, 71, 61], - [-121, -131, 135, 125], [-249, -259, 263, 253], - [-25, -131, 39, 125], [-57, -259, 71, 253], - [-121, -515, 135, 509]] - anchor_corners_out = self.execute(graph_fn, []) - self.assertAllClose(anchor_corners_out, exp_anchor_corners) - - def test_construct_anchor_grid(self): - def graph_fn(): - base_anchor_size = [10, 10] - anchor_stride = [19, 19] - anchor_offset = [0, 0] - scales = [0.5, 1.0, 2.0] - aspect_ratios = [1.0] - - anchor_generator = grid_anchor_generator.GridAnchorGenerator( - scales, - aspect_ratios, - base_anchor_size=base_anchor_size, - anchor_stride=anchor_stride, - anchor_offset=anchor_offset) - - anchors_list = anchor_generator.generate(feature_map_shape_list=[(2, 2)]) - anchor_corners = anchors_list[0].get() - return (anchor_corners,) - exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.], - [-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5], - [-5., 14., 5, 24], [-10., 9., 10, 29], - [16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5], - [9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5], - [14., 14., 24, 24], [9., 9., 29, 29]] - anchor_corners_out = self.execute(graph_fn, []) - self.assertAllClose(anchor_corners_out, exp_anchor_corners) - - def test_construct_anchor_grid_with_dynamic_feature_map_shapes(self): - def graph_fn(feature_map_height, feature_map_width): - base_anchor_size = [10, 10] - anchor_stride = [19, 19] - anchor_offset = [0, 0] - scales = [0.5, 1.0, 2.0] - aspect_ratios = [1.0] - anchor_generator = grid_anchor_generator.GridAnchorGenerator( - scales, - aspect_ratios, - base_anchor_size=base_anchor_size, - anchor_stride=anchor_stride, - anchor_offset=anchor_offset) - - anchors_list = anchor_generator.generate( - feature_map_shape_list=[(feature_map_height, feature_map_width)]) - anchor_corners = anchors_list[0].get() - return (anchor_corners,) - - exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.], - [-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5], - [-5., 14., 5, 24], [-10., 9., 10, 29], - [16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5], - [9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5], - [14., 14., 24, 24], [9., 9., 29, 29]] - anchor_corners_out = self.execute_cpu(graph_fn, - [np.array(2, dtype=np.int32), - np.array(2, dtype=np.int32)]) - self.assertAllClose(anchor_corners_out, exp_anchor_corners) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiple_grid_anchor_generator.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiple_grid_anchor_generator.py deleted file mode 100644 index bd785c171f686f1c524b78efbc7d03dbae4f7940..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiple_grid_anchor_generator.py +++ /dev/null @@ -1,336 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Generates grid anchors on the fly corresponding to multiple CNN layers. - -Generates grid anchors on the fly corresponding to multiple CNN layers as -described in: -"SSD: Single Shot MultiBox Detector" -Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, -Cheng-Yang Fu, Alexander C. Berg -(see Section 2.2: Choosing scales and aspect ratios for default boxes) -""" - -import numpy as np - -import tensorflow as tf - -from object_detection.anchor_generators import grid_anchor_generator -from object_detection.core import anchor_generator -from object_detection.core import box_list_ops - - -class MultipleGridAnchorGenerator(anchor_generator.AnchorGenerator): - """Generate a grid of anchors for multiple CNN layers.""" - - def __init__(self, - box_specs_list, - base_anchor_size=None, - anchor_strides=None, - anchor_offsets=None, - clip_window=None): - """Constructs a MultipleGridAnchorGenerator. - - To construct anchors, at multiple grid resolutions, one must provide a - list of feature_map_shape_list (e.g., [(8, 8), (4, 4)]), and for each grid - size, a corresponding list of (scale, aspect ratio) box specifications. - - For example: - box_specs_list = [[(.1, 1.0), (.1, 2.0)], # for 8x8 grid - [(.2, 1.0), (.3, 1.0), (.2, 2.0)]] # for 4x4 grid - - To support the fully convolutional setting, we pass grid sizes in at - generation time, while scale and aspect ratios are fixed at construction - time. - - Args: - box_specs_list: list of list of (scale, aspect ratio) pairs with the - outside list having the same number of entries as feature_map_shape_list - (which is passed in at generation time). - base_anchor_size: base anchor size as [height, width] - (length-2 float tensor, default=[1.0, 1.0]). - The height and width values are normalized to the - minimum dimension of the input height and width, so that - when the base anchor height equals the base anchor - width, the resulting anchor is square even if the input - image is not square. - anchor_strides: list of pairs of strides in pixels (in y and x directions - respectively). For example, setting anchor_strides=[(25, 25), (50, 50)] - means that we want the anchors corresponding to the first layer to be - strided by 25 pixels and those in the second layer to be strided by 50 - pixels in both y and x directions. If anchor_strides=None, they are set - to be the reciprocal of the corresponding feature map shapes. - anchor_offsets: list of pairs of offsets in pixels (in y and x directions - respectively). The offset specifies where we want the center of the - (0, 0)-th anchor to lie for each layer. For example, setting - anchor_offsets=[(10, 10), (20, 20)]) means that we want the - (0, 0)-th anchor of the first layer to lie at (10, 10) in pixel space - and likewise that we want the (0, 0)-th anchor of the second layer to - lie at (25, 25) in pixel space. If anchor_offsets=None, then they are - set to be half of the corresponding anchor stride. - clip_window: a tensor of shape [4] specifying a window to which all - anchors should be clipped. If clip_window is None, then no clipping - is performed. - - Raises: - ValueError: if box_specs_list is not a list of list of pairs - ValueError: if clip_window is not either None or a tensor of shape [4] - """ - if isinstance(box_specs_list, list) and all( - [isinstance(list_item, list) for list_item in box_specs_list]): - self._box_specs = box_specs_list - else: - raise ValueError('box_specs_list is expected to be a ' - 'list of lists of pairs') - if base_anchor_size is None: - base_anchor_size = tf.constant([256, 256], dtype=tf.float32) - self._base_anchor_size = base_anchor_size - self._anchor_strides = anchor_strides - self._anchor_offsets = anchor_offsets - if clip_window is not None and clip_window.get_shape().as_list() != [4]: - raise ValueError('clip_window must either be None or a shape [4] tensor') - self._clip_window = clip_window - self._scales = [] - self._aspect_ratios = [] - for box_spec in self._box_specs: - if not all([isinstance(entry, tuple) and len(entry) == 2 - for entry in box_spec]): - raise ValueError('box_specs_list is expected to be a ' - 'list of lists of pairs') - scales, aspect_ratios = zip(*box_spec) - self._scales.append(scales) - self._aspect_ratios.append(aspect_ratios) - - for arg, arg_name in zip([self._anchor_strides, self._anchor_offsets], - ['anchor_strides', 'anchor_offsets']): - if arg and not (isinstance(arg, list) and - len(arg) == len(self._box_specs)): - raise ValueError('%s must be a list with the same length ' - 'as self._box_specs' % arg_name) - if arg and not all([ - isinstance(list_item, tuple) and len(list_item) == 2 - for list_item in arg - ]): - raise ValueError('%s must be a list of pairs.' % arg_name) - - def name_scope(self): - return 'MultipleGridAnchorGenerator' - - def num_anchors_per_location(self): - """Returns the number of anchors per spatial location. - - Returns: - a list of integers, one for each expected feature map to be passed to - the Generate function. - """ - return [len(box_specs) for box_specs in self._box_specs] - - def _generate(self, feature_map_shape_list, im_height=1, im_width=1): - """Generates a collection of bounding boxes to be used as anchors. - - The number of anchors generated for a single grid with shape MxM where we - place k boxes over each grid center is k*M^2 and thus the total number of - anchors is the sum over all grids. In our box_specs_list example - (see the constructor docstring), we would place two boxes over each grid - point on an 8x8 grid and three boxes over each grid point on a 4x4 grid and - thus end up with 2*8^2 + 3*4^2 = 176 anchors in total. The layout of the - output anchors follows the order of how the grid sizes and box_specs are - specified (with box_spec index varying the fastest, followed by width - index, then height index, then grid index). - - Args: - feature_map_shape_list: list of pairs of convnet layer resolutions in the - format [(height_0, width_0), (height_1, width_1), ...]. For example, - setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that - correspond to an 8x8 layer followed by a 7x7 layer. - im_height: the height of the image to generate the grid for. If both - im_height and im_width are 1, the generated anchors default to - normalized coordinates, otherwise absolute coordinates are used for the - grid. - im_width: the width of the image to generate the grid for. If both - im_height and im_width are 1, the generated anchors default to - normalized coordinates, otherwise absolute coordinates are used for the - grid. - - Returns: - boxes_list: a list of BoxLists each holding anchor boxes corresponding to - the input feature map shapes. - - Raises: - ValueError: if feature_map_shape_list, box_specs_list do not have the same - length. - ValueError: if feature_map_shape_list does not consist of pairs of - integers - """ - if not (isinstance(feature_map_shape_list, list) - and len(feature_map_shape_list) == len(self._box_specs)): - raise ValueError('feature_map_shape_list must be a list with the same ' - 'length as self._box_specs') - if not all([isinstance(list_item, tuple) and len(list_item) == 2 - for list_item in feature_map_shape_list]): - raise ValueError('feature_map_shape_list must be a list of pairs.') - - im_height = tf.to_float(im_height) - im_width = tf.to_float(im_width) - - if not self._anchor_strides: - anchor_strides = [(1.0 / tf.to_float(pair[0]), 1.0 / tf.to_float(pair[1])) - for pair in feature_map_shape_list] - else: - anchor_strides = [(tf.to_float(stride[0]) / im_height, - tf.to_float(stride[1]) / im_width) - for stride in self._anchor_strides] - if not self._anchor_offsets: - anchor_offsets = [(0.5 * stride[0], 0.5 * stride[1]) - for stride in anchor_strides] - else: - anchor_offsets = [(tf.to_float(offset[0]) / im_height, - tf.to_float(offset[1]) / im_width) - for offset in self._anchor_offsets] - - for arg, arg_name in zip([anchor_strides, anchor_offsets], - ['anchor_strides', 'anchor_offsets']): - if not (isinstance(arg, list) and len(arg) == len(self._box_specs)): - raise ValueError('%s must be a list with the same length ' - 'as self._box_specs' % arg_name) - if not all([isinstance(list_item, tuple) and len(list_item) == 2 - for list_item in arg]): - raise ValueError('%s must be a list of pairs.' % arg_name) - - anchor_grid_list = [] - min_im_shape = tf.minimum(im_height, im_width) - scale_height = min_im_shape / im_height - scale_width = min_im_shape / im_width - base_anchor_size = [ - scale_height * self._base_anchor_size[0], - scale_width * self._base_anchor_size[1] - ] - for feature_map_index, (grid_size, scales, aspect_ratios, stride, - offset) in enumerate( - zip(feature_map_shape_list, self._scales, - self._aspect_ratios, anchor_strides, - anchor_offsets)): - tiled_anchors = grid_anchor_generator.tile_anchors( - grid_height=grid_size[0], - grid_width=grid_size[1], - scales=scales, - aspect_ratios=aspect_ratios, - base_anchor_size=base_anchor_size, - anchor_stride=stride, - anchor_offset=offset) - if self._clip_window is not None: - tiled_anchors = box_list_ops.clip_to_window( - tiled_anchors, self._clip_window, filter_nonoverlapping=False) - num_anchors_in_layer = tiled_anchors.num_boxes_static() - if num_anchors_in_layer is None: - num_anchors_in_layer = tiled_anchors.num_boxes() - anchor_indices = feature_map_index * tf.ones([num_anchors_in_layer]) - tiled_anchors.add_field('feature_map_index', anchor_indices) - anchor_grid_list.append(tiled_anchors) - - return anchor_grid_list - - -def create_ssd_anchors(num_layers=6, - min_scale=0.2, - max_scale=0.95, - scales=None, - aspect_ratios=(1.0, 2.0, 3.0, 1.0 / 2, 1.0 / 3), - interpolated_scale_aspect_ratio=1.0, - base_anchor_size=None, - anchor_strides=None, - anchor_offsets=None, - reduce_boxes_in_lowest_layer=True): - """Creates MultipleGridAnchorGenerator for SSD anchors. - - This function instantiates a MultipleGridAnchorGenerator that reproduces - ``default box`` construction proposed by Liu et al in the SSD paper. - See Section 2.2 for details. Grid sizes are assumed to be passed in - at generation time from finest resolution to coarsest resolution --- this is - used to (linearly) interpolate scales of anchor boxes corresponding to the - intermediate grid sizes. - - Anchors that are returned by calling the `generate` method on the returned - MultipleGridAnchorGenerator object are always in normalized coordinates - and clipped to the unit square: (i.e. all coordinates lie in [0, 1]x[0, 1]). - - Args: - num_layers: integer number of grid layers to create anchors for (actual - grid sizes passed in at generation time) - min_scale: scale of anchors corresponding to finest resolution (float) - max_scale: scale of anchors corresponding to coarsest resolution (float) - scales: As list of anchor scales to use. When not None and not empty, - min_scale and max_scale are not used. - aspect_ratios: list or tuple of (float) aspect ratios to place on each - grid point. - interpolated_scale_aspect_ratio: An additional anchor is added with this - aspect ratio and a scale interpolated between the scale for a layer - and the scale for the next layer (1.0 for the last layer). - This anchor is not included if this value is 0. - base_anchor_size: base anchor size as [height, width]. - The height and width values are normalized to the minimum dimension of the - input height and width, so that when the base anchor height equals the - base anchor width, the resulting anchor is square even if the input image - is not square. - anchor_strides: list of pairs of strides in pixels (in y and x directions - respectively). For example, setting anchor_strides=[(25, 25), (50, 50)] - means that we want the anchors corresponding to the first layer to be - strided by 25 pixels and those in the second layer to be strided by 50 - pixels in both y and x directions. If anchor_strides=None, they are set to - be the reciprocal of the corresponding feature map shapes. - anchor_offsets: list of pairs of offsets in pixels (in y and x directions - respectively). The offset specifies where we want the center of the - (0, 0)-th anchor to lie for each layer. For example, setting - anchor_offsets=[(10, 10), (20, 20)]) means that we want the - (0, 0)-th anchor of the first layer to lie at (10, 10) in pixel space - and likewise that we want the (0, 0)-th anchor of the second layer to lie - at (25, 25) in pixel space. If anchor_offsets=None, then they are set to - be half of the corresponding anchor stride. - reduce_boxes_in_lowest_layer: a boolean to indicate whether the fixed 3 - boxes per location is used in the lowest layer. - - Returns: - a MultipleGridAnchorGenerator - """ - if base_anchor_size is None: - base_anchor_size = [1.0, 1.0] - base_anchor_size = tf.constant(base_anchor_size, dtype=tf.float32) - box_specs_list = [] - if scales is None or not scales: - scales = [min_scale + (max_scale - min_scale) * i / (num_layers - 1) - for i in range(num_layers)] + [1.0] - else: - # Add 1.0 to the end, which will only be used in scale_next below and used - # for computing an interpolated scale for the largest scale in the list. - scales += [1.0] - - for layer, scale, scale_next in zip( - range(num_layers), scales[:-1], scales[1:]): - layer_box_specs = [] - if layer == 0 and reduce_boxes_in_lowest_layer: - layer_box_specs = [(0.1, 1.0), (scale, 2.0), (scale, 0.5)] - else: - for aspect_ratio in aspect_ratios: - layer_box_specs.append((scale, aspect_ratio)) - # Add one more anchor, with a scale between the current scale, and the - # scale for the next layer, with a specified aspect ratio (1.0 by - # default). - if interpolated_scale_aspect_ratio > 0.0: - layer_box_specs.append((np.sqrt(scale*scale_next), - interpolated_scale_aspect_ratio)) - box_specs_list.append(layer_box_specs) - - return MultipleGridAnchorGenerator(box_specs_list, base_anchor_size, - anchor_strides, anchor_offsets) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiple_grid_anchor_generator_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiple_grid_anchor_generator_test.py deleted file mode 100644 index 070d81d36e79368c9fd46c7f3e03df7a93baee76..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiple_grid_anchor_generator_test.py +++ /dev/null @@ -1,289 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for anchor_generators.multiple_grid_anchor_generator_test.py.""" - -import numpy as np - -import tensorflow as tf - -from object_detection.anchor_generators import multiple_grid_anchor_generator as ag -from object_detection.utils import test_case - - -class MultipleGridAnchorGeneratorTest(test_case.TestCase): - - def test_construct_single_anchor_grid(self): - """Builds a 1x1 anchor grid to test the size of the output boxes.""" - def graph_fn(): - - box_specs_list = [[(.5, .25), (1.0, .25), (2.0, .25), - (.5, 1.0), (1.0, 1.0), (2.0, 1.0), - (.5, 4.0), (1.0, 4.0), (2.0, 4.0)]] - anchor_generator = ag.MultipleGridAnchorGenerator( - box_specs_list, - base_anchor_size=tf.constant([256, 256], dtype=tf.float32), - anchor_strides=[(16, 16)], - anchor_offsets=[(7, -3)]) - anchors_list = anchor_generator.generate(feature_map_shape_list=[(1, 1)]) - return anchors_list[0].get() - exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61], - [-505, -131, 519, 125], [-57, -67, 71, 61], - [-121, -131, 135, 125], [-249, -259, 263, 253], - [-25, -131, 39, 125], [-57, -259, 71, 253], - [-121, -515, 135, 509]] - - anchor_corners_out = self.execute(graph_fn, []) - self.assertAllClose(anchor_corners_out, exp_anchor_corners) - - def test_construct_anchor_grid(self): - def graph_fn(): - box_specs_list = [[(0.5, 1.0), (1.0, 1.0), (2.0, 1.0)]] - - anchor_generator = ag.MultipleGridAnchorGenerator( - box_specs_list, - base_anchor_size=tf.constant([10, 10], dtype=tf.float32), - anchor_strides=[(19, 19)], - anchor_offsets=[(0, 0)]) - anchors_list = anchor_generator.generate(feature_map_shape_list=[(2, 2)]) - return anchors_list[0].get() - exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.], - [-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5], - [-5., 14., 5, 24], [-10., 9., 10, 29], - [16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5], - [9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5], - [14., 14., 24, 24], [9., 9., 29, 29]] - - anchor_corners_out = self.execute(graph_fn, []) - self.assertAllClose(anchor_corners_out, exp_anchor_corners) - - def test_construct_anchor_grid_non_square(self): - - def graph_fn(): - box_specs_list = [[(1.0, 1.0)]] - anchor_generator = ag.MultipleGridAnchorGenerator( - box_specs_list, base_anchor_size=tf.constant([1, 1], - dtype=tf.float32)) - anchors_list = anchor_generator.generate(feature_map_shape_list=[( - tf.constant(1, dtype=tf.int32), tf.constant(2, dtype=tf.int32))]) - return anchors_list[0].get() - - exp_anchor_corners = [[0., -0.25, 1., 0.75], [0., 0.25, 1., 1.25]] - anchor_corners_out = self.execute(graph_fn, []) - self.assertAllClose(anchor_corners_out, exp_anchor_corners) - - def test_construct_dynamic_size_anchor_grid(self): - - def graph_fn(height, width): - box_specs_list = [[(1.0, 1.0)]] - anchor_generator = ag.MultipleGridAnchorGenerator( - box_specs_list, base_anchor_size=tf.constant([1, 1], - dtype=tf.float32)) - anchors_list = anchor_generator.generate(feature_map_shape_list=[(height, - width)]) - return anchors_list[0].get() - - exp_anchor_corners = [[0., -0.25, 1., 0.75], [0., 0.25, 1., 1.25]] - - anchor_corners_out = self.execute_cpu(graph_fn, - [np.array(1, dtype=np.int32), - np.array(2, dtype=np.int32)]) - self.assertAllClose(anchor_corners_out, exp_anchor_corners) - - def test_construct_anchor_grid_normalized(self): - def graph_fn(): - box_specs_list = [[(1.0, 1.0)]] - - anchor_generator = ag.MultipleGridAnchorGenerator( - box_specs_list, base_anchor_size=tf.constant([1, 1], - dtype=tf.float32)) - anchors_list = anchor_generator.generate( - feature_map_shape_list=[(tf.constant(1, dtype=tf.int32), tf.constant( - 2, dtype=tf.int32))], - im_height=320, - im_width=640) - return anchors_list[0].get() - - exp_anchor_corners = [[0., 0., 1., 0.5], [0., 0.5, 1., 1.]] - anchor_corners_out = self.execute(graph_fn, []) - self.assertAllClose(anchor_corners_out, exp_anchor_corners) - - def test_construct_multiple_grids(self): - - def graph_fn(): - box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)], - [(1.0, 1.0), (1.0, 0.5)]] - - anchor_generator = ag.MultipleGridAnchorGenerator( - box_specs_list, - base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32), - anchor_strides=[(.25, .25), (.5, .5)], - anchor_offsets=[(.125, .125), (.25, .25)]) - anchors_list = anchor_generator.generate(feature_map_shape_list=[(4, 4), ( - 2, 2)]) - return [anchors.get() for anchors in anchors_list] - # height and width of box with .5 aspect ratio - h = np.sqrt(2) - w = 1.0/np.sqrt(2) - exp_small_grid_corners = [[-.25, -.25, .75, .75], - [.25-.5*h, .25-.5*w, .25+.5*h, .25+.5*w], - [-.25, .25, .75, 1.25], - [.25-.5*h, .75-.5*w, .25+.5*h, .75+.5*w], - [.25, -.25, 1.25, .75], - [.75-.5*h, .25-.5*w, .75+.5*h, .25+.5*w], - [.25, .25, 1.25, 1.25], - [.75-.5*h, .75-.5*w, .75+.5*h, .75+.5*w]] - # only test first entry of larger set of anchors - exp_big_grid_corners = [[.125-.5, .125-.5, .125+.5, .125+.5], - [.125-1.0, .125-1.0, .125+1.0, .125+1.0], - [.125-.5*h, .125-.5*w, .125+.5*h, .125+.5*w],] - - anchor_corners_out = np.concatenate(self.execute(graph_fn, []), axis=0) - self.assertEquals(anchor_corners_out.shape, (56, 4)) - big_grid_corners = anchor_corners_out[0:3, :] - small_grid_corners = anchor_corners_out[48:, :] - self.assertAllClose(small_grid_corners, exp_small_grid_corners) - self.assertAllClose(big_grid_corners, exp_big_grid_corners) - - def test_construct_multiple_grids_with_clipping(self): - - def graph_fn(): - box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)], - [(1.0, 1.0), (1.0, 0.5)]] - - clip_window = tf.constant([0, 0, 1, 1], dtype=tf.float32) - anchor_generator = ag.MultipleGridAnchorGenerator( - box_specs_list, - base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32), - clip_window=clip_window) - anchors_list = anchor_generator.generate(feature_map_shape_list=[(4, 4), ( - 2, 2)]) - return [anchors.get() for anchors in anchors_list] - # height and width of box with .5 aspect ratio - h = np.sqrt(2) - w = 1.0/np.sqrt(2) - exp_small_grid_corners = [[0, 0, .75, .75], - [0, 0, .25+.5*h, .25+.5*w], - [0, .25, .75, 1], - [0, .75-.5*w, .25+.5*h, 1], - [.25, 0, 1, .75], - [.75-.5*h, 0, 1, .25+.5*w], - [.25, .25, 1, 1], - [.75-.5*h, .75-.5*w, 1, 1]] - - anchor_corners_out = np.concatenate(self.execute(graph_fn, []), axis=0) - small_grid_corners = anchor_corners_out[48:, :] - self.assertAllClose(small_grid_corners, exp_small_grid_corners) - - def test_invalid_box_specs(self): - # not all box specs are pairs - box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)], - [(1.0, 1.0), (1.0, 0.5, .3)]] - with self.assertRaises(ValueError): - ag.MultipleGridAnchorGenerator(box_specs_list) - - # box_specs_list is not a list of lists - box_specs_list = [(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)] - with self.assertRaises(ValueError): - ag.MultipleGridAnchorGenerator(box_specs_list) - - def test_invalid_generate_arguments(self): - box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)], - [(1.0, 1.0), (1.0, 0.5)]] - - # incompatible lengths with box_specs_list - with self.assertRaises(ValueError): - anchor_generator = ag.MultipleGridAnchorGenerator( - box_specs_list, - base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32), - anchor_strides=[(.25, .25)], - anchor_offsets=[(.125, .125), (.25, .25)]) - anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)]) - with self.assertRaises(ValueError): - anchor_generator = ag.MultipleGridAnchorGenerator( - box_specs_list, - base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32), - anchor_strides=[(.25, .25), (.5, .5)], - anchor_offsets=[(.125, .125), (.25, .25)]) - anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2), (1, 1)]) - with self.assertRaises(ValueError): - anchor_generator = ag.MultipleGridAnchorGenerator( - box_specs_list, - base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32), - anchor_strides=[(.5, .5)], - anchor_offsets=[(.25, .25)]) - anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)]) - - # not pairs - with self.assertRaises(ValueError): - anchor_generator = ag.MultipleGridAnchorGenerator( - box_specs_list, - base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32), - anchor_strides=[(.25, .25), (.5, .5)], - anchor_offsets=[(.125, .125), (.25, .25)]) - anchor_generator.generate(feature_map_shape_list=[(4, 4, 4), (2, 2)]) - with self.assertRaises(ValueError): - anchor_generator = ag.MultipleGridAnchorGenerator( - box_specs_list, - base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32), - anchor_strides=[(.25, .25, .1), (.5, .5)], - anchor_offsets=[(.125, .125), (.25, .25)]) - anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)]) - with self.assertRaises(ValueError): - anchor_generator = ag.MultipleGridAnchorGenerator( - box_specs_list, - base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32), - anchor_strides=[(.25, .25), (.5, .5)], - anchor_offsets=[(.125, .125), (.25, .25)]) - anchor_generator.generate(feature_map_shape_list=[(4), (2, 2)]) - - -class CreateSSDAnchorsTest(test_case.TestCase): - - def test_create_ssd_anchors_returns_correct_shape(self): - - def graph_fn1(): - anchor_generator = ag.create_ssd_anchors( - num_layers=6, - min_scale=0.2, - max_scale=0.95, - aspect_ratios=(1.0, 2.0, 3.0, 1.0 / 2, 1.0 / 3), - reduce_boxes_in_lowest_layer=True) - - feature_map_shape_list = [(38, 38), (19, 19), (10, 10), - (5, 5), (3, 3), (1, 1)] - anchors_list = anchor_generator.generate( - feature_map_shape_list=feature_map_shape_list) - return [anchors.get() for anchors in anchors_list] - anchor_corners_out = np.concatenate(self.execute(graph_fn1, []), axis=0) - self.assertEquals(anchor_corners_out.shape, (7308, 4)) - - def graph_fn2(): - anchor_generator = ag.create_ssd_anchors( - num_layers=6, min_scale=0.2, max_scale=0.95, - aspect_ratios=(1.0, 2.0, 3.0, 1.0/2, 1.0/3), - reduce_boxes_in_lowest_layer=False) - - feature_map_shape_list = [(38, 38), (19, 19), (10, 10), - (5, 5), (3, 3), (1, 1)] - anchors_list = anchor_generator.generate( - feature_map_shape_list=feature_map_shape_list) - return [anchors.get() for anchors in anchors_list] - anchor_corners_out = np.concatenate(self.execute(graph_fn2, []), axis=0) - self.assertEquals(anchor_corners_out.shape, (11640, 4)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiscale_grid_anchor_generator.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiscale_grid_anchor_generator.py deleted file mode 100644 index a8d227c77155eb45eb737c86c416d2e3d1fdda83..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiscale_grid_anchor_generator.py +++ /dev/null @@ -1,138 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Generates grid anchors on the fly corresponding to multiple CNN layers. - -Generates grid anchors on the fly corresponding to multiple CNN layers as -described in: -"Focal Loss for Dense Object Detection" (https://arxiv.org/abs/1708.02002) -T.-Y. Lin, P. Goyal, R. Girshick, K. He, P. Dollar -""" - -from object_detection.anchor_generators import grid_anchor_generator -from object_detection.core import anchor_generator -from object_detection.core import box_list_ops - - -class MultiscaleGridAnchorGenerator(anchor_generator.AnchorGenerator): - """Generate a grid of anchors for multiple CNN layers of different scale.""" - - def __init__(self, min_level, max_level, anchor_scale, aspect_ratios, - scales_per_octave, normalize_coordinates=True): - """Constructs a MultiscaleGridAnchorGenerator. - - To construct anchors, at multiple scale resolutions, one must provide a - the minimum level and maximum levels on a scale pyramid. To define the size - of anchor, the anchor scale is provided to decide the size relatively to the - stride of the corresponding feature map. The generator allows one pixel - location on feature map maps to multiple anchors, that have different aspect - ratios and intermediate scales. - - Args: - min_level: minimum level in feature pyramid. - max_level: maximum level in feature pyramid. - anchor_scale: anchor scale and feature stride define the size of the base - anchor on an image. For example, given a feature pyramid with strides - [2^3, ..., 2^7] and anchor scale 4. The base anchor size is - 4 * [2^3, ..., 2^7]. - aspect_ratios: list or tuple of (float) aspect ratios to place on each - grid point. - scales_per_octave: integer number of intermediate scales per scale octave. - normalize_coordinates: whether to produce anchors in normalized - coordinates. (defaults to True). - """ - self._anchor_grid_info = [] - self._aspect_ratios = aspect_ratios - self._scales_per_octave = scales_per_octave - self._normalize_coordinates = normalize_coordinates - - for level in range(min_level, max_level + 1): - anchor_stride = [2**level, 2**level] - scales = [] - aspects = [] - for scale in range(scales_per_octave): - scales.append(2**(float(scale) / scales_per_octave)) - for aspect_ratio in aspect_ratios: - aspects.append(aspect_ratio) - base_anchor_size = [2**level * anchor_scale, 2**level * anchor_scale] - self._anchor_grid_info.append({ - 'level': level, - 'info': [scales, aspects, base_anchor_size, anchor_stride] - }) - - def name_scope(self): - return 'MultiscaleGridAnchorGenerator' - - def num_anchors_per_location(self): - """Returns the number of anchors per spatial location. - - Returns: - a list of integers, one for each expected feature map to be passed to - the Generate function. - """ - return len(self._anchor_grid_info) * [ - len(self._aspect_ratios) * self._scales_per_octave] - - def _generate(self, feature_map_shape_list, im_height, im_width): - """Generates a collection of bounding boxes to be used as anchors. - - Currently we require the input image shape to be statically defined. That - is, im_height and im_width should be integers rather than tensors. - - Args: - feature_map_shape_list: list of pairs of convnet layer resolutions in the - format [(height_0, width_0), (height_1, width_1), ...]. For example, - setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that - correspond to an 8x8 layer followed by a 7x7 layer. - im_height: the height of the image to generate the grid for. - im_width: the width of the image to generate the grid for. - - Returns: - boxes_list: a list of BoxLists each holding anchor boxes corresponding to - the input feature map shapes. - Raises: - ValueError: if im_height and im_width are not integers. - """ - if not isinstance(im_height, int) or not isinstance(im_width, int): - raise ValueError('MultiscaleGridAnchorGenerator currently requires ' - 'input image shape to be statically defined.') - anchor_grid_list = [] - for feat_shape, grid_info in zip(feature_map_shape_list, - self._anchor_grid_info): - # TODO(rathodv) check the feature_map_shape_list is consistent with - # self._anchor_grid_info - level = grid_info['level'] - stride = 2**level - scales, aspect_ratios, base_anchor_size, anchor_stride = grid_info['info'] - feat_h = feat_shape[0] - feat_w = feat_shape[1] - anchor_offset = [0, 0] - if im_height % 2.0**level == 0: - anchor_offset[0] = stride / 2.0 - if im_width % 2.0**level == 0: - anchor_offset[1] = stride / 2.0 - ag = grid_anchor_generator.GridAnchorGenerator( - scales, - aspect_ratios, - base_anchor_size=base_anchor_size, - anchor_stride=anchor_stride, - anchor_offset=anchor_offset) - (anchor_grid,) = ag.generate(feature_map_shape_list=[(feat_h, feat_w)]) - - if self._normalize_coordinates: - anchor_grid = box_list_ops.to_normalized_coordinates( - anchor_grid, im_height, im_width, check_range=False) - anchor_grid_list.append(anchor_grid) - - return anchor_grid_list diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiscale_grid_anchor_generator_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiscale_grid_anchor_generator_test.py deleted file mode 100644 index c96bdae7b9bcb59d295350ac31a5f8f56b720280..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiscale_grid_anchor_generator_test.py +++ /dev/null @@ -1,258 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for anchor_generators.multiscale_grid_anchor_generator_test.py.""" -import numpy as np -import tensorflow as tf - -from object_detection.anchor_generators import multiscale_grid_anchor_generator as mg -from object_detection.utils import test_case - - -class MultiscaleGridAnchorGeneratorTest(test_case.TestCase): - - def test_construct_single_anchor(self): - min_level = 5 - max_level = 5 - anchor_scale = 4.0 - aspect_ratios = [1.0] - scales_per_octave = 1 - im_height = 64 - im_width = 64 - feature_map_shape_list = [(2, 2)] - exp_anchor_corners = [[-48, -48, 80, 80], - [-48, -16, 80, 112], - [-16, -48, 112, 80], - [-16, -16, 112, 112]] - anchor_generator = mg.MultiscaleGridAnchorGenerator( - min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave, - normalize_coordinates=False) - anchors_list = anchor_generator.generate( - feature_map_shape_list, im_height=im_height, im_width=im_width) - anchor_corners = anchors_list[0].get() - - with self.test_session(): - anchor_corners_out = anchor_corners.eval() - self.assertAllClose(anchor_corners_out, exp_anchor_corners) - - def test_construct_single_anchor_in_normalized_coordinates(self): - min_level = 5 - max_level = 5 - anchor_scale = 4.0 - aspect_ratios = [1.0] - scales_per_octave = 1 - im_height = 64 - im_width = 128 - feature_map_shape_list = [(2, 2)] - exp_anchor_corners = [[-48./64, -48./128, 80./64, 80./128], - [-48./64, -16./128, 80./64, 112./128], - [-16./64, -48./128, 112./64, 80./128], - [-16./64, -16./128, 112./64, 112./128]] - anchor_generator = mg.MultiscaleGridAnchorGenerator( - min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave, - normalize_coordinates=True) - anchors_list = anchor_generator.generate( - feature_map_shape_list, im_height=im_height, im_width=im_width) - anchor_corners = anchors_list[0].get() - - with self.test_session(): - anchor_corners_out = anchor_corners.eval() - self.assertAllClose(anchor_corners_out, exp_anchor_corners) - - def test_num_anchors_per_location(self): - min_level = 5 - max_level = 6 - anchor_scale = 4.0 - aspect_ratios = [1.0, 2.0] - scales_per_octave = 3 - anchor_generator = mg.MultiscaleGridAnchorGenerator( - min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave, - normalize_coordinates=False) - self.assertEqual(anchor_generator.num_anchors_per_location(), [6, 6]) - - def test_construct_single_anchor_fails_with_tensor_image_size(self): - min_level = 5 - max_level = 5 - anchor_scale = 4.0 - aspect_ratios = [1.0] - scales_per_octave = 1 - im_height = tf.constant(64) - im_width = tf.constant(64) - feature_map_shape_list = [(2, 2)] - anchor_generator = mg.MultiscaleGridAnchorGenerator( - min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave, - normalize_coordinates=False) - with self.assertRaises(ValueError): - anchor_generator.generate( - feature_map_shape_list, im_height=im_height, im_width=im_width) - - def test_construct_single_anchor_with_odd_input_dimension(self): - - def graph_fn(): - min_level = 5 - max_level = 5 - anchor_scale = 4.0 - aspect_ratios = [1.0] - scales_per_octave = 1 - im_height = 65 - im_width = 65 - feature_map_shape_list = [(3, 3)] - anchor_generator = mg.MultiscaleGridAnchorGenerator( - min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave, - normalize_coordinates=False) - anchors_list = anchor_generator.generate( - feature_map_shape_list, im_height=im_height, im_width=im_width) - anchor_corners = anchors_list[0].get() - return (anchor_corners,) - anchor_corners_out = self.execute(graph_fn, []) - exp_anchor_corners = [[-64, -64, 64, 64], - [-64, -32, 64, 96], - [-64, 0, 64, 128], - [-32, -64, 96, 64], - [-32, -32, 96, 96], - [-32, 0, 96, 128], - [0, -64, 128, 64], - [0, -32, 128, 96], - [0, 0, 128, 128]] - self.assertAllClose(anchor_corners_out, exp_anchor_corners) - - def test_construct_single_anchor_on_two_feature_maps(self): - - def graph_fn(): - min_level = 5 - max_level = 6 - anchor_scale = 4.0 - aspect_ratios = [1.0] - scales_per_octave = 1 - im_height = 64 - im_width = 64 - feature_map_shape_list = [(2, 2), (1, 1)] - anchor_generator = mg.MultiscaleGridAnchorGenerator( - min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave, - normalize_coordinates=False) - anchors_list = anchor_generator.generate(feature_map_shape_list, - im_height=im_height, - im_width=im_width) - anchor_corners = [anchors.get() for anchors in anchors_list] - return anchor_corners - - anchor_corners_out = np.concatenate(self.execute(graph_fn, []), axis=0) - exp_anchor_corners = [[-48, -48, 80, 80], - [-48, -16, 80, 112], - [-16, -48, 112, 80], - [-16, -16, 112, 112], - [-96, -96, 160, 160]] - self.assertAllClose(anchor_corners_out, exp_anchor_corners) - - def test_construct_single_anchor_with_two_scales_per_octave(self): - - def graph_fn(): - min_level = 6 - max_level = 6 - anchor_scale = 4.0 - aspect_ratios = [1.0] - scales_per_octave = 2 - im_height = 64 - im_width = 64 - feature_map_shape_list = [(1, 1)] - - anchor_generator = mg.MultiscaleGridAnchorGenerator( - min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave, - normalize_coordinates=False) - anchors_list = anchor_generator.generate(feature_map_shape_list, - im_height=im_height, - im_width=im_width) - anchor_corners = [anchors.get() for anchors in anchors_list] - return anchor_corners - # There are 4 set of anchors in this configuration. The order is: - # [[2**0.0 intermediate scale + 1.0 aspect], - # [2**0.5 intermediate scale + 1.0 aspect]] - exp_anchor_corners = [[-96., -96., 160., 160.], - [-149.0193, -149.0193, 213.0193, 213.0193]] - - anchor_corners_out = self.execute(graph_fn, []) - self.assertAllClose(anchor_corners_out, exp_anchor_corners) - - def test_construct_single_anchor_with_two_scales_per_octave_and_aspect(self): - def graph_fn(): - min_level = 6 - max_level = 6 - anchor_scale = 4.0 - aspect_ratios = [1.0, 2.0] - scales_per_octave = 2 - im_height = 64 - im_width = 64 - feature_map_shape_list = [(1, 1)] - anchor_generator = mg.MultiscaleGridAnchorGenerator( - min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave, - normalize_coordinates=False) - anchors_list = anchor_generator.generate(feature_map_shape_list, - im_height=im_height, - im_width=im_width) - anchor_corners = [anchors.get() for anchors in anchors_list] - return anchor_corners - # There are 4 set of anchors in this configuration. The order is: - # [[2**0.0 intermediate scale + 1.0 aspect], - # [2**0.5 intermediate scale + 1.0 aspect], - # [2**0.0 intermediate scale + 2.0 aspect], - # [2**0.5 intermediate scale + 2.0 aspect]] - - exp_anchor_corners = [[-96., -96., 160., 160.], - [-149.0193, -149.0193, 213.0193, 213.0193], - [-58.50967, -149.0193, 122.50967, 213.0193], - [-96., -224., 160., 288.]] - anchor_corners_out = self.execute(graph_fn, []) - self.assertAllClose(anchor_corners_out, exp_anchor_corners) - - def test_construct_single_anchors_on_feature_maps_with_dynamic_shape(self): - - def graph_fn(feature_map1_height, feature_map1_width, feature_map2_height, - feature_map2_width): - min_level = 5 - max_level = 6 - anchor_scale = 4.0 - aspect_ratios = [1.0] - scales_per_octave = 1 - im_height = 64 - im_width = 64 - feature_map_shape_list = [(feature_map1_height, feature_map1_width), - (feature_map2_height, feature_map2_width)] - anchor_generator = mg.MultiscaleGridAnchorGenerator( - min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave, - normalize_coordinates=False) - anchors_list = anchor_generator.generate(feature_map_shape_list, - im_height=im_height, - im_width=im_width) - anchor_corners = [anchors.get() for anchors in anchors_list] - return anchor_corners - - anchor_corners_out = np.concatenate( - self.execute_cpu(graph_fn, [ - np.array(2, dtype=np.int32), - np.array(2, dtype=np.int32), - np.array(1, dtype=np.int32), - np.array(1, dtype=np.int32) - ]), - axis=0) - exp_anchor_corners = [[-48, -48, 80, 80], - [-48, -16, 80, 112], - [-16, -48, 112, 80], - [-16, -16, 112, 112], - [-96, -96, 160, 160]] - self.assertAllClose(anchor_corners_out, exp_anchor_corners) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/__init__.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/faster_rcnn_box_coder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/faster_rcnn_box_coder.py deleted file mode 100644 index af25e21a105ffa85931d3f30a1ca41c89c5dde53..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/faster_rcnn_box_coder.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Faster RCNN box coder. - -Faster RCNN box coder follows the coding schema described below: - ty = (y - ya) / ha - tx = (x - xa) / wa - th = log(h / ha) - tw = log(w / wa) - where x, y, w, h denote the box's center coordinates, width and height - respectively. Similarly, xa, ya, wa, ha denote the anchor's center - coordinates, width and height. tx, ty, tw and th denote the anchor-encoded - center, width and height respectively. - - See http://arxiv.org/abs/1506.01497 for details. -""" - -import tensorflow as tf - -from object_detection.core import box_coder -from object_detection.core import box_list - -EPSILON = 1e-8 - - -class FasterRcnnBoxCoder(box_coder.BoxCoder): - """Faster RCNN box coder.""" - - def __init__(self, scale_factors=None): - """Constructor for FasterRcnnBoxCoder. - - Args: - scale_factors: List of 4 positive scalars to scale ty, tx, th and tw. - If set to None, does not perform scaling. For Faster RCNN, - the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0]. - """ - if scale_factors: - assert len(scale_factors) == 4 - for scalar in scale_factors: - assert scalar > 0 - self._scale_factors = scale_factors - - @property - def code_size(self): - return 4 - - def _encode(self, boxes, anchors): - """Encode a box collection with respect to anchor collection. - - Args: - boxes: BoxList holding N boxes to be encoded. - anchors: BoxList of anchors. - - Returns: - a tensor representing N anchor-encoded boxes of the format - [ty, tx, th, tw]. - """ - # Convert anchors to the center coordinate representation. - ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() - ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes() - # Avoid NaN in division and log below. - ha += EPSILON - wa += EPSILON - h += EPSILON - w += EPSILON - - tx = (xcenter - xcenter_a) / wa - ty = (ycenter - ycenter_a) / ha - tw = tf.log(w / wa) - th = tf.log(h / ha) - # Scales location targets as used in paper for joint training. - if self._scale_factors: - ty *= self._scale_factors[0] - tx *= self._scale_factors[1] - th *= self._scale_factors[2] - tw *= self._scale_factors[3] - return tf.transpose(tf.stack([ty, tx, th, tw])) - - def _decode(self, rel_codes, anchors): - """Decode relative codes to boxes. - - Args: - rel_codes: a tensor representing N anchor-encoded boxes. - anchors: BoxList of anchors. - - Returns: - boxes: BoxList holding N bounding boxes. - """ - ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() - - ty, tx, th, tw = tf.unstack(tf.transpose(rel_codes)) - if self._scale_factors: - ty /= self._scale_factors[0] - tx /= self._scale_factors[1] - th /= self._scale_factors[2] - tw /= self._scale_factors[3] - w = tf.exp(tw) * wa - h = tf.exp(th) * ha - ycenter = ty * ha + ycenter_a - xcenter = tx * wa + xcenter_a - ymin = ycenter - h / 2. - xmin = xcenter - w / 2. - ymax = ycenter + h / 2. - xmax = xcenter + w / 2. - return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax]))) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/faster_rcnn_box_coder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/faster_rcnn_box_coder_test.py deleted file mode 100644 index b2135f06eea093110c9da17c1c46b7d247f8e806..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/faster_rcnn_box_coder_test.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.box_coder.faster_rcnn_box_coder.""" - -import tensorflow as tf - -from object_detection.box_coders import faster_rcnn_box_coder -from object_detection.core import box_list - - -class FasterRcnnBoxCoderTest(tf.test.TestCase): - - def test_get_correct_relative_codes_after_encoding(self): - boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] - anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]] - expected_rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321], - [-0.083333, -0.222222, -0.693147, -1.098612]] - boxes = box_list.BoxList(tf.constant(boxes)) - anchors = box_list.BoxList(tf.constant(anchors)) - coder = faster_rcnn_box_coder.FasterRcnnBoxCoder() - rel_codes = coder.encode(boxes, anchors) - with self.test_session() as sess: - rel_codes_out, = sess.run([rel_codes]) - self.assertAllClose(rel_codes_out, expected_rel_codes) - - def test_get_correct_relative_codes_after_encoding_with_scaling(self): - boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] - anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]] - scale_factors = [2, 3, 4, 5] - expected_rel_codes = [[-1., -1.25, -1.62186, -0.911608], - [-0.166667, -0.666667, -2.772588, -5.493062]] - boxes = box_list.BoxList(tf.constant(boxes)) - anchors = box_list.BoxList(tf.constant(anchors)) - coder = faster_rcnn_box_coder.FasterRcnnBoxCoder( - scale_factors=scale_factors) - rel_codes = coder.encode(boxes, anchors) - with self.test_session() as sess: - rel_codes_out, = sess.run([rel_codes]) - self.assertAllClose(rel_codes_out, expected_rel_codes) - - def test_get_correct_boxes_after_decoding(self): - anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]] - rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321], - [-0.083333, -0.222222, -0.693147, -1.098612]] - expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] - anchors = box_list.BoxList(tf.constant(anchors)) - coder = faster_rcnn_box_coder.FasterRcnnBoxCoder() - boxes = coder.decode(rel_codes, anchors) - with self.test_session() as sess: - boxes_out, = sess.run([boxes.get()]) - self.assertAllClose(boxes_out, expected_boxes) - - def test_get_correct_boxes_after_decoding_with_scaling(self): - anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]] - rel_codes = [[-1., -1.25, -1.62186, -0.911608], - [-0.166667, -0.666667, -2.772588, -5.493062]] - scale_factors = [2, 3, 4, 5] - expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] - anchors = box_list.BoxList(tf.constant(anchors)) - coder = faster_rcnn_box_coder.FasterRcnnBoxCoder( - scale_factors=scale_factors) - boxes = coder.decode(rel_codes, anchors) - with self.test_session() as sess: - boxes_out, = sess.run([boxes.get()]) - self.assertAllClose(boxes_out, expected_boxes) - - def test_very_small_Width_nan_after_encoding(self): - boxes = [[10.0, 10.0, 10.0000001, 20.0]] - anchors = [[15.0, 12.0, 30.0, 18.0]] - expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826]] - boxes = box_list.BoxList(tf.constant(boxes)) - anchors = box_list.BoxList(tf.constant(anchors)) - coder = faster_rcnn_box_coder.FasterRcnnBoxCoder() - rel_codes = coder.encode(boxes, anchors) - with self.test_session() as sess: - rel_codes_out, = sess.run([rel_codes]) - self.assertAllClose(rel_codes_out, expected_rel_codes) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/keypoint_box_coder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/keypoint_box_coder.py deleted file mode 100644 index 67df3b82ebd83308578bc850ebba2e7c074a9679..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/keypoint_box_coder.py +++ /dev/null @@ -1,171 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Keypoint box coder. - -The keypoint box coder follows the coding schema described below (this is -similar to the FasterRcnnBoxCoder, except that it encodes keypoints in addition -to box coordinates): - ty = (y - ya) / ha - tx = (x - xa) / wa - th = log(h / ha) - tw = log(w / wa) - tky0 = (ky0 - ya) / ha - tkx0 = (kx0 - xa) / wa - tky1 = (ky1 - ya) / ha - tkx1 = (kx1 - xa) / wa - ... - where x, y, w, h denote the box's center coordinates, width and height - respectively. Similarly, xa, ya, wa, ha denote the anchor's center - coordinates, width and height. tx, ty, tw and th denote the anchor-encoded - center, width and height respectively. ky0, kx0, ky1, kx1, ... denote the - keypoints' coordinates, and tky0, tkx0, tky1, tkx1, ... denote the - anchor-encoded keypoint coordinates. -""" - -import tensorflow as tf - -from object_detection.core import box_coder -from object_detection.core import box_list -from object_detection.core import standard_fields as fields - -EPSILON = 1e-8 - - -class KeypointBoxCoder(box_coder.BoxCoder): - """Keypoint box coder.""" - - def __init__(self, num_keypoints, scale_factors=None): - """Constructor for KeypointBoxCoder. - - Args: - num_keypoints: Number of keypoints to encode/decode. - scale_factors: List of 4 positive scalars to scale ty, tx, th and tw. - In addition to scaling ty and tx, the first 2 scalars are used to scale - the y and x coordinates of the keypoints as well. If set to None, does - not perform scaling. - """ - self._num_keypoints = num_keypoints - - if scale_factors: - assert len(scale_factors) == 4 - for scalar in scale_factors: - assert scalar > 0 - self._scale_factors = scale_factors - self._keypoint_scale_factors = None - if scale_factors is not None: - self._keypoint_scale_factors = tf.expand_dims(tf.tile( - [tf.to_float(scale_factors[0]), tf.to_float(scale_factors[1])], - [num_keypoints]), 1) - - @property - def code_size(self): - return 4 + self._num_keypoints * 2 - - def _encode(self, boxes, anchors): - """Encode a box and keypoint collection with respect to anchor collection. - - Args: - boxes: BoxList holding N boxes and keypoints to be encoded. Boxes are - tensors with the shape [N, 4], and keypoints are tensors with the shape - [N, num_keypoints, 2]. - anchors: BoxList of anchors. - - Returns: - a tensor representing N anchor-encoded boxes of the format - [ty, tx, th, tw, tky0, tkx0, tky1, tkx1, ...] where tky0 and tkx0 - represent the y and x coordinates of the first keypoint, tky1 and tkx1 - represent the y and x coordinates of the second keypoint, and so on. - """ - # Convert anchors to the center coordinate representation. - ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() - ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes() - keypoints = boxes.get_field(fields.BoxListFields.keypoints) - keypoints = tf.transpose(tf.reshape(keypoints, - [-1, self._num_keypoints * 2])) - num_boxes = boxes.num_boxes() - - # Avoid NaN in division and log below. - ha += EPSILON - wa += EPSILON - h += EPSILON - w += EPSILON - - tx = (xcenter - xcenter_a) / wa - ty = (ycenter - ycenter_a) / ha - tw = tf.log(w / wa) - th = tf.log(h / ha) - - tiled_anchor_centers = tf.tile( - tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1]) - tiled_anchor_sizes = tf.tile( - tf.stack([ha, wa]), [self._num_keypoints, 1]) - tkeypoints = (keypoints - tiled_anchor_centers) / tiled_anchor_sizes - - # Scales location targets as used in paper for joint training. - if self._scale_factors: - ty *= self._scale_factors[0] - tx *= self._scale_factors[1] - th *= self._scale_factors[2] - tw *= self._scale_factors[3] - tkeypoints *= tf.tile(self._keypoint_scale_factors, [1, num_boxes]) - - tboxes = tf.stack([ty, tx, th, tw]) - return tf.transpose(tf.concat([tboxes, tkeypoints], 0)) - - def _decode(self, rel_codes, anchors): - """Decode relative codes to boxes and keypoints. - - Args: - rel_codes: a tensor with shape [N, 4 + 2 * num_keypoints] representing N - anchor-encoded boxes and keypoints - anchors: BoxList of anchors. - - Returns: - boxes: BoxList holding N bounding boxes and keypoints. - """ - ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() - - num_codes = tf.shape(rel_codes)[0] - result = tf.unstack(tf.transpose(rel_codes)) - ty, tx, th, tw = result[:4] - tkeypoints = result[4:] - if self._scale_factors: - ty /= self._scale_factors[0] - tx /= self._scale_factors[1] - th /= self._scale_factors[2] - tw /= self._scale_factors[3] - tkeypoints /= tf.tile(self._keypoint_scale_factors, [1, num_codes]) - - w = tf.exp(tw) * wa - h = tf.exp(th) * ha - ycenter = ty * ha + ycenter_a - xcenter = tx * wa + xcenter_a - ymin = ycenter - h / 2. - xmin = xcenter - w / 2. - ymax = ycenter + h / 2. - xmax = xcenter + w / 2. - decoded_boxes_keypoints = box_list.BoxList( - tf.transpose(tf.stack([ymin, xmin, ymax, xmax]))) - - tiled_anchor_centers = tf.tile( - tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1]) - tiled_anchor_sizes = tf.tile( - tf.stack([ha, wa]), [self._num_keypoints, 1]) - keypoints = tkeypoints * tiled_anchor_sizes + tiled_anchor_centers - keypoints = tf.reshape(tf.transpose(keypoints), - [-1, self._num_keypoints, 2]) - decoded_boxes_keypoints.add_field(fields.BoxListFields.keypoints, keypoints) - return decoded_boxes_keypoints diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/keypoint_box_coder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/keypoint_box_coder_test.py deleted file mode 100644 index 330641e586af98af5f4764fb08f5307458777458..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/keypoint_box_coder_test.py +++ /dev/null @@ -1,140 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.box_coder.keypoint_box_coder.""" - -import tensorflow as tf - -from object_detection.box_coders import keypoint_box_coder -from object_detection.core import box_list -from object_detection.core import standard_fields as fields - - -class KeypointBoxCoderTest(tf.test.TestCase): - - def test_get_correct_relative_codes_after_encoding(self): - boxes = [[10., 10., 20., 15.], - [0.2, 0.1, 0.5, 0.4]] - keypoints = [[[15., 12.], [10., 15.]], - [[0.5, 0.3], [0.2, 0.4]]] - num_keypoints = len(keypoints[0]) - anchors = [[15., 12., 30., 18.], - [0.1, 0.0, 0.7, 0.9]] - expected_rel_codes = [ - [-0.5, -0.416666, -0.405465, -0.182321, - -0.5, -0.5, -0.833333, 0.], - [-0.083333, -0.222222, -0.693147, -1.098612, - 0.166667, -0.166667, -0.333333, -0.055556] - ] - boxes = box_list.BoxList(tf.constant(boxes)) - boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints)) - anchors = box_list.BoxList(tf.constant(anchors)) - coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints) - rel_codes = coder.encode(boxes, anchors) - with self.test_session() as sess: - rel_codes_out, = sess.run([rel_codes]) - self.assertAllClose(rel_codes_out, expected_rel_codes) - - def test_get_correct_relative_codes_after_encoding_with_scaling(self): - boxes = [[10., 10., 20., 15.], - [0.2, 0.1, 0.5, 0.4]] - keypoints = [[[15., 12.], [10., 15.]], - [[0.5, 0.3], [0.2, 0.4]]] - num_keypoints = len(keypoints[0]) - anchors = [[15., 12., 30., 18.], - [0.1, 0.0, 0.7, 0.9]] - scale_factors = [2, 3, 4, 5] - expected_rel_codes = [ - [-1., -1.25, -1.62186, -0.911608, - -1.0, -1.5, -1.666667, 0.], - [-0.166667, -0.666667, -2.772588, -5.493062, - 0.333333, -0.5, -0.666667, -0.166667] - ] - boxes = box_list.BoxList(tf.constant(boxes)) - boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints)) - anchors = box_list.BoxList(tf.constant(anchors)) - coder = keypoint_box_coder.KeypointBoxCoder( - num_keypoints, scale_factors=scale_factors) - rel_codes = coder.encode(boxes, anchors) - with self.test_session() as sess: - rel_codes_out, = sess.run([rel_codes]) - self.assertAllClose(rel_codes_out, expected_rel_codes) - - def test_get_correct_boxes_after_decoding(self): - anchors = [[15., 12., 30., 18.], - [0.1, 0.0, 0.7, 0.9]] - rel_codes = [ - [-0.5, -0.416666, -0.405465, -0.182321, - -0.5, -0.5, -0.833333, 0.], - [-0.083333, -0.222222, -0.693147, -1.098612, - 0.166667, -0.166667, -0.333333, -0.055556] - ] - expected_boxes = [[10., 10., 20., 15.], - [0.2, 0.1, 0.5, 0.4]] - expected_keypoints = [[[15., 12.], [10., 15.]], - [[0.5, 0.3], [0.2, 0.4]]] - num_keypoints = len(expected_keypoints[0]) - anchors = box_list.BoxList(tf.constant(anchors)) - coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints) - boxes = coder.decode(rel_codes, anchors) - with self.test_session() as sess: - boxes_out, keypoints_out = sess.run( - [boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)]) - self.assertAllClose(boxes_out, expected_boxes) - self.assertAllClose(keypoints_out, expected_keypoints) - - def test_get_correct_boxes_after_decoding_with_scaling(self): - anchors = [[15., 12., 30., 18.], - [0.1, 0.0, 0.7, 0.9]] - rel_codes = [ - [-1., -1.25, -1.62186, -0.911608, - -1.0, -1.5, -1.666667, 0.], - [-0.166667, -0.666667, -2.772588, -5.493062, - 0.333333, -0.5, -0.666667, -0.166667] - ] - scale_factors = [2, 3, 4, 5] - expected_boxes = [[10., 10., 20., 15.], - [0.2, 0.1, 0.5, 0.4]] - expected_keypoints = [[[15., 12.], [10., 15.]], - [[0.5, 0.3], [0.2, 0.4]]] - num_keypoints = len(expected_keypoints[0]) - anchors = box_list.BoxList(tf.constant(anchors)) - coder = keypoint_box_coder.KeypointBoxCoder( - num_keypoints, scale_factors=scale_factors) - boxes = coder.decode(rel_codes, anchors) - with self.test_session() as sess: - boxes_out, keypoints_out = sess.run( - [boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)]) - self.assertAllClose(boxes_out, expected_boxes) - self.assertAllClose(keypoints_out, expected_keypoints) - - def test_very_small_width_nan_after_encoding(self): - boxes = [[10., 10., 10.0000001, 20.]] - keypoints = [[[10., 10.], [10.0000001, 20.]]] - anchors = [[15., 12., 30., 18.]] - expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826, - -0.833333, -0.833333, -0.833333, 0.833333]] - boxes = box_list.BoxList(tf.constant(boxes)) - boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints)) - anchors = box_list.BoxList(tf.constant(anchors)) - coder = keypoint_box_coder.KeypointBoxCoder(2) - rel_codes = coder.encode(boxes, anchors) - with self.test_session() as sess: - rel_codes_out, = sess.run([rel_codes]) - self.assertAllClose(rel_codes_out, expected_rel_codes) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/mean_stddev_box_coder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/mean_stddev_box_coder.py deleted file mode 100644 index 256f53fd036798cd7b3da8fcdd720c7e3c46e2e4..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/mean_stddev_box_coder.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Mean stddev box coder. - -This box coder use the following coding schema to encode boxes: -rel_code = (box_corner - anchor_corner_mean) / anchor_corner_stddev. -""" -from object_detection.core import box_coder -from object_detection.core import box_list - - -class MeanStddevBoxCoder(box_coder.BoxCoder): - """Mean stddev box coder.""" - - def __init__(self, stddev=0.01): - """Constructor for MeanStddevBoxCoder. - - Args: - stddev: The standard deviation used to encode and decode boxes. - """ - self._stddev = stddev - - @property - def code_size(self): - return 4 - - def _encode(self, boxes, anchors): - """Encode a box collection with respect to anchor collection. - - Args: - boxes: BoxList holding N boxes to be encoded. - anchors: BoxList of N anchors. - - Returns: - a tensor representing N anchor-encoded boxes - - Raises: - ValueError: if the anchors still have deprecated stddev field. - """ - box_corners = boxes.get() - if anchors.has_field('stddev'): - raise ValueError("'stddev' is a parameter of MeanStddevBoxCoder and " - "should not be specified in the box list.") - means = anchors.get() - return (box_corners - means) / self._stddev - - def _decode(self, rel_codes, anchors): - """Decode. - - Args: - rel_codes: a tensor representing N anchor-encoded boxes. - anchors: BoxList of anchors. - - Returns: - boxes: BoxList holding N bounding boxes - - Raises: - ValueError: if the anchors still have deprecated stddev field and expects - the decode method to use stddev value from that field. - """ - means = anchors.get() - if anchors.has_field('stddev'): - raise ValueError("'stddev' is a parameter of MeanStddevBoxCoder and " - "should not be specified in the box list.") - box_corners = rel_codes * self._stddev + means - return box_list.BoxList(box_corners) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/mean_stddev_box_coder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/mean_stddev_box_coder_test.py deleted file mode 100644 index 3e0eba936fe5a47e34501af73a926d8f83f9f163..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/mean_stddev_box_coder_test.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.box_coder.mean_stddev_boxcoder.""" - -import tensorflow as tf - -from object_detection.box_coders import mean_stddev_box_coder -from object_detection.core import box_list - - -class MeanStddevBoxCoderTest(tf.test.TestCase): - - def testGetCorrectRelativeCodesAfterEncoding(self): - box_corners = [[0.0, 0.0, 0.5, 0.5], [0.0, 0.0, 0.5, 0.5]] - boxes = box_list.BoxList(tf.constant(box_corners)) - expected_rel_codes = [[0.0, 0.0, 0.0, 0.0], [-5.0, -5.0, -5.0, -3.0]] - prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 0.8]]) - priors = box_list.BoxList(prior_means) - - coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1) - rel_codes = coder.encode(boxes, priors) - with self.test_session() as sess: - rel_codes_out = sess.run(rel_codes) - self.assertAllClose(rel_codes_out, expected_rel_codes) - - def testGetCorrectBoxesAfterDecoding(self): - rel_codes = tf.constant([[0.0, 0.0, 0.0, 0.0], [-5.0, -5.0, -5.0, -3.0]]) - expected_box_corners = [[0.0, 0.0, 0.5, 0.5], [0.0, 0.0, 0.5, 0.5]] - prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 0.8]]) - priors = box_list.BoxList(prior_means) - - coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1) - decoded_boxes = coder.decode(rel_codes, priors) - decoded_box_corners = decoded_boxes.get() - with self.test_session() as sess: - decoded_out = sess.run(decoded_box_corners) - self.assertAllClose(decoded_out, expected_box_corners) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/square_box_coder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/square_box_coder.py deleted file mode 100644 index ee46b689524838518182ff0f9208168e78c8b2cf..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/square_box_coder.py +++ /dev/null @@ -1,126 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Square box coder. - -Square box coder follows the coding schema described below: -l = sqrt(h * w) -la = sqrt(ha * wa) -ty = (y - ya) / la -tx = (x - xa) / la -tl = log(l / la) -where x, y, w, h denote the box's center coordinates, width, and height, -respectively. Similarly, xa, ya, wa, ha denote the anchor's center -coordinates, width and height. tx, ty, tl denote the anchor-encoded -center, and length, respectively. Because the encoded box is a square, only -one length is encoded. - -This has shown to provide performance improvements over the Faster RCNN box -coder when the objects being detected tend to be square (e.g. faces) and when -the input images are not distorted via resizing. -""" - -import tensorflow as tf - -from object_detection.core import box_coder -from object_detection.core import box_list - -EPSILON = 1e-8 - - -class SquareBoxCoder(box_coder.BoxCoder): - """Encodes a 3-scalar representation of a square box.""" - - def __init__(self, scale_factors=None): - """Constructor for SquareBoxCoder. - - Args: - scale_factors: List of 3 positive scalars to scale ty, tx, and tl. - If set to None, does not perform scaling. For faster RCNN, - the open-source implementation recommends using [10.0, 10.0, 5.0]. - - Raises: - ValueError: If scale_factors is not length 3 or contains values less than - or equal to 0. - """ - if scale_factors: - if len(scale_factors) != 3: - raise ValueError('The argument scale_factors must be a list of length ' - '3.') - if any(scalar <= 0 for scalar in scale_factors): - raise ValueError('The values in scale_factors must all be greater ' - 'than 0.') - self._scale_factors = scale_factors - - @property - def code_size(self): - return 3 - - def _encode(self, boxes, anchors): - """Encodes a box collection with respect to an anchor collection. - - Args: - boxes: BoxList holding N boxes to be encoded. - anchors: BoxList of anchors. - - Returns: - a tensor representing N anchor-encoded boxes of the format - [ty, tx, tl]. - """ - # Convert anchors to the center coordinate representation. - ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() - la = tf.sqrt(ha * wa) - ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes() - l = tf.sqrt(h * w) - # Avoid NaN in division and log below. - la += EPSILON - l += EPSILON - - tx = (xcenter - xcenter_a) / la - ty = (ycenter - ycenter_a) / la - tl = tf.log(l / la) - # Scales location targets for joint training. - if self._scale_factors: - ty *= self._scale_factors[0] - tx *= self._scale_factors[1] - tl *= self._scale_factors[2] - return tf.transpose(tf.stack([ty, tx, tl])) - - def _decode(self, rel_codes, anchors): - """Decodes relative codes to boxes. - - Args: - rel_codes: a tensor representing N anchor-encoded boxes. - anchors: BoxList of anchors. - - Returns: - boxes: BoxList holding N bounding boxes. - """ - ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() - la = tf.sqrt(ha * wa) - - ty, tx, tl = tf.unstack(tf.transpose(rel_codes)) - if self._scale_factors: - ty /= self._scale_factors[0] - tx /= self._scale_factors[1] - tl /= self._scale_factors[2] - l = tf.exp(tl) * la - ycenter = ty * la + ycenter_a - xcenter = tx * la + xcenter_a - ymin = ycenter - l / 2. - xmin = xcenter - l / 2. - ymax = ycenter + l / 2. - xmax = xcenter + l / 2. - return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax]))) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/square_box_coder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/square_box_coder_test.py deleted file mode 100644 index 7f739c6b4f38de3d280cb91e9c8e04a661a621e4..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/square_box_coder_test.py +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.box_coder.square_box_coder.""" - -import tensorflow as tf - -from object_detection.box_coders import square_box_coder -from object_detection.core import box_list - - -class SquareBoxCoderTest(tf.test.TestCase): - - def test_correct_relative_codes_with_default_scale(self): - boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] - anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]] - scale_factors = None - expected_rel_codes = [[-0.790569, -0.263523, -0.293893], - [-0.068041, -0.272166, -0.89588]] - - boxes = box_list.BoxList(tf.constant(boxes)) - anchors = box_list.BoxList(tf.constant(anchors)) - coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors) - rel_codes = coder.encode(boxes, anchors) - with self.test_session() as sess: - (rel_codes_out,) = sess.run([rel_codes]) - self.assertAllClose(rel_codes_out, expected_rel_codes) - - def test_correct_relative_codes_with_non_default_scale(self): - boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] - anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]] - scale_factors = [2, 3, 4] - expected_rel_codes = [[-1.581139, -0.790569, -1.175573], - [-0.136083, -0.816497, -3.583519]] - boxes = box_list.BoxList(tf.constant(boxes)) - anchors = box_list.BoxList(tf.constant(anchors)) - coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors) - rel_codes = coder.encode(boxes, anchors) - with self.test_session() as sess: - (rel_codes_out,) = sess.run([rel_codes]) - self.assertAllClose(rel_codes_out, expected_rel_codes) - - def test_correct_relative_codes_with_small_width(self): - boxes = [[10.0, 10.0, 10.0000001, 20.0]] - anchors = [[15.0, 12.0, 30.0, 18.0]] - scale_factors = None - expected_rel_codes = [[-1.317616, 0., -20.670586]] - boxes = box_list.BoxList(tf.constant(boxes)) - anchors = box_list.BoxList(tf.constant(anchors)) - coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors) - rel_codes = coder.encode(boxes, anchors) - with self.test_session() as sess: - (rel_codes_out,) = sess.run([rel_codes]) - self.assertAllClose(rel_codes_out, expected_rel_codes) - - def test_correct_boxes_with_default_scale(self): - anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]] - rel_codes = [[-0.5, -0.416666, -0.405465], - [-0.083333, -0.222222, -0.693147]] - scale_factors = None - expected_boxes = [[14.594306, 7.884875, 20.918861, 14.209432], - [0.155051, 0.102989, 0.522474, 0.470412]] - anchors = box_list.BoxList(tf.constant(anchors)) - coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors) - boxes = coder.decode(rel_codes, anchors) - with self.test_session() as sess: - (boxes_out,) = sess.run([boxes.get()]) - self.assertAllClose(boxes_out, expected_boxes) - - def test_correct_boxes_with_non_default_scale(self): - anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]] - rel_codes = [[-1., -1.25, -1.62186], [-0.166667, -0.666667, -2.772588]] - scale_factors = [2, 3, 4] - expected_boxes = [[14.594306, 7.884875, 20.918861, 14.209432], - [0.155051, 0.102989, 0.522474, 0.470412]] - anchors = box_list.BoxList(tf.constant(anchors)) - coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors) - boxes = coder.decode(rel_codes, anchors) - with self.test_session() as sess: - (boxes_out,) = sess.run([boxes.get()]) - self.assertAllClose(boxes_out, expected_boxes) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/__init__.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/anchor_generator_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/anchor_generator_builder.py deleted file mode 100644 index 54cec3a1df57f06466cde5e2bd9c6b706133c174..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/anchor_generator_builder.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""A function to build an object detection anchor generator from config.""" - -from object_detection.anchor_generators import grid_anchor_generator -from object_detection.anchor_generators import multiple_grid_anchor_generator -from object_detection.anchor_generators import multiscale_grid_anchor_generator -from object_detection.protos import anchor_generator_pb2 - - -def build(anchor_generator_config): - """Builds an anchor generator based on the config. - - Args: - anchor_generator_config: An anchor_generator.proto object containing the - config for the desired anchor generator. - - Returns: - Anchor generator based on the config. - - Raises: - ValueError: On empty anchor generator proto. - """ - if not isinstance(anchor_generator_config, - anchor_generator_pb2.AnchorGenerator): - raise ValueError('anchor_generator_config not of type ' - 'anchor_generator_pb2.AnchorGenerator') - if anchor_generator_config.WhichOneof( - 'anchor_generator_oneof') == 'grid_anchor_generator': - grid_anchor_generator_config = anchor_generator_config.grid_anchor_generator - return grid_anchor_generator.GridAnchorGenerator( - scales=[float(scale) for scale in grid_anchor_generator_config.scales], - aspect_ratios=[float(aspect_ratio) - for aspect_ratio - in grid_anchor_generator_config.aspect_ratios], - base_anchor_size=[grid_anchor_generator_config.height, - grid_anchor_generator_config.width], - anchor_stride=[grid_anchor_generator_config.height_stride, - grid_anchor_generator_config.width_stride], - anchor_offset=[grid_anchor_generator_config.height_offset, - grid_anchor_generator_config.width_offset]) - elif anchor_generator_config.WhichOneof( - 'anchor_generator_oneof') == 'ssd_anchor_generator': - ssd_anchor_generator_config = anchor_generator_config.ssd_anchor_generator - anchor_strides = None - if ssd_anchor_generator_config.height_stride: - anchor_strides = zip(ssd_anchor_generator_config.height_stride, - ssd_anchor_generator_config.width_stride) - anchor_offsets = None - if ssd_anchor_generator_config.height_offset: - anchor_offsets = zip(ssd_anchor_generator_config.height_offset, - ssd_anchor_generator_config.width_offset) - return multiple_grid_anchor_generator.create_ssd_anchors( - num_layers=ssd_anchor_generator_config.num_layers, - min_scale=ssd_anchor_generator_config.min_scale, - max_scale=ssd_anchor_generator_config.max_scale, - scales=[float(scale) for scale in ssd_anchor_generator_config.scales], - aspect_ratios=ssd_anchor_generator_config.aspect_ratios, - interpolated_scale_aspect_ratio=( - ssd_anchor_generator_config.interpolated_scale_aspect_ratio), - base_anchor_size=[ - ssd_anchor_generator_config.base_anchor_height, - ssd_anchor_generator_config.base_anchor_width - ], - anchor_strides=anchor_strides, - anchor_offsets=anchor_offsets, - reduce_boxes_in_lowest_layer=( - ssd_anchor_generator_config.reduce_boxes_in_lowest_layer)) - elif anchor_generator_config.WhichOneof( - 'anchor_generator_oneof') == 'multiscale_anchor_generator': - cfg = anchor_generator_config.multiscale_anchor_generator - return multiscale_grid_anchor_generator.MultiscaleGridAnchorGenerator( - cfg.min_level, - cfg.max_level, - cfg.anchor_scale, - [float(aspect_ratio) for aspect_ratio in cfg.aspect_ratios], - cfg.scales_per_octave, - cfg.normalize_coordinates - ) - else: - raise ValueError('Empty anchor generator.') diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/anchor_generator_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/anchor_generator_builder_test.py deleted file mode 100644 index 2a23c2d96b411634263ef7bd20ed045c6305c790..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/anchor_generator_builder_test.py +++ /dev/null @@ -1,300 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for anchor_generator_builder.""" - -import math - -import tensorflow as tf - -from google.protobuf import text_format -from object_detection.anchor_generators import grid_anchor_generator -from object_detection.anchor_generators import multiple_grid_anchor_generator -from object_detection.anchor_generators import multiscale_grid_anchor_generator -from object_detection.builders import anchor_generator_builder -from object_detection.protos import anchor_generator_pb2 - - -class AnchorGeneratorBuilderTest(tf.test.TestCase): - - def assert_almost_list_equal(self, expected_list, actual_list, delta=None): - self.assertEqual(len(expected_list), len(actual_list)) - for expected_item, actual_item in zip(expected_list, actual_list): - self.assertAlmostEqual(expected_item, actual_item, delta=delta) - - def test_build_grid_anchor_generator_with_defaults(self): - anchor_generator_text_proto = """ - grid_anchor_generator { - } - """ - anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() - text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) - anchor_generator_object = anchor_generator_builder.build( - anchor_generator_proto) - self.assertTrue(isinstance(anchor_generator_object, - grid_anchor_generator.GridAnchorGenerator)) - self.assertListEqual(anchor_generator_object._scales, []) - self.assertListEqual(anchor_generator_object._aspect_ratios, []) - with self.test_session() as sess: - base_anchor_size, anchor_offset, anchor_stride = sess.run( - [anchor_generator_object._base_anchor_size, - anchor_generator_object._anchor_offset, - anchor_generator_object._anchor_stride]) - self.assertAllEqual(anchor_offset, [0, 0]) - self.assertAllEqual(anchor_stride, [16, 16]) - self.assertAllEqual(base_anchor_size, [256, 256]) - - def test_build_grid_anchor_generator_with_non_default_parameters(self): - anchor_generator_text_proto = """ - grid_anchor_generator { - height: 128 - width: 512 - height_stride: 10 - width_stride: 20 - height_offset: 30 - width_offset: 40 - scales: [0.4, 2.2] - aspect_ratios: [0.3, 4.5] - } - """ - anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() - text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) - anchor_generator_object = anchor_generator_builder.build( - anchor_generator_proto) - self.assertTrue(isinstance(anchor_generator_object, - grid_anchor_generator.GridAnchorGenerator)) - self.assert_almost_list_equal(anchor_generator_object._scales, - [0.4, 2.2]) - self.assert_almost_list_equal(anchor_generator_object._aspect_ratios, - [0.3, 4.5]) - with self.test_session() as sess: - base_anchor_size, anchor_offset, anchor_stride = sess.run( - [anchor_generator_object._base_anchor_size, - anchor_generator_object._anchor_offset, - anchor_generator_object._anchor_stride]) - self.assertAllEqual(anchor_offset, [30, 40]) - self.assertAllEqual(anchor_stride, [10, 20]) - self.assertAllEqual(base_anchor_size, [128, 512]) - - def test_build_ssd_anchor_generator_with_defaults(self): - anchor_generator_text_proto = """ - ssd_anchor_generator { - aspect_ratios: [1.0] - } - """ - anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() - text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) - anchor_generator_object = anchor_generator_builder.build( - anchor_generator_proto) - self.assertTrue(isinstance(anchor_generator_object, - multiple_grid_anchor_generator. - MultipleGridAnchorGenerator)) - for actual_scales, expected_scales in zip( - list(anchor_generator_object._scales), - [(0.1, 0.2, 0.2), - (0.35, 0.418), - (0.499, 0.570), - (0.649, 0.721), - (0.799, 0.871), - (0.949, 0.974)]): - self.assert_almost_list_equal(expected_scales, actual_scales, delta=1e-2) - for actual_aspect_ratio, expected_aspect_ratio in zip( - list(anchor_generator_object._aspect_ratios), - [(1.0, 2.0, 0.5)] + 5 * [(1.0, 1.0)]): - self.assert_almost_list_equal(expected_aspect_ratio, actual_aspect_ratio) - - with self.test_session() as sess: - base_anchor_size = sess.run(anchor_generator_object._base_anchor_size) - self.assertAllClose(base_anchor_size, [1.0, 1.0]) - - def test_build_ssd_anchor_generator_with_custom_scales(self): - anchor_generator_text_proto = """ - ssd_anchor_generator { - aspect_ratios: [1.0] - scales: [0.1, 0.15, 0.2, 0.4, 0.6, 0.8] - reduce_boxes_in_lowest_layer: false - } - """ - anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() - text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) - anchor_generator_object = anchor_generator_builder.build( - anchor_generator_proto) - self.assertTrue(isinstance(anchor_generator_object, - multiple_grid_anchor_generator. - MultipleGridAnchorGenerator)) - for actual_scales, expected_scales in zip( - list(anchor_generator_object._scales), - [(0.1, math.sqrt(0.1 * 0.15)), - (0.15, math.sqrt(0.15 * 0.2)), - (0.2, math.sqrt(0.2 * 0.4)), - (0.4, math.sqrt(0.4 * 0.6)), - (0.6, math.sqrt(0.6 * 0.8)), - (0.8, math.sqrt(0.8 * 1.0))]): - self.assert_almost_list_equal(expected_scales, actual_scales, delta=1e-2) - - def test_build_ssd_anchor_generator_with_custom_interpolated_scale(self): - anchor_generator_text_proto = """ - ssd_anchor_generator { - aspect_ratios: [0.5] - interpolated_scale_aspect_ratio: 0.5 - reduce_boxes_in_lowest_layer: false - } - """ - anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() - text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) - anchor_generator_object = anchor_generator_builder.build( - anchor_generator_proto) - self.assertTrue(isinstance(anchor_generator_object, - multiple_grid_anchor_generator. - MultipleGridAnchorGenerator)) - for actual_aspect_ratio, expected_aspect_ratio in zip( - list(anchor_generator_object._aspect_ratios), - 6 * [(0.5, 0.5)]): - self.assert_almost_list_equal(expected_aspect_ratio, actual_aspect_ratio) - - def test_build_ssd_anchor_generator_without_reduced_boxes(self): - anchor_generator_text_proto = """ - ssd_anchor_generator { - aspect_ratios: [1.0] - reduce_boxes_in_lowest_layer: false - } - """ - anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() - text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) - anchor_generator_object = anchor_generator_builder.build( - anchor_generator_proto) - self.assertTrue(isinstance(anchor_generator_object, - multiple_grid_anchor_generator. - MultipleGridAnchorGenerator)) - - for actual_scales, expected_scales in zip( - list(anchor_generator_object._scales), - [(0.2, 0.264), - (0.35, 0.418), - (0.499, 0.570), - (0.649, 0.721), - (0.799, 0.871), - (0.949, 0.974)]): - self.assert_almost_list_equal(expected_scales, actual_scales, delta=1e-2) - - for actual_aspect_ratio, expected_aspect_ratio in zip( - list(anchor_generator_object._aspect_ratios), - 6 * [(1.0, 1.0)]): - self.assert_almost_list_equal(expected_aspect_ratio, actual_aspect_ratio) - - with self.test_session() as sess: - base_anchor_size = sess.run(anchor_generator_object._base_anchor_size) - self.assertAllClose(base_anchor_size, [1.0, 1.0]) - - def test_build_ssd_anchor_generator_with_non_default_parameters(self): - anchor_generator_text_proto = """ - ssd_anchor_generator { - num_layers: 2 - min_scale: 0.3 - max_scale: 0.8 - aspect_ratios: [2.0] - height_stride: 16 - height_stride: 32 - width_stride: 20 - width_stride: 30 - height_offset: 8 - height_offset: 16 - width_offset: 0 - width_offset: 10 - } - """ - anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() - text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) - anchor_generator_object = anchor_generator_builder.build( - anchor_generator_proto) - self.assertTrue(isinstance(anchor_generator_object, - multiple_grid_anchor_generator. - MultipleGridAnchorGenerator)) - - for actual_scales, expected_scales in zip( - list(anchor_generator_object._scales), - [(0.1, 0.3, 0.3), (0.8, 0.894)]): - self.assert_almost_list_equal(expected_scales, actual_scales, delta=1e-2) - - for actual_aspect_ratio, expected_aspect_ratio in zip( - list(anchor_generator_object._aspect_ratios), - [(1.0, 2.0, 0.5), (2.0, 1.0)]): - self.assert_almost_list_equal(expected_aspect_ratio, actual_aspect_ratio) - - for actual_strides, expected_strides in zip( - list(anchor_generator_object._anchor_strides), [(16, 20), (32, 30)]): - self.assert_almost_list_equal(expected_strides, actual_strides) - - for actual_offsets, expected_offsets in zip( - list(anchor_generator_object._anchor_offsets), [(8, 0), (16, 10)]): - self.assert_almost_list_equal(expected_offsets, actual_offsets) - - with self.test_session() as sess: - base_anchor_size = sess.run(anchor_generator_object._base_anchor_size) - self.assertAllClose(base_anchor_size, [1.0, 1.0]) - - def test_raise_value_error_on_empty_anchor_genertor(self): - anchor_generator_text_proto = """ - """ - anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() - text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) - with self.assertRaises(ValueError): - anchor_generator_builder.build(anchor_generator_proto) - - def test_build_multiscale_anchor_generator_custom_aspect_ratios(self): - anchor_generator_text_proto = """ - multiscale_anchor_generator { - aspect_ratios: [1.0] - } - """ - anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() - text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) - anchor_generator_object = anchor_generator_builder.build( - anchor_generator_proto) - self.assertTrue(isinstance(anchor_generator_object, - multiscale_grid_anchor_generator. - MultiscaleGridAnchorGenerator)) - for level, anchor_grid_info in zip( - range(3, 8), anchor_generator_object._anchor_grid_info): - self.assertEqual(set(anchor_grid_info.keys()), set(['level', 'info'])) - self.assertTrue(level, anchor_grid_info['level']) - self.assertEqual(len(anchor_grid_info['info']), 4) - self.assertAllClose(anchor_grid_info['info'][0], [2**0, 2**0.5]) - self.assertTrue(anchor_grid_info['info'][1], 1.0) - self.assertAllClose(anchor_grid_info['info'][2], - [4.0 * 2**level, 4.0 * 2**level]) - self.assertAllClose(anchor_grid_info['info'][3], [2**level, 2**level]) - self.assertTrue(anchor_generator_object._normalize_coordinates) - - def test_build_multiscale_anchor_generator_with_anchors_in_pixel_coordinates( - self): - anchor_generator_text_proto = """ - multiscale_anchor_generator { - aspect_ratios: [1.0] - normalize_coordinates: false - } - """ - anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() - text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) - anchor_generator_object = anchor_generator_builder.build( - anchor_generator_proto) - self.assertTrue(isinstance(anchor_generator_object, - multiscale_grid_anchor_generator. - MultiscaleGridAnchorGenerator)) - self.assertFalse(anchor_generator_object._normalize_coordinates) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/box_coder_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/box_coder_builder.py deleted file mode 100644 index cc13d5a2f01c5a1f66e83abc5bb5ada542047d83..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/box_coder_builder.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""A function to build an object detection box coder from configuration.""" -from object_detection.box_coders import faster_rcnn_box_coder -from object_detection.box_coders import keypoint_box_coder -from object_detection.box_coders import mean_stddev_box_coder -from object_detection.box_coders import square_box_coder -from object_detection.protos import box_coder_pb2 - - -def build(box_coder_config): - """Builds a box coder object based on the box coder config. - - Args: - box_coder_config: A box_coder.proto object containing the config for the - desired box coder. - - Returns: - BoxCoder based on the config. - - Raises: - ValueError: On empty box coder proto. - """ - if not isinstance(box_coder_config, box_coder_pb2.BoxCoder): - raise ValueError('box_coder_config not of type box_coder_pb2.BoxCoder.') - - if box_coder_config.WhichOneof('box_coder_oneof') == 'faster_rcnn_box_coder': - return faster_rcnn_box_coder.FasterRcnnBoxCoder(scale_factors=[ - box_coder_config.faster_rcnn_box_coder.y_scale, - box_coder_config.faster_rcnn_box_coder.x_scale, - box_coder_config.faster_rcnn_box_coder.height_scale, - box_coder_config.faster_rcnn_box_coder.width_scale - ]) - if box_coder_config.WhichOneof('box_coder_oneof') == 'keypoint_box_coder': - return keypoint_box_coder.KeypointBoxCoder( - box_coder_config.keypoint_box_coder.num_keypoints, - scale_factors=[ - box_coder_config.keypoint_box_coder.y_scale, - box_coder_config.keypoint_box_coder.x_scale, - box_coder_config.keypoint_box_coder.height_scale, - box_coder_config.keypoint_box_coder.width_scale - ]) - if (box_coder_config.WhichOneof('box_coder_oneof') == - 'mean_stddev_box_coder'): - return mean_stddev_box_coder.MeanStddevBoxCoder( - stddev=box_coder_config.mean_stddev_box_coder.stddev) - if box_coder_config.WhichOneof('box_coder_oneof') == 'square_box_coder': - return square_box_coder.SquareBoxCoder(scale_factors=[ - box_coder_config.square_box_coder.y_scale, - box_coder_config.square_box_coder.x_scale, - box_coder_config.square_box_coder.length_scale - ]) - raise ValueError('Empty box coder.') diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/box_coder_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/box_coder_builder_test.py deleted file mode 100644 index 286012e9de7661a5663e0ba2873818337f106985..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/box_coder_builder_test.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for box_coder_builder.""" - -import tensorflow as tf - -from google.protobuf import text_format -from object_detection.box_coders import faster_rcnn_box_coder -from object_detection.box_coders import keypoint_box_coder -from object_detection.box_coders import mean_stddev_box_coder -from object_detection.box_coders import square_box_coder -from object_detection.builders import box_coder_builder -from object_detection.protos import box_coder_pb2 - - -class BoxCoderBuilderTest(tf.test.TestCase): - - def test_build_faster_rcnn_box_coder_with_defaults(self): - box_coder_text_proto = """ - faster_rcnn_box_coder { - } - """ - box_coder_proto = box_coder_pb2.BoxCoder() - text_format.Merge(box_coder_text_proto, box_coder_proto) - box_coder_object = box_coder_builder.build(box_coder_proto) - self.assertIsInstance(box_coder_object, - faster_rcnn_box_coder.FasterRcnnBoxCoder) - self.assertEqual(box_coder_object._scale_factors, [10.0, 10.0, 5.0, 5.0]) - - def test_build_faster_rcnn_box_coder_with_non_default_parameters(self): - box_coder_text_proto = """ - faster_rcnn_box_coder { - y_scale: 6.0 - x_scale: 3.0 - height_scale: 7.0 - width_scale: 8.0 - } - """ - box_coder_proto = box_coder_pb2.BoxCoder() - text_format.Merge(box_coder_text_proto, box_coder_proto) - box_coder_object = box_coder_builder.build(box_coder_proto) - self.assertIsInstance(box_coder_object, - faster_rcnn_box_coder.FasterRcnnBoxCoder) - self.assertEqual(box_coder_object._scale_factors, [6.0, 3.0, 7.0, 8.0]) - - def test_build_keypoint_box_coder_with_defaults(self): - box_coder_text_proto = """ - keypoint_box_coder { - } - """ - box_coder_proto = box_coder_pb2.BoxCoder() - text_format.Merge(box_coder_text_proto, box_coder_proto) - box_coder_object = box_coder_builder.build(box_coder_proto) - self.assertIsInstance(box_coder_object, keypoint_box_coder.KeypointBoxCoder) - self.assertEqual(box_coder_object._scale_factors, [10.0, 10.0, 5.0, 5.0]) - - def test_build_keypoint_box_coder_with_non_default_parameters(self): - box_coder_text_proto = """ - keypoint_box_coder { - num_keypoints: 6 - y_scale: 6.0 - x_scale: 3.0 - height_scale: 7.0 - width_scale: 8.0 - } - """ - box_coder_proto = box_coder_pb2.BoxCoder() - text_format.Merge(box_coder_text_proto, box_coder_proto) - box_coder_object = box_coder_builder.build(box_coder_proto) - self.assertIsInstance(box_coder_object, keypoint_box_coder.KeypointBoxCoder) - self.assertEqual(box_coder_object._num_keypoints, 6) - self.assertEqual(box_coder_object._scale_factors, [6.0, 3.0, 7.0, 8.0]) - - def test_build_mean_stddev_box_coder(self): - box_coder_text_proto = """ - mean_stddev_box_coder { - } - """ - box_coder_proto = box_coder_pb2.BoxCoder() - text_format.Merge(box_coder_text_proto, box_coder_proto) - box_coder_object = box_coder_builder.build(box_coder_proto) - self.assertTrue( - isinstance(box_coder_object, - mean_stddev_box_coder.MeanStddevBoxCoder)) - - def test_build_square_box_coder_with_defaults(self): - box_coder_text_proto = """ - square_box_coder { - } - """ - box_coder_proto = box_coder_pb2.BoxCoder() - text_format.Merge(box_coder_text_proto, box_coder_proto) - box_coder_object = box_coder_builder.build(box_coder_proto) - self.assertTrue( - isinstance(box_coder_object, square_box_coder.SquareBoxCoder)) - self.assertEqual(box_coder_object._scale_factors, [10.0, 10.0, 5.0]) - - def test_build_square_box_coder_with_non_default_parameters(self): - box_coder_text_proto = """ - square_box_coder { - y_scale: 6.0 - x_scale: 3.0 - length_scale: 7.0 - } - """ - box_coder_proto = box_coder_pb2.BoxCoder() - text_format.Merge(box_coder_text_proto, box_coder_proto) - box_coder_object = box_coder_builder.build(box_coder_proto) - self.assertTrue( - isinstance(box_coder_object, square_box_coder.SquareBoxCoder)) - self.assertEqual(box_coder_object._scale_factors, [6.0, 3.0, 7.0]) - - def test_raise_error_on_empty_box_coder(self): - box_coder_text_proto = """ - """ - box_coder_proto = box_coder_pb2.BoxCoder() - text_format.Merge(box_coder_text_proto, box_coder_proto) - with self.assertRaises(ValueError): - box_coder_builder.build(box_coder_proto) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/box_predictor_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/box_predictor_builder.py deleted file mode 100644 index 2f311221ce4729599501c93d2192764d5ed8207b..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/box_predictor_builder.py +++ /dev/null @@ -1,138 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Function to build box predictor from configuration.""" - -from object_detection.core import box_predictor -from object_detection.protos import box_predictor_pb2 - - -def build(argscope_fn, box_predictor_config, is_training, num_classes): - """Builds box predictor based on the configuration. - - Builds box predictor based on the configuration. See box_predictor.proto for - configurable options. Also, see box_predictor.py for more details. - - Args: - argscope_fn: A function that takes the following inputs: - * hyperparams_pb2.Hyperparams proto - * a boolean indicating if the model is in training mode. - and returns a tf slim argscope for Conv and FC hyperparameters. - box_predictor_config: box_predictor_pb2.BoxPredictor proto containing - configuration. - is_training: Whether the models is in training mode. - num_classes: Number of classes to predict. - - Returns: - box_predictor: box_predictor.BoxPredictor object. - - Raises: - ValueError: On unknown box predictor. - """ - if not isinstance(box_predictor_config, box_predictor_pb2.BoxPredictor): - raise ValueError('box_predictor_config not of type ' - 'box_predictor_pb2.BoxPredictor.') - - box_predictor_oneof = box_predictor_config.WhichOneof('box_predictor_oneof') - - if box_predictor_oneof == 'convolutional_box_predictor': - conv_box_predictor = box_predictor_config.convolutional_box_predictor - conv_hyperparams_fn = argscope_fn(conv_box_predictor.conv_hyperparams, - is_training) - box_predictor_object = box_predictor.ConvolutionalBoxPredictor( - is_training=is_training, - num_classes=num_classes, - conv_hyperparams_fn=conv_hyperparams_fn, - min_depth=conv_box_predictor.min_depth, - max_depth=conv_box_predictor.max_depth, - num_layers_before_predictor=(conv_box_predictor. - num_layers_before_predictor), - use_dropout=conv_box_predictor.use_dropout, - dropout_keep_prob=conv_box_predictor.dropout_keep_probability, - kernel_size=conv_box_predictor.kernel_size, - box_code_size=conv_box_predictor.box_code_size, - apply_sigmoid_to_scores=conv_box_predictor.apply_sigmoid_to_scores, - class_prediction_bias_init=(conv_box_predictor. - class_prediction_bias_init), - use_depthwise=conv_box_predictor.use_depthwise - ) - return box_predictor_object - - if box_predictor_oneof == 'weight_shared_convolutional_box_predictor': - conv_box_predictor = (box_predictor_config. - weight_shared_convolutional_box_predictor) - conv_hyperparams_fn = argscope_fn(conv_box_predictor.conv_hyperparams, - is_training) - box_predictor_object = box_predictor.WeightSharedConvolutionalBoxPredictor( - is_training=is_training, - num_classes=num_classes, - conv_hyperparams_fn=conv_hyperparams_fn, - depth=conv_box_predictor.depth, - num_layers_before_predictor=( - conv_box_predictor.num_layers_before_predictor), - kernel_size=conv_box_predictor.kernel_size, - box_code_size=conv_box_predictor.box_code_size, - class_prediction_bias_init=conv_box_predictor. - class_prediction_bias_init, - use_dropout=conv_box_predictor.use_dropout, - dropout_keep_prob=conv_box_predictor.dropout_keep_probability) - return box_predictor_object - - if box_predictor_oneof == 'mask_rcnn_box_predictor': - mask_rcnn_box_predictor = box_predictor_config.mask_rcnn_box_predictor - fc_hyperparams_fn = argscope_fn(mask_rcnn_box_predictor.fc_hyperparams, - is_training) - conv_hyperparams_fn = None - if mask_rcnn_box_predictor.HasField('conv_hyperparams'): - conv_hyperparams_fn = argscope_fn( - mask_rcnn_box_predictor.conv_hyperparams, is_training) - box_predictor_object = box_predictor.MaskRCNNBoxPredictor( - is_training=is_training, - num_classes=num_classes, - fc_hyperparams_fn=fc_hyperparams_fn, - use_dropout=mask_rcnn_box_predictor.use_dropout, - dropout_keep_prob=mask_rcnn_box_predictor.dropout_keep_probability, - box_code_size=mask_rcnn_box_predictor.box_code_size, - conv_hyperparams_fn=conv_hyperparams_fn, - predict_instance_masks=mask_rcnn_box_predictor.predict_instance_masks, - mask_height=mask_rcnn_box_predictor.mask_height, - mask_width=mask_rcnn_box_predictor.mask_width, - mask_prediction_num_conv_layers=( - mask_rcnn_box_predictor.mask_prediction_num_conv_layers), - mask_prediction_conv_depth=( - mask_rcnn_box_predictor.mask_prediction_conv_depth), - masks_are_class_agnostic=( - mask_rcnn_box_predictor.masks_are_class_agnostic), - predict_keypoints=mask_rcnn_box_predictor.predict_keypoints, - share_box_across_classes=( - mask_rcnn_box_predictor.share_box_across_classes)) - return box_predictor_object - - if box_predictor_oneof == 'rfcn_box_predictor': - rfcn_box_predictor = box_predictor_config.rfcn_box_predictor - conv_hyperparams_fn = argscope_fn(rfcn_box_predictor.conv_hyperparams, - is_training) - box_predictor_object = box_predictor.RfcnBoxPredictor( - is_training=is_training, - num_classes=num_classes, - conv_hyperparams_fn=conv_hyperparams_fn, - crop_size=[rfcn_box_predictor.crop_height, - rfcn_box_predictor.crop_width], - num_spatial_bins=[rfcn_box_predictor.num_spatial_bins_height, - rfcn_box_predictor.num_spatial_bins_width], - depth=rfcn_box_predictor.depth, - box_code_size=rfcn_box_predictor.box_code_size) - return box_predictor_object - raise ValueError('Unknown box predictor: {}'.format(box_predictor_oneof)) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/box_predictor_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/box_predictor_builder_test.py deleted file mode 100644 index 35ad57be9975bedb93a953d3189062ef0d8d5568..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/box_predictor_builder_test.py +++ /dev/null @@ -1,514 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for box_predictor_builder.""" -import mock -import tensorflow as tf - -from google.protobuf import text_format -from object_detection.builders import box_predictor_builder -from object_detection.builders import hyperparams_builder -from object_detection.protos import box_predictor_pb2 -from object_detection.protos import hyperparams_pb2 - - -class ConvolutionalBoxPredictorBuilderTest(tf.test.TestCase): - - def test_box_predictor_calls_conv_argscope_fn(self): - conv_hyperparams_text_proto = """ - regularizer { - l1_regularizer { - weight: 0.0003 - } - } - initializer { - truncated_normal_initializer { - mean: 0.0 - stddev: 0.3 - } - } - activation: RELU_6 - """ - hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto) - def mock_conv_argscope_builder(conv_hyperparams_arg, is_training): - return (conv_hyperparams_arg, is_training) - - box_predictor_proto = box_predictor_pb2.BoxPredictor() - box_predictor_proto.convolutional_box_predictor.conv_hyperparams.CopyFrom( - hyperparams_proto) - box_predictor = box_predictor_builder.build( - argscope_fn=mock_conv_argscope_builder, - box_predictor_config=box_predictor_proto, - is_training=False, - num_classes=10) - (conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams_fn - self.assertAlmostEqual((hyperparams_proto.regularizer. - l1_regularizer.weight), - (conv_hyperparams_actual.regularizer.l1_regularizer. - weight)) - self.assertAlmostEqual((hyperparams_proto.initializer. - truncated_normal_initializer.stddev), - (conv_hyperparams_actual.initializer. - truncated_normal_initializer.stddev)) - self.assertAlmostEqual((hyperparams_proto.initializer. - truncated_normal_initializer.mean), - (conv_hyperparams_actual.initializer. - truncated_normal_initializer.mean)) - self.assertEqual(hyperparams_proto.activation, - conv_hyperparams_actual.activation) - self.assertFalse(is_training) - - def test_construct_non_default_conv_box_predictor(self): - box_predictor_text_proto = """ - convolutional_box_predictor { - min_depth: 2 - max_depth: 16 - num_layers_before_predictor: 2 - use_dropout: false - dropout_keep_probability: 0.4 - kernel_size: 3 - box_code_size: 3 - apply_sigmoid_to_scores: true - class_prediction_bias_init: 4.0 - use_depthwise: true - } - """ - conv_hyperparams_text_proto = """ - regularizer { - l1_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - """ - hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto) - def mock_conv_argscope_builder(conv_hyperparams_arg, is_training): - return (conv_hyperparams_arg, is_training) - - box_predictor_proto = box_predictor_pb2.BoxPredictor() - text_format.Merge(box_predictor_text_proto, box_predictor_proto) - box_predictor_proto.convolutional_box_predictor.conv_hyperparams.CopyFrom( - hyperparams_proto) - box_predictor = box_predictor_builder.build( - argscope_fn=mock_conv_argscope_builder, - box_predictor_config=box_predictor_proto, - is_training=False, - num_classes=10) - self.assertEqual(box_predictor._min_depth, 2) - self.assertEqual(box_predictor._max_depth, 16) - self.assertEqual(box_predictor._num_layers_before_predictor, 2) - self.assertFalse(box_predictor._use_dropout) - self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.4) - self.assertTrue(box_predictor._apply_sigmoid_to_scores) - self.assertAlmostEqual(box_predictor._class_prediction_bias_init, 4.0) - self.assertEqual(box_predictor.num_classes, 10) - self.assertFalse(box_predictor._is_training) - self.assertTrue(box_predictor._use_depthwise) - - def test_construct_default_conv_box_predictor(self): - box_predictor_text_proto = """ - convolutional_box_predictor { - conv_hyperparams { - regularizer { - l1_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - }""" - box_predictor_proto = box_predictor_pb2.BoxPredictor() - text_format.Merge(box_predictor_text_proto, box_predictor_proto) - box_predictor = box_predictor_builder.build( - argscope_fn=hyperparams_builder.build, - box_predictor_config=box_predictor_proto, - is_training=True, - num_classes=90) - self.assertEqual(box_predictor._min_depth, 0) - self.assertEqual(box_predictor._max_depth, 0) - self.assertEqual(box_predictor._num_layers_before_predictor, 0) - self.assertTrue(box_predictor._use_dropout) - self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.8) - self.assertFalse(box_predictor._apply_sigmoid_to_scores) - self.assertEqual(box_predictor.num_classes, 90) - self.assertTrue(box_predictor._is_training) - self.assertFalse(box_predictor._use_depthwise) - - -class WeightSharedConvolutionalBoxPredictorBuilderTest(tf.test.TestCase): - - def test_box_predictor_calls_conv_argscope_fn(self): - conv_hyperparams_text_proto = """ - regularizer { - l1_regularizer { - weight: 0.0003 - } - } - initializer { - truncated_normal_initializer { - mean: 0.0 - stddev: 0.3 - } - } - activation: RELU_6 - """ - hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto) - def mock_conv_argscope_builder(conv_hyperparams_arg, is_training): - return (conv_hyperparams_arg, is_training) - - box_predictor_proto = box_predictor_pb2.BoxPredictor() - (box_predictor_proto.weight_shared_convolutional_box_predictor - .conv_hyperparams.CopyFrom(hyperparams_proto)) - box_predictor = box_predictor_builder.build( - argscope_fn=mock_conv_argscope_builder, - box_predictor_config=box_predictor_proto, - is_training=False, - num_classes=10) - (conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams_fn - self.assertAlmostEqual((hyperparams_proto.regularizer. - l1_regularizer.weight), - (conv_hyperparams_actual.regularizer.l1_regularizer. - weight)) - self.assertAlmostEqual((hyperparams_proto.initializer. - truncated_normal_initializer.stddev), - (conv_hyperparams_actual.initializer. - truncated_normal_initializer.stddev)) - self.assertAlmostEqual((hyperparams_proto.initializer. - truncated_normal_initializer.mean), - (conv_hyperparams_actual.initializer. - truncated_normal_initializer.mean)) - self.assertEqual(hyperparams_proto.activation, - conv_hyperparams_actual.activation) - self.assertFalse(is_training) - - def test_construct_non_default_conv_box_predictor(self): - box_predictor_text_proto = """ - weight_shared_convolutional_box_predictor { - depth: 2 - num_layers_before_predictor: 2 - kernel_size: 7 - box_code_size: 3 - class_prediction_bias_init: 4.0 - } - """ - conv_hyperparams_text_proto = """ - regularizer { - l1_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - """ - hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto) - def mock_conv_argscope_builder(conv_hyperparams_arg, is_training): - return (conv_hyperparams_arg, is_training) - - box_predictor_proto = box_predictor_pb2.BoxPredictor() - text_format.Merge(box_predictor_text_proto, box_predictor_proto) - (box_predictor_proto.weight_shared_convolutional_box_predictor. - conv_hyperparams.CopyFrom(hyperparams_proto)) - box_predictor = box_predictor_builder.build( - argscope_fn=mock_conv_argscope_builder, - box_predictor_config=box_predictor_proto, - is_training=False, - num_classes=10) - self.assertEqual(box_predictor._depth, 2) - self.assertEqual(box_predictor._num_layers_before_predictor, 2) - self.assertAlmostEqual(box_predictor._class_prediction_bias_init, 4.0) - self.assertEqual(box_predictor.num_classes, 10) - self.assertFalse(box_predictor._is_training) - - def test_construct_default_conv_box_predictor(self): - box_predictor_text_proto = """ - weight_shared_convolutional_box_predictor { - conv_hyperparams { - regularizer { - l1_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - }""" - box_predictor_proto = box_predictor_pb2.BoxPredictor() - text_format.Merge(box_predictor_text_proto, box_predictor_proto) - box_predictor = box_predictor_builder.build( - argscope_fn=hyperparams_builder.build, - box_predictor_config=box_predictor_proto, - is_training=True, - num_classes=90) - self.assertEqual(box_predictor._depth, 0) - self.assertEqual(box_predictor._num_layers_before_predictor, 0) - self.assertEqual(box_predictor.num_classes, 90) - self.assertTrue(box_predictor._is_training) - - -class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase): - - def test_box_predictor_builder_calls_fc_argscope_fn(self): - fc_hyperparams_text_proto = """ - regularizer { - l1_regularizer { - weight: 0.0003 - } - } - initializer { - truncated_normal_initializer { - mean: 0.0 - stddev: 0.3 - } - } - activation: RELU_6 - op: FC - """ - hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(fc_hyperparams_text_proto, hyperparams_proto) - box_predictor_proto = box_predictor_pb2.BoxPredictor() - box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.CopyFrom( - hyperparams_proto) - mock_argscope_fn = mock.Mock(return_value='arg_scope') - box_predictor = box_predictor_builder.build( - argscope_fn=mock_argscope_fn, - box_predictor_config=box_predictor_proto, - is_training=False, - num_classes=10) - mock_argscope_fn.assert_called_with(hyperparams_proto, False) - self.assertEqual(box_predictor._fc_hyperparams_fn, 'arg_scope') - - def test_non_default_mask_rcnn_box_predictor(self): - fc_hyperparams_text_proto = """ - regularizer { - l1_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - activation: RELU_6 - op: FC - """ - box_predictor_text_proto = """ - mask_rcnn_box_predictor { - use_dropout: true - dropout_keep_probability: 0.8 - box_code_size: 3 - share_box_across_classes: true - } - """ - hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(fc_hyperparams_text_proto, hyperparams_proto) - def mock_fc_argscope_builder(fc_hyperparams_arg, is_training): - return (fc_hyperparams_arg, is_training) - - box_predictor_proto = box_predictor_pb2.BoxPredictor() - text_format.Merge(box_predictor_text_proto, box_predictor_proto) - box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.CopyFrom( - hyperparams_proto) - box_predictor = box_predictor_builder.build( - argscope_fn=mock_fc_argscope_builder, - box_predictor_config=box_predictor_proto, - is_training=True, - num_classes=90) - self.assertTrue(box_predictor._use_dropout) - self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.8) - self.assertEqual(box_predictor.num_classes, 90) - self.assertTrue(box_predictor._is_training) - self.assertEqual(box_predictor._box_code_size, 3) - self.assertEqual(box_predictor._share_box_across_classes, True) - - def test_build_default_mask_rcnn_box_predictor(self): - box_predictor_proto = box_predictor_pb2.BoxPredictor() - box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.op = ( - hyperparams_pb2.Hyperparams.FC) - box_predictor = box_predictor_builder.build( - argscope_fn=mock.Mock(return_value='arg_scope'), - box_predictor_config=box_predictor_proto, - is_training=True, - num_classes=90) - self.assertFalse(box_predictor._use_dropout) - self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.5) - self.assertEqual(box_predictor.num_classes, 90) - self.assertTrue(box_predictor._is_training) - self.assertEqual(box_predictor._box_code_size, 4) - self.assertFalse(box_predictor._predict_instance_masks) - self.assertFalse(box_predictor._predict_keypoints) - - def test_build_box_predictor_with_mask_branch(self): - box_predictor_proto = box_predictor_pb2.BoxPredictor() - box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.op = ( - hyperparams_pb2.Hyperparams.FC) - box_predictor_proto.mask_rcnn_box_predictor.conv_hyperparams.op = ( - hyperparams_pb2.Hyperparams.CONV) - box_predictor_proto.mask_rcnn_box_predictor.predict_instance_masks = True - box_predictor_proto.mask_rcnn_box_predictor.mask_prediction_conv_depth = 512 - box_predictor_proto.mask_rcnn_box_predictor.mask_height = 16 - box_predictor_proto.mask_rcnn_box_predictor.mask_width = 16 - mock_argscope_fn = mock.Mock(return_value='arg_scope') - box_predictor = box_predictor_builder.build( - argscope_fn=mock_argscope_fn, - box_predictor_config=box_predictor_proto, - is_training=True, - num_classes=90) - mock_argscope_fn.assert_has_calls( - [mock.call(box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams, - True), - mock.call(box_predictor_proto.mask_rcnn_box_predictor.conv_hyperparams, - True)], any_order=True) - self.assertFalse(box_predictor._use_dropout) - self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.5) - self.assertEqual(box_predictor.num_classes, 90) - self.assertTrue(box_predictor._is_training) - self.assertEqual(box_predictor._box_code_size, 4) - self.assertTrue(box_predictor._predict_instance_masks) - self.assertEqual(box_predictor._mask_prediction_conv_depth, 512) - self.assertFalse(box_predictor._predict_keypoints) - - -class RfcnBoxPredictorBuilderTest(tf.test.TestCase): - - def test_box_predictor_calls_fc_argscope_fn(self): - conv_hyperparams_text_proto = """ - regularizer { - l1_regularizer { - weight: 0.0003 - } - } - initializer { - truncated_normal_initializer { - mean: 0.0 - stddev: 0.3 - } - } - activation: RELU_6 - """ - hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto) - def mock_conv_argscope_builder(conv_hyperparams_arg, is_training): - return (conv_hyperparams_arg, is_training) - - box_predictor_proto = box_predictor_pb2.BoxPredictor() - box_predictor_proto.rfcn_box_predictor.conv_hyperparams.CopyFrom( - hyperparams_proto) - box_predictor = box_predictor_builder.build( - argscope_fn=mock_conv_argscope_builder, - box_predictor_config=box_predictor_proto, - is_training=False, - num_classes=10) - (conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams_fn - self.assertAlmostEqual((hyperparams_proto.regularizer. - l1_regularizer.weight), - (conv_hyperparams_actual.regularizer.l1_regularizer. - weight)) - self.assertAlmostEqual((hyperparams_proto.initializer. - truncated_normal_initializer.stddev), - (conv_hyperparams_actual.initializer. - truncated_normal_initializer.stddev)) - self.assertAlmostEqual((hyperparams_proto.initializer. - truncated_normal_initializer.mean), - (conv_hyperparams_actual.initializer. - truncated_normal_initializer.mean)) - self.assertEqual(hyperparams_proto.activation, - conv_hyperparams_actual.activation) - self.assertFalse(is_training) - - def test_non_default_rfcn_box_predictor(self): - conv_hyperparams_text_proto = """ - regularizer { - l1_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - activation: RELU_6 - """ - box_predictor_text_proto = """ - rfcn_box_predictor { - num_spatial_bins_height: 4 - num_spatial_bins_width: 4 - depth: 4 - box_code_size: 3 - crop_height: 16 - crop_width: 16 - } - """ - hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto) - def mock_conv_argscope_builder(conv_hyperparams_arg, is_training): - return (conv_hyperparams_arg, is_training) - - box_predictor_proto = box_predictor_pb2.BoxPredictor() - text_format.Merge(box_predictor_text_proto, box_predictor_proto) - box_predictor_proto.rfcn_box_predictor.conv_hyperparams.CopyFrom( - hyperparams_proto) - box_predictor = box_predictor_builder.build( - argscope_fn=mock_conv_argscope_builder, - box_predictor_config=box_predictor_proto, - is_training=True, - num_classes=90) - self.assertEqual(box_predictor.num_classes, 90) - self.assertTrue(box_predictor._is_training) - self.assertEqual(box_predictor._box_code_size, 3) - self.assertEqual(box_predictor._num_spatial_bins, [4, 4]) - self.assertEqual(box_predictor._crop_size, [16, 16]) - - def test_default_rfcn_box_predictor(self): - conv_hyperparams_text_proto = """ - regularizer { - l1_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - activation: RELU_6 - """ - hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto) - def mock_conv_argscope_builder(conv_hyperparams_arg, is_training): - return (conv_hyperparams_arg, is_training) - - box_predictor_proto = box_predictor_pb2.BoxPredictor() - box_predictor_proto.rfcn_box_predictor.conv_hyperparams.CopyFrom( - hyperparams_proto) - box_predictor = box_predictor_builder.build( - argscope_fn=mock_conv_argscope_builder, - box_predictor_config=box_predictor_proto, - is_training=True, - num_classes=90) - self.assertEqual(box_predictor.num_classes, 90) - self.assertTrue(box_predictor._is_training) - self.assertEqual(box_predictor._box_code_size, 4) - self.assertEqual(box_predictor._num_spatial_bins, [3, 3]) - self.assertEqual(box_predictor._crop_size, [12, 12]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/dataset_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/dataset_builder.py deleted file mode 100644 index 3628a85ea3ec33373e0642244a6a96984677358b..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/dataset_builder.py +++ /dev/null @@ -1,196 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""tf.data.Dataset builder. - -Creates data sources for DetectionModels from an InputReader config. See -input_reader.proto for options. - -Note: If users wishes to also use their own InputReaders with the Object -Detection configuration framework, they should define their own builder function -that wraps the build function. -""" -import functools -import tensorflow as tf - -from object_detection.core import standard_fields as fields -from object_detection.data_decoders import tf_example_decoder -from object_detection.protos import input_reader_pb2 -from object_detection.utils import dataset_util - - -def _get_padding_shapes(dataset, max_num_boxes=None, num_classes=None, - spatial_image_shape=None): - """Returns shapes to pad dataset tensors to before batching. - - Args: - dataset: tf.data.Dataset object. - max_num_boxes: Max number of groundtruth boxes needed to computes shapes for - padding. - num_classes: Number of classes in the dataset needed to compute shapes for - padding. - spatial_image_shape: A list of two integers of the form [height, width] - containing expected spatial shape of the image. - - Returns: - A dictionary keyed by fields.InputDataFields containing padding shapes for - tensors in the dataset. - - Raises: - ValueError: If groundtruth classes is neither rank 1 nor rank 2. - """ - - if not spatial_image_shape or spatial_image_shape == [-1, -1]: - height, width = None, None - else: - height, width = spatial_image_shape # pylint: disable=unpacking-non-sequence - - num_additional_channels = 0 - if fields.InputDataFields.image_additional_channels in dataset.output_shapes: - num_additional_channels = dataset.output_shapes[ - fields.InputDataFields.image_additional_channels].dims[2].value - padding_shapes = { - # Additional channels are merged before batching. - fields.InputDataFields.image: [ - height, width, 3 + num_additional_channels - ], - fields.InputDataFields.image_additional_channels: [ - height, width, num_additional_channels - ], - fields.InputDataFields.source_id: [], - fields.InputDataFields.filename: [], - fields.InputDataFields.key: [], - fields.InputDataFields.groundtruth_difficult: [max_num_boxes], - fields.InputDataFields.groundtruth_boxes: [max_num_boxes, 4], - fields.InputDataFields.groundtruth_instance_masks: [ - max_num_boxes, height, width - ], - fields.InputDataFields.groundtruth_is_crowd: [max_num_boxes], - fields.InputDataFields.groundtruth_group_of: [max_num_boxes], - fields.InputDataFields.groundtruth_area: [max_num_boxes], - fields.InputDataFields.groundtruth_weights: [max_num_boxes], - fields.InputDataFields.num_groundtruth_boxes: [], - fields.InputDataFields.groundtruth_label_types: [max_num_boxes], - fields.InputDataFields.groundtruth_label_scores: [max_num_boxes], - fields.InputDataFields.true_image_shape: [3], - fields.InputDataFields.multiclass_scores: [ - max_num_boxes, num_classes + 1 if num_classes is not None else None - ], - } - # Determine whether groundtruth_classes are integers or one-hot encodings, and - # apply batching appropriately. - classes_shape = dataset.output_shapes[ - fields.InputDataFields.groundtruth_classes] - if len(classes_shape) == 1: # Class integers. - padding_shapes[fields.InputDataFields.groundtruth_classes] = [max_num_boxes] - elif len(classes_shape) == 2: # One-hot or k-hot encoding. - padding_shapes[fields.InputDataFields.groundtruth_classes] = [ - max_num_boxes, num_classes] - else: - raise ValueError('Groundtruth classes must be a rank 1 tensor (classes) or ' - 'rank 2 tensor (one-hot encodings)') - - if fields.InputDataFields.original_image in dataset.output_shapes: - padding_shapes[fields.InputDataFields.original_image] = [ - None, None, 3 + num_additional_channels - ] - if fields.InputDataFields.groundtruth_keypoints in dataset.output_shapes: - tensor_shape = dataset.output_shapes[fields.InputDataFields. - groundtruth_keypoints] - padding_shape = [max_num_boxes, tensor_shape[1].value, - tensor_shape[2].value] - padding_shapes[fields.InputDataFields.groundtruth_keypoints] = padding_shape - if (fields.InputDataFields.groundtruth_keypoint_visibilities - in dataset.output_shapes): - tensor_shape = dataset.output_shapes[fields.InputDataFields. - groundtruth_keypoint_visibilities] - padding_shape = [max_num_boxes, tensor_shape[1].value] - padding_shapes[fields.InputDataFields. - groundtruth_keypoint_visibilities] = padding_shape - return {tensor_key: padding_shapes[tensor_key] - for tensor_key, _ in dataset.output_shapes.items()} - - -def build(input_reader_config, - transform_input_data_fn=None, - batch_size=None, - max_num_boxes=None, - num_classes=None, - spatial_image_shape=None, - num_additional_channels=0): - """Builds a tf.data.Dataset. - - Builds a tf.data.Dataset by applying the `transform_input_data_fn` on all - records. Applies a padded batch to the resulting dataset. - - Args: - input_reader_config: A input_reader_pb2.InputReader object. - transform_input_data_fn: Function to apply to all records, or None if - no extra decoding is required. - batch_size: Batch size. If None, batching is not performed. - max_num_boxes: Max number of groundtruth boxes needed to compute shapes for - padding. If None, will use a dynamic shape. - num_classes: Number of classes in the dataset needed to compute shapes for - padding. If None, will use a dynamic shape. - spatial_image_shape: A list of two integers of the form [height, width] - containing expected spatial shape of the image after applying - transform_input_data_fn. If None, will use dynamic shapes. - num_additional_channels: Number of additional channels to use in the input. - - Returns: - A tf.data.Dataset based on the input_reader_config. - - Raises: - ValueError: On invalid input reader proto. - ValueError: If no input paths are specified. - """ - if not isinstance(input_reader_config, input_reader_pb2.InputReader): - raise ValueError('input_reader_config not of type ' - 'input_reader_pb2.InputReader.') - - if input_reader_config.WhichOneof('input_reader') == 'tf_record_input_reader': - config = input_reader_config.tf_record_input_reader - if not config.input_path: - raise ValueError('At least one input path must be specified in ' - '`input_reader_config`.') - - label_map_proto_file = None - if input_reader_config.HasField('label_map_path'): - label_map_proto_file = input_reader_config.label_map_path - decoder = tf_example_decoder.TfExampleDecoder( - load_instance_masks=input_reader_config.load_instance_masks, - instance_mask_type=input_reader_config.mask_type, - label_map_proto_file=label_map_proto_file, - use_display_name=input_reader_config.use_display_name, - num_additional_channels=num_additional_channels) - - def process_fn(value): - processed = decoder.decode(value) - if transform_input_data_fn is not None: - return transform_input_data_fn(processed) - return processed - - dataset = dataset_util.read_dataset( - functools.partial(tf.data.TFRecordDataset, buffer_size=8 * 1000 * 1000), - process_fn, config.input_path[:], input_reader_config) - - if batch_size: - padding_shapes = _get_padding_shapes(dataset, max_num_boxes, num_classes, - spatial_image_shape) - dataset = dataset.apply( - tf.contrib.data.padded_batch_and_drop_remainder(batch_size, - padding_shapes)) - return dataset - - raise ValueError('Unsupported input_reader_config.') diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/dataset_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/dataset_builder_test.py deleted file mode 100644 index 0f1360f5e18892ebf4155407e65b46e12e69a96a..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/dataset_builder_test.py +++ /dev/null @@ -1,260 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for dataset_builder.""" - -import os -import numpy as np -import tensorflow as tf - -from google.protobuf import text_format - -from tensorflow.core.example import example_pb2 -from tensorflow.core.example import feature_pb2 -from object_detection.builders import dataset_builder -from object_detection.core import standard_fields as fields -from object_detection.protos import input_reader_pb2 -from object_detection.utils import dataset_util - - -class DatasetBuilderTest(tf.test.TestCase): - - def create_tf_record(self, has_additional_channels=False): - path = os.path.join(self.get_temp_dir(), 'tfrecord') - writer = tf.python_io.TFRecordWriter(path) - - image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) - additional_channels_tensor = np.random.randint( - 255, size=(4, 5, 1)).astype(np.uint8) - flat_mask = (4 * 5) * [1.0] - with self.test_session(): - encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval() - encoded_additional_channels_jpeg = tf.image.encode_jpeg( - tf.constant(additional_channels_tensor)).eval() - features = { - 'image/encoded': - feature_pb2.Feature( - bytes_list=feature_pb2.BytesList(value=[encoded_jpeg])), - 'image/format': - feature_pb2.Feature( - bytes_list=feature_pb2.BytesList(value=['jpeg'.encode('utf-8')]) - ), - 'image/height': - feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=[4])), - 'image/width': - feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=[5])), - 'image/object/bbox/xmin': - feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[0.0])), - 'image/object/bbox/xmax': - feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[1.0])), - 'image/object/bbox/ymin': - feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[0.0])), - 'image/object/bbox/ymax': - feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[1.0])), - 'image/object/class/label': - feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=[2])), - 'image/object/mask': - feature_pb2.Feature( - float_list=feature_pb2.FloatList(value=flat_mask)), - } - if has_additional_channels: - features['image/additional_channels/encoded'] = feature_pb2.Feature( - bytes_list=feature_pb2.BytesList( - value=[encoded_additional_channels_jpeg] * 2)) - example = example_pb2.Example( - features=feature_pb2.Features(feature=features)) - writer.write(example.SerializeToString()) - writer.close() - - return path - - def test_build_tf_record_input_reader(self): - tf_record_path = self.create_tf_record() - - input_reader_text_proto = """ - shuffle: false - num_readers: 1 - tf_record_input_reader {{ - input_path: '{0}' - }} - """.format(tf_record_path) - input_reader_proto = input_reader_pb2.InputReader() - text_format.Merge(input_reader_text_proto, input_reader_proto) - tensor_dict = dataset_util.make_initializable_iterator( - dataset_builder.build(input_reader_proto, batch_size=1)).get_next() - - sv = tf.train.Supervisor(logdir=self.get_temp_dir()) - with sv.prepare_or_wait_for_session() as sess: - sv.start_queue_runners(sess) - output_dict = sess.run(tensor_dict) - - self.assertTrue( - fields.InputDataFields.groundtruth_instance_masks not in output_dict) - self.assertEquals((1, 4, 5, 3), - output_dict[fields.InputDataFields.image].shape) - self.assertAllEqual([[2]], - output_dict[fields.InputDataFields.groundtruth_classes]) - self.assertEquals( - (1, 1, 4), output_dict[fields.InputDataFields.groundtruth_boxes].shape) - self.assertAllEqual( - [0.0, 0.0, 1.0, 1.0], - output_dict[fields.InputDataFields.groundtruth_boxes][0][0]) - - def test_build_tf_record_input_reader_and_load_instance_masks(self): - tf_record_path = self.create_tf_record() - - input_reader_text_proto = """ - shuffle: false - num_readers: 1 - load_instance_masks: true - tf_record_input_reader {{ - input_path: '{0}' - }} - """.format(tf_record_path) - input_reader_proto = input_reader_pb2.InputReader() - text_format.Merge(input_reader_text_proto, input_reader_proto) - tensor_dict = dataset_util.make_initializable_iterator( - dataset_builder.build(input_reader_proto, batch_size=1)).get_next() - - sv = tf.train.Supervisor(logdir=self.get_temp_dir()) - with sv.prepare_or_wait_for_session() as sess: - sv.start_queue_runners(sess) - output_dict = sess.run(tensor_dict) - self.assertAllEqual( - (1, 1, 4, 5), - output_dict[fields.InputDataFields.groundtruth_instance_masks].shape) - - def test_build_tf_record_input_reader_with_batch_size_two(self): - tf_record_path = self.create_tf_record() - - input_reader_text_proto = """ - shuffle: false - num_readers: 1 - tf_record_input_reader {{ - input_path: '{0}' - }} - """.format(tf_record_path) - input_reader_proto = input_reader_pb2.InputReader() - text_format.Merge(input_reader_text_proto, input_reader_proto) - - def one_hot_class_encoding_fn(tensor_dict): - tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.one_hot( - tensor_dict[fields.InputDataFields.groundtruth_classes] - 1, depth=3) - return tensor_dict - - tensor_dict = dataset_util.make_initializable_iterator( - dataset_builder.build( - input_reader_proto, - transform_input_data_fn=one_hot_class_encoding_fn, - batch_size=2, - max_num_boxes=2, - num_classes=3, - spatial_image_shape=[4, 5])).get_next() - - sv = tf.train.Supervisor(logdir=self.get_temp_dir()) - with sv.prepare_or_wait_for_session() as sess: - sv.start_queue_runners(sess) - output_dict = sess.run(tensor_dict) - - self.assertAllEqual([2, 4, 5, 3], - output_dict[fields.InputDataFields.image].shape) - self.assertAllEqual([2, 2, 3], - output_dict[fields.InputDataFields.groundtruth_classes]. - shape) - self.assertAllEqual([2, 2, 4], - output_dict[fields.InputDataFields.groundtruth_boxes]. - shape) - self.assertAllEqual( - [[[0.0, 0.0, 1.0, 1.0], - [0.0, 0.0, 0.0, 0.0]], - [[0.0, 0.0, 1.0, 1.0], - [0.0, 0.0, 0.0, 0.0]]], - output_dict[fields.InputDataFields.groundtruth_boxes]) - - def test_build_tf_record_input_reader_with_batch_size_two_and_masks(self): - tf_record_path = self.create_tf_record() - - input_reader_text_proto = """ - shuffle: false - num_readers: 1 - load_instance_masks: true - tf_record_input_reader {{ - input_path: '{0}' - }} - """.format(tf_record_path) - input_reader_proto = input_reader_pb2.InputReader() - text_format.Merge(input_reader_text_proto, input_reader_proto) - - def one_hot_class_encoding_fn(tensor_dict): - tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.one_hot( - tensor_dict[fields.InputDataFields.groundtruth_classes] - 1, depth=3) - return tensor_dict - - tensor_dict = dataset_util.make_initializable_iterator( - dataset_builder.build( - input_reader_proto, - transform_input_data_fn=one_hot_class_encoding_fn, - batch_size=2, - max_num_boxes=2, - num_classes=3, - spatial_image_shape=[4, 5])).get_next() - - sv = tf.train.Supervisor(logdir=self.get_temp_dir()) - with sv.prepare_or_wait_for_session() as sess: - sv.start_queue_runners(sess) - output_dict = sess.run(tensor_dict) - - self.assertAllEqual( - [2, 2, 4, 5], - output_dict[fields.InputDataFields.groundtruth_instance_masks].shape) - - def test_build_tf_record_input_reader_with_additional_channels(self): - tf_record_path = self.create_tf_record(has_additional_channels=True) - - input_reader_text_proto = """ - shuffle: false - num_readers: 1 - tf_record_input_reader {{ - input_path: '{0}' - }} - """.format(tf_record_path) - input_reader_proto = input_reader_pb2.InputReader() - text_format.Merge(input_reader_text_proto, input_reader_proto) - tensor_dict = dataset_util.make_initializable_iterator( - dataset_builder.build( - input_reader_proto, batch_size=2, - num_additional_channels=2)).get_next() - - sv = tf.train.Supervisor(logdir=self.get_temp_dir()) - with sv.prepare_or_wait_for_session() as sess: - sv.start_queue_runners(sess) - output_dict = sess.run(tensor_dict) - - self.assertEquals((2, 4, 5, 5), - output_dict[fields.InputDataFields.image].shape) - - def test_raises_error_with_no_input_paths(self): - input_reader_text_proto = """ - shuffle: false - num_readers: 1 - load_instance_masks: true - """ - input_reader_proto = input_reader_pb2.InputReader() - text_format.Merge(input_reader_text_proto, input_reader_proto) - with self.assertRaises(ValueError): - dataset_builder.build(input_reader_proto) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/graph_rewriter_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/graph_rewriter_builder.py deleted file mode 100644 index 77e60479bd8f6e6267acabcec9a4995ed1622959..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/graph_rewriter_builder.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Functions for quantized training and evaluation.""" - -import tensorflow as tf - - -def build(graph_rewriter_config, is_training): - """Returns a function that modifies default graph based on options. - - Args: - graph_rewriter_config: graph_rewriter_pb2.GraphRewriter proto. - is_training: whether in training of eval mode. - """ - def graph_rewrite_fn(): - """Function to quantize weights and activation of the default graph.""" - if (graph_rewriter_config.quantization.weight_bits != 8 or - graph_rewriter_config.quantization.activation_bits != 8): - raise ValueError('Only 8bit quantization is supported') - - # Quantize the graph by inserting quantize ops for weights and activations - if is_training: - tf.contrib.quantize.create_training_graph( - input_graph=tf.get_default_graph(), - quant_delay=graph_rewriter_config.quantization.delay) - else: - tf.contrib.quantize.create_eval_graph(input_graph=tf.get_default_graph()) - - tf.contrib.layers.summarize_collection('quant_vars') - return graph_rewrite_fn diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/graph_rewriter_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/graph_rewriter_builder_test.py deleted file mode 100644 index 5f38d5a27df1e74674e74748687efdef191781f0..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/graph_rewriter_builder_test.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for graph_rewriter_builder.""" -import mock -import tensorflow as tf -from object_detection.builders import graph_rewriter_builder -from object_detection.protos import graph_rewriter_pb2 - - -class QuantizationBuilderTest(tf.test.TestCase): - - def testQuantizationBuilderSetsUpCorrectTrainArguments(self): - with mock.patch.object( - tf.contrib.quantize, 'create_training_graph') as mock_quant_fn: - with mock.patch.object(tf.contrib.layers, - 'summarize_collection') as mock_summarize_col: - graph_rewriter_proto = graph_rewriter_pb2.GraphRewriter() - graph_rewriter_proto.quantization.delay = 10 - graph_rewriter_proto.quantization.weight_bits = 8 - graph_rewriter_proto.quantization.activation_bits = 8 - graph_rewrite_fn = graph_rewriter_builder.build( - graph_rewriter_proto, is_training=True) - graph_rewrite_fn() - _, kwargs = mock_quant_fn.call_args - self.assertEqual(kwargs['input_graph'], tf.get_default_graph()) - self.assertEqual(kwargs['quant_delay'], 10) - mock_summarize_col.assert_called_with('quant_vars') - - def testQuantizationBuilderSetsUpCorrectEvalArguments(self): - with mock.patch.object(tf.contrib.quantize, - 'create_eval_graph') as mock_quant_fn: - with mock.patch.object(tf.contrib.layers, - 'summarize_collection') as mock_summarize_col: - graph_rewriter_proto = graph_rewriter_pb2.GraphRewriter() - graph_rewriter_proto.quantization.delay = 10 - graph_rewrite_fn = graph_rewriter_builder.build( - graph_rewriter_proto, is_training=False) - graph_rewrite_fn() - _, kwargs = mock_quant_fn.call_args - self.assertEqual(kwargs['input_graph'], tf.get_default_graph()) - mock_summarize_col.assert_called_with('quant_vars') - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/hyperparams_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/hyperparams_builder.py deleted file mode 100644 index 05addddaafa5785aa0995fb58181841511a250bc..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/hyperparams_builder.py +++ /dev/null @@ -1,182 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Builder function to construct tf-slim arg_scope for convolution, fc ops.""" -import tensorflow as tf - -from object_detection.protos import hyperparams_pb2 -from object_detection.utils import context_manager - -slim = tf.contrib.slim - - -def build(hyperparams_config, is_training): - """Builds tf-slim arg_scope for convolution ops based on the config. - - Returns an arg_scope to use for convolution ops containing weights - initializer, weights regularizer, activation function, batch norm function - and batch norm parameters based on the configuration. - - Note that if the batch_norm parameteres are not specified in the config - (i.e. left to default) then batch norm is excluded from the arg_scope. - - The batch norm parameters are set for updates based on `is_training` argument - and conv_hyperparams_config.batch_norm.train parameter. During training, they - are updated only if batch_norm.train parameter is true. However, during eval, - no updates are made to the batch norm variables. In both cases, their current - values are used during forward pass. - - Args: - hyperparams_config: hyperparams.proto object containing - hyperparameters. - is_training: Whether the network is in training mode. - - Returns: - arg_scope_fn: A function to construct tf-slim arg_scope containing - hyperparameters for ops. - - Raises: - ValueError: if hyperparams_config is not of type hyperparams.Hyperparams. - """ - if not isinstance(hyperparams_config, - hyperparams_pb2.Hyperparams): - raise ValueError('hyperparams_config not of type ' - 'hyperparams_pb.Hyperparams.') - - batch_norm = None - batch_norm_params = None - if hyperparams_config.HasField('batch_norm'): - batch_norm = slim.batch_norm - batch_norm_params = _build_batch_norm_params( - hyperparams_config.batch_norm, is_training) - - affected_ops = [slim.conv2d, slim.separable_conv2d, slim.conv2d_transpose] - if hyperparams_config.HasField('op') and ( - hyperparams_config.op == hyperparams_pb2.Hyperparams.FC): - affected_ops = [slim.fully_connected] - def scope_fn(): - with (slim.arg_scope([slim.batch_norm], **batch_norm_params) - if batch_norm_params is not None else - context_manager.IdentityContextManager()): - with slim.arg_scope( - affected_ops, - weights_regularizer=_build_regularizer( - hyperparams_config.regularizer), - weights_initializer=_build_initializer( - hyperparams_config.initializer), - activation_fn=_build_activation_fn(hyperparams_config.activation), - normalizer_fn=batch_norm) as sc: - return sc - - return scope_fn - - -def _build_activation_fn(activation_fn): - """Builds a callable activation from config. - - Args: - activation_fn: hyperparams_pb2.Hyperparams.activation - - Returns: - Callable activation function. - - Raises: - ValueError: On unknown activation function. - """ - if activation_fn == hyperparams_pb2.Hyperparams.NONE: - return None - if activation_fn == hyperparams_pb2.Hyperparams.RELU: - return tf.nn.relu - if activation_fn == hyperparams_pb2.Hyperparams.RELU_6: - return tf.nn.relu6 - raise ValueError('Unknown activation function: {}'.format(activation_fn)) - - -def _build_regularizer(regularizer): - """Builds a tf-slim regularizer from config. - - Args: - regularizer: hyperparams_pb2.Hyperparams.regularizer proto. - - Returns: - tf-slim regularizer. - - Raises: - ValueError: On unknown regularizer. - """ - regularizer_oneof = regularizer.WhichOneof('regularizer_oneof') - if regularizer_oneof == 'l1_regularizer': - return slim.l1_regularizer(scale=float(regularizer.l1_regularizer.weight)) - if regularizer_oneof == 'l2_regularizer': - return slim.l2_regularizer(scale=float(regularizer.l2_regularizer.weight)) - raise ValueError('Unknown regularizer function: {}'.format(regularizer_oneof)) - - -def _build_initializer(initializer): - """Build a tf initializer from config. - - Args: - initializer: hyperparams_pb2.Hyperparams.regularizer proto. - - Returns: - tf initializer. - - Raises: - ValueError: On unknown initializer. - """ - initializer_oneof = initializer.WhichOneof('initializer_oneof') - if initializer_oneof == 'truncated_normal_initializer': - return tf.truncated_normal_initializer( - mean=initializer.truncated_normal_initializer.mean, - stddev=initializer.truncated_normal_initializer.stddev) - if initializer_oneof == 'random_normal_initializer': - return tf.random_normal_initializer( - mean=initializer.random_normal_initializer.mean, - stddev=initializer.random_normal_initializer.stddev) - if initializer_oneof == 'variance_scaling_initializer': - enum_descriptor = (hyperparams_pb2.VarianceScalingInitializer. - DESCRIPTOR.enum_types_by_name['Mode']) - mode = enum_descriptor.values_by_number[initializer. - variance_scaling_initializer. - mode].name - return slim.variance_scaling_initializer( - factor=initializer.variance_scaling_initializer.factor, - mode=mode, - uniform=initializer.variance_scaling_initializer.uniform) - raise ValueError('Unknown initializer function: {}'.format( - initializer_oneof)) - - -def _build_batch_norm_params(batch_norm, is_training): - """Build a dictionary of batch_norm params from config. - - Args: - batch_norm: hyperparams_pb2.ConvHyperparams.batch_norm proto. - is_training: Whether the models is in training mode. - - Returns: - A dictionary containing batch_norm parameters. - """ - batch_norm_params = { - 'decay': batch_norm.decay, - 'center': batch_norm.center, - 'scale': batch_norm.scale, - 'epsilon': batch_norm.epsilon, - # Remove is_training parameter from here and deprecate it in the proto - # once we refactor Faster RCNN models to set is_training through an outer - # arg_scope in the meta architecture. - 'is_training': is_training and batch_norm.train, - } - return batch_norm_params diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/hyperparams_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/hyperparams_builder_test.py deleted file mode 100644 index 943532fbebca2870e5035fa39becd994f6d0b1ca..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/hyperparams_builder_test.py +++ /dev/null @@ -1,509 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests object_detection.core.hyperparams_builder.""" - -import numpy as np -import tensorflow as tf - -from google.protobuf import text_format - -from object_detection.builders import hyperparams_builder -from object_detection.protos import hyperparams_pb2 - -slim = tf.contrib.slim - - -def _get_scope_key(op): - return getattr(op, '_key_op', str(op)) - - -class HyperparamsBuilderTest(tf.test.TestCase): - - def test_default_arg_scope_has_conv2d_op(self): - conv_hyperparams_text_proto = """ - regularizer { - l1_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope_fn = hyperparams_builder.build(conv_hyperparams_proto, - is_training=True) - scope = scope_fn() - self.assertTrue(_get_scope_key(slim.conv2d) in scope) - - def test_default_arg_scope_has_separable_conv2d_op(self): - conv_hyperparams_text_proto = """ - regularizer { - l1_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope_fn = hyperparams_builder.build(conv_hyperparams_proto, - is_training=True) - scope = scope_fn() - self.assertTrue(_get_scope_key(slim.separable_conv2d) in scope) - - def test_default_arg_scope_has_conv2d_transpose_op(self): - conv_hyperparams_text_proto = """ - regularizer { - l1_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope_fn = hyperparams_builder.build(conv_hyperparams_proto, - is_training=True) - scope = scope_fn() - self.assertTrue(_get_scope_key(slim.conv2d_transpose) in scope) - - def test_explicit_fc_op_arg_scope_has_fully_connected_op(self): - conv_hyperparams_text_proto = """ - op: FC - regularizer { - l1_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope_fn = hyperparams_builder.build(conv_hyperparams_proto, - is_training=True) - scope = scope_fn() - self.assertTrue(_get_scope_key(slim.fully_connected) in scope) - - def test_separable_conv2d_and_conv2d_and_transpose_have_same_parameters(self): - conv_hyperparams_text_proto = """ - regularizer { - l1_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope_fn = hyperparams_builder.build(conv_hyperparams_proto, - is_training=True) - scope = scope_fn() - kwargs_1, kwargs_2, kwargs_3 = scope.values() - self.assertDictEqual(kwargs_1, kwargs_2) - self.assertDictEqual(kwargs_1, kwargs_3) - - def test_return_l1_regularized_weights(self): - conv_hyperparams_text_proto = """ - regularizer { - l1_regularizer { - weight: 0.5 - } - } - initializer { - truncated_normal_initializer { - } - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope_fn = hyperparams_builder.build(conv_hyperparams_proto, - is_training=True) - scope = scope_fn() - conv_scope_arguments = scope.values()[0] - regularizer = conv_scope_arguments['weights_regularizer'] - weights = np.array([1., -1, 4., 2.]) - with self.test_session() as sess: - result = sess.run(regularizer(tf.constant(weights))) - self.assertAllClose(np.abs(weights).sum() * 0.5, result) - - def test_return_l2_regularizer_weights(self): - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - weight: 0.42 - } - } - initializer { - truncated_normal_initializer { - } - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope_fn = hyperparams_builder.build(conv_hyperparams_proto, - is_training=True) - scope = scope_fn() - conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] - - regularizer = conv_scope_arguments['weights_regularizer'] - weights = np.array([1., -1, 4., 2.]) - with self.test_session() as sess: - result = sess.run(regularizer(tf.constant(weights))) - self.assertAllClose(np.power(weights, 2).sum() / 2.0 * 0.42, result) - - def test_return_non_default_batch_norm_params_with_train_during_train(self): - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - batch_norm { - decay: 0.7 - center: false - scale: true - epsilon: 0.03 - train: true - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope_fn = hyperparams_builder.build(conv_hyperparams_proto, - is_training=True) - scope = scope_fn() - conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] - self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm) - batch_norm_params = scope[_get_scope_key(slim.batch_norm)] - self.assertAlmostEqual(batch_norm_params['decay'], 0.7) - self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03) - self.assertFalse(batch_norm_params['center']) - self.assertTrue(batch_norm_params['scale']) - self.assertTrue(batch_norm_params['is_training']) - - def test_return_batch_norm_params_with_notrain_during_eval(self): - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - batch_norm { - decay: 0.7 - center: false - scale: true - epsilon: 0.03 - train: true - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope_fn = hyperparams_builder.build(conv_hyperparams_proto, - is_training=False) - scope = scope_fn() - conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] - self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm) - batch_norm_params = scope[_get_scope_key(slim.batch_norm)] - self.assertAlmostEqual(batch_norm_params['decay'], 0.7) - self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03) - self.assertFalse(batch_norm_params['center']) - self.assertTrue(batch_norm_params['scale']) - self.assertFalse(batch_norm_params['is_training']) - - def test_return_batch_norm_params_with_notrain_when_train_is_false(self): - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - batch_norm { - decay: 0.7 - center: false - scale: true - epsilon: 0.03 - train: false - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope_fn = hyperparams_builder.build(conv_hyperparams_proto, - is_training=True) - scope = scope_fn() - conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] - self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm) - batch_norm_params = scope[_get_scope_key(slim.batch_norm)] - self.assertAlmostEqual(batch_norm_params['decay'], 0.7) - self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03) - self.assertFalse(batch_norm_params['center']) - self.assertTrue(batch_norm_params['scale']) - self.assertFalse(batch_norm_params['is_training']) - - def test_do_not_use_batch_norm_if_default(self): - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope_fn = hyperparams_builder.build(conv_hyperparams_proto, - is_training=True) - scope = scope_fn() - conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] - self.assertEqual(conv_scope_arguments['normalizer_fn'], None) - - def test_use_none_activation(self): - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - activation: NONE - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope_fn = hyperparams_builder.build(conv_hyperparams_proto, - is_training=True) - scope = scope_fn() - conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] - self.assertEqual(conv_scope_arguments['activation_fn'], None) - - def test_use_relu_activation(self): - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - activation: RELU - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope_fn = hyperparams_builder.build(conv_hyperparams_proto, - is_training=True) - scope = scope_fn() - conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] - self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu) - - def test_use_relu_6_activation(self): - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - activation: RELU_6 - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope_fn = hyperparams_builder.build(conv_hyperparams_proto, - is_training=True) - scope = scope_fn() - conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] - self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu6) - - def _assert_variance_in_range(self, initializer, shape, variance, - tol=1e-2): - with tf.Graph().as_default() as g: - with self.test_session(graph=g) as sess: - var = tf.get_variable( - name='test', - shape=shape, - dtype=tf.float32, - initializer=initializer) - sess.run(tf.global_variables_initializer()) - values = sess.run(var) - self.assertAllClose(np.var(values), variance, tol, tol) - - def test_variance_in_range_with_variance_scaling_initializer_fan_in(self): - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - } - } - initializer { - variance_scaling_initializer { - factor: 2.0 - mode: FAN_IN - uniform: false - } - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope_fn = hyperparams_builder.build(conv_hyperparams_proto, - is_training=True) - scope = scope_fn() - conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] - initializer = conv_scope_arguments['weights_initializer'] - self._assert_variance_in_range(initializer, shape=[100, 40], - variance=2. / 100.) - - def test_variance_in_range_with_variance_scaling_initializer_fan_out(self): - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - } - } - initializer { - variance_scaling_initializer { - factor: 2.0 - mode: FAN_OUT - uniform: false - } - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope_fn = hyperparams_builder.build(conv_hyperparams_proto, - is_training=True) - scope = scope_fn() - conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] - initializer = conv_scope_arguments['weights_initializer'] - self._assert_variance_in_range(initializer, shape=[100, 40], - variance=2. / 40.) - - def test_variance_in_range_with_variance_scaling_initializer_fan_avg(self): - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - } - } - initializer { - variance_scaling_initializer { - factor: 2.0 - mode: FAN_AVG - uniform: false - } - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope_fn = hyperparams_builder.build(conv_hyperparams_proto, - is_training=True) - scope = scope_fn() - conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] - initializer = conv_scope_arguments['weights_initializer'] - self._assert_variance_in_range(initializer, shape=[100, 40], - variance=4. / (100. + 40.)) - - def test_variance_in_range_with_variance_scaling_initializer_uniform(self): - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - } - } - initializer { - variance_scaling_initializer { - factor: 2.0 - mode: FAN_IN - uniform: true - } - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope_fn = hyperparams_builder.build(conv_hyperparams_proto, - is_training=True) - scope = scope_fn() - conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] - initializer = conv_scope_arguments['weights_initializer'] - self._assert_variance_in_range(initializer, shape=[100, 40], - variance=2. / 100.) - - def test_variance_in_range_with_truncated_normal_initializer(self): - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - mean: 0.0 - stddev: 0.8 - } - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope_fn = hyperparams_builder.build(conv_hyperparams_proto, - is_training=True) - scope = scope_fn() - conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] - initializer = conv_scope_arguments['weights_initializer'] - self._assert_variance_in_range(initializer, shape=[100, 40], - variance=0.49, tol=1e-1) - - def test_variance_in_range_with_random_normal_initializer(self): - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - } - } - initializer { - random_normal_initializer { - mean: 0.0 - stddev: 0.8 - } - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope_fn = hyperparams_builder.build(conv_hyperparams_proto, - is_training=True) - scope = scope_fn() - conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] - initializer = conv_scope_arguments['weights_initializer'] - self._assert_variance_in_range(initializer, shape=[100, 40], - variance=0.64, tol=1e-1) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/image_resizer_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/image_resizer_builder.py deleted file mode 100644 index 3b3014f727e13d2bf671ea12f3ff30972cc67684..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/image_resizer_builder.py +++ /dev/null @@ -1,115 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Builder function for image resizing operations.""" -import functools -import tensorflow as tf - -from object_detection.core import preprocessor -from object_detection.protos import image_resizer_pb2 - - -def _tf_resize_method(resize_method): - """Maps image resize method from enumeration type to TensorFlow. - - Args: - resize_method: The resize_method attribute of keep_aspect_ratio_resizer or - fixed_shape_resizer. - - Returns: - method: The corresponding TensorFlow ResizeMethod. - - Raises: - ValueError: if `resize_method` is of unknown type. - """ - dict_method = { - image_resizer_pb2.BILINEAR: - tf.image.ResizeMethod.BILINEAR, - image_resizer_pb2.NEAREST_NEIGHBOR: - tf.image.ResizeMethod.NEAREST_NEIGHBOR, - image_resizer_pb2.BICUBIC: - tf.image.ResizeMethod.BICUBIC, - image_resizer_pb2.AREA: - tf.image.ResizeMethod.AREA - } - if resize_method in dict_method: - return dict_method[resize_method] - else: - raise ValueError('Unknown resize_method') - - -def build(image_resizer_config): - """Builds callable for image resizing operations. - - Args: - image_resizer_config: image_resizer.proto object containing parameters for - an image resizing operation. - - Returns: - image_resizer_fn: Callable for image resizing. This callable always takes - a rank-3 image tensor (corresponding to a single image) and returns a - rank-3 image tensor, possibly with new spatial dimensions. - - Raises: - ValueError: if `image_resizer_config` is of incorrect type. - ValueError: if `image_resizer_config.image_resizer_oneof` is of expected - type. - ValueError: if min_dimension > max_dimension when keep_aspect_ratio_resizer - is used. - """ - if not isinstance(image_resizer_config, image_resizer_pb2.ImageResizer): - raise ValueError('image_resizer_config not of type ' - 'image_resizer_pb2.ImageResizer.') - - image_resizer_oneof = image_resizer_config.WhichOneof('image_resizer_oneof') - if image_resizer_oneof == 'keep_aspect_ratio_resizer': - keep_aspect_ratio_config = image_resizer_config.keep_aspect_ratio_resizer - if not (keep_aspect_ratio_config.min_dimension <= - keep_aspect_ratio_config.max_dimension): - raise ValueError('min_dimension > max_dimension') - method = _tf_resize_method(keep_aspect_ratio_config.resize_method) - per_channel_pad_value = (0, 0, 0) - if keep_aspect_ratio_config.per_channel_pad_value: - per_channel_pad_value = tuple(keep_aspect_ratio_config. - per_channel_pad_value) - image_resizer_fn = functools.partial( - preprocessor.resize_to_range, - min_dimension=keep_aspect_ratio_config.min_dimension, - max_dimension=keep_aspect_ratio_config.max_dimension, - method=method, - pad_to_max_dimension=keep_aspect_ratio_config.pad_to_max_dimension, - per_channel_pad_value=per_channel_pad_value) - if not keep_aspect_ratio_config.convert_to_grayscale: - return image_resizer_fn - elif image_resizer_oneof == 'fixed_shape_resizer': - fixed_shape_resizer_config = image_resizer_config.fixed_shape_resizer - method = _tf_resize_method(fixed_shape_resizer_config.resize_method) - image_resizer_fn = functools.partial( - preprocessor.resize_image, - new_height=fixed_shape_resizer_config.height, - new_width=fixed_shape_resizer_config.width, - method=method) - if not fixed_shape_resizer_config.convert_to_grayscale: - return image_resizer_fn - else: - raise ValueError( - 'Invalid image resizer option: \'%s\'.' % image_resizer_oneof) - - def grayscale_image_resizer(image): - [resized_image, resized_image_shape] = image_resizer_fn(image) - grayscale_image = preprocessor.rgb_to_gray(resized_image) - grayscale_image_shape = tf.concat([resized_image_shape[:-1], [1]], 0) - return [grayscale_image, grayscale_image_shape] - - return functools.partial(grayscale_image_resizer) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/image_resizer_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/image_resizer_builder_test.py deleted file mode 100644 index 38f620e04050888c5f3b1c73cdab8942a99b9d57..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/image_resizer_builder_test.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for object_detection.builders.image_resizer_builder.""" -import numpy as np -import tensorflow as tf -from google.protobuf import text_format -from object_detection.builders import image_resizer_builder -from object_detection.protos import image_resizer_pb2 - - -class ImageResizerBuilderTest(tf.test.TestCase): - - def _shape_of_resized_random_image_given_text_proto(self, input_shape, - text_proto): - image_resizer_config = image_resizer_pb2.ImageResizer() - text_format.Merge(text_proto, image_resizer_config) - image_resizer_fn = image_resizer_builder.build(image_resizer_config) - images = tf.to_float( - tf.random_uniform(input_shape, minval=0, maxval=255, dtype=tf.int32)) - resized_images, _ = image_resizer_fn(images) - with self.test_session() as sess: - return sess.run(resized_images).shape - - def test_build_keep_aspect_ratio_resizer_returns_expected_shape(self): - image_resizer_text_proto = """ - keep_aspect_ratio_resizer { - min_dimension: 10 - max_dimension: 20 - } - """ - input_shape = (50, 25, 3) - expected_output_shape = (20, 10, 3) - output_shape = self._shape_of_resized_random_image_given_text_proto( - input_shape, image_resizer_text_proto) - self.assertEqual(output_shape, expected_output_shape) - - def test_build_keep_aspect_ratio_resizer_with_padding(self): - image_resizer_text_proto = """ - keep_aspect_ratio_resizer { - min_dimension: 10 - max_dimension: 20 - pad_to_max_dimension: true - per_channel_pad_value: 3 - per_channel_pad_value: 4 - per_channel_pad_value: 5 - } - """ - input_shape = (50, 25, 3) - expected_output_shape = (20, 20, 3) - output_shape = self._shape_of_resized_random_image_given_text_proto( - input_shape, image_resizer_text_proto) - self.assertEqual(output_shape, expected_output_shape) - - def test_built_fixed_shape_resizer_returns_expected_shape(self): - image_resizer_text_proto = """ - fixed_shape_resizer { - height: 10 - width: 20 - } - """ - input_shape = (50, 25, 3) - expected_output_shape = (10, 20, 3) - output_shape = self._shape_of_resized_random_image_given_text_proto( - input_shape, image_resizer_text_proto) - self.assertEqual(output_shape, expected_output_shape) - - def test_raises_error_on_invalid_input(self): - invalid_input = 'invalid_input' - with self.assertRaises(ValueError): - image_resizer_builder.build(invalid_input) - - def _resized_image_given_text_proto(self, image, text_proto): - image_resizer_config = image_resizer_pb2.ImageResizer() - text_format.Merge(text_proto, image_resizer_config) - image_resizer_fn = image_resizer_builder.build(image_resizer_config) - image_placeholder = tf.placeholder(tf.uint8, [1, None, None, 3]) - resized_image, _ = image_resizer_fn(image_placeholder) - with self.test_session() as sess: - return sess.run(resized_image, feed_dict={image_placeholder: image}) - - def test_fixed_shape_resizer_nearest_neighbor_method(self): - image_resizer_text_proto = """ - fixed_shape_resizer { - height: 1 - width: 1 - resize_method: NEAREST_NEIGHBOR - } - """ - image = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) - image = np.expand_dims(image, axis=2) - image = np.tile(image, (1, 1, 3)) - image = np.expand_dims(image, axis=0) - resized_image = self._resized_image_given_text_proto( - image, image_resizer_text_proto) - vals = np.unique(resized_image).tolist() - self.assertEqual(len(vals), 1) - self.assertEqual(vals[0], 1) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/input_reader_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/input_reader_builder.py deleted file mode 100644 index 8cb5e2f05448f1817a7644f1a553eac1ee98ba17..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/input_reader_builder.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Input reader builder. - -Creates data sources for DetectionModels from an InputReader config. See -input_reader.proto for options. - -Note: If users wishes to also use their own InputReaders with the Object -Detection configuration framework, they should define their own builder function -that wraps the build function. -""" - -import tensorflow as tf - -from object_detection.data_decoders import tf_example_decoder -from object_detection.protos import input_reader_pb2 - -parallel_reader = tf.contrib.slim.parallel_reader - - -def build(input_reader_config): - """Builds a tensor dictionary based on the InputReader config. - - Args: - input_reader_config: A input_reader_pb2.InputReader object. - - Returns: - A tensor dict based on the input_reader_config. - - Raises: - ValueError: On invalid input reader proto. - ValueError: If no input paths are specified. - """ - if not isinstance(input_reader_config, input_reader_pb2.InputReader): - raise ValueError('input_reader_config not of type ' - 'input_reader_pb2.InputReader.') - - if input_reader_config.WhichOneof('input_reader') == 'tf_record_input_reader': - config = input_reader_config.tf_record_input_reader - if not config.input_path: - raise ValueError('At least one input path must be specified in ' - '`input_reader_config`.') - _, string_tensor = parallel_reader.parallel_read( - config.input_path[:], # Convert `RepeatedScalarContainer` to list. - reader_class=tf.TFRecordReader, - num_epochs=(input_reader_config.num_epochs - if input_reader_config.num_epochs else None), - num_readers=input_reader_config.num_readers, - shuffle=input_reader_config.shuffle, - dtypes=[tf.string, tf.string], - capacity=input_reader_config.queue_capacity, - min_after_dequeue=input_reader_config.min_after_dequeue) - - label_map_proto_file = None - if input_reader_config.HasField('label_map_path'): - label_map_proto_file = input_reader_config.label_map_path - decoder = tf_example_decoder.TfExampleDecoder( - load_instance_masks=input_reader_config.load_instance_masks, - instance_mask_type=input_reader_config.mask_type, - label_map_proto_file=label_map_proto_file) - return decoder.decode(string_tensor) - - raise ValueError('Unsupported input_reader_config.') diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/input_reader_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/input_reader_builder_test.py deleted file mode 100644 index f09f60e5777b133e5fa50840d63728f2de55c147..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/input_reader_builder_test.py +++ /dev/null @@ -1,144 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for input_reader_builder.""" - -import os -import numpy as np -import tensorflow as tf - -from google.protobuf import text_format - -from tensorflow.core.example import example_pb2 -from tensorflow.core.example import feature_pb2 -from object_detection.builders import input_reader_builder -from object_detection.core import standard_fields as fields -from object_detection.protos import input_reader_pb2 - - -class InputReaderBuilderTest(tf.test.TestCase): - - def create_tf_record(self): - path = os.path.join(self.get_temp_dir(), 'tfrecord') - writer = tf.python_io.TFRecordWriter(path) - - image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) - flat_mask = (4 * 5) * [1.0] - with self.test_session(): - encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval() - example = example_pb2.Example(features=feature_pb2.Features(feature={ - 'image/encoded': feature_pb2.Feature( - bytes_list=feature_pb2.BytesList(value=[encoded_jpeg])), - 'image/format': feature_pb2.Feature( - bytes_list=feature_pb2.BytesList(value=['jpeg'.encode('utf-8')])), - 'image/height': feature_pb2.Feature( - int64_list=feature_pb2.Int64List(value=[4])), - 'image/width': feature_pb2.Feature( - int64_list=feature_pb2.Int64List(value=[5])), - 'image/object/bbox/xmin': feature_pb2.Feature( - float_list=feature_pb2.FloatList(value=[0.0])), - 'image/object/bbox/xmax': feature_pb2.Feature( - float_list=feature_pb2.FloatList(value=[1.0])), - 'image/object/bbox/ymin': feature_pb2.Feature( - float_list=feature_pb2.FloatList(value=[0.0])), - 'image/object/bbox/ymax': feature_pb2.Feature( - float_list=feature_pb2.FloatList(value=[1.0])), - 'image/object/class/label': feature_pb2.Feature( - int64_list=feature_pb2.Int64List(value=[2])), - 'image/object/mask': feature_pb2.Feature( - float_list=feature_pb2.FloatList(value=flat_mask)), - })) - writer.write(example.SerializeToString()) - writer.close() - - return path - - def test_build_tf_record_input_reader(self): - tf_record_path = self.create_tf_record() - - input_reader_text_proto = """ - shuffle: false - num_readers: 1 - tf_record_input_reader {{ - input_path: '{0}' - }} - """.format(tf_record_path) - input_reader_proto = input_reader_pb2.InputReader() - text_format.Merge(input_reader_text_proto, input_reader_proto) - tensor_dict = input_reader_builder.build(input_reader_proto) - - sv = tf.train.Supervisor(logdir=self.get_temp_dir()) - with sv.prepare_or_wait_for_session() as sess: - sv.start_queue_runners(sess) - output_dict = sess.run(tensor_dict) - - self.assertTrue(fields.InputDataFields.groundtruth_instance_masks - not in output_dict) - self.assertEquals( - (4, 5, 3), output_dict[fields.InputDataFields.image].shape) - self.assertEquals( - [2], output_dict[fields.InputDataFields.groundtruth_classes]) - self.assertEquals( - (1, 4), output_dict[fields.InputDataFields.groundtruth_boxes].shape) - self.assertAllEqual( - [0.0, 0.0, 1.0, 1.0], - output_dict[fields.InputDataFields.groundtruth_boxes][0]) - - def test_build_tf_record_input_reader_and_load_instance_masks(self): - tf_record_path = self.create_tf_record() - - input_reader_text_proto = """ - shuffle: false - num_readers: 1 - load_instance_masks: true - tf_record_input_reader {{ - input_path: '{0}' - }} - """.format(tf_record_path) - input_reader_proto = input_reader_pb2.InputReader() - text_format.Merge(input_reader_text_proto, input_reader_proto) - tensor_dict = input_reader_builder.build(input_reader_proto) - - sv = tf.train.Supervisor(logdir=self.get_temp_dir()) - with sv.prepare_or_wait_for_session() as sess: - sv.start_queue_runners(sess) - output_dict = sess.run(tensor_dict) - - self.assertEquals( - (4, 5, 3), output_dict[fields.InputDataFields.image].shape) - self.assertEquals( - [2], output_dict[fields.InputDataFields.groundtruth_classes]) - self.assertEquals( - (1, 4), output_dict[fields.InputDataFields.groundtruth_boxes].shape) - self.assertAllEqual( - [0.0, 0.0, 1.0, 1.0], - output_dict[fields.InputDataFields.groundtruth_boxes][0]) - self.assertAllEqual( - (1, 4, 5), - output_dict[fields.InputDataFields.groundtruth_instance_masks].shape) - - def test_raises_error_with_no_input_paths(self): - input_reader_text_proto = """ - shuffle: false - num_readers: 1 - load_instance_masks: true - """ - input_reader_proto = input_reader_pb2.InputReader() - text_format.Merge(input_reader_text_proto, input_reader_proto) - with self.assertRaises(ValueError): - input_reader_builder.build(input_reader_proto) - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/losses_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/losses_builder.py deleted file mode 100644 index e4f7a12400fc3ce8c90407943c4530da1cef9594..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/losses_builder.py +++ /dev/null @@ -1,222 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""A function to build localization and classification losses from config.""" - -from object_detection.core import balanced_positive_negative_sampler as sampler -from object_detection.core import losses -from object_detection.protos import losses_pb2 - - -def build(loss_config): - """Build losses based on the config. - - Builds classification, localization losses and optionally a hard example miner - based on the config. - - Args: - loss_config: A losses_pb2.Loss object. - - Returns: - classification_loss: Classification loss object. - localization_loss: Localization loss object. - classification_weight: Classification loss weight. - localization_weight: Localization loss weight. - hard_example_miner: Hard example miner object. - random_example_sampler: BalancedPositiveNegativeSampler object. - - Raises: - ValueError: If hard_example_miner is used with sigmoid_focal_loss. - ValueError: If random_example_sampler is getting non-positive value as - desired positive example fraction. - """ - classification_loss = _build_classification_loss( - loss_config.classification_loss) - localization_loss = _build_localization_loss( - loss_config.localization_loss) - classification_weight = loss_config.classification_weight - localization_weight = loss_config.localization_weight - hard_example_miner = None - if loss_config.HasField('hard_example_miner'): - if (loss_config.classification_loss.WhichOneof('classification_loss') == - 'weighted_sigmoid_focal'): - raise ValueError('HardExampleMiner should not be used with sigmoid focal ' - 'loss') - hard_example_miner = build_hard_example_miner( - loss_config.hard_example_miner, - classification_weight, - localization_weight) - random_example_sampler = None - if loss_config.HasField('random_example_sampler'): - if loss_config.random_example_sampler.positive_sample_fraction <= 0: - raise ValueError('RandomExampleSampler should not use non-positive' - 'value as positive sample fraction.') - random_example_sampler = sampler.BalancedPositiveNegativeSampler( - positive_fraction=loss_config.random_example_sampler. - positive_sample_fraction) - return (classification_loss, localization_loss, classification_weight, - localization_weight, hard_example_miner, random_example_sampler) - - -def build_hard_example_miner(config, - classification_weight, - localization_weight): - """Builds hard example miner based on the config. - - Args: - config: A losses_pb2.HardExampleMiner object. - classification_weight: Classification loss weight. - localization_weight: Localization loss weight. - - Returns: - Hard example miner. - - """ - loss_type = None - if config.loss_type == losses_pb2.HardExampleMiner.BOTH: - loss_type = 'both' - if config.loss_type == losses_pb2.HardExampleMiner.CLASSIFICATION: - loss_type = 'cls' - if config.loss_type == losses_pb2.HardExampleMiner.LOCALIZATION: - loss_type = 'loc' - - max_negatives_per_positive = None - num_hard_examples = None - if config.max_negatives_per_positive > 0: - max_negatives_per_positive = config.max_negatives_per_positive - if config.num_hard_examples > 0: - num_hard_examples = config.num_hard_examples - hard_example_miner = losses.HardExampleMiner( - num_hard_examples=num_hard_examples, - iou_threshold=config.iou_threshold, - loss_type=loss_type, - cls_loss_weight=classification_weight, - loc_loss_weight=localization_weight, - max_negatives_per_positive=max_negatives_per_positive, - min_negatives_per_image=config.min_negatives_per_image) - return hard_example_miner - - -def build_faster_rcnn_classification_loss(loss_config): - """Builds a classification loss for Faster RCNN based on the loss config. - - Args: - loss_config: A losses_pb2.ClassificationLoss object. - - Returns: - Loss based on the config. - - Raises: - ValueError: On invalid loss_config. - """ - if not isinstance(loss_config, losses_pb2.ClassificationLoss): - raise ValueError('loss_config not of type losses_pb2.ClassificationLoss.') - - loss_type = loss_config.WhichOneof('classification_loss') - - if loss_type == 'weighted_sigmoid': - return losses.WeightedSigmoidClassificationLoss() - if loss_type == 'weighted_softmax': - config = loss_config.weighted_softmax - return losses.WeightedSoftmaxClassificationLoss( - logit_scale=config.logit_scale) - if loss_type == 'weighted_logits_softmax': - config = loss_config.weighted_logits_softmax - return losses.WeightedSoftmaxClassificationAgainstLogitsLoss( - logit_scale=config.logit_scale) - - # By default, Faster RCNN second stage classifier uses Softmax loss - # with anchor-wise outputs. - config = loss_config.weighted_softmax - return losses.WeightedSoftmaxClassificationLoss( - logit_scale=config.logit_scale) - - -def _build_localization_loss(loss_config): - """Builds a localization loss based on the loss config. - - Args: - loss_config: A losses_pb2.LocalizationLoss object. - - Returns: - Loss based on the config. - - Raises: - ValueError: On invalid loss_config. - """ - if not isinstance(loss_config, losses_pb2.LocalizationLoss): - raise ValueError('loss_config not of type losses_pb2.LocalizationLoss.') - - loss_type = loss_config.WhichOneof('localization_loss') - - if loss_type == 'weighted_l2': - return losses.WeightedL2LocalizationLoss() - - if loss_type == 'weighted_smooth_l1': - return losses.WeightedSmoothL1LocalizationLoss( - loss_config.weighted_smooth_l1.delta) - - if loss_type == 'weighted_iou': - return losses.WeightedIOULocalizationLoss() - - raise ValueError('Empty loss config.') - - -def _build_classification_loss(loss_config): - """Builds a classification loss based on the loss config. - - Args: - loss_config: A losses_pb2.ClassificationLoss object. - - Returns: - Loss based on the config. - - Raises: - ValueError: On invalid loss_config. - """ - if not isinstance(loss_config, losses_pb2.ClassificationLoss): - raise ValueError('loss_config not of type losses_pb2.ClassificationLoss.') - - loss_type = loss_config.WhichOneof('classification_loss') - - if loss_type == 'weighted_sigmoid': - return losses.WeightedSigmoidClassificationLoss() - - if loss_type == 'weighted_sigmoid_focal': - config = loss_config.weighted_sigmoid_focal - alpha = None - if config.HasField('alpha'): - alpha = config.alpha - return losses.SigmoidFocalClassificationLoss( - gamma=config.gamma, - alpha=alpha) - - if loss_type == 'weighted_softmax': - config = loss_config.weighted_softmax - return losses.WeightedSoftmaxClassificationLoss( - logit_scale=config.logit_scale) - - if loss_type == 'weighted_logits_softmax': - config = loss_config.weighted_logits_softmax - return losses.WeightedSoftmaxClassificationAgainstLogitsLoss( - logit_scale=config.logit_scale) - - if loss_type == 'bootstrapped_sigmoid': - config = loss_config.bootstrapped_sigmoid - return losses.BootstrappedSigmoidClassificationLoss( - alpha=config.alpha, - bootstrap_type=('hard' if config.hard_bootstrap else 'soft')) - - raise ValueError('Empty loss config.') diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/losses_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/losses_builder_test.py deleted file mode 100644 index 4dc4a754eca9a2180963da8dbb75afd9a520225f..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/losses_builder_test.py +++ /dev/null @@ -1,488 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for losses_builder.""" - -import tensorflow as tf - -from google.protobuf import text_format -from object_detection.builders import losses_builder -from object_detection.core import losses -from object_detection.protos import losses_pb2 - - -class LocalizationLossBuilderTest(tf.test.TestCase): - - def test_build_weighted_l2_localization_loss(self): - losses_text_proto = """ - localization_loss { - weighted_l2 { - } - } - classification_loss { - weighted_softmax { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - _, localization_loss, _, _, _, _ = losses_builder.build(losses_proto) - self.assertTrue(isinstance(localization_loss, - losses.WeightedL2LocalizationLoss)) - - def test_build_weighted_smooth_l1_localization_loss_default_delta(self): - losses_text_proto = """ - localization_loss { - weighted_smooth_l1 { - } - } - classification_loss { - weighted_softmax { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - _, localization_loss, _, _, _, _ = losses_builder.build(losses_proto) - self.assertTrue(isinstance(localization_loss, - losses.WeightedSmoothL1LocalizationLoss)) - self.assertAlmostEqual(localization_loss._delta, 1.0) - - def test_build_weighted_smooth_l1_localization_loss_non_default_delta(self): - losses_text_proto = """ - localization_loss { - weighted_smooth_l1 { - delta: 0.1 - } - } - classification_loss { - weighted_softmax { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - _, localization_loss, _, _, _, _ = losses_builder.build(losses_proto) - self.assertTrue(isinstance(localization_loss, - losses.WeightedSmoothL1LocalizationLoss)) - self.assertAlmostEqual(localization_loss._delta, 0.1) - - def test_build_weighted_iou_localization_loss(self): - losses_text_proto = """ - localization_loss { - weighted_iou { - } - } - classification_loss { - weighted_softmax { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - _, localization_loss, _, _, _, _ = losses_builder.build(losses_proto) - self.assertTrue(isinstance(localization_loss, - losses.WeightedIOULocalizationLoss)) - - def test_anchorwise_output(self): - losses_text_proto = """ - localization_loss { - weighted_smooth_l1 { - } - } - classification_loss { - weighted_softmax { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - _, localization_loss, _, _, _, _ = losses_builder.build(losses_proto) - self.assertTrue(isinstance(localization_loss, - losses.WeightedSmoothL1LocalizationLoss)) - predictions = tf.constant([[[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]]]) - targets = tf.constant([[[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]]]) - weights = tf.constant([[1.0, 1.0]]) - loss = localization_loss(predictions, targets, weights=weights) - self.assertEqual(loss.shape, [1, 2]) - - def test_raise_error_on_empty_localization_config(self): - losses_text_proto = """ - classification_loss { - weighted_softmax { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - with self.assertRaises(ValueError): - losses_builder._build_localization_loss(losses_proto) - - -class ClassificationLossBuilderTest(tf.test.TestCase): - - def test_build_weighted_sigmoid_classification_loss(self): - losses_text_proto = """ - classification_loss { - weighted_sigmoid { - } - } - localization_loss { - weighted_l2 { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - classification_loss, _, _, _, _, _ = losses_builder.build(losses_proto) - self.assertTrue(isinstance(classification_loss, - losses.WeightedSigmoidClassificationLoss)) - - def test_build_weighted_sigmoid_focal_classification_loss(self): - losses_text_proto = """ - classification_loss { - weighted_sigmoid_focal { - } - } - localization_loss { - weighted_l2 { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - classification_loss, _, _, _, _, _ = losses_builder.build(losses_proto) - self.assertTrue(isinstance(classification_loss, - losses.SigmoidFocalClassificationLoss)) - self.assertAlmostEqual(classification_loss._alpha, None) - self.assertAlmostEqual(classification_loss._gamma, 2.0) - - def test_build_weighted_sigmoid_focal_loss_non_default(self): - losses_text_proto = """ - classification_loss { - weighted_sigmoid_focal { - alpha: 0.25 - gamma: 3.0 - } - } - localization_loss { - weighted_l2 { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - classification_loss, _, _, _, _, _ = losses_builder.build(losses_proto) - self.assertTrue(isinstance(classification_loss, - losses.SigmoidFocalClassificationLoss)) - self.assertAlmostEqual(classification_loss._alpha, 0.25) - self.assertAlmostEqual(classification_loss._gamma, 3.0) - - def test_build_weighted_softmax_classification_loss(self): - losses_text_proto = """ - classification_loss { - weighted_softmax { - } - } - localization_loss { - weighted_l2 { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - classification_loss, _, _, _, _, _ = losses_builder.build(losses_proto) - self.assertTrue(isinstance(classification_loss, - losses.WeightedSoftmaxClassificationLoss)) - - def test_build_weighted_logits_softmax_classification_loss(self): - losses_text_proto = """ - classification_loss { - weighted_logits_softmax { - } - } - localization_loss { - weighted_l2 { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - classification_loss, _, _, _, _, _ = losses_builder.build(losses_proto) - self.assertTrue( - isinstance(classification_loss, - losses.WeightedSoftmaxClassificationAgainstLogitsLoss)) - - def test_build_weighted_softmax_classification_loss_with_logit_scale(self): - losses_text_proto = """ - classification_loss { - weighted_softmax { - logit_scale: 2.0 - } - } - localization_loss { - weighted_l2 { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - classification_loss, _, _, _, _, _ = losses_builder.build(losses_proto) - self.assertTrue(isinstance(classification_loss, - losses.WeightedSoftmaxClassificationLoss)) - - def test_build_bootstrapped_sigmoid_classification_loss(self): - losses_text_proto = """ - classification_loss { - bootstrapped_sigmoid { - alpha: 0.5 - } - } - localization_loss { - weighted_l2 { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - classification_loss, _, _, _, _, _ = losses_builder.build(losses_proto) - self.assertTrue(isinstance(classification_loss, - losses.BootstrappedSigmoidClassificationLoss)) - - def test_anchorwise_output(self): - losses_text_proto = """ - classification_loss { - weighted_sigmoid { - anchorwise_output: true - } - } - localization_loss { - weighted_l2 { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - classification_loss, _, _, _, _, _ = losses_builder.build(losses_proto) - self.assertTrue(isinstance(classification_loss, - losses.WeightedSigmoidClassificationLoss)) - predictions = tf.constant([[[0.0, 1.0, 0.0], [0.0, 0.5, 0.5]]]) - targets = tf.constant([[[0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]]) - weights = tf.constant([[1.0, 1.0]]) - loss = classification_loss(predictions, targets, weights=weights) - self.assertEqual(loss.shape, [1, 2, 3]) - - def test_raise_error_on_empty_config(self): - losses_text_proto = """ - localization_loss { - weighted_l2 { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - with self.assertRaises(ValueError): - losses_builder.build(losses_proto) - - -class HardExampleMinerBuilderTest(tf.test.TestCase): - - def test_do_not_build_hard_example_miner_by_default(self): - losses_text_proto = """ - localization_loss { - weighted_l2 { - } - } - classification_loss { - weighted_softmax { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - _, _, _, _, hard_example_miner, _ = losses_builder.build(losses_proto) - self.assertEqual(hard_example_miner, None) - - def test_build_hard_example_miner_for_classification_loss(self): - losses_text_proto = """ - localization_loss { - weighted_l2 { - } - } - classification_loss { - weighted_softmax { - } - } - hard_example_miner { - loss_type: CLASSIFICATION - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - _, _, _, _, hard_example_miner, _ = losses_builder.build(losses_proto) - self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner)) - self.assertEqual(hard_example_miner._loss_type, 'cls') - - def test_build_hard_example_miner_for_localization_loss(self): - losses_text_proto = """ - localization_loss { - weighted_l2 { - } - } - classification_loss { - weighted_softmax { - } - } - hard_example_miner { - loss_type: LOCALIZATION - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - _, _, _, _, hard_example_miner, _ = losses_builder.build(losses_proto) - self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner)) - self.assertEqual(hard_example_miner._loss_type, 'loc') - - def test_build_hard_example_miner_with_non_default_values(self): - losses_text_proto = """ - localization_loss { - weighted_l2 { - } - } - classification_loss { - weighted_softmax { - } - } - hard_example_miner { - num_hard_examples: 32 - iou_threshold: 0.5 - loss_type: LOCALIZATION - max_negatives_per_positive: 10 - min_negatives_per_image: 3 - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - _, _, _, _, hard_example_miner, _ = losses_builder.build(losses_proto) - self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner)) - self.assertEqual(hard_example_miner._num_hard_examples, 32) - self.assertAlmostEqual(hard_example_miner._iou_threshold, 0.5) - self.assertEqual(hard_example_miner._max_negatives_per_positive, 10) - self.assertEqual(hard_example_miner._min_negatives_per_image, 3) - - -class LossBuilderTest(tf.test.TestCase): - - def test_build_all_loss_parameters(self): - losses_text_proto = """ - localization_loss { - weighted_l2 { - } - } - classification_loss { - weighted_softmax { - } - } - hard_example_miner { - } - classification_weight: 0.8 - localization_weight: 0.2 - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - (classification_loss, localization_loss, - classification_weight, localization_weight, - hard_example_miner, _) = losses_builder.build(losses_proto) - self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner)) - self.assertTrue(isinstance(classification_loss, - losses.WeightedSoftmaxClassificationLoss)) - self.assertTrue(isinstance(localization_loss, - losses.WeightedL2LocalizationLoss)) - self.assertAlmostEqual(classification_weight, 0.8) - self.assertAlmostEqual(localization_weight, 0.2) - - def test_raise_error_when_both_focal_loss_and_hard_example_miner(self): - losses_text_proto = """ - localization_loss { - weighted_l2 { - } - } - classification_loss { - weighted_sigmoid_focal { - } - } - hard_example_miner { - } - classification_weight: 0.8 - localization_weight: 0.2 - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - with self.assertRaises(ValueError): - losses_builder.build(losses_proto) - - -class FasterRcnnClassificationLossBuilderTest(tf.test.TestCase): - - def test_build_sigmoid_loss(self): - losses_text_proto = """ - weighted_sigmoid { - } - """ - losses_proto = losses_pb2.ClassificationLoss() - text_format.Merge(losses_text_proto, losses_proto) - classification_loss = losses_builder.build_faster_rcnn_classification_loss( - losses_proto) - self.assertTrue(isinstance(classification_loss, - losses.WeightedSigmoidClassificationLoss)) - - def test_build_softmax_loss(self): - losses_text_proto = """ - weighted_softmax { - } - """ - losses_proto = losses_pb2.ClassificationLoss() - text_format.Merge(losses_text_proto, losses_proto) - classification_loss = losses_builder.build_faster_rcnn_classification_loss( - losses_proto) - self.assertTrue(isinstance(classification_loss, - losses.WeightedSoftmaxClassificationLoss)) - - def test_build_logits_softmax_loss(self): - losses_text_proto = """ - weighted_logits_softmax { - } - """ - losses_proto = losses_pb2.ClassificationLoss() - text_format.Merge(losses_text_proto, losses_proto) - classification_loss = losses_builder.build_faster_rcnn_classification_loss( - losses_proto) - self.assertTrue( - isinstance(classification_loss, - losses.WeightedSoftmaxClassificationAgainstLogitsLoss)) - - def test_build_softmax_loss_by_default(self): - losses_text_proto = """ - """ - losses_proto = losses_pb2.ClassificationLoss() - text_format.Merge(losses_text_proto, losses_proto) - classification_loss = losses_builder.build_faster_rcnn_classification_loss( - losses_proto) - self.assertTrue(isinstance(classification_loss, - losses.WeightedSoftmaxClassificationLoss)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/matcher_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/matcher_builder.py deleted file mode 100644 index d334f435372984eb78265d72b2bcdf63c45bde5b..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/matcher_builder.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""A function to build an object detection matcher from configuration.""" - -from object_detection.matchers import argmax_matcher -from object_detection.matchers import bipartite_matcher -from object_detection.protos import matcher_pb2 - - -def build(matcher_config): - """Builds a matcher object based on the matcher config. - - Args: - matcher_config: A matcher.proto object containing the config for the desired - Matcher. - - Returns: - Matcher based on the config. - - Raises: - ValueError: On empty matcher proto. - """ - if not isinstance(matcher_config, matcher_pb2.Matcher): - raise ValueError('matcher_config not of type matcher_pb2.Matcher.') - if matcher_config.WhichOneof('matcher_oneof') == 'argmax_matcher': - matcher = matcher_config.argmax_matcher - matched_threshold = unmatched_threshold = None - if not matcher.ignore_thresholds: - matched_threshold = matcher.matched_threshold - unmatched_threshold = matcher.unmatched_threshold - return argmax_matcher.ArgMaxMatcher( - matched_threshold=matched_threshold, - unmatched_threshold=unmatched_threshold, - negatives_lower_than_unmatched=matcher.negatives_lower_than_unmatched, - force_match_for_each_row=matcher.force_match_for_each_row, - use_matmul_gather=matcher.use_matmul_gather) - if matcher_config.WhichOneof('matcher_oneof') == 'bipartite_matcher': - matcher = matcher_config.bipartite_matcher - return bipartite_matcher.GreedyBipartiteMatcher(matcher.use_matmul_gather) - raise ValueError('Empty matcher.') diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/matcher_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/matcher_builder_test.py deleted file mode 100644 index 66854491192c1739855b9f2a428a2f29005ad866..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/matcher_builder_test.py +++ /dev/null @@ -1,99 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for matcher_builder.""" - -import tensorflow as tf - -from google.protobuf import text_format -from object_detection.builders import matcher_builder -from object_detection.matchers import argmax_matcher -from object_detection.matchers import bipartite_matcher -from object_detection.protos import matcher_pb2 - - -class MatcherBuilderTest(tf.test.TestCase): - - def test_build_arg_max_matcher_with_defaults(self): - matcher_text_proto = """ - argmax_matcher { - } - """ - matcher_proto = matcher_pb2.Matcher() - text_format.Merge(matcher_text_proto, matcher_proto) - matcher_object = matcher_builder.build(matcher_proto) - self.assertTrue(isinstance(matcher_object, argmax_matcher.ArgMaxMatcher)) - self.assertAlmostEqual(matcher_object._matched_threshold, 0.5) - self.assertAlmostEqual(matcher_object._unmatched_threshold, 0.5) - self.assertTrue(matcher_object._negatives_lower_than_unmatched) - self.assertFalse(matcher_object._force_match_for_each_row) - - def test_build_arg_max_matcher_without_thresholds(self): - matcher_text_proto = """ - argmax_matcher { - ignore_thresholds: true - } - """ - matcher_proto = matcher_pb2.Matcher() - text_format.Merge(matcher_text_proto, matcher_proto) - matcher_object = matcher_builder.build(matcher_proto) - self.assertTrue(isinstance(matcher_object, argmax_matcher.ArgMaxMatcher)) - self.assertEqual(matcher_object._matched_threshold, None) - self.assertEqual(matcher_object._unmatched_threshold, None) - self.assertTrue(matcher_object._negatives_lower_than_unmatched) - self.assertFalse(matcher_object._force_match_for_each_row) - - def test_build_arg_max_matcher_with_non_default_parameters(self): - matcher_text_proto = """ - argmax_matcher { - matched_threshold: 0.7 - unmatched_threshold: 0.3 - negatives_lower_than_unmatched: false - force_match_for_each_row: true - use_matmul_gather: true - } - """ - matcher_proto = matcher_pb2.Matcher() - text_format.Merge(matcher_text_proto, matcher_proto) - matcher_object = matcher_builder.build(matcher_proto) - self.assertTrue(isinstance(matcher_object, argmax_matcher.ArgMaxMatcher)) - self.assertAlmostEqual(matcher_object._matched_threshold, 0.7) - self.assertAlmostEqual(matcher_object._unmatched_threshold, 0.3) - self.assertFalse(matcher_object._negatives_lower_than_unmatched) - self.assertTrue(matcher_object._force_match_for_each_row) - self.assertTrue(matcher_object._use_matmul_gather) - - def test_build_bipartite_matcher(self): - matcher_text_proto = """ - bipartite_matcher { - } - """ - matcher_proto = matcher_pb2.Matcher() - text_format.Merge(matcher_text_proto, matcher_proto) - matcher_object = matcher_builder.build(matcher_proto) - self.assertTrue( - isinstance(matcher_object, bipartite_matcher.GreedyBipartiteMatcher)) - - def test_raise_error_on_empty_matcher(self): - matcher_text_proto = """ - """ - matcher_proto = matcher_pb2.Matcher() - text_format.Merge(matcher_text_proto, matcher_proto) - with self.assertRaises(ValueError): - matcher_builder.build(matcher_proto) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/model_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/model_builder.py deleted file mode 100644 index 1ebdcb79f391726f2af9c85888d36a5f881f590e..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/model_builder.py +++ /dev/null @@ -1,377 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""A function to build a DetectionModel from configuration.""" -from object_detection.builders import anchor_generator_builder -from object_detection.builders import box_coder_builder -from object_detection.builders import box_predictor_builder -from object_detection.builders import hyperparams_builder -from object_detection.builders import image_resizer_builder -from object_detection.builders import losses_builder -from object_detection.builders import matcher_builder -from object_detection.builders import post_processing_builder -from object_detection.builders import region_similarity_calculator_builder as sim_calc -from object_detection.core import box_predictor -from object_detection.meta_architectures import faster_rcnn_meta_arch -from object_detection.meta_architectures import rfcn_meta_arch -from object_detection.meta_architectures import ssd_meta_arch -from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res -from object_detection.models import faster_rcnn_inception_v2_feature_extractor as frcnn_inc_v2 -from object_detection.models import faster_rcnn_nas_feature_extractor as frcnn_nas -from object_detection.models import faster_rcnn_pnas_feature_extractor as frcnn_pnas -from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1 -from object_detection.models import ssd_resnet_v1_fpn_feature_extractor as ssd_resnet_v1_fpn -from object_detection.models.embedded_ssd_mobilenet_v1_feature_extractor import EmbeddedSSDMobileNetV1FeatureExtractor -from object_detection.models.ssd_inception_v2_feature_extractor import SSDInceptionV2FeatureExtractor -from object_detection.models.ssd_inception_v3_feature_extractor import SSDInceptionV3FeatureExtractor -from object_detection.models.ssd_mobilenet_v1_feature_extractor import SSDMobileNetV1FeatureExtractor -from object_detection.models.ssd_mobilenet_v2_feature_extractor import SSDMobileNetV2FeatureExtractor -from object_detection.protos import model_pb2 - -# A map of names to SSD feature extractors. -SSD_FEATURE_EXTRACTOR_CLASS_MAP = { - 'ssd_inception_v2': SSDInceptionV2FeatureExtractor, - 'ssd_inception_v3': SSDInceptionV3FeatureExtractor, - 'ssd_mobilenet_v1': SSDMobileNetV1FeatureExtractor, - 'ssd_mobilenet_v2': SSDMobileNetV2FeatureExtractor, - 'ssd_resnet50_v1_fpn': ssd_resnet_v1_fpn.SSDResnet50V1FpnFeatureExtractor, - 'ssd_resnet101_v1_fpn': ssd_resnet_v1_fpn.SSDResnet101V1FpnFeatureExtractor, - 'ssd_resnet152_v1_fpn': ssd_resnet_v1_fpn.SSDResnet152V1FpnFeatureExtractor, - 'embedded_ssd_mobilenet_v1': EmbeddedSSDMobileNetV1FeatureExtractor, -} - -# A map of names to Faster R-CNN feature extractors. -FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP = { - 'faster_rcnn_nas': - frcnn_nas.FasterRCNNNASFeatureExtractor, - 'faster_rcnn_pnas': - frcnn_pnas.FasterRCNNPNASFeatureExtractor, - 'faster_rcnn_inception_resnet_v2': - frcnn_inc_res.FasterRCNNInceptionResnetV2FeatureExtractor, - 'faster_rcnn_inception_v2': - frcnn_inc_v2.FasterRCNNInceptionV2FeatureExtractor, - 'faster_rcnn_resnet50': - frcnn_resnet_v1.FasterRCNNResnet50FeatureExtractor, - 'faster_rcnn_resnet101': - frcnn_resnet_v1.FasterRCNNResnet101FeatureExtractor, - 'faster_rcnn_resnet152': - frcnn_resnet_v1.FasterRCNNResnet152FeatureExtractor, -} - - -def build(model_config, is_training, add_summaries=True, - add_background_class=True): - """Builds a DetectionModel based on the model config. - - Args: - model_config: A model.proto object containing the config for the desired - DetectionModel. - is_training: True if this model is being built for training purposes. - add_summaries: Whether to add tensorflow summaries in the model graph. - add_background_class: Whether to add an implicit background class to one-hot - encodings of groundtruth labels. Set to false if using groundtruth labels - with an explicit background class or using multiclass scores instead of - truth in the case of distillation. Ignored in the case of faster_rcnn. - Returns: - DetectionModel based on the config. - - Raises: - ValueError: On invalid meta architecture or model. - """ - if not isinstance(model_config, model_pb2.DetectionModel): - raise ValueError('model_config not of type model_pb2.DetectionModel.') - meta_architecture = model_config.WhichOneof('model') - if meta_architecture == 'ssd': - return _build_ssd_model(model_config.ssd, is_training, add_summaries, - add_background_class) - if meta_architecture == 'faster_rcnn': - return _build_faster_rcnn_model(model_config.faster_rcnn, is_training, - add_summaries) - raise ValueError('Unknown meta architecture: {}'.format(meta_architecture)) - - -def _build_ssd_feature_extractor(feature_extractor_config, is_training, - reuse_weights=None): - """Builds a ssd_meta_arch.SSDFeatureExtractor based on config. - - Args: - feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto. - is_training: True if this feature extractor is being built for training. - reuse_weights: if the feature extractor should reuse weights. - - Returns: - ssd_meta_arch.SSDFeatureExtractor based on config. - - Raises: - ValueError: On invalid feature extractor type. - """ - feature_type = feature_extractor_config.type - depth_multiplier = feature_extractor_config.depth_multiplier - min_depth = feature_extractor_config.min_depth - pad_to_multiple = feature_extractor_config.pad_to_multiple - use_explicit_padding = feature_extractor_config.use_explicit_padding - use_depthwise = feature_extractor_config.use_depthwise - conv_hyperparams = hyperparams_builder.build( - feature_extractor_config.conv_hyperparams, is_training) - override_base_feature_extractor_hyperparams = ( - feature_extractor_config.override_base_feature_extractor_hyperparams) - - if feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP: - raise ValueError('Unknown ssd feature_extractor: {}'.format(feature_type)) - - feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type] - return feature_extractor_class( - is_training, depth_multiplier, min_depth, pad_to_multiple, - conv_hyperparams, reuse_weights, use_explicit_padding, use_depthwise, - override_base_feature_extractor_hyperparams) - - -def _build_ssd_model(ssd_config, is_training, add_summaries, - add_background_class=True): - """Builds an SSD detection model based on the model config. - - Args: - ssd_config: A ssd.proto object containing the config for the desired - SSDMetaArch. - is_training: True if this model is being built for training purposes. - add_summaries: Whether to add tf summaries in the model. - add_background_class: Whether to add an implicit background class to one-hot - encodings of groundtruth labels. Set to false if using groundtruth labels - with an explicit background class or using multiclass scores instead of - truth in the case of distillation. - Returns: - SSDMetaArch based on the config. - - Raises: - ValueError: If ssd_config.type is not recognized (i.e. not registered in - model_class_map). - """ - num_classes = ssd_config.num_classes - - # Feature extractor - feature_extractor = _build_ssd_feature_extractor( - feature_extractor_config=ssd_config.feature_extractor, - is_training=is_training) - - box_coder = box_coder_builder.build(ssd_config.box_coder) - matcher = matcher_builder.build(ssd_config.matcher) - region_similarity_calculator = sim_calc.build( - ssd_config.similarity_calculator) - encode_background_as_zeros = ssd_config.encode_background_as_zeros - negative_class_weight = ssd_config.negative_class_weight - ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build, - ssd_config.box_predictor, - is_training, num_classes) - anchor_generator = anchor_generator_builder.build( - ssd_config.anchor_generator) - image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer) - non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( - ssd_config.post_processing) - (classification_loss, localization_loss, classification_weight, - localization_weight, hard_example_miner, - random_example_sampler) = losses_builder.build(ssd_config.loss) - normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches - normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize - - return ssd_meta_arch.SSDMetaArch( - is_training, - anchor_generator, - ssd_box_predictor, - box_coder, - feature_extractor, - matcher, - region_similarity_calculator, - encode_background_as_zeros, - negative_class_weight, - image_resizer_fn, - non_max_suppression_fn, - score_conversion_fn, - classification_loss, - localization_loss, - classification_weight, - localization_weight, - normalize_loss_by_num_matches, - hard_example_miner, - add_summaries=add_summaries, - normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize, - freeze_batchnorm=ssd_config.freeze_batchnorm, - inplace_batchnorm_update=ssd_config.inplace_batchnorm_update, - add_background_class=add_background_class, - random_example_sampler=random_example_sampler) - - -def _build_faster_rcnn_feature_extractor( - feature_extractor_config, is_training, reuse_weights=None, - inplace_batchnorm_update=False): - """Builds a faster_rcnn_meta_arch.FasterRCNNFeatureExtractor based on config. - - Args: - feature_extractor_config: A FasterRcnnFeatureExtractor proto config from - faster_rcnn.proto. - is_training: True if this feature extractor is being built for training. - reuse_weights: if the feature extractor should reuse weights. - inplace_batchnorm_update: Whether to update batch_norm inplace during - training. This is required for batch norm to work correctly on TPUs. When - this is false, user must add a control dependency on - tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch - norm moving average parameters. - - Returns: - faster_rcnn_meta_arch.FasterRCNNFeatureExtractor based on config. - - Raises: - ValueError: On invalid feature extractor type. - """ - if inplace_batchnorm_update: - raise ValueError('inplace batchnorm updates not supported.') - feature_type = feature_extractor_config.type - first_stage_features_stride = ( - feature_extractor_config.first_stage_features_stride) - batch_norm_trainable = feature_extractor_config.batch_norm_trainable - - if feature_type not in FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP: - raise ValueError('Unknown Faster R-CNN feature_extractor: {}'.format( - feature_type)) - feature_extractor_class = FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP[ - feature_type] - return feature_extractor_class( - is_training, first_stage_features_stride, - batch_norm_trainable, reuse_weights) - - -def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): - """Builds a Faster R-CNN or R-FCN detection model based on the model config. - - Builds R-FCN model if the second_stage_box_predictor in the config is of type - `rfcn_box_predictor` else builds a Faster R-CNN model. - - Args: - frcnn_config: A faster_rcnn.proto object containing the config for the - desired FasterRCNNMetaArch or RFCNMetaArch. - is_training: True if this model is being built for training purposes. - add_summaries: Whether to add tf summaries in the model. - - Returns: - FasterRCNNMetaArch based on the config. - - Raises: - ValueError: If frcnn_config.type is not recognized (i.e. not registered in - model_class_map). - """ - num_classes = frcnn_config.num_classes - image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer) - - feature_extractor = _build_faster_rcnn_feature_extractor( - frcnn_config.feature_extractor, is_training, - frcnn_config.inplace_batchnorm_update) - - number_of_stages = frcnn_config.number_of_stages - first_stage_anchor_generator = anchor_generator_builder.build( - frcnn_config.first_stage_anchor_generator) - - first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate - first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build( - frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training) - first_stage_box_predictor_kernel_size = ( - frcnn_config.first_stage_box_predictor_kernel_size) - first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth - first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size - first_stage_positive_balance_fraction = ( - frcnn_config.first_stage_positive_balance_fraction) - first_stage_nms_score_threshold = frcnn_config.first_stage_nms_score_threshold - first_stage_nms_iou_threshold = frcnn_config.first_stage_nms_iou_threshold - first_stage_max_proposals = frcnn_config.first_stage_max_proposals - first_stage_loc_loss_weight = ( - frcnn_config.first_stage_localization_loss_weight) - first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight - - initial_crop_size = frcnn_config.initial_crop_size - maxpool_kernel_size = frcnn_config.maxpool_kernel_size - maxpool_stride = frcnn_config.maxpool_stride - - second_stage_box_predictor = box_predictor_builder.build( - hyperparams_builder.build, - frcnn_config.second_stage_box_predictor, - is_training=is_training, - num_classes=num_classes) - second_stage_batch_size = frcnn_config.second_stage_batch_size - second_stage_balance_fraction = frcnn_config.second_stage_balance_fraction - (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn - ) = post_processing_builder.build(frcnn_config.second_stage_post_processing) - second_stage_localization_loss_weight = ( - frcnn_config.second_stage_localization_loss_weight) - second_stage_classification_loss = ( - losses_builder.build_faster_rcnn_classification_loss( - frcnn_config.second_stage_classification_loss)) - second_stage_classification_loss_weight = ( - frcnn_config.second_stage_classification_loss_weight) - second_stage_mask_prediction_loss_weight = ( - frcnn_config.second_stage_mask_prediction_loss_weight) - - hard_example_miner = None - if frcnn_config.HasField('hard_example_miner'): - hard_example_miner = losses_builder.build_hard_example_miner( - frcnn_config.hard_example_miner, - second_stage_classification_loss_weight, - second_stage_localization_loss_weight) - - common_kwargs = { - 'is_training': is_training, - 'num_classes': num_classes, - 'image_resizer_fn': image_resizer_fn, - 'feature_extractor': feature_extractor, - 'number_of_stages': number_of_stages, - 'first_stage_anchor_generator': first_stage_anchor_generator, - 'first_stage_atrous_rate': first_stage_atrous_rate, - 'first_stage_box_predictor_arg_scope_fn': - first_stage_box_predictor_arg_scope_fn, - 'first_stage_box_predictor_kernel_size': - first_stage_box_predictor_kernel_size, - 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, - 'first_stage_minibatch_size': first_stage_minibatch_size, - 'first_stage_positive_balance_fraction': - first_stage_positive_balance_fraction, - 'first_stage_nms_score_threshold': first_stage_nms_score_threshold, - 'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold, - 'first_stage_max_proposals': first_stage_max_proposals, - 'first_stage_localization_loss_weight': first_stage_loc_loss_weight, - 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight, - 'second_stage_batch_size': second_stage_batch_size, - 'second_stage_balance_fraction': second_stage_balance_fraction, - 'second_stage_non_max_suppression_fn': - second_stage_non_max_suppression_fn, - 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, - 'second_stage_localization_loss_weight': - second_stage_localization_loss_weight, - 'second_stage_classification_loss': - second_stage_classification_loss, - 'second_stage_classification_loss_weight': - second_stage_classification_loss_weight, - 'hard_example_miner': hard_example_miner, - 'add_summaries': add_summaries} - - if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor): - return rfcn_meta_arch.RFCNMetaArch( - second_stage_rfcn_box_predictor=second_stage_box_predictor, - **common_kwargs) - else: - return faster_rcnn_meta_arch.FasterRCNNMetaArch( - initial_crop_size=initial_crop_size, - maxpool_kernel_size=maxpool_kernel_size, - maxpool_stride=maxpool_stride, - second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, - second_stage_mask_prediction_loss_weight=( - second_stage_mask_prediction_loss_weight), - **common_kwargs) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/model_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/model_builder_test.py deleted file mode 100644 index 225e1d50b1c229b7d7b2017661df55973098eb99..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/model_builder_test.py +++ /dev/null @@ -1,1056 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.models.model_builder.""" - -import tensorflow as tf - -from google.protobuf import text_format -from object_detection.builders import model_builder -from object_detection.meta_architectures import faster_rcnn_meta_arch -from object_detection.meta_architectures import rfcn_meta_arch -from object_detection.meta_architectures import ssd_meta_arch -from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res -from object_detection.models import faster_rcnn_inception_v2_feature_extractor as frcnn_inc_v2 -from object_detection.models import faster_rcnn_nas_feature_extractor as frcnn_nas -from object_detection.models import faster_rcnn_pnas_feature_extractor as frcnn_pnas -from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1 -from object_detection.models import ssd_resnet_v1_fpn_feature_extractor as ssd_resnet_v1_fpn -from object_detection.models.embedded_ssd_mobilenet_v1_feature_extractor import EmbeddedSSDMobileNetV1FeatureExtractor -from object_detection.models.ssd_inception_v2_feature_extractor import SSDInceptionV2FeatureExtractor -from object_detection.models.ssd_inception_v3_feature_extractor import SSDInceptionV3FeatureExtractor -from object_detection.models.ssd_mobilenet_v1_feature_extractor import SSDMobileNetV1FeatureExtractor -from object_detection.models.ssd_mobilenet_v2_feature_extractor import SSDMobileNetV2FeatureExtractor -from object_detection.protos import model_pb2 - -FRCNN_RESNET_FEAT_MAPS = { - 'faster_rcnn_resnet50': - frcnn_resnet_v1.FasterRCNNResnet50FeatureExtractor, - 'faster_rcnn_resnet101': - frcnn_resnet_v1.FasterRCNNResnet101FeatureExtractor, - 'faster_rcnn_resnet152': - frcnn_resnet_v1.FasterRCNNResnet152FeatureExtractor -} - -SSD_RESNET_V1_FPN_FEAT_MAPS = { - 'ssd_resnet50_v1_fpn': - ssd_resnet_v1_fpn.SSDResnet50V1FpnFeatureExtractor, - 'ssd_resnet101_v1_fpn': - ssd_resnet_v1_fpn.SSDResnet101V1FpnFeatureExtractor, - 'ssd_resnet152_v1_fpn': - ssd_resnet_v1_fpn.SSDResnet152V1FpnFeatureExtractor -} - - -class ModelBuilderTest(tf.test.TestCase): - - def create_model(self, model_config): - """Builds a DetectionModel based on the model config. - - Args: - model_config: A model.proto object containing the config for the desired - DetectionModel. - - Returns: - DetectionModel based on the config. - """ - return model_builder.build(model_config, is_training=True) - - def test_create_ssd_inception_v2_model_from_config(self): - model_text_proto = """ - ssd { - feature_extractor { - type: 'ssd_inception_v2' - conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - override_base_feature_extractor_hyperparams: true - } - box_coder { - faster_rcnn_box_coder { - } - } - matcher { - argmax_matcher { - } - } - similarity_calculator { - iou_similarity { - } - } - anchor_generator { - ssd_anchor_generator { - aspect_ratios: 1.0 - } - } - image_resizer { - fixed_shape_resizer { - height: 320 - width: 320 - } - } - box_predictor { - convolutional_box_predictor { - conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - } - } - loss { - classification_loss { - weighted_softmax { - } - } - localization_loss { - weighted_smooth_l1 { - } - } - } - }""" - model_proto = model_pb2.DetectionModel() - text_format.Merge(model_text_proto, model_proto) - model = self.create_model(model_proto) - self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch) - self.assertIsInstance(model._feature_extractor, - SSDInceptionV2FeatureExtractor) - - def test_create_ssd_inception_v3_model_from_config(self): - model_text_proto = """ - ssd { - feature_extractor { - type: 'ssd_inception_v3' - conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - override_base_feature_extractor_hyperparams: true - } - box_coder { - faster_rcnn_box_coder { - } - } - matcher { - argmax_matcher { - } - } - similarity_calculator { - iou_similarity { - } - } - anchor_generator { - ssd_anchor_generator { - aspect_ratios: 1.0 - } - } - image_resizer { - fixed_shape_resizer { - height: 320 - width: 320 - } - } - box_predictor { - convolutional_box_predictor { - conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - } - } - loss { - classification_loss { - weighted_softmax { - } - } - localization_loss { - weighted_smooth_l1 { - } - } - } - }""" - model_proto = model_pb2.DetectionModel() - text_format.Merge(model_text_proto, model_proto) - model = self.create_model(model_proto) - self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch) - self.assertIsInstance(model._feature_extractor, - SSDInceptionV3FeatureExtractor) - - def test_create_ssd_resnet_v1_fpn_model_from_config(self): - model_text_proto = """ - ssd { - feature_extractor { - type: 'ssd_resnet50_v1_fpn' - conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - } - box_coder { - faster_rcnn_box_coder { - } - } - matcher { - argmax_matcher { - } - } - similarity_calculator { - iou_similarity { - } - } - encode_background_as_zeros: true - anchor_generator { - multiscale_anchor_generator { - aspect_ratios: [1.0, 2.0, 0.5] - scales_per_octave: 2 - } - } - image_resizer { - fixed_shape_resizer { - height: 320 - width: 320 - } - } - box_predictor { - weight_shared_convolutional_box_predictor { - depth: 32 - conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - random_normal_initializer { - } - } - } - num_layers_before_predictor: 1 - } - } - normalize_loss_by_num_matches: true - normalize_loc_loss_by_codesize: true - loss { - classification_loss { - weighted_sigmoid_focal { - alpha: 0.25 - gamma: 2.0 - } - } - localization_loss { - weighted_smooth_l1 { - delta: 0.1 - } - } - classification_weight: 1.0 - localization_weight: 1.0 - } - }""" - model_proto = model_pb2.DetectionModel() - text_format.Merge(model_text_proto, model_proto) - - for extractor_type, extractor_class in SSD_RESNET_V1_FPN_FEAT_MAPS.items(): - model_proto.ssd.feature_extractor.type = extractor_type - model = model_builder.build(model_proto, is_training=True) - self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch) - self.assertIsInstance(model._feature_extractor, extractor_class) - - def test_create_ssd_mobilenet_v1_model_from_config(self): - model_text_proto = """ - ssd { - freeze_batchnorm: true - inplace_batchnorm_update: true - feature_extractor { - type: 'ssd_mobilenet_v1' - conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - } - box_coder { - faster_rcnn_box_coder { - } - } - matcher { - argmax_matcher { - } - } - similarity_calculator { - iou_similarity { - } - } - anchor_generator { - ssd_anchor_generator { - aspect_ratios: 1.0 - } - } - image_resizer { - fixed_shape_resizer { - height: 320 - width: 320 - } - } - box_predictor { - convolutional_box_predictor { - conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - } - } - normalize_loc_loss_by_codesize: true - loss { - classification_loss { - weighted_softmax { - } - } - localization_loss { - weighted_smooth_l1 { - } - } - } - }""" - model_proto = model_pb2.DetectionModel() - text_format.Merge(model_text_proto, model_proto) - model = self.create_model(model_proto) - self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch) - self.assertIsInstance(model._feature_extractor, - SSDMobileNetV1FeatureExtractor) - self.assertTrue(model._normalize_loc_loss_by_codesize) - self.assertTrue(model._freeze_batchnorm) - self.assertTrue(model._inplace_batchnorm_update) - - def test_create_ssd_mobilenet_v2_model_from_config(self): - model_text_proto = """ - ssd { - feature_extractor { - type: 'ssd_mobilenet_v2' - conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - } - box_coder { - faster_rcnn_box_coder { - } - } - matcher { - argmax_matcher { - } - } - similarity_calculator { - iou_similarity { - } - } - anchor_generator { - ssd_anchor_generator { - aspect_ratios: 1.0 - } - } - image_resizer { - fixed_shape_resizer { - height: 320 - width: 320 - } - } - box_predictor { - convolutional_box_predictor { - conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - } - } - normalize_loc_loss_by_codesize: true - loss { - classification_loss { - weighted_softmax { - } - } - localization_loss { - weighted_smooth_l1 { - } - } - } - }""" - model_proto = model_pb2.DetectionModel() - text_format.Merge(model_text_proto, model_proto) - model = self.create_model(model_proto) - self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch) - self.assertIsInstance(model._feature_extractor, - SSDMobileNetV2FeatureExtractor) - self.assertTrue(model._normalize_loc_loss_by_codesize) - - def test_create_embedded_ssd_mobilenet_v1_model_from_config(self): - model_text_proto = """ - ssd { - feature_extractor { - type: 'embedded_ssd_mobilenet_v1' - conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - } - box_coder { - faster_rcnn_box_coder { - } - } - matcher { - argmax_matcher { - } - } - similarity_calculator { - iou_similarity { - } - } - anchor_generator { - ssd_anchor_generator { - aspect_ratios: 1.0 - } - } - image_resizer { - fixed_shape_resizer { - height: 256 - width: 256 - } - } - box_predictor { - convolutional_box_predictor { - conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - } - } - loss { - classification_loss { - weighted_softmax { - } - } - localization_loss { - weighted_smooth_l1 { - } - } - } - }""" - model_proto = model_pb2.DetectionModel() - text_format.Merge(model_text_proto, model_proto) - model = self.create_model(model_proto) - self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch) - self.assertIsInstance(model._feature_extractor, - EmbeddedSSDMobileNetV1FeatureExtractor) - - def test_create_faster_rcnn_resnet_v1_models_from_config(self): - model_text_proto = """ - faster_rcnn { - inplace_batchnorm_update: true - num_classes: 3 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - feature_extractor { - type: 'faster_rcnn_resnet101' - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - initial_crop_size: 14 - maxpool_kernel_size: 2 - maxpool_stride: 2 - second_stage_box_predictor { - mask_rcnn_box_predictor { - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.01 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - score_converter: SOFTMAX - } - }""" - model_proto = model_pb2.DetectionModel() - text_format.Merge(model_text_proto, model_proto) - for extractor_type, extractor_class in FRCNN_RESNET_FEAT_MAPS.items(): - model_proto.faster_rcnn.feature_extractor.type = extractor_type - model = model_builder.build(model_proto, is_training=True) - self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch) - self.assertIsInstance(model._feature_extractor, extractor_class) - - def test_create_faster_rcnn_resnet101_with_mask_prediction_enabled(self): - model_text_proto = """ - faster_rcnn { - num_classes: 3 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - feature_extractor { - type: 'faster_rcnn_resnet101' - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - initial_crop_size: 14 - maxpool_kernel_size: 2 - maxpool_stride: 2 - second_stage_box_predictor { - mask_rcnn_box_predictor { - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - predict_instance_masks: true - } - } - second_stage_mask_prediction_loss_weight: 3.0 - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.01 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - score_converter: SOFTMAX - } - }""" - model_proto = model_pb2.DetectionModel() - text_format.Merge(model_text_proto, model_proto) - model = model_builder.build(model_proto, is_training=True) - self.assertAlmostEqual(model._second_stage_mask_loss_weight, 3.0) - - def test_create_faster_rcnn_nas_model_from_config(self): - model_text_proto = """ - faster_rcnn { - num_classes: 3 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - feature_extractor { - type: 'faster_rcnn_nas' - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - initial_crop_size: 17 - maxpool_kernel_size: 1 - maxpool_stride: 1 - second_stage_box_predictor { - mask_rcnn_box_predictor { - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.01 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - score_converter: SOFTMAX - } - }""" - model_proto = model_pb2.DetectionModel() - text_format.Merge(model_text_proto, model_proto) - model = model_builder.build(model_proto, is_training=True) - self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch) - self.assertIsInstance( - model._feature_extractor, - frcnn_nas.FasterRCNNNASFeatureExtractor) - - def test_create_faster_rcnn_pnas_model_from_config(self): - model_text_proto = """ - faster_rcnn { - num_classes: 3 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - feature_extractor { - type: 'faster_rcnn_pnas' - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - initial_crop_size: 17 - maxpool_kernel_size: 1 - maxpool_stride: 1 - second_stage_box_predictor { - mask_rcnn_box_predictor { - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.01 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - score_converter: SOFTMAX - } - }""" - model_proto = model_pb2.DetectionModel() - text_format.Merge(model_text_proto, model_proto) - model = model_builder.build(model_proto, is_training=True) - self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch) - self.assertIsInstance( - model._feature_extractor, - frcnn_pnas.FasterRCNNPNASFeatureExtractor) - - def test_create_faster_rcnn_inception_resnet_v2_model_from_config(self): - model_text_proto = """ - faster_rcnn { - num_classes: 3 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - feature_extractor { - type: 'faster_rcnn_inception_resnet_v2' - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - initial_crop_size: 17 - maxpool_kernel_size: 1 - maxpool_stride: 1 - second_stage_box_predictor { - mask_rcnn_box_predictor { - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.01 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - score_converter: SOFTMAX - } - }""" - model_proto = model_pb2.DetectionModel() - text_format.Merge(model_text_proto, model_proto) - model = model_builder.build(model_proto, is_training=True) - self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch) - self.assertIsInstance( - model._feature_extractor, - frcnn_inc_res.FasterRCNNInceptionResnetV2FeatureExtractor) - - def test_create_faster_rcnn_inception_v2_model_from_config(self): - model_text_proto = """ - faster_rcnn { - num_classes: 3 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - feature_extractor { - type: 'faster_rcnn_inception_v2' - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - initial_crop_size: 14 - maxpool_kernel_size: 2 - maxpool_stride: 2 - second_stage_box_predictor { - mask_rcnn_box_predictor { - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.01 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - score_converter: SOFTMAX - } - }""" - model_proto = model_pb2.DetectionModel() - text_format.Merge(model_text_proto, model_proto) - model = model_builder.build(model_proto, is_training=True) - self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch) - self.assertIsInstance(model._feature_extractor, - frcnn_inc_v2.FasterRCNNInceptionV2FeatureExtractor) - - def test_create_faster_rcnn_model_from_config_with_example_miner(self): - model_text_proto = """ - faster_rcnn { - num_classes: 3 - feature_extractor { - type: 'faster_rcnn_inception_resnet_v2' - } - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - second_stage_box_predictor { - mask_rcnn_box_predictor { - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - } - } - hard_example_miner { - num_hard_examples: 10 - iou_threshold: 0.99 - } - }""" - model_proto = model_pb2.DetectionModel() - text_format.Merge(model_text_proto, model_proto) - model = model_builder.build(model_proto, is_training=True) - self.assertIsNotNone(model._hard_example_miner) - - def test_create_rfcn_resnet_v1_model_from_config(self): - model_text_proto = """ - faster_rcnn { - num_classes: 3 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - feature_extractor { - type: 'faster_rcnn_resnet101' - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - initial_crop_size: 14 - maxpool_kernel_size: 2 - maxpool_stride: 2 - second_stage_box_predictor { - rfcn_box_predictor { - conv_hyperparams { - op: CONV - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.01 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - score_converter: SOFTMAX - } - }""" - model_proto = model_pb2.DetectionModel() - text_format.Merge(model_text_proto, model_proto) - for extractor_type, extractor_class in FRCNN_RESNET_FEAT_MAPS.items(): - model_proto.faster_rcnn.feature_extractor.type = extractor_type - model = model_builder.build(model_proto, is_training=True) - self.assertIsInstance(model, rfcn_meta_arch.RFCNMetaArch) - self.assertIsInstance(model._feature_extractor, extractor_class) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/optimizer_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/optimizer_builder.py deleted file mode 100644 index e3a437f0d9a9442dfb1fff3013b250e4e854a2c2..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/optimizer_builder.py +++ /dev/null @@ -1,124 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Functions to build DetectionModel training optimizers.""" - -import tensorflow as tf -from object_detection.utils import learning_schedules - - -def build(optimizer_config): - """Create optimizer based on config. - - Args: - optimizer_config: A Optimizer proto message. - - Returns: - An optimizer and a list of variables for summary. - - Raises: - ValueError: when using an unsupported input data type. - """ - optimizer_type = optimizer_config.WhichOneof('optimizer') - optimizer = None - - summary_vars = [] - if optimizer_type == 'rms_prop_optimizer': - config = optimizer_config.rms_prop_optimizer - learning_rate = _create_learning_rate(config.learning_rate) - summary_vars.append(learning_rate) - optimizer = tf.train.RMSPropOptimizer( - learning_rate, - decay=config.decay, - momentum=config.momentum_optimizer_value, - epsilon=config.epsilon) - - if optimizer_type == 'momentum_optimizer': - config = optimizer_config.momentum_optimizer - learning_rate = _create_learning_rate(config.learning_rate) - summary_vars.append(learning_rate) - optimizer = tf.train.MomentumOptimizer( - learning_rate, - momentum=config.momentum_optimizer_value) - - if optimizer_type == 'adam_optimizer': - config = optimizer_config.adam_optimizer - learning_rate = _create_learning_rate(config.learning_rate) - summary_vars.append(learning_rate) - optimizer = tf.train.AdamOptimizer(learning_rate) - - if optimizer is None: - raise ValueError('Optimizer %s not supported.' % optimizer_type) - - if optimizer_config.use_moving_average: - optimizer = tf.contrib.opt.MovingAverageOptimizer( - optimizer, average_decay=optimizer_config.moving_average_decay) - - return optimizer, summary_vars - - -def _create_learning_rate(learning_rate_config): - """Create optimizer learning rate based on config. - - Args: - learning_rate_config: A LearningRate proto message. - - Returns: - A learning rate. - - Raises: - ValueError: when using an unsupported input data type. - """ - learning_rate = None - learning_rate_type = learning_rate_config.WhichOneof('learning_rate') - if learning_rate_type == 'constant_learning_rate': - config = learning_rate_config.constant_learning_rate - learning_rate = tf.constant(config.learning_rate, dtype=tf.float32, - name='learning_rate') - - if learning_rate_type == 'exponential_decay_learning_rate': - config = learning_rate_config.exponential_decay_learning_rate - learning_rate = tf.train.exponential_decay( - config.initial_learning_rate, - tf.train.get_or_create_global_step(), - config.decay_steps, - config.decay_factor, - staircase=config.staircase, name='learning_rate') - - if learning_rate_type == 'manual_step_learning_rate': - config = learning_rate_config.manual_step_learning_rate - if not config.schedule: - raise ValueError('Empty learning rate schedule.') - learning_rate_step_boundaries = [x.step for x in config.schedule] - learning_rate_sequence = [config.initial_learning_rate] - learning_rate_sequence += [x.learning_rate for x in config.schedule] - learning_rate = learning_schedules.manual_stepping( - tf.train.get_or_create_global_step(), learning_rate_step_boundaries, - learning_rate_sequence, config.warmup) - - if learning_rate_type == 'cosine_decay_learning_rate': - config = learning_rate_config.cosine_decay_learning_rate - learning_rate = learning_schedules.cosine_decay_with_warmup( - tf.train.get_or_create_global_step(), - config.learning_rate_base, - config.total_steps, - config.warmup_learning_rate, - config.warmup_steps, - config.hold_base_rate_steps) - - if learning_rate is None: - raise ValueError('Learning_rate %s not supported.' % learning_rate_type) - - return learning_rate diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/optimizer_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/optimizer_builder_test.py deleted file mode 100644 index 343a858fb90b223d7f82b1d11466a6478d73f3e5..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/optimizer_builder_test.py +++ /dev/null @@ -1,208 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for optimizer_builder.""" - -import tensorflow as tf - -from google.protobuf import text_format - -from object_detection.builders import optimizer_builder -from object_detection.protos import optimizer_pb2 - - -class LearningRateBuilderTest(tf.test.TestCase): - - def testBuildConstantLearningRate(self): - learning_rate_text_proto = """ - constant_learning_rate { - learning_rate: 0.004 - } - """ - learning_rate_proto = optimizer_pb2.LearningRate() - text_format.Merge(learning_rate_text_proto, learning_rate_proto) - learning_rate = optimizer_builder._create_learning_rate( - learning_rate_proto) - self.assertTrue(learning_rate.op.name.endswith('learning_rate')) - with self.test_session(): - learning_rate_out = learning_rate.eval() - self.assertAlmostEqual(learning_rate_out, 0.004) - - def testBuildExponentialDecayLearningRate(self): - learning_rate_text_proto = """ - exponential_decay_learning_rate { - initial_learning_rate: 0.004 - decay_steps: 99999 - decay_factor: 0.85 - staircase: false - } - """ - learning_rate_proto = optimizer_pb2.LearningRate() - text_format.Merge(learning_rate_text_proto, learning_rate_proto) - learning_rate = optimizer_builder._create_learning_rate( - learning_rate_proto) - self.assertTrue(learning_rate.op.name.endswith('learning_rate')) - self.assertTrue(isinstance(learning_rate, tf.Tensor)) - - def testBuildManualStepLearningRate(self): - learning_rate_text_proto = """ - manual_step_learning_rate { - initial_learning_rate: 0.002 - schedule { - step: 100 - learning_rate: 0.006 - } - schedule { - step: 90000 - learning_rate: 0.00006 - } - warmup: true - } - """ - learning_rate_proto = optimizer_pb2.LearningRate() - text_format.Merge(learning_rate_text_proto, learning_rate_proto) - learning_rate = optimizer_builder._create_learning_rate( - learning_rate_proto) - self.assertTrue(isinstance(learning_rate, tf.Tensor)) - - def testBuildCosineDecayLearningRate(self): - learning_rate_text_proto = """ - cosine_decay_learning_rate { - learning_rate_base: 0.002 - total_steps: 20000 - warmup_learning_rate: 0.0001 - warmup_steps: 1000 - hold_base_rate_steps: 20000 - } - """ - learning_rate_proto = optimizer_pb2.LearningRate() - text_format.Merge(learning_rate_text_proto, learning_rate_proto) - learning_rate = optimizer_builder._create_learning_rate( - learning_rate_proto) - self.assertTrue(isinstance(learning_rate, tf.Tensor)) - - def testRaiseErrorOnEmptyLearningRate(self): - learning_rate_text_proto = """ - """ - learning_rate_proto = optimizer_pb2.LearningRate() - text_format.Merge(learning_rate_text_proto, learning_rate_proto) - with self.assertRaises(ValueError): - optimizer_builder._create_learning_rate(learning_rate_proto) - - -class OptimizerBuilderTest(tf.test.TestCase): - - def testBuildRMSPropOptimizer(self): - optimizer_text_proto = """ - rms_prop_optimizer: { - learning_rate: { - exponential_decay_learning_rate { - initial_learning_rate: 0.004 - decay_steps: 800720 - decay_factor: 0.95 - } - } - momentum_optimizer_value: 0.9 - decay: 0.9 - epsilon: 1.0 - } - use_moving_average: false - """ - optimizer_proto = optimizer_pb2.Optimizer() - text_format.Merge(optimizer_text_proto, optimizer_proto) - optimizer, _ = optimizer_builder.build(optimizer_proto) - self.assertTrue(isinstance(optimizer, tf.train.RMSPropOptimizer)) - - def testBuildMomentumOptimizer(self): - optimizer_text_proto = """ - momentum_optimizer: { - learning_rate: { - constant_learning_rate { - learning_rate: 0.001 - } - } - momentum_optimizer_value: 0.99 - } - use_moving_average: false - """ - optimizer_proto = optimizer_pb2.Optimizer() - text_format.Merge(optimizer_text_proto, optimizer_proto) - optimizer, _ = optimizer_builder.build(optimizer_proto) - self.assertTrue(isinstance(optimizer, tf.train.MomentumOptimizer)) - - def testBuildAdamOptimizer(self): - optimizer_text_proto = """ - adam_optimizer: { - learning_rate: { - constant_learning_rate { - learning_rate: 0.002 - } - } - } - use_moving_average: false - """ - optimizer_proto = optimizer_pb2.Optimizer() - text_format.Merge(optimizer_text_proto, optimizer_proto) - optimizer, _ = optimizer_builder.build(optimizer_proto) - self.assertTrue(isinstance(optimizer, tf.train.AdamOptimizer)) - - def testBuildMovingAverageOptimizer(self): - optimizer_text_proto = """ - adam_optimizer: { - learning_rate: { - constant_learning_rate { - learning_rate: 0.002 - } - } - } - use_moving_average: True - """ - optimizer_proto = optimizer_pb2.Optimizer() - text_format.Merge(optimizer_text_proto, optimizer_proto) - optimizer, _ = optimizer_builder.build(optimizer_proto) - self.assertTrue( - isinstance(optimizer, tf.contrib.opt.MovingAverageOptimizer)) - - def testBuildMovingAverageOptimizerWithNonDefaultDecay(self): - optimizer_text_proto = """ - adam_optimizer: { - learning_rate: { - constant_learning_rate { - learning_rate: 0.002 - } - } - } - use_moving_average: True - moving_average_decay: 0.2 - """ - optimizer_proto = optimizer_pb2.Optimizer() - text_format.Merge(optimizer_text_proto, optimizer_proto) - optimizer, _ = optimizer_builder.build(optimizer_proto) - self.assertTrue( - isinstance(optimizer, tf.contrib.opt.MovingAverageOptimizer)) - # TODO(rathodv): Find a way to not depend on the private members. - self.assertAlmostEqual(optimizer._ema._decay, 0.2) - - def testBuildEmptyOptimizer(self): - optimizer_text_proto = """ - """ - optimizer_proto = optimizer_pb2.Optimizer() - text_format.Merge(optimizer_text_proto, optimizer_proto) - with self.assertRaises(ValueError): - optimizer_builder.build(optimizer_proto) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/post_processing_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/post_processing_builder.py deleted file mode 100644 index fa3a772896dd1a1b8146677dc862549970a6fecd..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/post_processing_builder.py +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Builder function for post processing operations.""" -import functools - -import tensorflow as tf -from object_detection.core import post_processing -from object_detection.protos import post_processing_pb2 - - -def build(post_processing_config): - """Builds callables for post-processing operations. - - Builds callables for non-max suppression and score conversion based on the - configuration. - - Non-max suppression callable takes `boxes`, `scores`, and optionally - `clip_window`, `parallel_iterations` `masks, and `scope` as inputs. It returns - `nms_boxes`, `nms_scores`, `nms_classes` `nms_masks` and `num_detections`. See - post_processing.batch_multiclass_non_max_suppression for the type and shape - of these tensors. - - Score converter callable should be called with `input` tensor. The callable - returns the output from one of 3 tf operations based on the configuration - - tf.identity, tf.sigmoid or tf.nn.softmax. See tensorflow documentation for - argument and return value descriptions. - - Args: - post_processing_config: post_processing.proto object containing the - parameters for the post-processing operations. - - Returns: - non_max_suppressor_fn: Callable for non-max suppression. - score_converter_fn: Callable for score conversion. - - Raises: - ValueError: if the post_processing_config is of incorrect type. - """ - if not isinstance(post_processing_config, post_processing_pb2.PostProcessing): - raise ValueError('post_processing_config not of type ' - 'post_processing_pb2.Postprocessing.') - non_max_suppressor_fn = _build_non_max_suppressor( - post_processing_config.batch_non_max_suppression) - score_converter_fn = _build_score_converter( - post_processing_config.score_converter, - post_processing_config.logit_scale) - return non_max_suppressor_fn, score_converter_fn - - -def _build_non_max_suppressor(nms_config): - """Builds non-max suppresson based on the nms config. - - Args: - nms_config: post_processing_pb2.PostProcessing.BatchNonMaxSuppression proto. - - Returns: - non_max_suppressor_fn: Callable non-max suppressor. - - Raises: - ValueError: On incorrect iou_threshold or on incompatible values of - max_total_detections and max_detections_per_class. - """ - if nms_config.iou_threshold < 0 or nms_config.iou_threshold > 1.0: - raise ValueError('iou_threshold not in [0, 1.0].') - if nms_config.max_detections_per_class > nms_config.max_total_detections: - raise ValueError('max_detections_per_class should be no greater than ' - 'max_total_detections.') - - non_max_suppressor_fn = functools.partial( - post_processing.batch_multiclass_non_max_suppression, - score_thresh=nms_config.score_threshold, - iou_thresh=nms_config.iou_threshold, - max_size_per_class=nms_config.max_detections_per_class, - max_total_size=nms_config.max_total_detections) - return non_max_suppressor_fn - - -def _score_converter_fn_with_logit_scale(tf_score_converter_fn, logit_scale): - """Create a function to scale logits then apply a Tensorflow function.""" - def score_converter_fn(logits): - scaled_logits = tf.divide(logits, logit_scale, name='scale_logits') - return tf_score_converter_fn(scaled_logits, name='convert_scores') - score_converter_fn.__name__ = '%s_with_logit_scale' % ( - tf_score_converter_fn.__name__) - return score_converter_fn - - -def _build_score_converter(score_converter_config, logit_scale): - """Builds score converter based on the config. - - Builds one of [tf.identity, tf.sigmoid, tf.softmax] score converters based on - the config. - - Args: - score_converter_config: post_processing_pb2.PostProcessing.score_converter. - logit_scale: temperature to use for SOFTMAX score_converter. - - Returns: - Callable score converter op. - - Raises: - ValueError: On unknown score converter. - """ - if score_converter_config == post_processing_pb2.PostProcessing.IDENTITY: - return _score_converter_fn_with_logit_scale(tf.identity, logit_scale) - if score_converter_config == post_processing_pb2.PostProcessing.SIGMOID: - return _score_converter_fn_with_logit_scale(tf.sigmoid, logit_scale) - if score_converter_config == post_processing_pb2.PostProcessing.SOFTMAX: - return _score_converter_fn_with_logit_scale(tf.nn.softmax, logit_scale) - raise ValueError('Unknown score converter.') diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/post_processing_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/post_processing_builder_test.py deleted file mode 100644 index c39fbfb417db148d756c3e8a2b51948ed13d07b3..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/post_processing_builder_test.py +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for post_processing_builder.""" - -import tensorflow as tf -from google.protobuf import text_format -from object_detection.builders import post_processing_builder -from object_detection.protos import post_processing_pb2 - - -class PostProcessingBuilderTest(tf.test.TestCase): - - def test_build_non_max_suppressor_with_correct_parameters(self): - post_processing_text_proto = """ - batch_non_max_suppression { - score_threshold: 0.7 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - """ - post_processing_config = post_processing_pb2.PostProcessing() - text_format.Merge(post_processing_text_proto, post_processing_config) - non_max_suppressor, _ = post_processing_builder.build( - post_processing_config) - self.assertEqual(non_max_suppressor.keywords['max_size_per_class'], 100) - self.assertEqual(non_max_suppressor.keywords['max_total_size'], 300) - self.assertAlmostEqual(non_max_suppressor.keywords['score_thresh'], 0.7) - self.assertAlmostEqual(non_max_suppressor.keywords['iou_thresh'], 0.6) - - def test_build_identity_score_converter(self): - post_processing_text_proto = """ - score_converter: IDENTITY - """ - post_processing_config = post_processing_pb2.PostProcessing() - text_format.Merge(post_processing_text_proto, post_processing_config) - _, score_converter = post_processing_builder.build(post_processing_config) - self.assertEqual(score_converter.__name__, 'identity_with_logit_scale') - - inputs = tf.constant([1, 1], tf.float32) - outputs = score_converter(inputs) - with self.test_session() as sess: - converted_scores = sess.run(outputs) - expected_converted_scores = sess.run(inputs) - self.assertAllClose(converted_scores, expected_converted_scores) - - def test_build_identity_score_converter_with_logit_scale(self): - post_processing_text_proto = """ - score_converter: IDENTITY - logit_scale: 2.0 - """ - post_processing_config = post_processing_pb2.PostProcessing() - text_format.Merge(post_processing_text_proto, post_processing_config) - _, score_converter = post_processing_builder.build(post_processing_config) - self.assertEqual(score_converter.__name__, 'identity_with_logit_scale') - - inputs = tf.constant([1, 1], tf.float32) - outputs = score_converter(inputs) - with self.test_session() as sess: - converted_scores = sess.run(outputs) - expected_converted_scores = sess.run(tf.constant([.5, .5], tf.float32)) - self.assertAllClose(converted_scores, expected_converted_scores) - - def test_build_sigmoid_score_converter(self): - post_processing_text_proto = """ - score_converter: SIGMOID - """ - post_processing_config = post_processing_pb2.PostProcessing() - text_format.Merge(post_processing_text_proto, post_processing_config) - _, score_converter = post_processing_builder.build(post_processing_config) - self.assertEqual(score_converter.__name__, 'sigmoid_with_logit_scale') - - def test_build_softmax_score_converter(self): - post_processing_text_proto = """ - score_converter: SOFTMAX - """ - post_processing_config = post_processing_pb2.PostProcessing() - text_format.Merge(post_processing_text_proto, post_processing_config) - _, score_converter = post_processing_builder.build(post_processing_config) - self.assertEqual(score_converter.__name__, 'softmax_with_logit_scale') - - def test_build_softmax_score_converter_with_temperature(self): - post_processing_text_proto = """ - score_converter: SOFTMAX - logit_scale: 2.0 - """ - post_processing_config = post_processing_pb2.PostProcessing() - text_format.Merge(post_processing_text_proto, post_processing_config) - _, score_converter = post_processing_builder.build(post_processing_config) - self.assertEqual(score_converter.__name__, 'softmax_with_logit_scale') - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/preprocessor_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/preprocessor_builder.py deleted file mode 100644 index 10b92532fc3ef5a533b7f317082436b0052eb166..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/preprocessor_builder.py +++ /dev/null @@ -1,322 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Builder for preprocessing steps.""" - -import tensorflow as tf - -from object_detection.core import preprocessor -from object_detection.protos import preprocessor_pb2 - - -def _get_step_config_from_proto(preprocessor_step_config, step_name): - """Returns the value of a field named step_name from proto. - - Args: - preprocessor_step_config: A preprocessor_pb2.PreprocessingStep object. - step_name: Name of the field to get value from. - - Returns: - result_dict: a sub proto message from preprocessor_step_config which will be - later converted to a dictionary. - - Raises: - ValueError: If field does not exist in proto. - """ - for field, value in preprocessor_step_config.ListFields(): - if field.name == step_name: - return value - - raise ValueError('Could not get field %s from proto!', step_name) - - -def _get_dict_from_proto(config): - """Helper function to put all proto fields into a dictionary. - - For many preprocessing steps, there's an trivial 1-1 mapping from proto fields - to function arguments. This function automatically populates a dictionary with - the arguments from the proto. - - Protos that CANNOT be trivially populated include: - * nested messages. - * steps that check if an optional field is set (ie. where None != 0). - * protos that don't map 1-1 to arguments (ie. list should be reshaped). - * fields requiring additional validation (ie. repeated field has n elements). - - Args: - config: A protobuf object that does not violate the conditions above. - - Returns: - result_dict: |config| converted into a python dictionary. - """ - result_dict = {} - for field, value in config.ListFields(): - result_dict[field.name] = value - return result_dict - - -# A map from a PreprocessingStep proto config field name to the preprocessing -# function that should be used. The PreprocessingStep proto should be parsable -# with _get_dict_from_proto. -PREPROCESSING_FUNCTION_MAP = { - 'normalize_image': preprocessor.normalize_image, - 'random_pixel_value_scale': preprocessor.random_pixel_value_scale, - 'random_image_scale': preprocessor.random_image_scale, - 'random_rgb_to_gray': preprocessor.random_rgb_to_gray, - 'random_adjust_brightness': preprocessor.random_adjust_brightness, - 'random_adjust_contrast': preprocessor.random_adjust_contrast, - 'random_adjust_hue': preprocessor.random_adjust_hue, - 'random_adjust_saturation': preprocessor.random_adjust_saturation, - 'random_distort_color': preprocessor.random_distort_color, - 'random_jitter_boxes': preprocessor.random_jitter_boxes, - 'random_crop_to_aspect_ratio': preprocessor.random_crop_to_aspect_ratio, - 'random_black_patches': preprocessor.random_black_patches, - 'rgb_to_gray': preprocessor.rgb_to_gray, - 'scale_boxes_to_pixel_coordinates': ( - preprocessor.scale_boxes_to_pixel_coordinates), - 'subtract_channel_mean': preprocessor.subtract_channel_mean, -} - - -# A map to convert from preprocessor_pb2.ResizeImage.Method enum to -# tf.image.ResizeMethod. -RESIZE_METHOD_MAP = { - preprocessor_pb2.ResizeImage.AREA: tf.image.ResizeMethod.AREA, - preprocessor_pb2.ResizeImage.BICUBIC: tf.image.ResizeMethod.BICUBIC, - preprocessor_pb2.ResizeImage.BILINEAR: tf.image.ResizeMethod.BILINEAR, - preprocessor_pb2.ResizeImage.NEAREST_NEIGHBOR: ( - tf.image.ResizeMethod.NEAREST_NEIGHBOR), -} - - -def build(preprocessor_step_config): - """Builds preprocessing step based on the configuration. - - Args: - preprocessor_step_config: PreprocessingStep configuration proto. - - Returns: - function, argmap: A callable function and an argument map to call function - with. - - Raises: - ValueError: On invalid configuration. - """ - step_type = preprocessor_step_config.WhichOneof('preprocessing_step') - - if step_type in PREPROCESSING_FUNCTION_MAP: - preprocessing_function = PREPROCESSING_FUNCTION_MAP[step_type] - step_config = _get_step_config_from_proto(preprocessor_step_config, - step_type) - function_args = _get_dict_from_proto(step_config) - return (preprocessing_function, function_args) - - if step_type == 'random_horizontal_flip': - config = preprocessor_step_config.random_horizontal_flip - return (preprocessor.random_horizontal_flip, - { - 'keypoint_flip_permutation': tuple( - config.keypoint_flip_permutation), - }) - - if step_type == 'random_vertical_flip': - config = preprocessor_step_config.random_vertical_flip - return (preprocessor.random_vertical_flip, - { - 'keypoint_flip_permutation': tuple( - config.keypoint_flip_permutation), - }) - - if step_type == 'random_rotation90': - return (preprocessor.random_rotation90, {}) - - if step_type == 'random_crop_image': - config = preprocessor_step_config.random_crop_image - return (preprocessor.random_crop_image, - { - 'min_object_covered': config.min_object_covered, - 'aspect_ratio_range': (config.min_aspect_ratio, - config.max_aspect_ratio), - 'area_range': (config.min_area, config.max_area), - 'overlap_thresh': config.overlap_thresh, - 'random_coef': config.random_coef, - }) - - if step_type == 'random_pad_image': - config = preprocessor_step_config.random_pad_image - min_image_size = None - if (config.HasField('min_image_height') != - config.HasField('min_image_width')): - raise ValueError('min_image_height and min_image_width should be either ' - 'both set or both unset.') - if config.HasField('min_image_height'): - min_image_size = (config.min_image_height, config.min_image_width) - - max_image_size = None - if (config.HasField('max_image_height') != - config.HasField('max_image_width')): - raise ValueError('max_image_height and max_image_width should be either ' - 'both set or both unset.') - if config.HasField('max_image_height'): - max_image_size = (config.max_image_height, config.max_image_width) - - pad_color = config.pad_color - if pad_color and len(pad_color) != 3: - raise ValueError('pad_color should have 3 elements (RGB) if set!') - if not pad_color: - pad_color = None - return (preprocessor.random_pad_image, - { - 'min_image_size': min_image_size, - 'max_image_size': max_image_size, - 'pad_color': pad_color, - }) - - if step_type == 'random_crop_pad_image': - config = preprocessor_step_config.random_crop_pad_image - min_padded_size_ratio = config.min_padded_size_ratio - if min_padded_size_ratio and len(min_padded_size_ratio) != 2: - raise ValueError('min_padded_size_ratio should have 2 elements if set!') - max_padded_size_ratio = config.max_padded_size_ratio - if max_padded_size_ratio and len(max_padded_size_ratio) != 2: - raise ValueError('max_padded_size_ratio should have 2 elements if set!') - pad_color = config.pad_color - if pad_color and len(pad_color) != 3: - raise ValueError('pad_color should have 3 elements if set!') - kwargs = { - 'min_object_covered': config.min_object_covered, - 'aspect_ratio_range': (config.min_aspect_ratio, - config.max_aspect_ratio), - 'area_range': (config.min_area, config.max_area), - 'overlap_thresh': config.overlap_thresh, - 'random_coef': config.random_coef, - } - if min_padded_size_ratio: - kwargs['min_padded_size_ratio'] = tuple(min_padded_size_ratio) - if max_padded_size_ratio: - kwargs['max_padded_size_ratio'] = tuple(max_padded_size_ratio) - if pad_color: - kwargs['pad_color'] = tuple(pad_color) - return (preprocessor.random_crop_pad_image, kwargs) - - if step_type == 'random_resize_method': - config = preprocessor_step_config.random_resize_method - return (preprocessor.random_resize_method, - { - 'target_size': [config.target_height, config.target_width], - }) - - if step_type == 'resize_image': - config = preprocessor_step_config.resize_image - method = RESIZE_METHOD_MAP[config.method] - return (preprocessor.resize_image, - { - 'new_height': config.new_height, - 'new_width': config.new_width, - 'method': method - }) - - if step_type == 'ssd_random_crop': - config = preprocessor_step_config.ssd_random_crop - if config.operations: - min_object_covered = [op.min_object_covered for op in config.operations] - aspect_ratio_range = [(op.min_aspect_ratio, op.max_aspect_ratio) - for op in config.operations] - area_range = [(op.min_area, op.max_area) for op in config.operations] - overlap_thresh = [op.overlap_thresh for op in config.operations] - random_coef = [op.random_coef for op in config.operations] - return (preprocessor.ssd_random_crop, - { - 'min_object_covered': min_object_covered, - 'aspect_ratio_range': aspect_ratio_range, - 'area_range': area_range, - 'overlap_thresh': overlap_thresh, - 'random_coef': random_coef, - }) - return (preprocessor.ssd_random_crop, {}) - - if step_type == 'ssd_random_crop_pad': - config = preprocessor_step_config.ssd_random_crop_pad - if config.operations: - min_object_covered = [op.min_object_covered for op in config.operations] - aspect_ratio_range = [(op.min_aspect_ratio, op.max_aspect_ratio) - for op in config.operations] - area_range = [(op.min_area, op.max_area) for op in config.operations] - overlap_thresh = [op.overlap_thresh for op in config.operations] - random_coef = [op.random_coef for op in config.operations] - min_padded_size_ratio = [tuple(op.min_padded_size_ratio) - for op in config.operations] - max_padded_size_ratio = [tuple(op.max_padded_size_ratio) - for op in config.operations] - pad_color = [(op.pad_color_r, op.pad_color_g, op.pad_color_b) - for op in config.operations] - return (preprocessor.ssd_random_crop_pad, - { - 'min_object_covered': min_object_covered, - 'aspect_ratio_range': aspect_ratio_range, - 'area_range': area_range, - 'overlap_thresh': overlap_thresh, - 'random_coef': random_coef, - 'min_padded_size_ratio': min_padded_size_ratio, - 'max_padded_size_ratio': max_padded_size_ratio, - 'pad_color': pad_color, - }) - return (preprocessor.ssd_random_crop_pad, {}) - - if step_type == 'ssd_random_crop_fixed_aspect_ratio': - config = preprocessor_step_config.ssd_random_crop_fixed_aspect_ratio - if config.operations: - min_object_covered = [op.min_object_covered for op in config.operations] - area_range = [(op.min_area, op.max_area) for op in config.operations] - overlap_thresh = [op.overlap_thresh for op in config.operations] - random_coef = [op.random_coef for op in config.operations] - return (preprocessor.ssd_random_crop_fixed_aspect_ratio, - { - 'min_object_covered': min_object_covered, - 'aspect_ratio': config.aspect_ratio, - 'area_range': area_range, - 'overlap_thresh': overlap_thresh, - 'random_coef': random_coef, - }) - return (preprocessor.ssd_random_crop_fixed_aspect_ratio, {}) - - if step_type == 'ssd_random_crop_pad_fixed_aspect_ratio': - config = preprocessor_step_config.ssd_random_crop_pad_fixed_aspect_ratio - kwargs = {} - aspect_ratio = config.aspect_ratio - if aspect_ratio: - kwargs['aspect_ratio'] = aspect_ratio - min_padded_size_ratio = config.min_padded_size_ratio - if min_padded_size_ratio: - if len(min_padded_size_ratio) != 2: - raise ValueError('min_padded_size_ratio should have 2 elements if set!') - kwargs['min_padded_size_ratio'] = tuple(min_padded_size_ratio) - max_padded_size_ratio = config.max_padded_size_ratio - if max_padded_size_ratio: - if len(max_padded_size_ratio) != 2: - raise ValueError('max_padded_size_ratio should have 2 elements if set!') - kwargs['max_padded_size_ratio'] = tuple(max_padded_size_ratio) - if config.operations: - kwargs['min_object_covered'] = [op.min_object_covered - for op in config.operations] - kwargs['aspect_ratio_range'] = [(op.min_aspect_ratio, op.max_aspect_ratio) - for op in config.operations] - kwargs['area_range'] = [(op.min_area, op.max_area) - for op in config.operations] - kwargs['overlap_thresh'] = [op.overlap_thresh for op in config.operations] - kwargs['random_coef'] = [op.random_coef for op in config.operations] - return (preprocessor.ssd_random_crop_pad_fixed_aspect_ratio, kwargs) - - raise ValueError('Unknown preprocessing step.') diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/preprocessor_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/preprocessor_builder_test.py deleted file mode 100644 index 9e5d8de8e9ab84836c918b40cd17345543e18d19..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/preprocessor_builder_test.py +++ /dev/null @@ -1,566 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for preprocessor_builder.""" - -import tensorflow as tf - -from google.protobuf import text_format - -from object_detection.builders import preprocessor_builder -from object_detection.core import preprocessor -from object_detection.protos import preprocessor_pb2 - - -class PreprocessorBuilderTest(tf.test.TestCase): - - def assert_dictionary_close(self, dict1, dict2): - """Helper to check if two dicts with floatst or integers are close.""" - self.assertEqual(sorted(dict1.keys()), sorted(dict2.keys())) - for key in dict1: - value = dict1[key] - if isinstance(value, float): - self.assertAlmostEqual(value, dict2[key]) - else: - self.assertEqual(value, dict2[key]) - - def test_build_normalize_image(self): - preprocessor_text_proto = """ - normalize_image { - original_minval: 0.0 - original_maxval: 255.0 - target_minval: -1.0 - target_maxval: 1.0 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.normalize_image) - self.assertEqual(args, { - 'original_minval': 0.0, - 'original_maxval': 255.0, - 'target_minval': -1.0, - 'target_maxval': 1.0, - }) - - def test_build_random_horizontal_flip(self): - preprocessor_text_proto = """ - random_horizontal_flip { - keypoint_flip_permutation: 1 - keypoint_flip_permutation: 0 - keypoint_flip_permutation: 2 - keypoint_flip_permutation: 3 - keypoint_flip_permutation: 5 - keypoint_flip_permutation: 4 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_horizontal_flip) - self.assertEqual(args, {'keypoint_flip_permutation': (1, 0, 2, 3, 5, 4)}) - - def test_build_random_vertical_flip(self): - preprocessor_text_proto = """ - random_vertical_flip { - keypoint_flip_permutation: 1 - keypoint_flip_permutation: 0 - keypoint_flip_permutation: 2 - keypoint_flip_permutation: 3 - keypoint_flip_permutation: 5 - keypoint_flip_permutation: 4 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_vertical_flip) - self.assertEqual(args, {'keypoint_flip_permutation': (1, 0, 2, 3, 5, 4)}) - - def test_build_random_rotation90(self): - preprocessor_text_proto = """ - random_rotation90 {} - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_rotation90) - self.assertEqual(args, {}) - - def test_build_random_pixel_value_scale(self): - preprocessor_text_proto = """ - random_pixel_value_scale { - minval: 0.8 - maxval: 1.2 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_pixel_value_scale) - self.assert_dictionary_close(args, {'minval': 0.8, 'maxval': 1.2}) - - def test_build_random_image_scale(self): - preprocessor_text_proto = """ - random_image_scale { - min_scale_ratio: 0.8 - max_scale_ratio: 2.2 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_image_scale) - self.assert_dictionary_close(args, {'min_scale_ratio': 0.8, - 'max_scale_ratio': 2.2}) - - def test_build_random_rgb_to_gray(self): - preprocessor_text_proto = """ - random_rgb_to_gray { - probability: 0.8 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_rgb_to_gray) - self.assert_dictionary_close(args, {'probability': 0.8}) - - def test_build_random_adjust_brightness(self): - preprocessor_text_proto = """ - random_adjust_brightness { - max_delta: 0.2 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_adjust_brightness) - self.assert_dictionary_close(args, {'max_delta': 0.2}) - - def test_build_random_adjust_contrast(self): - preprocessor_text_proto = """ - random_adjust_contrast { - min_delta: 0.7 - max_delta: 1.1 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_adjust_contrast) - self.assert_dictionary_close(args, {'min_delta': 0.7, 'max_delta': 1.1}) - - def test_build_random_adjust_hue(self): - preprocessor_text_proto = """ - random_adjust_hue { - max_delta: 0.01 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_adjust_hue) - self.assert_dictionary_close(args, {'max_delta': 0.01}) - - def test_build_random_adjust_saturation(self): - preprocessor_text_proto = """ - random_adjust_saturation { - min_delta: 0.75 - max_delta: 1.15 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_adjust_saturation) - self.assert_dictionary_close(args, {'min_delta': 0.75, 'max_delta': 1.15}) - - def test_build_random_distort_color(self): - preprocessor_text_proto = """ - random_distort_color { - color_ordering: 1 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_distort_color) - self.assertEqual(args, {'color_ordering': 1}) - - def test_build_random_jitter_boxes(self): - preprocessor_text_proto = """ - random_jitter_boxes { - ratio: 0.1 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_jitter_boxes) - self.assert_dictionary_close(args, {'ratio': 0.1}) - - def test_build_random_crop_image(self): - preprocessor_text_proto = """ - random_crop_image { - min_object_covered: 0.75 - min_aspect_ratio: 0.75 - max_aspect_ratio: 1.5 - min_area: 0.25 - max_area: 0.875 - overlap_thresh: 0.5 - random_coef: 0.125 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_crop_image) - self.assertEqual(args, { - 'min_object_covered': 0.75, - 'aspect_ratio_range': (0.75, 1.5), - 'area_range': (0.25, 0.875), - 'overlap_thresh': 0.5, - 'random_coef': 0.125, - }) - - def test_build_random_pad_image(self): - preprocessor_text_proto = """ - random_pad_image { - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_pad_image) - self.assertEqual(args, { - 'min_image_size': None, - 'max_image_size': None, - 'pad_color': None, - }) - - def test_build_random_crop_pad_image(self): - preprocessor_text_proto = """ - random_crop_pad_image { - min_object_covered: 0.75 - min_aspect_ratio: 0.75 - max_aspect_ratio: 1.5 - min_area: 0.25 - max_area: 0.875 - overlap_thresh: 0.5 - random_coef: 0.125 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_crop_pad_image) - self.assertEqual(args, { - 'min_object_covered': 0.75, - 'aspect_ratio_range': (0.75, 1.5), - 'area_range': (0.25, 0.875), - 'overlap_thresh': 0.5, - 'random_coef': 0.125, - }) - - def test_build_random_crop_pad_image_with_optional_parameters(self): - preprocessor_text_proto = """ - random_crop_pad_image { - min_object_covered: 0.75 - min_aspect_ratio: 0.75 - max_aspect_ratio: 1.5 - min_area: 0.25 - max_area: 0.875 - overlap_thresh: 0.5 - random_coef: 0.125 - min_padded_size_ratio: 0.5 - min_padded_size_ratio: 0.75 - max_padded_size_ratio: 0.5 - max_padded_size_ratio: 0.75 - pad_color: 0.5 - pad_color: 0.5 - pad_color: 1.0 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_crop_pad_image) - self.assertEqual(args, { - 'min_object_covered': 0.75, - 'aspect_ratio_range': (0.75, 1.5), - 'area_range': (0.25, 0.875), - 'overlap_thresh': 0.5, - 'random_coef': 0.125, - 'min_padded_size_ratio': (0.5, 0.75), - 'max_padded_size_ratio': (0.5, 0.75), - 'pad_color': (0.5, 0.5, 1.0) - }) - - def test_build_random_crop_to_aspect_ratio(self): - preprocessor_text_proto = """ - random_crop_to_aspect_ratio { - aspect_ratio: 0.85 - overlap_thresh: 0.35 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_crop_to_aspect_ratio) - self.assert_dictionary_close(args, {'aspect_ratio': 0.85, - 'overlap_thresh': 0.35}) - - def test_build_random_black_patches(self): - preprocessor_text_proto = """ - random_black_patches { - max_black_patches: 20 - probability: 0.95 - size_to_image_ratio: 0.12 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_black_patches) - self.assert_dictionary_close(args, {'max_black_patches': 20, - 'probability': 0.95, - 'size_to_image_ratio': 0.12}) - - def test_build_random_resize_method(self): - preprocessor_text_proto = """ - random_resize_method { - target_height: 75 - target_width: 100 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_resize_method) - self.assert_dictionary_close(args, {'target_size': [75, 100]}) - - def test_build_scale_boxes_to_pixel_coordinates(self): - preprocessor_text_proto = """ - scale_boxes_to_pixel_coordinates {} - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.scale_boxes_to_pixel_coordinates) - self.assertEqual(args, {}) - - def test_build_resize_image(self): - preprocessor_text_proto = """ - resize_image { - new_height: 75 - new_width: 100 - method: BICUBIC - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.resize_image) - self.assertEqual(args, {'new_height': 75, - 'new_width': 100, - 'method': tf.image.ResizeMethod.BICUBIC}) - - def test_build_rgb_to_gray(self): - preprocessor_text_proto = """ - rgb_to_gray {} - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.rgb_to_gray) - self.assertEqual(args, {}) - - def test_build_subtract_channel_mean(self): - preprocessor_text_proto = """ - subtract_channel_mean { - means: [1.0, 2.0, 3.0] - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.subtract_channel_mean) - self.assertEqual(args, {'means': [1.0, 2.0, 3.0]}) - - def test_build_ssd_random_crop(self): - preprocessor_text_proto = """ - ssd_random_crop { - operations { - min_object_covered: 0.0 - min_aspect_ratio: 0.875 - max_aspect_ratio: 1.125 - min_area: 0.5 - max_area: 1.0 - overlap_thresh: 0.0 - random_coef: 0.375 - } - operations { - min_object_covered: 0.25 - min_aspect_ratio: 0.75 - max_aspect_ratio: 1.5 - min_area: 0.5 - max_area: 1.0 - overlap_thresh: 0.25 - random_coef: 0.375 - } - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.ssd_random_crop) - self.assertEqual(args, {'min_object_covered': [0.0, 0.25], - 'aspect_ratio_range': [(0.875, 1.125), (0.75, 1.5)], - 'area_range': [(0.5, 1.0), (0.5, 1.0)], - 'overlap_thresh': [0.0, 0.25], - 'random_coef': [0.375, 0.375]}) - - def test_build_ssd_random_crop_empty_operations(self): - preprocessor_text_proto = """ - ssd_random_crop { - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.ssd_random_crop) - self.assertEqual(args, {}) - - def test_build_ssd_random_crop_pad(self): - preprocessor_text_proto = """ - ssd_random_crop_pad { - operations { - min_object_covered: 0.0 - min_aspect_ratio: 0.875 - max_aspect_ratio: 1.125 - min_area: 0.5 - max_area: 1.0 - overlap_thresh: 0.0 - random_coef: 0.375 - min_padded_size_ratio: [1.0, 1.0] - max_padded_size_ratio: [2.0, 2.0] - pad_color_r: 0.5 - pad_color_g: 0.5 - pad_color_b: 0.5 - } - operations { - min_object_covered: 0.25 - min_aspect_ratio: 0.75 - max_aspect_ratio: 1.5 - min_area: 0.5 - max_area: 1.0 - overlap_thresh: 0.25 - random_coef: 0.375 - min_padded_size_ratio: [1.0, 1.0] - max_padded_size_ratio: [2.0, 2.0] - pad_color_r: 0.5 - pad_color_g: 0.5 - pad_color_b: 0.5 - } - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.ssd_random_crop_pad) - self.assertEqual(args, {'min_object_covered': [0.0, 0.25], - 'aspect_ratio_range': [(0.875, 1.125), (0.75, 1.5)], - 'area_range': [(0.5, 1.0), (0.5, 1.0)], - 'overlap_thresh': [0.0, 0.25], - 'random_coef': [0.375, 0.375], - 'min_padded_size_ratio': [(1.0, 1.0), (1.0, 1.0)], - 'max_padded_size_ratio': [(2.0, 2.0), (2.0, 2.0)], - 'pad_color': [(0.5, 0.5, 0.5), (0.5, 0.5, 0.5)]}) - - def test_build_ssd_random_crop_fixed_aspect_ratio(self): - preprocessor_text_proto = """ - ssd_random_crop_fixed_aspect_ratio { - operations { - min_object_covered: 0.0 - min_area: 0.5 - max_area: 1.0 - overlap_thresh: 0.0 - random_coef: 0.375 - } - operations { - min_object_covered: 0.25 - min_area: 0.5 - max_area: 1.0 - overlap_thresh: 0.25 - random_coef: 0.375 - } - aspect_ratio: 0.875 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.ssd_random_crop_fixed_aspect_ratio) - self.assertEqual(args, {'min_object_covered': [0.0, 0.25], - 'aspect_ratio': 0.875, - 'area_range': [(0.5, 1.0), (0.5, 1.0)], - 'overlap_thresh': [0.0, 0.25], - 'random_coef': [0.375, 0.375]}) - - def test_build_ssd_random_crop_pad_fixed_aspect_ratio(self): - preprocessor_text_proto = """ - ssd_random_crop_pad_fixed_aspect_ratio { - operations { - min_object_covered: 0.0 - min_aspect_ratio: 0.875 - max_aspect_ratio: 1.125 - min_area: 0.5 - max_area: 1.0 - overlap_thresh: 0.0 - random_coef: 0.375 - } - operations { - min_object_covered: 0.25 - min_aspect_ratio: 0.75 - max_aspect_ratio: 1.5 - min_area: 0.5 - max_area: 1.0 - overlap_thresh: 0.25 - random_coef: 0.375 - } - aspect_ratio: 0.875 - min_padded_size_ratio: [1.0, 1.0] - max_padded_size_ratio: [2.0, 2.0] - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, - preprocessor.ssd_random_crop_pad_fixed_aspect_ratio) - self.assertEqual(args, {'min_object_covered': [0.0, 0.25], - 'aspect_ratio': 0.875, - 'aspect_ratio_range': [(0.875, 1.125), (0.75, 1.5)], - 'area_range': [(0.5, 1.0), (0.5, 1.0)], - 'overlap_thresh': [0.0, 0.25], - 'random_coef': [0.375, 0.375], - 'min_padded_size_ratio': (1.0, 1.0), - 'max_padded_size_ratio': (2.0, 2.0)}) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/region_similarity_calculator_builder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/region_similarity_calculator_builder.py deleted file mode 100644 index fa1d671754df07043957ccf9e04f651c114c1cf9..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/region_similarity_calculator_builder.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Builder for region similarity calculators.""" - -from object_detection.core import region_similarity_calculator -from object_detection.protos import region_similarity_calculator_pb2 - - -def build(region_similarity_calculator_config): - """Builds region similarity calculator based on the configuration. - - Builds one of [IouSimilarity, IoaSimilarity, NegSqDistSimilarity] objects. See - core/region_similarity_calculator.proto for details. - - Args: - region_similarity_calculator_config: RegionSimilarityCalculator - configuration proto. - - Returns: - region_similarity_calculator: RegionSimilarityCalculator object. - - Raises: - ValueError: On unknown region similarity calculator. - """ - - if not isinstance( - region_similarity_calculator_config, - region_similarity_calculator_pb2.RegionSimilarityCalculator): - raise ValueError( - 'region_similarity_calculator_config not of type ' - 'region_similarity_calculator_pb2.RegionsSimilarityCalculator') - - similarity_calculator = region_similarity_calculator_config.WhichOneof( - 'region_similarity') - if similarity_calculator == 'iou_similarity': - return region_similarity_calculator.IouSimilarity() - if similarity_calculator == 'ioa_similarity': - return region_similarity_calculator.IoaSimilarity() - if similarity_calculator == 'neg_sq_dist_similarity': - return region_similarity_calculator.NegSqDistSimilarity() - - raise ValueError('Unknown region similarity calculator.') - diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/region_similarity_calculator_builder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/region_similarity_calculator_builder_test.py deleted file mode 100644 index ca3a5512e374fc03f39de1f3f77cf22bc6f6556e..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/builders/region_similarity_calculator_builder_test.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for region_similarity_calculator_builder.""" - -import tensorflow as tf - -from google.protobuf import text_format -from object_detection.builders import region_similarity_calculator_builder -from object_detection.core import region_similarity_calculator -from object_detection.protos import region_similarity_calculator_pb2 as sim_calc_pb2 - - -class RegionSimilarityCalculatorBuilderTest(tf.test.TestCase): - - def testBuildIoaSimilarityCalculator(self): - similarity_calc_text_proto = """ - ioa_similarity { - } - """ - similarity_calc_proto = sim_calc_pb2.RegionSimilarityCalculator() - text_format.Merge(similarity_calc_text_proto, similarity_calc_proto) - similarity_calc = region_similarity_calculator_builder.build( - similarity_calc_proto) - self.assertTrue(isinstance(similarity_calc, - region_similarity_calculator.IoaSimilarity)) - - def testBuildIouSimilarityCalculator(self): - similarity_calc_text_proto = """ - iou_similarity { - } - """ - similarity_calc_proto = sim_calc_pb2.RegionSimilarityCalculator() - text_format.Merge(similarity_calc_text_proto, similarity_calc_proto) - similarity_calc = region_similarity_calculator_builder.build( - similarity_calc_proto) - self.assertTrue(isinstance(similarity_calc, - region_similarity_calculator.IouSimilarity)) - - def testBuildNegSqDistSimilarityCalculator(self): - similarity_calc_text_proto = """ - neg_sq_dist_similarity { - } - """ - similarity_calc_proto = sim_calc_pb2.RegionSimilarityCalculator() - text_format.Merge(similarity_calc_text_proto, similarity_calc_proto) - similarity_calc = region_similarity_calculator_builder.build( - similarity_calc_proto) - self.assertTrue(isinstance(similarity_calc, - region_similarity_calculator. - NegSqDistSimilarity)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/__init__.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/__init__.py deleted file mode 100644 index 8b137891791fe96927ad78e64b0aad7bded08bdc..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/anchor_generator.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/anchor_generator.py deleted file mode 100644 index f2797ef77d3e83597e18db10e5ba87f24364d8aa..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/anchor_generator.py +++ /dev/null @@ -1,150 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Base anchor generator. - -The job of the anchor generator is to create (or load) a collection -of bounding boxes to be used as anchors. - -Generated anchors are assumed to match some convolutional grid or list of grid -shapes. For example, we might want to generate anchors matching an 8x8 -feature map and a 4x4 feature map. If we place 3 anchors per grid location -on the first feature map and 6 anchors per grid location on the second feature -map, then 3*8*8 + 6*4*4 = 288 anchors are generated in total. - -To support fully convolutional settings, feature map shapes are passed -dynamically at generation time. The number of anchors to place at each location -is static --- implementations of AnchorGenerator must always be able return -the number of anchors that it uses per location for each feature map. -""" -from abc import ABCMeta -from abc import abstractmethod - -import tensorflow as tf - - -class AnchorGenerator(object): - """Abstract base class for anchor generators.""" - __metaclass__ = ABCMeta - - @abstractmethod - def name_scope(self): - """Name scope. - - Must be defined by implementations. - - Returns: - a string representing the name scope of the anchor generation operation. - """ - pass - - @property - def check_num_anchors(self): - """Whether to dynamically check the number of anchors generated. - - Can be overridden by implementations that would like to disable this - behavior. - - Returns: - a boolean controlling whether the Generate function should dynamically - check the number of anchors generated against the mathematically - expected number of anchors. - """ - return True - - @abstractmethod - def num_anchors_per_location(self): - """Returns the number of anchors per spatial location. - - Returns: - a list of integers, one for each expected feature map to be passed to - the `generate` function. - """ - pass - - def generate(self, feature_map_shape_list, **params): - """Generates a collection of bounding boxes to be used as anchors. - - TODO(rathodv): remove **params from argument list and make stride and - offsets (for multiple_grid_anchor_generator) constructor arguments. - - Args: - feature_map_shape_list: list of (height, width) pairs in the format - [(height_0, width_0), (height_1, width_1), ...] that the generated - anchors must align with. Pairs can be provided as 1-dimensional - integer tensors of length 2 or simply as tuples of integers. - **params: parameters for anchor generation op - - Returns: - boxes_list: a list of BoxLists each holding anchor boxes corresponding to - the input feature map shapes. - - Raises: - ValueError: if the number of feature map shapes does not match the length - of NumAnchorsPerLocation. - """ - if self.check_num_anchors and ( - len(feature_map_shape_list) != len(self.num_anchors_per_location())): - raise ValueError('Number of feature maps is expected to equal the length ' - 'of `num_anchors_per_location`.') - with tf.name_scope(self.name_scope()): - anchors_list = self._generate(feature_map_shape_list, **params) - if self.check_num_anchors: - with tf.control_dependencies([ - self._assert_correct_number_of_anchors( - anchors_list, feature_map_shape_list)]): - for item in anchors_list: - item.set(tf.identity(item.get())) - return anchors_list - - @abstractmethod - def _generate(self, feature_map_shape_list, **params): - """To be overridden by implementations. - - Args: - feature_map_shape_list: list of (height, width) pairs in the format - [(height_0, width_0), (height_1, width_1), ...] that the generated - anchors must align with. - **params: parameters for anchor generation op - - Returns: - boxes_list: a list of BoxList, each holding a collection of N anchor - boxes. - """ - pass - - def _assert_correct_number_of_anchors(self, anchors_list, - feature_map_shape_list): - """Assert that correct number of anchors was generated. - - Args: - anchors_list: A list of box_list.BoxList object holding anchors generated. - feature_map_shape_list: list of (height, width) pairs in the format - [(height_0, width_0), (height_1, width_1), ...] that the generated - anchors must align with. - Returns: - Op that raises InvalidArgumentError if the number of anchors does not - match the number of expected anchors. - """ - expected_num_anchors = 0 - actual_num_anchors = 0 - for num_anchors_per_location, feature_map_shape, anchors in zip( - self.num_anchors_per_location(), feature_map_shape_list, anchors_list): - expected_num_anchors += (num_anchors_per_location - * feature_map_shape[0] - * feature_map_shape[1]) - actual_num_anchors += anchors.num_boxes() - return tf.assert_equal(expected_num_anchors, actual_num_anchors) - diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/balanced_positive_negative_sampler.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/balanced_positive_negative_sampler.py deleted file mode 100644 index 7042c40fffbef3126fc90a81114693ac4c0c8bf6..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/balanced_positive_negative_sampler.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Class to subsample minibatches by balancing positives and negatives. - -Subsamples minibatches based on a pre-specified positive fraction in range -[0,1]. The class presumes there are many more negatives than positive examples: -if the desired batch_size cannot be achieved with the pre-specified positive -fraction, it fills the rest with negative examples. If this is not sufficient -for obtaining the desired batch_size, it returns fewer examples. - -The main function to call is Subsample(self, indicator, labels). For convenience -one can also call SubsampleWeights(self, weights, labels) which is defined in -the minibatch_sampler base class. -""" - -import tensorflow as tf - -from object_detection.core import minibatch_sampler - - -class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler): - """Subsamples minibatches to a desired balance of positives and negatives.""" - - def __init__(self, positive_fraction=0.5): - """Constructs a minibatch sampler. - - Args: - positive_fraction: desired fraction of positive examples (scalar in [0,1]) - in the batch. - - Raises: - ValueError: if positive_fraction < 0, or positive_fraction > 1 - """ - if positive_fraction < 0 or positive_fraction > 1: - raise ValueError('positive_fraction should be in range [0,1]. ' - 'Received: %s.' % positive_fraction) - self._positive_fraction = positive_fraction - - def subsample(self, indicator, batch_size, labels): - """Returns subsampled minibatch. - - Args: - indicator: boolean tensor of shape [N] whose True entries can be sampled. - batch_size: desired batch size. If None, keeps all positive samples and - randomly selects negative samples so that the positive sample fraction - matches self._positive_fraction. - labels: boolean tensor of shape [N] denoting positive(=True) and negative - (=False) examples. - - Returns: - is_sampled: boolean tensor of shape [N], True for entries which are - sampled. - - Raises: - ValueError: if labels and indicator are not 1D boolean tensors. - """ - if len(indicator.get_shape().as_list()) != 1: - raise ValueError('indicator must be 1 dimensional, got a tensor of ' - 'shape %s' % indicator.get_shape()) - if len(labels.get_shape().as_list()) != 1: - raise ValueError('labels must be 1 dimensional, got a tensor of ' - 'shape %s' % labels.get_shape()) - if labels.dtype != tf.bool: - raise ValueError('labels should be of type bool. Received: %s' % - labels.dtype) - if indicator.dtype != tf.bool: - raise ValueError('indicator should be of type bool. Received: %s' % - indicator.dtype) - - # Only sample from indicated samples - negative_idx = tf.logical_not(labels) - positive_idx = tf.logical_and(labels, indicator) - negative_idx = tf.logical_and(negative_idx, indicator) - - # Sample positive and negative samples separately - if batch_size is None: - max_num_pos = tf.reduce_sum(tf.to_int32(positive_idx)) - else: - max_num_pos = int(self._positive_fraction * batch_size) - sampled_pos_idx = self.subsample_indicator(positive_idx, max_num_pos) - num_sampled_pos = tf.reduce_sum(tf.cast(sampled_pos_idx, tf.int32)) - if batch_size is None: - negative_positive_ratio = ( - 1 - self._positive_fraction) / self._positive_fraction - max_num_neg = tf.to_int32( - negative_positive_ratio * tf.to_float(num_sampled_pos)) - else: - max_num_neg = batch_size - num_sampled_pos - sampled_neg_idx = self.subsample_indicator(negative_idx, max_num_neg) - - sampled_idx = tf.logical_or(sampled_pos_idx, sampled_neg_idx) - return sampled_idx diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/balanced_positive_negative_sampler_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/balanced_positive_negative_sampler_test.py deleted file mode 100644 index e39de5342c4f01afa38725a56ee543c6eec27d13..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/balanced_positive_negative_sampler_test.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.core.balanced_positive_negative_sampler.""" - -import numpy as np -import tensorflow as tf - -from object_detection.core import balanced_positive_negative_sampler -from object_detection.utils import test_case - - -class BalancedPositiveNegativeSamplerTest(test_case.TestCase): - - def test_subsample_all_examples(self): - numpy_labels = np.random.permutation(300) - indicator = tf.constant(np.ones(300) == 1) - numpy_labels = (numpy_labels - 200) > 0 - - labels = tf.constant(numpy_labels) - - sampler = (balanced_positive_negative_sampler. - BalancedPositiveNegativeSampler()) - is_sampled = sampler.subsample(indicator, 64, labels) - with self.test_session() as sess: - is_sampled = sess.run(is_sampled) - self.assertTrue(sum(is_sampled) == 64) - self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 32) - self.assertTrue(sum(np.logical_and( - np.logical_not(numpy_labels), is_sampled)) == 32) - - def test_subsample_selection(self): - # Test random sampling when only some examples can be sampled: - # 100 samples, 20 positives, 10 positives cannot be sampled - numpy_labels = np.arange(100) - numpy_indicator = numpy_labels < 90 - indicator = tf.constant(numpy_indicator) - numpy_labels = (numpy_labels - 80) >= 0 - - labels = tf.constant(numpy_labels) - - sampler = (balanced_positive_negative_sampler. - BalancedPositiveNegativeSampler()) - is_sampled = sampler.subsample(indicator, 64, labels) - with self.test_session() as sess: - is_sampled = sess.run(is_sampled) - self.assertTrue(sum(is_sampled) == 64) - self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 10) - self.assertTrue(sum(np.logical_and( - np.logical_not(numpy_labels), is_sampled)) == 54) - self.assertAllEqual(is_sampled, np.logical_and(is_sampled, - numpy_indicator)) - - def test_subsample_selection_no_batch_size(self): - # Test random sampling when only some examples can be sampled: - # 1000 samples, 6 positives (5 can be sampled). - numpy_labels = np.arange(1000) - numpy_indicator = numpy_labels < 999 - indicator = tf.constant(numpy_indicator) - numpy_labels = (numpy_labels - 994) >= 0 - - labels = tf.constant(numpy_labels) - - sampler = (balanced_positive_negative_sampler. - BalancedPositiveNegativeSampler(0.01)) - is_sampled = sampler.subsample(indicator, None, labels) - with self.test_session() as sess: - is_sampled = sess.run(is_sampled) - self.assertTrue(sum(is_sampled) == 500) - self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 5) - self.assertTrue(sum(np.logical_and( - np.logical_not(numpy_labels), is_sampled)) == 495) - self.assertAllEqual(is_sampled, np.logical_and(is_sampled, - numpy_indicator)) - - def test_raises_error_with_incorrect_label_shape(self): - labels = tf.constant([[True, False, False]]) - indicator = tf.constant([True, False, True]) - sampler = (balanced_positive_negative_sampler. - BalancedPositiveNegativeSampler()) - with self.assertRaises(ValueError): - sampler.subsample(indicator, 64, labels) - - def test_raises_error_with_incorrect_indicator_shape(self): - labels = tf.constant([True, False, False]) - indicator = tf.constant([[True, False, True]]) - sampler = (balanced_positive_negative_sampler. - BalancedPositiveNegativeSampler()) - with self.assertRaises(ValueError): - sampler.subsample(indicator, 64, labels) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/batcher.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/batcher.py deleted file mode 100644 index c5dfb712108d0f9ec797ef04c9a4a3620b189fea..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/batcher.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Provides functions to batch a dictionary of input tensors.""" -import collections - -import tensorflow as tf - -from object_detection.core import prefetcher - -rt_shape_str = '_runtime_shapes' - - -class BatchQueue(object): - """BatchQueue class. - - This class creates a batch queue to asynchronously enqueue tensors_dict. - It also adds a FIFO prefetcher so that the batches are readily available - for the consumers. Dequeue ops for a BatchQueue object can be created via - the Dequeue method which evaluates to a batch of tensor_dict. - - Example input pipeline with batching: - ------------------------------------ - key, string_tensor = slim.parallel_reader.parallel_read(...) - tensor_dict = decoder.decode(string_tensor) - tensor_dict = preprocessor.preprocess(tensor_dict, ...) - batch_queue = batcher.BatchQueue(tensor_dict, - batch_size=32, - batch_queue_capacity=2000, - num_batch_queue_threads=8, - prefetch_queue_capacity=20) - tensor_dict = batch_queue.dequeue() - outputs = Model(tensor_dict) - ... - ----------------------------------- - - Notes: - ----- - This class batches tensors of unequal sizes by zero padding and unpadding - them after generating a batch. This can be computationally expensive when - batching tensors (such as images) that are of vastly different sizes. So it is - recommended that the shapes of such tensors be fully defined in tensor_dict - while other lightweight tensors such as bounding box corners and class labels - can be of varying sizes. Use either crop or resize operations to fully define - the shape of an image in tensor_dict. - - It is also recommended to perform any preprocessing operations on tensors - before passing to BatchQueue and subsequently calling the Dequeue method. - - Another caveat is that this class does not read the last batch if it is not - full. The current implementation makes it hard to support that use case. So, - for evaluation, when it is critical to run all the examples through your - network use the input pipeline example mentioned in core/prefetcher.py. - """ - - def __init__(self, tensor_dict, batch_size, batch_queue_capacity, - num_batch_queue_threads, prefetch_queue_capacity): - """Constructs a batch queue holding tensor_dict. - - Args: - tensor_dict: dictionary of tensors to batch. - batch_size: batch size. - batch_queue_capacity: max capacity of the queue from which the tensors are - batched. - num_batch_queue_threads: number of threads to use for batching. - prefetch_queue_capacity: max capacity of the queue used to prefetch - assembled batches. - """ - # Remember static shapes to set shapes of batched tensors. - static_shapes = collections.OrderedDict( - {key: tensor.get_shape() for key, tensor in tensor_dict.items()}) - # Remember runtime shapes to unpad tensors after batching. - runtime_shapes = collections.OrderedDict( - {(key + rt_shape_str): tf.shape(tensor) - for key, tensor in tensor_dict.items()}) - - all_tensors = tensor_dict - all_tensors.update(runtime_shapes) - batched_tensors = tf.train.batch( - all_tensors, - capacity=batch_queue_capacity, - batch_size=batch_size, - dynamic_pad=True, - num_threads=num_batch_queue_threads) - - self._queue = prefetcher.prefetch(batched_tensors, - prefetch_queue_capacity) - self._static_shapes = static_shapes - self._batch_size = batch_size - - def dequeue(self): - """Dequeues a batch of tensor_dict from the BatchQueue. - - TODO: use allow_smaller_final_batch to allow running over the whole eval set - - Returns: - A list of tensor_dicts of the requested batch_size. - """ - batched_tensors = self._queue.dequeue() - # Separate input tensors from tensors containing their runtime shapes. - tensors = {} - shapes = {} - for key, batched_tensor in batched_tensors.items(): - unbatched_tensor_list = tf.unstack(batched_tensor) - for i, unbatched_tensor in enumerate(unbatched_tensor_list): - if rt_shape_str in key: - shapes[(key[:-len(rt_shape_str)], i)] = unbatched_tensor - else: - tensors[(key, i)] = unbatched_tensor - - # Undo that padding using shapes and create a list of size `batch_size` that - # contains tensor dictionaries. - tensor_dict_list = [] - batch_size = self._batch_size - for batch_id in range(batch_size): - tensor_dict = {} - for key in self._static_shapes: - tensor_dict[key] = tf.slice(tensors[(key, batch_id)], - tf.zeros_like(shapes[(key, batch_id)]), - shapes[(key, batch_id)]) - tensor_dict[key].set_shape(self._static_shapes[key]) - tensor_dict_list.append(tensor_dict) - - return tensor_dict_list diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/batcher_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/batcher_test.py deleted file mode 100644 index 61b4390b4cdcff146b721872ee98f9a48c6f67f0..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/batcher_test.py +++ /dev/null @@ -1,158 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.core.batcher.""" - -import numpy as np -import tensorflow as tf - -from object_detection.core import batcher - -slim = tf.contrib.slim - - -class BatcherTest(tf.test.TestCase): - - def test_batch_and_unpad_2d_tensors_of_different_sizes_in_1st_dimension(self): - with self.test_session() as sess: - batch_size = 3 - num_batches = 2 - examples = tf.Variable(tf.constant(2, dtype=tf.int32)) - counter = examples.count_up_to(num_batches * batch_size + 2) - boxes = tf.tile( - tf.reshape(tf.range(4), [1, 4]), tf.stack([counter, tf.constant(1)])) - batch_queue = batcher.BatchQueue( - tensor_dict={'boxes': boxes}, - batch_size=batch_size, - batch_queue_capacity=100, - num_batch_queue_threads=1, - prefetch_queue_capacity=100) - batch = batch_queue.dequeue() - - for tensor_dict in batch: - for tensor in tensor_dict.values(): - self.assertAllEqual([None, 4], tensor.get_shape().as_list()) - - tf.initialize_all_variables().run() - with slim.queues.QueueRunners(sess): - i = 2 - for _ in range(num_batches): - batch_np = sess.run(batch) - for tensor_dict in batch_np: - for tensor in tensor_dict.values(): - self.assertAllEqual(tensor, np.tile(np.arange(4), (i, 1))) - i += 1 - with self.assertRaises(tf.errors.OutOfRangeError): - sess.run(batch) - - def test_batch_and_unpad_2d_tensors_of_different_sizes_in_all_dimensions( - self): - with self.test_session() as sess: - batch_size = 3 - num_batches = 2 - examples = tf.Variable(tf.constant(2, dtype=tf.int32)) - counter = examples.count_up_to(num_batches * batch_size + 2) - image = tf.reshape( - tf.range(counter * counter), tf.stack([counter, counter])) - batch_queue = batcher.BatchQueue( - tensor_dict={'image': image}, - batch_size=batch_size, - batch_queue_capacity=100, - num_batch_queue_threads=1, - prefetch_queue_capacity=100) - batch = batch_queue.dequeue() - - for tensor_dict in batch: - for tensor in tensor_dict.values(): - self.assertAllEqual([None, None], tensor.get_shape().as_list()) - - tf.initialize_all_variables().run() - with slim.queues.QueueRunners(sess): - i = 2 - for _ in range(num_batches): - batch_np = sess.run(batch) - for tensor_dict in batch_np: - for tensor in tensor_dict.values(): - self.assertAllEqual(tensor, np.arange(i * i).reshape((i, i))) - i += 1 - with self.assertRaises(tf.errors.OutOfRangeError): - sess.run(batch) - - def test_batch_and_unpad_2d_tensors_of_same_size_in_all_dimensions(self): - with self.test_session() as sess: - batch_size = 3 - num_batches = 2 - examples = tf.Variable(tf.constant(1, dtype=tf.int32)) - counter = examples.count_up_to(num_batches * batch_size + 1) - image = tf.reshape(tf.range(1, 13), [4, 3]) * counter - batch_queue = batcher.BatchQueue( - tensor_dict={'image': image}, - batch_size=batch_size, - batch_queue_capacity=100, - num_batch_queue_threads=1, - prefetch_queue_capacity=100) - batch = batch_queue.dequeue() - - for tensor_dict in batch: - for tensor in tensor_dict.values(): - self.assertAllEqual([4, 3], tensor.get_shape().as_list()) - - tf.initialize_all_variables().run() - with slim.queues.QueueRunners(sess): - i = 1 - for _ in range(num_batches): - batch_np = sess.run(batch) - for tensor_dict in batch_np: - for tensor in tensor_dict.values(): - self.assertAllEqual(tensor, np.arange(1, 13).reshape((4, 3)) * i) - i += 1 - with self.assertRaises(tf.errors.OutOfRangeError): - sess.run(batch) - - def test_batcher_when_batch_size_is_one(self): - with self.test_session() as sess: - batch_size = 1 - num_batches = 2 - examples = tf.Variable(tf.constant(2, dtype=tf.int32)) - counter = examples.count_up_to(num_batches * batch_size + 2) - image = tf.reshape( - tf.range(counter * counter), tf.stack([counter, counter])) - batch_queue = batcher.BatchQueue( - tensor_dict={'image': image}, - batch_size=batch_size, - batch_queue_capacity=100, - num_batch_queue_threads=1, - prefetch_queue_capacity=100) - batch = batch_queue.dequeue() - - for tensor_dict in batch: - for tensor in tensor_dict.values(): - self.assertAllEqual([None, None], tensor.get_shape().as_list()) - - tf.initialize_all_variables().run() - with slim.queues.QueueRunners(sess): - i = 2 - for _ in range(num_batches): - batch_np = sess.run(batch) - for tensor_dict in batch_np: - for tensor in tensor_dict.values(): - self.assertAllEqual(tensor, np.arange(i * i).reshape((i, i))) - i += 1 - with self.assertRaises(tf.errors.OutOfRangeError): - sess.run(batch) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_coder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_coder.py deleted file mode 100644 index f20ac956dfbce1fa69d1b9e6f5b023b704e1ec8a..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_coder.py +++ /dev/null @@ -1,151 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Base box coder. - -Box coders convert between coordinate frames, namely image-centric -(with (0,0) on the top left of image) and anchor-centric (with (0,0) being -defined by a specific anchor). - -Users of a BoxCoder can call two methods: - encode: which encodes a box with respect to a given anchor - (or rather, a tensor of boxes wrt a corresponding tensor of anchors) and - decode: which inverts this encoding with a decode operation. -In both cases, the arguments are assumed to be in 1-1 correspondence already; -it is not the job of a BoxCoder to perform matching. -""" -from abc import ABCMeta -from abc import abstractmethod -from abc import abstractproperty - -import tensorflow as tf - - -# Box coder types. -FASTER_RCNN = 'faster_rcnn' -KEYPOINT = 'keypoint' -MEAN_STDDEV = 'mean_stddev' -SQUARE = 'square' - - -class BoxCoder(object): - """Abstract base class for box coder.""" - __metaclass__ = ABCMeta - - @abstractproperty - def code_size(self): - """Return the size of each code. - - This number is a constant and should agree with the output of the `encode` - op (e.g. if rel_codes is the output of self.encode(...), then it should have - shape [N, code_size()]). This abstractproperty should be overridden by - implementations. - - Returns: - an integer constant - """ - pass - - def encode(self, boxes, anchors): - """Encode a box list relative to an anchor collection. - - Args: - boxes: BoxList holding N boxes to be encoded - anchors: BoxList of N anchors - - Returns: - a tensor representing N relative-encoded boxes - """ - with tf.name_scope('Encode'): - return self._encode(boxes, anchors) - - def decode(self, rel_codes, anchors): - """Decode boxes that are encoded relative to an anchor collection. - - Args: - rel_codes: a tensor representing N relative-encoded boxes - anchors: BoxList of anchors - - Returns: - boxlist: BoxList holding N boxes encoded in the ordinary way (i.e., - with corners y_min, x_min, y_max, x_max) - """ - with tf.name_scope('Decode'): - return self._decode(rel_codes, anchors) - - @abstractmethod - def _encode(self, boxes, anchors): - """Method to be overriden by implementations. - - Args: - boxes: BoxList holding N boxes to be encoded - anchors: BoxList of N anchors - - Returns: - a tensor representing N relative-encoded boxes - """ - pass - - @abstractmethod - def _decode(self, rel_codes, anchors): - """Method to be overriden by implementations. - - Args: - rel_codes: a tensor representing N relative-encoded boxes - anchors: BoxList of anchors - - Returns: - boxlist: BoxList holding N boxes encoded in the ordinary way (i.e., - with corners y_min, x_min, y_max, x_max) - """ - pass - - -def batch_decode(encoded_boxes, box_coder, anchors): - """Decode a batch of encoded boxes. - - This op takes a batch of encoded bounding boxes and transforms - them to a batch of bounding boxes specified by their corners in - the order of [y_min, x_min, y_max, x_max]. - - Args: - encoded_boxes: a float32 tensor of shape [batch_size, num_anchors, - code_size] representing the location of the objects. - box_coder: a BoxCoder object. - anchors: a BoxList of anchors used to encode `encoded_boxes`. - - Returns: - decoded_boxes: a float32 tensor of shape [batch_size, num_anchors, - coder_size] representing the corners of the objects in the order - of [y_min, x_min, y_max, x_max]. - - Raises: - ValueError: if batch sizes of the inputs are inconsistent, or if - the number of anchors inferred from encoded_boxes and anchors are - inconsistent. - """ - encoded_boxes.get_shape().assert_has_rank(3) - if encoded_boxes.get_shape()[1].value != anchors.num_boxes_static(): - raise ValueError('The number of anchors inferred from encoded_boxes' - ' and anchors are inconsistent: shape[1] of encoded_boxes' - ' %s should be equal to the number of anchors: %s.' % - (encoded_boxes.get_shape()[1].value, - anchors.num_boxes_static())) - - decoded_boxes = tf.stack([ - box_coder.decode(boxes, anchors).get() - for boxes in tf.unstack(encoded_boxes) - ]) - return decoded_boxes diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_coder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_coder_test.py deleted file mode 100644 index c087a325275f84604a114d064e050147001d32d0..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_coder_test.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.core.box_coder.""" - -import tensorflow as tf - -from object_detection.core import box_coder -from object_detection.core import box_list - - -class MockBoxCoder(box_coder.BoxCoder): - """Test BoxCoder that encodes/decodes using the multiply-by-two function.""" - - def code_size(self): - return 4 - - def _encode(self, boxes, anchors): - return 2.0 * boxes.get() - - def _decode(self, rel_codes, anchors): - return box_list.BoxList(rel_codes / 2.0) - - -class BoxCoderTest(tf.test.TestCase): - - def test_batch_decode(self): - mock_anchor_corners = tf.constant( - [[0, 0.1, 0.2, 0.3], [0.2, 0.4, 0.4, 0.6]], tf.float32) - mock_anchors = box_list.BoxList(mock_anchor_corners) - mock_box_coder = MockBoxCoder() - - expected_boxes = [[[0.0, 0.1, 0.5, 0.6], [0.5, 0.6, 0.7, 0.8]], - [[0.1, 0.2, 0.3, 0.4], [0.7, 0.8, 0.9, 1.0]]] - - encoded_boxes_list = [mock_box_coder.encode( - box_list.BoxList(tf.constant(boxes)), mock_anchors) - for boxes in expected_boxes] - encoded_boxes = tf.stack(encoded_boxes_list) - decoded_boxes = box_coder.batch_decode( - encoded_boxes, mock_box_coder, mock_anchors) - - with self.test_session() as sess: - decoded_boxes_result = sess.run(decoded_boxes) - self.assertAllClose(expected_boxes, decoded_boxes_result) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_list.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_list.py deleted file mode 100644 index c0196f053030b103a6021ac159f6203f77ba1eed..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_list.py +++ /dev/null @@ -1,207 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Bounding Box List definition. - -BoxList represents a list of bounding boxes as tensorflow -tensors, where each bounding box is represented as a row of 4 numbers, -[y_min, x_min, y_max, x_max]. It is assumed that all bounding boxes -within a given list correspond to a single image. See also -box_list_ops.py for common box related operations (such as area, iou, etc). - -Optionally, users can add additional related fields (such as weights). -We assume the following things to be true about fields: -* they correspond to boxes in the box_list along the 0th dimension -* they have inferrable rank at graph construction time -* all dimensions except for possibly the 0th can be inferred - (i.e., not None) at graph construction time. - -Some other notes: - * Following tensorflow conventions, we use height, width ordering, - and correspondingly, y,x (or ymin, xmin, ymax, xmax) ordering - * Tensors are always provided as (flat) [N, 4] tensors. -""" - -import tensorflow as tf - - -class BoxList(object): - """Box collection.""" - - def __init__(self, boxes): - """Constructs box collection. - - Args: - boxes: a tensor of shape [N, 4] representing box corners - - Raises: - ValueError: if invalid dimensions for bbox data or if bbox data is not in - float32 format. - """ - if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4: - raise ValueError('Invalid dimensions for box data.') - if boxes.dtype != tf.float32: - raise ValueError('Invalid tensor type: should be tf.float32') - self.data = {'boxes': boxes} - - def num_boxes(self): - """Returns number of boxes held in collection. - - Returns: - a tensor representing the number of boxes held in the collection. - """ - return tf.shape(self.data['boxes'])[0] - - def num_boxes_static(self): - """Returns number of boxes held in collection. - - This number is inferred at graph construction time rather than run-time. - - Returns: - Number of boxes held in collection (integer) or None if this is not - inferrable at graph construction time. - """ - return self.data['boxes'].get_shape()[0].value - - def get_all_fields(self): - """Returns all fields.""" - return self.data.keys() - - def get_extra_fields(self): - """Returns all non-box fields (i.e., everything not named 'boxes').""" - return [k for k in self.data.keys() if k != 'boxes'] - - def add_field(self, field, field_data): - """Add field to box list. - - This method can be used to add related box data such as - weights/labels, etc. - - Args: - field: a string key to access the data via `get` - field_data: a tensor containing the data to store in the BoxList - """ - self.data[field] = field_data - - def has_field(self, field): - return field in self.data - - def get(self): - """Convenience function for accessing box coordinates. - - Returns: - a tensor with shape [N, 4] representing box coordinates. - """ - return self.get_field('boxes') - - def set(self, boxes): - """Convenience function for setting box coordinates. - - Args: - boxes: a tensor of shape [N, 4] representing box corners - - Raises: - ValueError: if invalid dimensions for bbox data - """ - if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4: - raise ValueError('Invalid dimensions for box data.') - self.data['boxes'] = boxes - - def get_field(self, field): - """Accesses a box collection and associated fields. - - This function returns specified field with object; if no field is specified, - it returns the box coordinates. - - Args: - field: this optional string parameter can be used to specify - a related field to be accessed. - - Returns: - a tensor representing the box collection or an associated field. - - Raises: - ValueError: if invalid field - """ - if not self.has_field(field): - raise ValueError('field ' + str(field) + ' does not exist') - return self.data[field] - - def set_field(self, field, value): - """Sets the value of a field. - - Updates the field of a box_list with a given value. - - Args: - field: (string) name of the field to set value. - value: the value to assign to the field. - - Raises: - ValueError: if the box_list does not have specified field. - """ - if not self.has_field(field): - raise ValueError('field %s does not exist' % field) - self.data[field] = value - - def get_center_coordinates_and_sizes(self, scope=None): - """Computes the center coordinates, height and width of the boxes. - - Args: - scope: name scope of the function. - - Returns: - a list of 4 1-D tensors [ycenter, xcenter, height, width]. - """ - with tf.name_scope(scope, 'get_center_coordinates_and_sizes'): - box_corners = self.get() - ymin, xmin, ymax, xmax = tf.unstack(tf.transpose(box_corners)) - width = xmax - xmin - height = ymax - ymin - ycenter = ymin + height / 2. - xcenter = xmin + width / 2. - return [ycenter, xcenter, height, width] - - def transpose_coordinates(self, scope=None): - """Transpose the coordinate representation in a boxlist. - - Args: - scope: name scope of the function. - """ - with tf.name_scope(scope, 'transpose_coordinates'): - y_min, x_min, y_max, x_max = tf.split( - value=self.get(), num_or_size_splits=4, axis=1) - self.set(tf.concat([x_min, y_min, x_max, y_max], 1)) - - def as_tensor_dict(self, fields=None): - """Retrieves specified fields as a dictionary of tensors. - - Args: - fields: (optional) list of fields to return in the dictionary. - If None (default), all fields are returned. - - Returns: - tensor_dict: A dictionary of tensors specified by fields. - - Raises: - ValueError: if specified field is not contained in boxlist. - """ - tensor_dict = {} - if fields is None: - fields = self.get_all_fields() - for field in fields: - if not self.has_field(field): - raise ValueError('boxlist must contain all specified fields') - tensor_dict[field] = self.get_field(field) - return tensor_dict diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_list_ops.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_list_ops.py deleted file mode 100644 index a755ef68ec81d6802f0ea6e8d1e3f613aff8fdef..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_list_ops.py +++ /dev/null @@ -1,1061 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Bounding Box List operations. - -Example box operations that are supported: - * areas: compute bounding box areas - * iou: pairwise intersection-over-union scores - * sq_dist: pairwise distances between bounding boxes - -Whenever box_list_ops functions output a BoxList, the fields of the incoming -BoxList are retained unless documented otherwise. -""" -import tensorflow as tf - -from object_detection.core import box_list -from object_detection.utils import shape_utils - - -class SortOrder(object): - """Enum class for sort order. - - Attributes: - ascend: ascend order. - descend: descend order. - """ - ascend = 1 - descend = 2 - - -def area(boxlist, scope=None): - """Computes area of boxes. - - Args: - boxlist: BoxList holding N boxes - scope: name scope. - - Returns: - a tensor with shape [N] representing box areas. - """ - with tf.name_scope(scope, 'Area'): - y_min, x_min, y_max, x_max = tf.split( - value=boxlist.get(), num_or_size_splits=4, axis=1) - return tf.squeeze((y_max - y_min) * (x_max - x_min), [1]) - - -def height_width(boxlist, scope=None): - """Computes height and width of boxes in boxlist. - - Args: - boxlist: BoxList holding N boxes - scope: name scope. - - Returns: - Height: A tensor with shape [N] representing box heights. - Width: A tensor with shape [N] representing box widths. - """ - with tf.name_scope(scope, 'HeightWidth'): - y_min, x_min, y_max, x_max = tf.split( - value=boxlist.get(), num_or_size_splits=4, axis=1) - return tf.squeeze(y_max - y_min, [1]), tf.squeeze(x_max - x_min, [1]) - - -def scale(boxlist, y_scale, x_scale, scope=None): - """scale box coordinates in x and y dimensions. - - Args: - boxlist: BoxList holding N boxes - y_scale: (float) scalar tensor - x_scale: (float) scalar tensor - scope: name scope. - - Returns: - boxlist: BoxList holding N boxes - """ - with tf.name_scope(scope, 'Scale'): - y_scale = tf.cast(y_scale, tf.float32) - x_scale = tf.cast(x_scale, tf.float32) - y_min, x_min, y_max, x_max = tf.split( - value=boxlist.get(), num_or_size_splits=4, axis=1) - y_min = y_scale * y_min - y_max = y_scale * y_max - x_min = x_scale * x_min - x_max = x_scale * x_max - scaled_boxlist = box_list.BoxList( - tf.concat([y_min, x_min, y_max, x_max], 1)) - return _copy_extra_fields(scaled_boxlist, boxlist) - - -def clip_to_window(boxlist, window, filter_nonoverlapping=True, scope=None): - """Clip bounding boxes to a window. - - This op clips any input bounding boxes (represented by bounding box - corners) to a window, optionally filtering out boxes that do not - overlap at all with the window. - - Args: - boxlist: BoxList holding M_in boxes - window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max] - window to which the op should clip boxes. - filter_nonoverlapping: whether to filter out boxes that do not overlap at - all with the window. - scope: name scope. - - Returns: - a BoxList holding M_out boxes where M_out <= M_in - """ - with tf.name_scope(scope, 'ClipToWindow'): - y_min, x_min, y_max, x_max = tf.split( - value=boxlist.get(), num_or_size_splits=4, axis=1) - win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) - y_min_clipped = tf.maximum(tf.minimum(y_min, win_y_max), win_y_min) - y_max_clipped = tf.maximum(tf.minimum(y_max, win_y_max), win_y_min) - x_min_clipped = tf.maximum(tf.minimum(x_min, win_x_max), win_x_min) - x_max_clipped = tf.maximum(tf.minimum(x_max, win_x_max), win_x_min) - clipped = box_list.BoxList( - tf.concat([y_min_clipped, x_min_clipped, y_max_clipped, x_max_clipped], - 1)) - clipped = _copy_extra_fields(clipped, boxlist) - if filter_nonoverlapping: - areas = area(clipped) - nonzero_area_indices = tf.cast( - tf.reshape(tf.where(tf.greater(areas, 0.0)), [-1]), tf.int32) - clipped = gather(clipped, nonzero_area_indices) - return clipped - - -def prune_outside_window(boxlist, window, scope=None): - """Prunes bounding boxes that fall outside a given window. - - This function prunes bounding boxes that even partially fall outside the given - window. See also clip_to_window which only prunes bounding boxes that fall - completely outside the window, and clips any bounding boxes that partially - overflow. - - Args: - boxlist: a BoxList holding M_in boxes. - window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax] - of the window - scope: name scope. - - Returns: - pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in - valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes - in the input tensor. - """ - with tf.name_scope(scope, 'PruneOutsideWindow'): - y_min, x_min, y_max, x_max = tf.split( - value=boxlist.get(), num_or_size_splits=4, axis=1) - win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) - coordinate_violations = tf.concat([ - tf.less(y_min, win_y_min), tf.less(x_min, win_x_min), - tf.greater(y_max, win_y_max), tf.greater(x_max, win_x_max) - ], 1) - valid_indices = tf.reshape( - tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1]) - return gather(boxlist, valid_indices), valid_indices - - -def prune_completely_outside_window(boxlist, window, scope=None): - """Prunes bounding boxes that fall completely outside of the given window. - - The function clip_to_window prunes bounding boxes that fall - completely outside the window, but also clips any bounding boxes that - partially overflow. This function does not clip partially overflowing boxes. - - Args: - boxlist: a BoxList holding M_in boxes. - window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax] - of the window - scope: name scope. - - Returns: - pruned_boxlist: a new BoxList with all bounding boxes partially or fully in - the window. - valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes - in the input tensor. - """ - with tf.name_scope(scope, 'PruneCompleteleyOutsideWindow'): - y_min, x_min, y_max, x_max = tf.split( - value=boxlist.get(), num_or_size_splits=4, axis=1) - win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) - coordinate_violations = tf.concat([ - tf.greater_equal(y_min, win_y_max), tf.greater_equal(x_min, win_x_max), - tf.less_equal(y_max, win_y_min), tf.less_equal(x_max, win_x_min) - ], 1) - valid_indices = tf.reshape( - tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1]) - return gather(boxlist, valid_indices), valid_indices - - -def intersection(boxlist1, boxlist2, scope=None): - """Compute pairwise intersection areas between boxes. - - Args: - boxlist1: BoxList holding N boxes - boxlist2: BoxList holding M boxes - scope: name scope. - - Returns: - a tensor with shape [N, M] representing pairwise intersections - """ - with tf.name_scope(scope, 'Intersection'): - y_min1, x_min1, y_max1, x_max1 = tf.split( - value=boxlist1.get(), num_or_size_splits=4, axis=1) - y_min2, x_min2, y_max2, x_max2 = tf.split( - value=boxlist2.get(), num_or_size_splits=4, axis=1) - all_pairs_min_ymax = tf.minimum(y_max1, tf.transpose(y_max2)) - all_pairs_max_ymin = tf.maximum(y_min1, tf.transpose(y_min2)) - intersect_heights = tf.maximum(0.0, all_pairs_min_ymax - all_pairs_max_ymin) - all_pairs_min_xmax = tf.minimum(x_max1, tf.transpose(x_max2)) - all_pairs_max_xmin = tf.maximum(x_min1, tf.transpose(x_min2)) - intersect_widths = tf.maximum(0.0, all_pairs_min_xmax - all_pairs_max_xmin) - return intersect_heights * intersect_widths - - -def matched_intersection(boxlist1, boxlist2, scope=None): - """Compute intersection areas between corresponding boxes in two boxlists. - - Args: - boxlist1: BoxList holding N boxes - boxlist2: BoxList holding N boxes - scope: name scope. - - Returns: - a tensor with shape [N] representing pairwise intersections - """ - with tf.name_scope(scope, 'MatchedIntersection'): - y_min1, x_min1, y_max1, x_max1 = tf.split( - value=boxlist1.get(), num_or_size_splits=4, axis=1) - y_min2, x_min2, y_max2, x_max2 = tf.split( - value=boxlist2.get(), num_or_size_splits=4, axis=1) - min_ymax = tf.minimum(y_max1, y_max2) - max_ymin = tf.maximum(y_min1, y_min2) - intersect_heights = tf.maximum(0.0, min_ymax - max_ymin) - min_xmax = tf.minimum(x_max1, x_max2) - max_xmin = tf.maximum(x_min1, x_min2) - intersect_widths = tf.maximum(0.0, min_xmax - max_xmin) - return tf.reshape(intersect_heights * intersect_widths, [-1]) - - -def iou(boxlist1, boxlist2, scope=None): - """Computes pairwise intersection-over-union between box collections. - - Args: - boxlist1: BoxList holding N boxes - boxlist2: BoxList holding M boxes - scope: name scope. - - Returns: - a tensor with shape [N, M] representing pairwise iou scores. - """ - with tf.name_scope(scope, 'IOU'): - intersections = intersection(boxlist1, boxlist2) - areas1 = area(boxlist1) - areas2 = area(boxlist2) - unions = ( - tf.expand_dims(areas1, 1) + tf.expand_dims(areas2, 0) - intersections) - return tf.where( - tf.equal(intersections, 0.0), - tf.zeros_like(intersections), tf.truediv(intersections, unions)) - - -def matched_iou(boxlist1, boxlist2, scope=None): - """Compute intersection-over-union between corresponding boxes in boxlists. - - Args: - boxlist1: BoxList holding N boxes - boxlist2: BoxList holding N boxes - scope: name scope. - - Returns: - a tensor with shape [N] representing pairwise iou scores. - """ - with tf.name_scope(scope, 'MatchedIOU'): - intersections = matched_intersection(boxlist1, boxlist2) - areas1 = area(boxlist1) - areas2 = area(boxlist2) - unions = areas1 + areas2 - intersections - return tf.where( - tf.equal(intersections, 0.0), - tf.zeros_like(intersections), tf.truediv(intersections, unions)) - - -def ioa(boxlist1, boxlist2, scope=None): - """Computes pairwise intersection-over-area between box collections. - - intersection-over-area (IOA) between two boxes box1 and box2 is defined as - their intersection area over box2's area. Note that ioa is not symmetric, - that is, ioa(box1, box2) != ioa(box2, box1). - - Args: - boxlist1: BoxList holding N boxes - boxlist2: BoxList holding M boxes - scope: name scope. - - Returns: - a tensor with shape [N, M] representing pairwise ioa scores. - """ - with tf.name_scope(scope, 'IOA'): - intersections = intersection(boxlist1, boxlist2) - areas = tf.expand_dims(area(boxlist2), 0) - return tf.truediv(intersections, areas) - - -def prune_non_overlapping_boxes( - boxlist1, boxlist2, min_overlap=0.0, scope=None): - """Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2. - - For each box in boxlist1, we want its IOA to be more than minoverlap with - at least one of the boxes in boxlist2. If it does not, we remove it. - - Args: - boxlist1: BoxList holding N boxes. - boxlist2: BoxList holding M boxes. - min_overlap: Minimum required overlap between boxes, to count them as - overlapping. - scope: name scope. - - Returns: - new_boxlist1: A pruned boxlist with size [N', 4]. - keep_inds: A tensor with shape [N'] indexing kept bounding boxes in the - first input BoxList `boxlist1`. - """ - with tf.name_scope(scope, 'PruneNonOverlappingBoxes'): - ioa_ = ioa(boxlist2, boxlist1) # [M, N] tensor - ioa_ = tf.reduce_max(ioa_, reduction_indices=[0]) # [N] tensor - keep_bool = tf.greater_equal(ioa_, tf.constant(min_overlap)) - keep_inds = tf.squeeze(tf.where(keep_bool), squeeze_dims=[1]) - new_boxlist1 = gather(boxlist1, keep_inds) - return new_boxlist1, keep_inds - - -def prune_small_boxes(boxlist, min_side, scope=None): - """Prunes small boxes in the boxlist which have a side smaller than min_side. - - Args: - boxlist: BoxList holding N boxes. - min_side: Minimum width AND height of box to survive pruning. - scope: name scope. - - Returns: - A pruned boxlist. - """ - with tf.name_scope(scope, 'PruneSmallBoxes'): - height, width = height_width(boxlist) - is_valid = tf.logical_and(tf.greater_equal(width, min_side), - tf.greater_equal(height, min_side)) - return gather(boxlist, tf.reshape(tf.where(is_valid), [-1])) - - -def change_coordinate_frame(boxlist, window, scope=None): - """Change coordinate frame of the boxlist to be relative to window's frame. - - Given a window of the form [ymin, xmin, ymax, xmax], - changes bounding box coordinates from boxlist to be relative to this window - (e.g., the min corner maps to (0,0) and the max corner maps to (1,1)). - - An example use case is data augmentation: where we are given groundtruth - boxes (boxlist) and would like to randomly crop the image to some - window (window). In this case we need to change the coordinate frame of - each groundtruth box to be relative to this new window. - - Args: - boxlist: A BoxList object holding N boxes. - window: A rank 1 tensor [4]. - scope: name scope. - - Returns: - Returns a BoxList object with N boxes. - """ - with tf.name_scope(scope, 'ChangeCoordinateFrame'): - win_height = window[2] - window[0] - win_width = window[3] - window[1] - boxlist_new = scale(box_list.BoxList( - boxlist.get() - [window[0], window[1], window[0], window[1]]), - 1.0 / win_height, 1.0 / win_width) - boxlist_new = _copy_extra_fields(boxlist_new, boxlist) - return boxlist_new - - -def sq_dist(boxlist1, boxlist2, scope=None): - """Computes the pairwise squared distances between box corners. - - This op treats each box as if it were a point in a 4d Euclidean space and - computes pairwise squared distances. - - Mathematically, we are given two matrices of box coordinates X and Y, - where X(i,:) is the i'th row of X, containing the 4 numbers defining the - corners of the i'th box in boxlist1. Similarly Y(j,:) corresponds to - boxlist2. We compute - Z(i,j) = ||X(i,:) - Y(j,:)||^2 - = ||X(i,:)||^2 + ||Y(j,:)||^2 - 2 X(i,:)' * Y(j,:), - - Args: - boxlist1: BoxList holding N boxes - boxlist2: BoxList holding M boxes - scope: name scope. - - Returns: - a tensor with shape [N, M] representing pairwise distances - """ - with tf.name_scope(scope, 'SqDist'): - sqnorm1 = tf.reduce_sum(tf.square(boxlist1.get()), 1, keep_dims=True) - sqnorm2 = tf.reduce_sum(tf.square(boxlist2.get()), 1, keep_dims=True) - innerprod = tf.matmul(boxlist1.get(), boxlist2.get(), - transpose_a=False, transpose_b=True) - return sqnorm1 + tf.transpose(sqnorm2) - 2.0 * innerprod - - -def boolean_mask(boxlist, indicator, fields=None, scope=None): - """Select boxes from BoxList according to indicator and return new BoxList. - - `boolean_mask` returns the subset of boxes that are marked as "True" by the - indicator tensor. By default, `boolean_mask` returns boxes corresponding to - the input index list, as well as all additional fields stored in the boxlist - (indexing into the first dimension). However one can optionally only draw - from a subset of fields. - - Args: - boxlist: BoxList holding N boxes - indicator: a rank-1 boolean tensor - fields: (optional) list of fields to also gather from. If None (default), - all fields are gathered from. Pass an empty fields list to only gather - the box coordinates. - scope: name scope. - - Returns: - subboxlist: a BoxList corresponding to the subset of the input BoxList - specified by indicator - Raises: - ValueError: if `indicator` is not a rank-1 boolean tensor. - """ - with tf.name_scope(scope, 'BooleanMask'): - if indicator.shape.ndims != 1: - raise ValueError('indicator should have rank 1') - if indicator.dtype != tf.bool: - raise ValueError('indicator should be a boolean tensor') - subboxlist = box_list.BoxList(tf.boolean_mask(boxlist.get(), indicator)) - if fields is None: - fields = boxlist.get_extra_fields() - for field in fields: - if not boxlist.has_field(field): - raise ValueError('boxlist must contain all specified fields') - subfieldlist = tf.boolean_mask(boxlist.get_field(field), indicator) - subboxlist.add_field(field, subfieldlist) - return subboxlist - - -def gather(boxlist, indices, fields=None, scope=None): - """Gather boxes from BoxList according to indices and return new BoxList. - - By default, `gather` returns boxes corresponding to the input index list, as - well as all additional fields stored in the boxlist (indexing into the - first dimension). However one can optionally only gather from a - subset of fields. - - Args: - boxlist: BoxList holding N boxes - indices: a rank-1 tensor of type int32 / int64 - fields: (optional) list of fields to also gather from. If None (default), - all fields are gathered from. Pass an empty fields list to only gather - the box coordinates. - scope: name scope. - - Returns: - subboxlist: a BoxList corresponding to the subset of the input BoxList - specified by indices - Raises: - ValueError: if specified field is not contained in boxlist or if the - indices are not of type int32 - """ - with tf.name_scope(scope, 'Gather'): - if len(indices.shape.as_list()) != 1: - raise ValueError('indices should have rank 1') - if indices.dtype != tf.int32 and indices.dtype != tf.int64: - raise ValueError('indices should be an int32 / int64 tensor') - subboxlist = box_list.BoxList(tf.gather(boxlist.get(), indices)) - if fields is None: - fields = boxlist.get_extra_fields() - for field in fields: - if not boxlist.has_field(field): - raise ValueError('boxlist must contain all specified fields') - subfieldlist = tf.gather(boxlist.get_field(field), indices) - subboxlist.add_field(field, subfieldlist) - return subboxlist - - -def concatenate(boxlists, fields=None, scope=None): - """Concatenate list of BoxLists. - - This op concatenates a list of input BoxLists into a larger BoxList. It also - handles concatenation of BoxList fields as long as the field tensor shapes - are equal except for the first dimension. - - Args: - boxlists: list of BoxList objects - fields: optional list of fields to also concatenate. By default, all - fields from the first BoxList in the list are included in the - concatenation. - scope: name scope. - - Returns: - a BoxList with number of boxes equal to - sum([boxlist.num_boxes() for boxlist in BoxList]) - Raises: - ValueError: if boxlists is invalid (i.e., is not a list, is empty, or - contains non BoxList objects), or if requested fields are not contained in - all boxlists - """ - with tf.name_scope(scope, 'Concatenate'): - if not isinstance(boxlists, list): - raise ValueError('boxlists should be a list') - if not boxlists: - raise ValueError('boxlists should have nonzero length') - for boxlist in boxlists: - if not isinstance(boxlist, box_list.BoxList): - raise ValueError('all elements of boxlists should be BoxList objects') - concatenated = box_list.BoxList( - tf.concat([boxlist.get() for boxlist in boxlists], 0)) - if fields is None: - fields = boxlists[0].get_extra_fields() - for field in fields: - first_field_shape = boxlists[0].get_field(field).get_shape().as_list() - first_field_shape[0] = -1 - if None in first_field_shape: - raise ValueError('field %s must have fully defined shape except for the' - ' 0th dimension.' % field) - for boxlist in boxlists: - if not boxlist.has_field(field): - raise ValueError('boxlist must contain all requested fields') - field_shape = boxlist.get_field(field).get_shape().as_list() - field_shape[0] = -1 - if field_shape != first_field_shape: - raise ValueError('field %s must have same shape for all boxlists ' - 'except for the 0th dimension.' % field) - concatenated_field = tf.concat( - [boxlist.get_field(field) for boxlist in boxlists], 0) - concatenated.add_field(field, concatenated_field) - return concatenated - - -def sort_by_field(boxlist, field, order=SortOrder.descend, scope=None): - """Sort boxes and associated fields according to a scalar field. - - A common use case is reordering the boxes according to descending scores. - - Args: - boxlist: BoxList holding N boxes. - field: A BoxList field for sorting and reordering the BoxList. - order: (Optional) descend or ascend. Default is descend. - scope: name scope. - - Returns: - sorted_boxlist: A sorted BoxList with the field in the specified order. - - Raises: - ValueError: if specified field does not exist - ValueError: if the order is not either descend or ascend - """ - with tf.name_scope(scope, 'SortByField'): - if order != SortOrder.descend and order != SortOrder.ascend: - raise ValueError('Invalid sort order') - - field_to_sort = boxlist.get_field(field) - if len(field_to_sort.shape.as_list()) != 1: - raise ValueError('Field should have rank 1') - - num_boxes = boxlist.num_boxes() - num_entries = tf.size(field_to_sort) - length_assert = tf.Assert( - tf.equal(num_boxes, num_entries), - ['Incorrect field size: actual vs expected.', num_entries, num_boxes]) - - with tf.control_dependencies([length_assert]): - # TODO(derekjchow): Remove with tf.device when top_k operation runs - # correctly on GPU. - with tf.device('/cpu:0'): - _, sorted_indices = tf.nn.top_k(field_to_sort, num_boxes, sorted=True) - - if order == SortOrder.ascend: - sorted_indices = tf.reverse_v2(sorted_indices, [0]) - - return gather(boxlist, sorted_indices) - - -def visualize_boxes_in_image(image, boxlist, normalized=False, scope=None): - """Overlay bounding box list on image. - - Currently this visualization plots a 1 pixel thick red bounding box on top - of the image. Note that tf.image.draw_bounding_boxes essentially is - 1 indexed. - - Args: - image: an image tensor with shape [height, width, 3] - boxlist: a BoxList - normalized: (boolean) specify whether corners are to be interpreted - as absolute coordinates in image space or normalized with respect to the - image size. - scope: name scope. - - Returns: - image_and_boxes: an image tensor with shape [height, width, 3] - """ - with tf.name_scope(scope, 'VisualizeBoxesInImage'): - if not normalized: - height, width, _ = tf.unstack(tf.shape(image)) - boxlist = scale(boxlist, - 1.0 / tf.cast(height, tf.float32), - 1.0 / tf.cast(width, tf.float32)) - corners = tf.expand_dims(boxlist.get(), 0) - image = tf.expand_dims(image, 0) - return tf.squeeze(tf.image.draw_bounding_boxes(image, corners), [0]) - - -def filter_field_value_equals(boxlist, field, value, scope=None): - """Filter to keep only boxes with field entries equal to the given value. - - Args: - boxlist: BoxList holding N boxes. - field: field name for filtering. - value: scalar value. - scope: name scope. - - Returns: - a BoxList holding M boxes where M <= N - - Raises: - ValueError: if boxlist not a BoxList object or if it does not have - the specified field. - """ - with tf.name_scope(scope, 'FilterFieldValueEquals'): - if not isinstance(boxlist, box_list.BoxList): - raise ValueError('boxlist must be a BoxList') - if not boxlist.has_field(field): - raise ValueError('boxlist must contain the specified field') - filter_field = boxlist.get_field(field) - gather_index = tf.reshape(tf.where(tf.equal(filter_field, value)), [-1]) - return gather(boxlist, gather_index) - - -def filter_greater_than(boxlist, thresh, scope=None): - """Filter to keep only boxes with score exceeding a given threshold. - - This op keeps the collection of boxes whose corresponding scores are - greater than the input threshold. - - TODO(jonathanhuang): Change function name to filter_scores_greater_than - - Args: - boxlist: BoxList holding N boxes. Must contain a 'scores' field - representing detection scores. - thresh: scalar threshold - scope: name scope. - - Returns: - a BoxList holding M boxes where M <= N - - Raises: - ValueError: if boxlist not a BoxList object or if it does not - have a scores field - """ - with tf.name_scope(scope, 'FilterGreaterThan'): - if not isinstance(boxlist, box_list.BoxList): - raise ValueError('boxlist must be a BoxList') - if not boxlist.has_field('scores'): - raise ValueError('input boxlist must have \'scores\' field') - scores = boxlist.get_field('scores') - if len(scores.shape.as_list()) > 2: - raise ValueError('Scores should have rank 1 or 2') - if len(scores.shape.as_list()) == 2 and scores.shape.as_list()[1] != 1: - raise ValueError('Scores should have rank 1 or have shape ' - 'consistent with [None, 1]') - high_score_indices = tf.cast(tf.reshape( - tf.where(tf.greater(scores, thresh)), - [-1]), tf.int32) - return gather(boxlist, high_score_indices) - - -def non_max_suppression(boxlist, thresh, max_output_size, scope=None): - """Non maximum suppression. - - This op greedily selects a subset of detection bounding boxes, pruning - away boxes that have high IOU (intersection over union) overlap (> thresh) - with already selected boxes. Note that this only works for a single class --- - to apply NMS to multi-class predictions, use MultiClassNonMaxSuppression. - - Args: - boxlist: BoxList holding N boxes. Must contain a 'scores' field - representing detection scores. - thresh: scalar threshold - max_output_size: maximum number of retained boxes - scope: name scope. - - Returns: - a BoxList holding M boxes where M <= max_output_size - Raises: - ValueError: if thresh is not in [0, 1] - """ - with tf.name_scope(scope, 'NonMaxSuppression'): - if not 0 <= thresh <= 1.0: - raise ValueError('thresh must be between 0 and 1') - if not isinstance(boxlist, box_list.BoxList): - raise ValueError('boxlist must be a BoxList') - if not boxlist.has_field('scores'): - raise ValueError('input boxlist must have \'scores\' field') - selected_indices = tf.image.non_max_suppression( - boxlist.get(), boxlist.get_field('scores'), - max_output_size, iou_threshold=thresh) - return gather(boxlist, selected_indices) - - -def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from): - """Copies the extra fields of boxlist_to_copy_from to boxlist_to_copy_to. - - Args: - boxlist_to_copy_to: BoxList to which extra fields are copied. - boxlist_to_copy_from: BoxList from which fields are copied. - - Returns: - boxlist_to_copy_to with extra fields. - """ - for field in boxlist_to_copy_from.get_extra_fields(): - boxlist_to_copy_to.add_field(field, boxlist_to_copy_from.get_field(field)) - return boxlist_to_copy_to - - -def to_normalized_coordinates(boxlist, height, width, - check_range=True, scope=None): - """Converts absolute box coordinates to normalized coordinates in [0, 1]. - - Usually one uses the dynamic shape of the image or conv-layer tensor: - boxlist = box_list_ops.to_normalized_coordinates(boxlist, - tf.shape(images)[1], - tf.shape(images)[2]), - - This function raises an assertion failed error at graph execution time when - the maximum coordinate is smaller than 1.01 (which means that coordinates are - already normalized). The value 1.01 is to deal with small rounding errors. - - Args: - boxlist: BoxList with coordinates in terms of pixel-locations. - height: Maximum value for height of absolute box coordinates. - width: Maximum value for width of absolute box coordinates. - check_range: If True, checks if the coordinates are normalized or not. - scope: name scope. - - Returns: - boxlist with normalized coordinates in [0, 1]. - """ - with tf.name_scope(scope, 'ToNormalizedCoordinates'): - height = tf.cast(height, tf.float32) - width = tf.cast(width, tf.float32) - - if check_range: - max_val = tf.reduce_max(boxlist.get()) - max_assert = tf.Assert(tf.greater(max_val, 1.01), - ['max value is lower than 1.01: ', max_val]) - with tf.control_dependencies([max_assert]): - width = tf.identity(width) - - return scale(boxlist, 1 / height, 1 / width) - - -def to_absolute_coordinates(boxlist, - height, - width, - check_range=True, - maximum_normalized_coordinate=1.1, - scope=None): - """Converts normalized box coordinates to absolute pixel coordinates. - - This function raises an assertion failed error when the maximum box coordinate - value is larger than maximum_normalized_coordinate (in which case coordinates - are already absolute). - - Args: - boxlist: BoxList with coordinates in range [0, 1]. - height: Maximum value for height of absolute box coordinates. - width: Maximum value for width of absolute box coordinates. - check_range: If True, checks if the coordinates are normalized or not. - maximum_normalized_coordinate: Maximum coordinate value to be considered - as normalized, default to 1.1. - scope: name scope. - - Returns: - boxlist with absolute coordinates in terms of the image size. - - """ - with tf.name_scope(scope, 'ToAbsoluteCoordinates'): - height = tf.cast(height, tf.float32) - width = tf.cast(width, tf.float32) - - # Ensure range of input boxes is correct. - if check_range: - box_maximum = tf.reduce_max(boxlist.get()) - max_assert = tf.Assert( - tf.greater_equal(maximum_normalized_coordinate, box_maximum), - ['maximum box coordinate value is larger ' - 'than %f: ' % maximum_normalized_coordinate, box_maximum]) - with tf.control_dependencies([max_assert]): - width = tf.identity(width) - - return scale(boxlist, height, width) - - -def refine_boxes_multi_class(pool_boxes, - num_classes, - nms_iou_thresh, - nms_max_detections, - voting_iou_thresh=0.5): - """Refines a pool of boxes using non max suppression and box voting. - - Box refinement is done independently for each class. - - Args: - pool_boxes: (BoxList) A collection of boxes to be refined. pool_boxes must - have a rank 1 'scores' field and a rank 1 'classes' field. - num_classes: (int scalar) Number of classes. - nms_iou_thresh: (float scalar) iou threshold for non max suppression (NMS). - nms_max_detections: (int scalar) maximum output size for NMS. - voting_iou_thresh: (float scalar) iou threshold for box voting. - - Returns: - BoxList of refined boxes. - - Raises: - ValueError: if - a) nms_iou_thresh or voting_iou_thresh is not in [0, 1]. - b) pool_boxes is not a BoxList. - c) pool_boxes does not have a scores and classes field. - """ - if not 0.0 <= nms_iou_thresh <= 1.0: - raise ValueError('nms_iou_thresh must be between 0 and 1') - if not 0.0 <= voting_iou_thresh <= 1.0: - raise ValueError('voting_iou_thresh must be between 0 and 1') - if not isinstance(pool_boxes, box_list.BoxList): - raise ValueError('pool_boxes must be a BoxList') - if not pool_boxes.has_field('scores'): - raise ValueError('pool_boxes must have a \'scores\' field') - if not pool_boxes.has_field('classes'): - raise ValueError('pool_boxes must have a \'classes\' field') - - refined_boxes = [] - for i in range(num_classes): - boxes_class = filter_field_value_equals(pool_boxes, 'classes', i) - refined_boxes_class = refine_boxes(boxes_class, nms_iou_thresh, - nms_max_detections, voting_iou_thresh) - refined_boxes.append(refined_boxes_class) - return sort_by_field(concatenate(refined_boxes), 'scores') - - -def refine_boxes(pool_boxes, - nms_iou_thresh, - nms_max_detections, - voting_iou_thresh=0.5): - """Refines a pool of boxes using non max suppression and box voting. - - Args: - pool_boxes: (BoxList) A collection of boxes to be refined. pool_boxes must - have a rank 1 'scores' field. - nms_iou_thresh: (float scalar) iou threshold for non max suppression (NMS). - nms_max_detections: (int scalar) maximum output size for NMS. - voting_iou_thresh: (float scalar) iou threshold for box voting. - - Returns: - BoxList of refined boxes. - - Raises: - ValueError: if - a) nms_iou_thresh or voting_iou_thresh is not in [0, 1]. - b) pool_boxes is not a BoxList. - c) pool_boxes does not have a scores field. - """ - if not 0.0 <= nms_iou_thresh <= 1.0: - raise ValueError('nms_iou_thresh must be between 0 and 1') - if not 0.0 <= voting_iou_thresh <= 1.0: - raise ValueError('voting_iou_thresh must be between 0 and 1') - if not isinstance(pool_boxes, box_list.BoxList): - raise ValueError('pool_boxes must be a BoxList') - if not pool_boxes.has_field('scores'): - raise ValueError('pool_boxes must have a \'scores\' field') - - nms_boxes = non_max_suppression( - pool_boxes, nms_iou_thresh, nms_max_detections) - return box_voting(nms_boxes, pool_boxes, voting_iou_thresh) - - -def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5): - """Performs box voting as described in S. Gidaris and N. Komodakis, ICCV 2015. - - Performs box voting as described in 'Object detection via a multi-region & - semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For - each box 'B' in selected_boxes, we find the set 'S' of boxes in pool_boxes - with iou overlap >= iou_thresh. The location of B is set to the weighted - average location of boxes in S (scores are used for weighting). And the score - of B is set to the average score of boxes in S. - - Args: - selected_boxes: BoxList containing a subset of boxes in pool_boxes. These - boxes are usually selected from pool_boxes using non max suppression. - pool_boxes: BoxList containing a set of (possibly redundant) boxes. - iou_thresh: (float scalar) iou threshold for matching boxes in - selected_boxes and pool_boxes. - - Returns: - BoxList containing averaged locations and scores for each box in - selected_boxes. - - Raises: - ValueError: if - a) selected_boxes or pool_boxes is not a BoxList. - b) if iou_thresh is not in [0, 1]. - c) pool_boxes does not have a scores field. - """ - if not 0.0 <= iou_thresh <= 1.0: - raise ValueError('iou_thresh must be between 0 and 1') - if not isinstance(selected_boxes, box_list.BoxList): - raise ValueError('selected_boxes must be a BoxList') - if not isinstance(pool_boxes, box_list.BoxList): - raise ValueError('pool_boxes must be a BoxList') - if not pool_boxes.has_field('scores'): - raise ValueError('pool_boxes must have a \'scores\' field') - - iou_ = iou(selected_boxes, pool_boxes) - match_indicator = tf.to_float(tf.greater(iou_, iou_thresh)) - num_matches = tf.reduce_sum(match_indicator, 1) - # TODO(kbanoop): Handle the case where some boxes in selected_boxes do not - # match to any boxes in pool_boxes. For such boxes without any matches, we - # should return the original boxes without voting. - match_assert = tf.Assert( - tf.reduce_all(tf.greater(num_matches, 0)), - ['Each box in selected_boxes must match with at least one box ' - 'in pool_boxes.']) - - scores = tf.expand_dims(pool_boxes.get_field('scores'), 1) - scores_assert = tf.Assert( - tf.reduce_all(tf.greater_equal(scores, 0)), - ['Scores must be non negative.']) - - with tf.control_dependencies([scores_assert, match_assert]): - sum_scores = tf.matmul(match_indicator, scores) - averaged_scores = tf.reshape(sum_scores, [-1]) / num_matches - - box_locations = tf.matmul(match_indicator, - pool_boxes.get() * scores) / sum_scores - averaged_boxes = box_list.BoxList(box_locations) - _copy_extra_fields(averaged_boxes, selected_boxes) - averaged_boxes.add_field('scores', averaged_scores) - return averaged_boxes - - -def pad_or_clip_box_list(boxlist, num_boxes, scope=None): - """Pads or clips all fields of a BoxList. - - Args: - boxlist: A BoxList with arbitrary of number of boxes. - num_boxes: First num_boxes in boxlist are kept. - The fields are zero-padded if num_boxes is bigger than the - actual number of boxes. - scope: name scope. - - Returns: - BoxList with all fields padded or clipped. - """ - with tf.name_scope(scope, 'PadOrClipBoxList'): - subboxlist = box_list.BoxList(shape_utils.pad_or_clip_tensor( - boxlist.get(), num_boxes)) - for field in boxlist.get_extra_fields(): - subfield = shape_utils.pad_or_clip_tensor( - boxlist.get_field(field), num_boxes) - subboxlist.add_field(field, subfield) - return subboxlist - - -def select_random_box(boxlist, - default_box=None, - seed=None, - scope=None): - """Selects a random bounding box from a `BoxList`. - - Args: - boxlist: A BoxList. - default_box: A [1, 4] float32 tensor. If no boxes are present in `boxlist`, - this default box will be returned. If None, will use a default box of - [[-1., -1., -1., -1.]]. - seed: Random seed. - scope: Name scope. - - Returns: - bbox: A [1, 4] tensor with a random bounding box. - valid: A bool tensor indicating whether a valid bounding box is returned - (True) or whether the default box is returned (False). - """ - with tf.name_scope(scope, 'SelectRandomBox'): - bboxes = boxlist.get() - combined_shape = shape_utils.combined_static_and_dynamic_shape(bboxes) - number_of_boxes = combined_shape[0] - default_box = default_box or tf.constant([[-1., -1., -1., -1.]]) - - def select_box(): - random_index = tf.random_uniform([], - maxval=number_of_boxes, - dtype=tf.int32, - seed=seed) - return tf.expand_dims(bboxes[random_index], axis=0), tf.constant(True) - - return tf.cond( - tf.greater_equal(number_of_boxes, 1), - true_fn=select_box, - false_fn=lambda: (default_box, tf.constant(False))) - - -def get_minimal_coverage_box(boxlist, - default_box=None, - scope=None): - """Creates a single bounding box which covers all boxes in the boxlist. - - Args: - boxlist: A Boxlist. - default_box: A [1, 4] float32 tensor. If no boxes are present in `boxlist`, - this default box will be returned. If None, will use a default box of - [[0., 0., 1., 1.]]. - scope: Name scope. - - Returns: - A [1, 4] float32 tensor with a bounding box that tightly covers all the - boxes in the box list. If the boxlist does not contain any boxes, the - default box is returned. - """ - with tf.name_scope(scope, 'CreateCoverageBox'): - num_boxes = boxlist.num_boxes() - - def coverage_box(bboxes): - y_min, x_min, y_max, x_max = tf.split( - value=bboxes, num_or_size_splits=4, axis=1) - y_min_coverage = tf.reduce_min(y_min, axis=0) - x_min_coverage = tf.reduce_min(x_min, axis=0) - y_max_coverage = tf.reduce_max(y_max, axis=0) - x_max_coverage = tf.reduce_max(x_max, axis=0) - return tf.stack( - [y_min_coverage, x_min_coverage, y_max_coverage, x_max_coverage], - axis=1) - - default_box = default_box or tf.constant([[0., 0., 1., 1.]]) - return tf.cond( - tf.greater_equal(num_boxes, 1), - true_fn=lambda: coverage_box(boxlist.get()), - false_fn=lambda: default_box) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_list_ops_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_list_ops_test.py deleted file mode 100644 index bb76cfd35af1a077debdf6945c13b04aaac37eca..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_list_ops_test.py +++ /dev/null @@ -1,1036 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.core.box_list_ops.""" -import numpy as np -import tensorflow as tf -from tensorflow.python.framework import errors -from tensorflow.python.framework import ops - -from object_detection.core import box_list -from object_detection.core import box_list_ops - - -class BoxListOpsTest(tf.test.TestCase): - """Tests for common bounding box operations.""" - - def test_area(self): - corners = tf.constant([[0.0, 0.0, 10.0, 20.0], [1.0, 2.0, 3.0, 4.0]]) - exp_output = [200.0, 4.0] - boxes = box_list.BoxList(corners) - areas = box_list_ops.area(boxes) - with self.test_session() as sess: - areas_output = sess.run(areas) - self.assertAllClose(areas_output, exp_output) - - def test_height_width(self): - corners = tf.constant([[0.0, 0.0, 10.0, 20.0], [1.0, 2.0, 3.0, 4.0]]) - exp_output_heights = [10., 2.] - exp_output_widths = [20., 2.] - boxes = box_list.BoxList(corners) - heights, widths = box_list_ops.height_width(boxes) - with self.test_session() as sess: - output_heights, output_widths = sess.run([heights, widths]) - self.assertAllClose(output_heights, exp_output_heights) - self.assertAllClose(output_widths, exp_output_widths) - - def test_scale(self): - corners = tf.constant([[0, 0, 100, 200], [50, 120, 100, 140]], - dtype=tf.float32) - boxes = box_list.BoxList(corners) - boxes.add_field('extra_data', tf.constant([[1], [2]])) - - y_scale = tf.constant(1.0/100) - x_scale = tf.constant(1.0/200) - scaled_boxes = box_list_ops.scale(boxes, y_scale, x_scale) - exp_output = [[0, 0, 1, 1], [0.5, 0.6, 1.0, 0.7]] - with self.test_session() as sess: - scaled_corners_out = sess.run(scaled_boxes.get()) - self.assertAllClose(scaled_corners_out, exp_output) - extra_data_out = sess.run(scaled_boxes.get_field('extra_data')) - self.assertAllEqual(extra_data_out, [[1], [2]]) - - def test_clip_to_window_filter_boxes_which_fall_outside_the_window( - self): - window = tf.constant([0, 0, 9, 14], tf.float32) - corners = tf.constant([[5.0, 5.0, 6.0, 6.0], - [-1.0, -2.0, 4.0, 5.0], - [2.0, 3.0, 5.0, 9.0], - [0.0, 0.0, 9.0, 14.0], - [-100.0, -100.0, 300.0, 600.0], - [-10.0, -10.0, -9.0, -9.0]]) - boxes = box_list.BoxList(corners) - boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]])) - exp_output = [[5.0, 5.0, 6.0, 6.0], [0.0, 0.0, 4.0, 5.0], - [2.0, 3.0, 5.0, 9.0], [0.0, 0.0, 9.0, 14.0], - [0.0, 0.0, 9.0, 14.0]] - pruned = box_list_ops.clip_to_window( - boxes, window, filter_nonoverlapping=True) - with self.test_session() as sess: - pruned_output = sess.run(pruned.get()) - self.assertAllClose(pruned_output, exp_output) - extra_data_out = sess.run(pruned.get_field('extra_data')) - self.assertAllEqual(extra_data_out, [[1], [2], [3], [4], [5]]) - - def test_clip_to_window_without_filtering_boxes_which_fall_outside_the_window( - self): - window = tf.constant([0, 0, 9, 14], tf.float32) - corners = tf.constant([[5.0, 5.0, 6.0, 6.0], - [-1.0, -2.0, 4.0, 5.0], - [2.0, 3.0, 5.0, 9.0], - [0.0, 0.0, 9.0, 14.0], - [-100.0, -100.0, 300.0, 600.0], - [-10.0, -10.0, -9.0, -9.0]]) - boxes = box_list.BoxList(corners) - boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]])) - exp_output = [[5.0, 5.0, 6.0, 6.0], [0.0, 0.0, 4.0, 5.0], - [2.0, 3.0, 5.0, 9.0], [0.0, 0.0, 9.0, 14.0], - [0.0, 0.0, 9.0, 14.0], [0.0, 0.0, 0.0, 0.0]] - pruned = box_list_ops.clip_to_window( - boxes, window, filter_nonoverlapping=False) - with self.test_session() as sess: - pruned_output = sess.run(pruned.get()) - self.assertAllClose(pruned_output, exp_output) - extra_data_out = sess.run(pruned.get_field('extra_data')) - self.assertAllEqual(extra_data_out, [[1], [2], [3], [4], [5], [6]]) - - def test_prune_outside_window_filters_boxes_which_fall_outside_the_window( - self): - window = tf.constant([0, 0, 9, 14], tf.float32) - corners = tf.constant([[5.0, 5.0, 6.0, 6.0], - [-1.0, -2.0, 4.0, 5.0], - [2.0, 3.0, 5.0, 9.0], - [0.0, 0.0, 9.0, 14.0], - [-10.0, -10.0, -9.0, -9.0], - [-100.0, -100.0, 300.0, 600.0]]) - boxes = box_list.BoxList(corners) - boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]])) - exp_output = [[5.0, 5.0, 6.0, 6.0], - [2.0, 3.0, 5.0, 9.0], - [0.0, 0.0, 9.0, 14.0]] - pruned, keep_indices = box_list_ops.prune_outside_window(boxes, window) - with self.test_session() as sess: - pruned_output = sess.run(pruned.get()) - self.assertAllClose(pruned_output, exp_output) - keep_indices_out = sess.run(keep_indices) - self.assertAllEqual(keep_indices_out, [0, 2, 3]) - extra_data_out = sess.run(pruned.get_field('extra_data')) - self.assertAllEqual(extra_data_out, [[1], [3], [4]]) - - def test_prune_completely_outside_window(self): - window = tf.constant([0, 0, 9, 14], tf.float32) - corners = tf.constant([[5.0, 5.0, 6.0, 6.0], - [-1.0, -2.0, 4.0, 5.0], - [2.0, 3.0, 5.0, 9.0], - [0.0, 0.0, 9.0, 14.0], - [-10.0, -10.0, -9.0, -9.0], - [-100.0, -100.0, 300.0, 600.0]]) - boxes = box_list.BoxList(corners) - boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]])) - exp_output = [[5.0, 5.0, 6.0, 6.0], - [-1.0, -2.0, 4.0, 5.0], - [2.0, 3.0, 5.0, 9.0], - [0.0, 0.0, 9.0, 14.0], - [-100.0, -100.0, 300.0, 600.0]] - pruned, keep_indices = box_list_ops.prune_completely_outside_window(boxes, - window) - with self.test_session() as sess: - pruned_output = sess.run(pruned.get()) - self.assertAllClose(pruned_output, exp_output) - keep_indices_out = sess.run(keep_indices) - self.assertAllEqual(keep_indices_out, [0, 1, 2, 3, 5]) - extra_data_out = sess.run(pruned.get_field('extra_data')) - self.assertAllEqual(extra_data_out, [[1], [2], [3], [4], [6]]) - - def test_prune_completely_outside_window_with_empty_boxlist(self): - window = tf.constant([0, 0, 9, 14], tf.float32) - corners = tf.zeros(shape=[0, 4], dtype=tf.float32) - boxes = box_list.BoxList(corners) - boxes.add_field('extra_data', tf.zeros(shape=[0], dtype=tf.int32)) - pruned, keep_indices = box_list_ops.prune_completely_outside_window(boxes, - window) - pruned_boxes = pruned.get() - extra = pruned.get_field('extra_data') - - exp_pruned_boxes = np.zeros(shape=[0, 4], dtype=np.float32) - exp_extra = np.zeros(shape=[0], dtype=np.int32) - with self.test_session() as sess: - pruned_boxes_out, keep_indices_out, extra_out = sess.run( - [pruned_boxes, keep_indices, extra]) - self.assertAllClose(exp_pruned_boxes, pruned_boxes_out) - self.assertAllEqual([], keep_indices_out) - self.assertAllEqual(exp_extra, extra_out) - - def test_intersection(self): - corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) - corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0]]) - exp_output = [[2.0, 0.0, 6.0], [1.0, 0.0, 5.0]] - boxes1 = box_list.BoxList(corners1) - boxes2 = box_list.BoxList(corners2) - intersect = box_list_ops.intersection(boxes1, boxes2) - with self.test_session() as sess: - intersect_output = sess.run(intersect) - self.assertAllClose(intersect_output, exp_output) - - def test_matched_intersection(self): - corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) - corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]]) - exp_output = [2.0, 0.0] - boxes1 = box_list.BoxList(corners1) - boxes2 = box_list.BoxList(corners2) - intersect = box_list_ops.matched_intersection(boxes1, boxes2) - with self.test_session() as sess: - intersect_output = sess.run(intersect) - self.assertAllClose(intersect_output, exp_output) - - def test_iou(self): - corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) - corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0]]) - exp_output = [[2.0 / 16.0, 0, 6.0 / 400.0], [1.0 / 16.0, 0.0, 5.0 / 400.0]] - boxes1 = box_list.BoxList(corners1) - boxes2 = box_list.BoxList(corners2) - iou = box_list_ops.iou(boxes1, boxes2) - with self.test_session() as sess: - iou_output = sess.run(iou) - self.assertAllClose(iou_output, exp_output) - - def test_matched_iou(self): - corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) - corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]]) - exp_output = [2.0 / 16.0, 0] - boxes1 = box_list.BoxList(corners1) - boxes2 = box_list.BoxList(corners2) - iou = box_list_ops.matched_iou(boxes1, boxes2) - with self.test_session() as sess: - iou_output = sess.run(iou) - self.assertAllClose(iou_output, exp_output) - - def test_iouworks_on_empty_inputs(self): - corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) - corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0]]) - boxes1 = box_list.BoxList(corners1) - boxes2 = box_list.BoxList(corners2) - boxes_empty = box_list.BoxList(tf.zeros((0, 4))) - iou_empty_1 = box_list_ops.iou(boxes1, boxes_empty) - iou_empty_2 = box_list_ops.iou(boxes_empty, boxes2) - iou_empty_3 = box_list_ops.iou(boxes_empty, boxes_empty) - with self.test_session() as sess: - iou_output_1, iou_output_2, iou_output_3 = sess.run( - [iou_empty_1, iou_empty_2, iou_empty_3]) - self.assertAllEqual(iou_output_1.shape, (2, 0)) - self.assertAllEqual(iou_output_2.shape, (0, 3)) - self.assertAllEqual(iou_output_3.shape, (0, 0)) - - def test_ioa(self): - corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) - corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0]]) - exp_output_1 = [[2.0 / 12.0, 0, 6.0 / 400.0], - [1.0 / 12.0, 0.0, 5.0 / 400.0]] - exp_output_2 = [[2.0 / 6.0, 1.0 / 5.0], - [0, 0], - [6.0 / 6.0, 5.0 / 5.0]] - boxes1 = box_list.BoxList(corners1) - boxes2 = box_list.BoxList(corners2) - ioa_1 = box_list_ops.ioa(boxes1, boxes2) - ioa_2 = box_list_ops.ioa(boxes2, boxes1) - with self.test_session() as sess: - ioa_output_1, ioa_output_2 = sess.run([ioa_1, ioa_2]) - self.assertAllClose(ioa_output_1, exp_output_1) - self.assertAllClose(ioa_output_2, exp_output_2) - - def test_prune_non_overlapping_boxes(self): - corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) - corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0]]) - boxes1 = box_list.BoxList(corners1) - boxes2 = box_list.BoxList(corners2) - minoverlap = 0.5 - - exp_output_1 = boxes1 - exp_output_2 = box_list.BoxList(tf.constant(0.0, shape=[0, 4])) - output_1, keep_indices_1 = box_list_ops.prune_non_overlapping_boxes( - boxes1, boxes2, min_overlap=minoverlap) - output_2, keep_indices_2 = box_list_ops.prune_non_overlapping_boxes( - boxes2, boxes1, min_overlap=minoverlap) - with self.test_session() as sess: - (output_1_, keep_indices_1_, output_2_, keep_indices_2_, exp_output_1_, - exp_output_2_) = sess.run( - [output_1.get(), keep_indices_1, - output_2.get(), keep_indices_2, - exp_output_1.get(), exp_output_2.get()]) - self.assertAllClose(output_1_, exp_output_1_) - self.assertAllClose(output_2_, exp_output_2_) - self.assertAllEqual(keep_indices_1_, [0, 1]) - self.assertAllEqual(keep_indices_2_, []) - - def test_prune_small_boxes(self): - boxes = tf.constant([[4.0, 3.0, 7.0, 5.0], - [5.0, 6.0, 10.0, 7.0], - [3.0, 4.0, 6.0, 8.0], - [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0]]) - exp_boxes = [[3.0, 4.0, 6.0, 8.0], - [0.0, 0.0, 20.0, 20.0]] - boxes = box_list.BoxList(boxes) - pruned_boxes = box_list_ops.prune_small_boxes(boxes, 3) - with self.test_session() as sess: - pruned_boxes = sess.run(pruned_boxes.get()) - self.assertAllEqual(pruned_boxes, exp_boxes) - - def test_prune_small_boxes_prunes_boxes_with_negative_side(self): - boxes = tf.constant([[4.0, 3.0, 7.0, 5.0], - [5.0, 6.0, 10.0, 7.0], - [3.0, 4.0, 6.0, 8.0], - [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0], - [2.0, 3.0, 1.5, 7.0], # negative height - [2.0, 3.0, 5.0, 1.7]]) # negative width - exp_boxes = [[3.0, 4.0, 6.0, 8.0], - [0.0, 0.0, 20.0, 20.0]] - boxes = box_list.BoxList(boxes) - pruned_boxes = box_list_ops.prune_small_boxes(boxes, 3) - with self.test_session() as sess: - pruned_boxes = sess.run(pruned_boxes.get()) - self.assertAllEqual(pruned_boxes, exp_boxes) - - def test_change_coordinate_frame(self): - corners = tf.constant([[0.25, 0.5, 0.75, 0.75], [0.5, 0.0, 1.0, 1.0]]) - window = tf.constant([0.25, 0.25, 0.75, 0.75]) - boxes = box_list.BoxList(corners) - - expected_corners = tf.constant([[0, 0.5, 1.0, 1.0], [0.5, -0.5, 1.5, 1.5]]) - expected_boxes = box_list.BoxList(expected_corners) - output = box_list_ops.change_coordinate_frame(boxes, window) - - with self.test_session() as sess: - output_, expected_boxes_ = sess.run([output.get(), expected_boxes.get()]) - self.assertAllClose(output_, expected_boxes_) - - def test_ioaworks_on_empty_inputs(self): - corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) - corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0]]) - boxes1 = box_list.BoxList(corners1) - boxes2 = box_list.BoxList(corners2) - boxes_empty = box_list.BoxList(tf.zeros((0, 4))) - ioa_empty_1 = box_list_ops.ioa(boxes1, boxes_empty) - ioa_empty_2 = box_list_ops.ioa(boxes_empty, boxes2) - ioa_empty_3 = box_list_ops.ioa(boxes_empty, boxes_empty) - with self.test_session() as sess: - ioa_output_1, ioa_output_2, ioa_output_3 = sess.run( - [ioa_empty_1, ioa_empty_2, ioa_empty_3]) - self.assertAllEqual(ioa_output_1.shape, (2, 0)) - self.assertAllEqual(ioa_output_2.shape, (0, 3)) - self.assertAllEqual(ioa_output_3.shape, (0, 0)) - - def test_pairwise_distances(self): - corners1 = tf.constant([[0.0, 0.0, 0.0, 0.0], - [1.0, 1.0, 0.0, 2.0]]) - corners2 = tf.constant([[3.0, 4.0, 1.0, 0.0], - [-4.0, 0.0, 0.0, 3.0], - [0.0, 0.0, 0.0, 0.0]]) - exp_output = [[26, 25, 0], [18, 27, 6]] - boxes1 = box_list.BoxList(corners1) - boxes2 = box_list.BoxList(corners2) - dist_matrix = box_list_ops.sq_dist(boxes1, boxes2) - with self.test_session() as sess: - dist_output = sess.run(dist_matrix) - self.assertAllClose(dist_output, exp_output) - - def test_boolean_mask(self): - corners = tf.constant( - [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]]) - indicator = tf.constant([True, False, True, False, True], tf.bool) - expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]] - boxes = box_list.BoxList(corners) - subset = box_list_ops.boolean_mask(boxes, indicator) - with self.test_session() as sess: - subset_output = sess.run(subset.get()) - self.assertAllClose(subset_output, expected_subset) - - def test_boolean_mask_with_field(self): - corners = tf.constant( - [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]]) - indicator = tf.constant([True, False, True, False, True], tf.bool) - weights = tf.constant([[.1], [.3], [.5], [.7], [.9]], tf.float32) - expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]] - expected_weights = [[.1], [.5], [.9]] - - boxes = box_list.BoxList(corners) - boxes.add_field('weights', weights) - subset = box_list_ops.boolean_mask(boxes, indicator, ['weights']) - with self.test_session() as sess: - subset_output, weights_output = sess.run( - [subset.get(), subset.get_field('weights')]) - self.assertAllClose(subset_output, expected_subset) - self.assertAllClose(weights_output, expected_weights) - - def test_gather(self): - corners = tf.constant( - [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]]) - indices = tf.constant([0, 2, 4], tf.int32) - expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]] - boxes = box_list.BoxList(corners) - subset = box_list_ops.gather(boxes, indices) - with self.test_session() as sess: - subset_output = sess.run(subset.get()) - self.assertAllClose(subset_output, expected_subset) - - def test_gather_with_field(self): - corners = tf.constant([4*[0.0], 4*[1.0], 4*[2.0], 4*[3.0], 4*[4.0]]) - indices = tf.constant([0, 2, 4], tf.int32) - weights = tf.constant([[.1], [.3], [.5], [.7], [.9]], tf.float32) - expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]] - expected_weights = [[.1], [.5], [.9]] - - boxes = box_list.BoxList(corners) - boxes.add_field('weights', weights) - subset = box_list_ops.gather(boxes, indices, ['weights']) - with self.test_session() as sess: - subset_output, weights_output = sess.run( - [subset.get(), subset.get_field('weights')]) - self.assertAllClose(subset_output, expected_subset) - self.assertAllClose(weights_output, expected_weights) - - def test_gather_with_invalid_field(self): - corners = tf.constant([4 * [0.0], 4 * [1.0]]) - indices = tf.constant([0, 1], tf.int32) - weights = tf.constant([[.1], [.3]], tf.float32) - - boxes = box_list.BoxList(corners) - boxes.add_field('weights', weights) - with self.assertRaises(ValueError): - box_list_ops.gather(boxes, indices, ['foo', 'bar']) - - def test_gather_with_invalid_inputs(self): - corners = tf.constant( - [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]]) - indices_float32 = tf.constant([0, 2, 4], tf.float32) - boxes = box_list.BoxList(corners) - with self.assertRaises(ValueError): - _ = box_list_ops.gather(boxes, indices_float32) - indices_2d = tf.constant([[0, 2, 4]], tf.int32) - boxes = box_list.BoxList(corners) - with self.assertRaises(ValueError): - _ = box_list_ops.gather(boxes, indices_2d) - - def test_gather_with_dynamic_indexing(self): - corners = tf.constant([4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0] - ]) - weights = tf.constant([.5, .3, .7, .1, .9], tf.float32) - indices = tf.reshape(tf.where(tf.greater(weights, 0.4)), [-1]) - expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]] - expected_weights = [.5, .7, .9] - - boxes = box_list.BoxList(corners) - boxes.add_field('weights', weights) - subset = box_list_ops.gather(boxes, indices, ['weights']) - with self.test_session() as sess: - subset_output, weights_output = sess.run([subset.get(), subset.get_field( - 'weights')]) - self.assertAllClose(subset_output, expected_subset) - self.assertAllClose(weights_output, expected_weights) - - def test_sort_by_field_ascending_order(self): - exp_corners = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9], - [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]] - exp_scores = [.95, .9, .75, .6, .5, .3] - exp_weights = [.2, .45, .6, .75, .8, .92] - shuffle = [2, 4, 0, 5, 1, 3] - corners = tf.constant([exp_corners[i] for i in shuffle], tf.float32) - boxes = box_list.BoxList(corners) - boxes.add_field('scores', tf.constant( - [exp_scores[i] for i in shuffle], tf.float32)) - boxes.add_field('weights', tf.constant( - [exp_weights[i] for i in shuffle], tf.float32)) - sort_by_weight = box_list_ops.sort_by_field( - boxes, - 'weights', - order=box_list_ops.SortOrder.ascend) - with self.test_session() as sess: - corners_out, scores_out, weights_out = sess.run([ - sort_by_weight.get(), - sort_by_weight.get_field('scores'), - sort_by_weight.get_field('weights')]) - self.assertAllClose(corners_out, exp_corners) - self.assertAllClose(scores_out, exp_scores) - self.assertAllClose(weights_out, exp_weights) - - def test_sort_by_field_descending_order(self): - exp_corners = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9], - [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]] - exp_scores = [.95, .9, .75, .6, .5, .3] - exp_weights = [.2, .45, .6, .75, .8, .92] - shuffle = [2, 4, 0, 5, 1, 3] - - corners = tf.constant([exp_corners[i] for i in shuffle], tf.float32) - boxes = box_list.BoxList(corners) - boxes.add_field('scores', tf.constant( - [exp_scores[i] for i in shuffle], tf.float32)) - boxes.add_field('weights', tf.constant( - [exp_weights[i] for i in shuffle], tf.float32)) - - sort_by_score = box_list_ops.sort_by_field(boxes, 'scores') - with self.test_session() as sess: - corners_out, scores_out, weights_out = sess.run([sort_by_score.get( - ), sort_by_score.get_field('scores'), sort_by_score.get_field('weights')]) - self.assertAllClose(corners_out, exp_corners) - self.assertAllClose(scores_out, exp_scores) - self.assertAllClose(weights_out, exp_weights) - - def test_sort_by_field_invalid_inputs(self): - corners = tf.constant([4 * [0.0], 4 * [0.5], 4 * [1.0], 4 * [2.0], 4 * - [3.0], 4 * [4.0]]) - misc = tf.constant([[.95, .9], [.5, .3]], tf.float32) - weights = tf.constant([.1, .2], tf.float32) - boxes = box_list.BoxList(corners) - boxes.add_field('misc', misc) - boxes.add_field('weights', weights) - - with self.test_session() as sess: - with self.assertRaises(ValueError): - box_list_ops.sort_by_field(boxes, 'area') - - with self.assertRaises(ValueError): - box_list_ops.sort_by_field(boxes, 'misc') - - if ops._USE_C_API: - with self.assertRaises(ValueError): - box_list_ops.sort_by_field(boxes, 'weights') - else: - with self.assertRaisesWithPredicateMatch(errors.InvalidArgumentError, - 'Incorrect field size'): - sess.run(box_list_ops.sort_by_field(boxes, 'weights').get()) - - def test_visualize_boxes_in_image(self): - image = tf.zeros((6, 4, 3)) - corners = tf.constant([[0, 0, 5, 3], - [0, 0, 3, 2]], tf.float32) - boxes = box_list.BoxList(corners) - image_and_boxes = box_list_ops.visualize_boxes_in_image(image, boxes) - image_and_boxes_bw = tf.to_float( - tf.greater(tf.reduce_sum(image_and_boxes, 2), 0.0)) - exp_result = [[1, 1, 1, 0], - [1, 1, 1, 0], - [1, 1, 1, 0], - [1, 0, 1, 0], - [1, 1, 1, 0], - [0, 0, 0, 0]] - with self.test_session() as sess: - output = sess.run(image_and_boxes_bw) - self.assertAllEqual(output.astype(int), exp_result) - - def test_filter_field_value_equals(self): - corners = tf.constant([[0, 0, 1, 1], - [0, 0.1, 1, 1.1], - [0, -0.1, 1, 0.9], - [0, 10, 1, 11], - [0, 10.1, 1, 11.1], - [0, 100, 1, 101]], tf.float32) - boxes = box_list.BoxList(corners) - boxes.add_field('classes', tf.constant([1, 2, 1, 2, 2, 1])) - exp_output1 = [[0, 0, 1, 1], [0, -0.1, 1, 0.9], [0, 100, 1, 101]] - exp_output2 = [[0, 0.1, 1, 1.1], [0, 10, 1, 11], [0, 10.1, 1, 11.1]] - - filtered_boxes1 = box_list_ops.filter_field_value_equals( - boxes, 'classes', 1) - filtered_boxes2 = box_list_ops.filter_field_value_equals( - boxes, 'classes', 2) - with self.test_session() as sess: - filtered_output1, filtered_output2 = sess.run([filtered_boxes1.get(), - filtered_boxes2.get()]) - self.assertAllClose(filtered_output1, exp_output1) - self.assertAllClose(filtered_output2, exp_output2) - - def test_filter_greater_than(self): - corners = tf.constant([[0, 0, 1, 1], - [0, 0.1, 1, 1.1], - [0, -0.1, 1, 0.9], - [0, 10, 1, 11], - [0, 10.1, 1, 11.1], - [0, 100, 1, 101]], tf.float32) - boxes = box_list.BoxList(corners) - boxes.add_field('scores', tf.constant([.1, .75, .9, .5, .5, .8])) - thresh = .6 - exp_output = [[0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9], [0, 100, 1, 101]] - - filtered_boxes = box_list_ops.filter_greater_than(boxes, thresh) - with self.test_session() as sess: - filtered_output = sess.run(filtered_boxes.get()) - self.assertAllClose(filtered_output, exp_output) - - def test_clip_box_list(self): - boxlist = box_list.BoxList( - tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5], - [0.6, 0.6, 0.8, 0.8], [0.2, 0.2, 0.3, 0.3]], tf.float32)) - boxlist.add_field('classes', tf.constant([0, 0, 1, 1])) - boxlist.add_field('scores', tf.constant([0.75, 0.65, 0.3, 0.2])) - num_boxes = 2 - clipped_boxlist = box_list_ops.pad_or_clip_box_list(boxlist, num_boxes) - - expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]] - expected_classes = [0, 0] - expected_scores = [0.75, 0.65] - with self.test_session() as sess: - boxes_out, classes_out, scores_out = sess.run( - [clipped_boxlist.get(), clipped_boxlist.get_field('classes'), - clipped_boxlist.get_field('scores')]) - - self.assertAllClose(expected_boxes, boxes_out) - self.assertAllEqual(expected_classes, classes_out) - self.assertAllClose(expected_scores, scores_out) - - def test_pad_box_list(self): - boxlist = box_list.BoxList( - tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32)) - boxlist.add_field('classes', tf.constant([0, 1])) - boxlist.add_field('scores', tf.constant([0.75, 0.2])) - num_boxes = 4 - padded_boxlist = box_list_ops.pad_or_clip_box_list(boxlist, num_boxes) - - expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5], - [0, 0, 0, 0], [0, 0, 0, 0]] - expected_classes = [0, 1, 0, 0] - expected_scores = [0.75, 0.2, 0, 0] - with self.test_session() as sess: - boxes_out, classes_out, scores_out = sess.run( - [padded_boxlist.get(), padded_boxlist.get_field('classes'), - padded_boxlist.get_field('scores')]) - - self.assertAllClose(expected_boxes, boxes_out) - self.assertAllEqual(expected_classes, classes_out) - self.assertAllClose(expected_scores, scores_out) - - def test_select_random_box(self): - boxes = [[0., 0., 1., 1.], - [0., 1., 2., 3.], - [0., 2., 3., 4.]] - - corners = tf.constant(boxes, dtype=tf.float32) - boxlist = box_list.BoxList(corners) - random_bbox, valid = box_list_ops.select_random_box(boxlist) - with self.test_session() as sess: - random_bbox_out, valid_out = sess.run([random_bbox, valid]) - - norm_small = any( - [np.linalg.norm(random_bbox_out - box) < 1e-6 for box in boxes]) - - self.assertTrue(norm_small) - self.assertTrue(valid_out) - - def test_select_random_box_with_empty_boxlist(self): - corners = tf.constant([], shape=[0, 4], dtype=tf.float32) - boxlist = box_list.BoxList(corners) - random_bbox, valid = box_list_ops.select_random_box(boxlist) - with self.test_session() as sess: - random_bbox_out, valid_out = sess.run([random_bbox, valid]) - - expected_bbox_out = np.array([[-1., -1., -1., -1.]], dtype=np.float32) - self.assertAllEqual(expected_bbox_out, random_bbox_out) - self.assertFalse(valid_out) - - def test_get_minimal_coverage_box(self): - boxes = [[0., 0., 1., 1.], - [-1., 1., 2., 3.], - [0., 2., 3., 4.]] - - expected_coverage_box = [[-1., 0., 3., 4.]] - - corners = tf.constant(boxes, dtype=tf.float32) - boxlist = box_list.BoxList(corners) - coverage_box = box_list_ops.get_minimal_coverage_box(boxlist) - with self.test_session() as sess: - coverage_box_out = sess.run(coverage_box) - - self.assertAllClose(expected_coverage_box, coverage_box_out) - - def test_get_minimal_coverage_box_with_empty_boxlist(self): - corners = tf.constant([], shape=[0, 4], dtype=tf.float32) - boxlist = box_list.BoxList(corners) - coverage_box = box_list_ops.get_minimal_coverage_box(boxlist) - with self.test_session() as sess: - coverage_box_out = sess.run(coverage_box) - - self.assertAllClose([[0.0, 0.0, 1.0, 1.0]], coverage_box_out) - - -class ConcatenateTest(tf.test.TestCase): - - def test_invalid_input_box_list_list(self): - with self.assertRaises(ValueError): - box_list_ops.concatenate(None) - with self.assertRaises(ValueError): - box_list_ops.concatenate([]) - with self.assertRaises(ValueError): - corners = tf.constant([[0, 0, 0, 0]], tf.float32) - boxlist = box_list.BoxList(corners) - box_list_ops.concatenate([boxlist, 2]) - - def test_concatenate_with_missing_fields(self): - corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32) - scores1 = tf.constant([1.0, 2.1]) - corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8]], tf.float32) - boxlist1 = box_list.BoxList(corners1) - boxlist1.add_field('scores', scores1) - boxlist2 = box_list.BoxList(corners2) - with self.assertRaises(ValueError): - box_list_ops.concatenate([boxlist1, boxlist2]) - - def test_concatenate_with_incompatible_field_shapes(self): - corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32) - scores1 = tf.constant([1.0, 2.1]) - corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8]], tf.float32) - scores2 = tf.constant([[1.0, 1.0], [2.1, 3.2]]) - boxlist1 = box_list.BoxList(corners1) - boxlist1.add_field('scores', scores1) - boxlist2 = box_list.BoxList(corners2) - boxlist2.add_field('scores', scores2) - with self.assertRaises(ValueError): - box_list_ops.concatenate([boxlist1, boxlist2]) - - def test_concatenate_is_correct(self): - corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32) - scores1 = tf.constant([1.0, 2.1]) - corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8], [1, 0, 5, 10]], - tf.float32) - scores2 = tf.constant([1.0, 2.1, 5.6]) - - exp_corners = [[0, 0, 0, 0], - [1, 2, 3, 4], - [0, 3, 1, 6], - [2, 4, 3, 8], - [1, 0, 5, 10]] - exp_scores = [1.0, 2.1, 1.0, 2.1, 5.6] - - boxlist1 = box_list.BoxList(corners1) - boxlist1.add_field('scores', scores1) - boxlist2 = box_list.BoxList(corners2) - boxlist2.add_field('scores', scores2) - result = box_list_ops.concatenate([boxlist1, boxlist2]) - with self.test_session() as sess: - corners_output, scores_output = sess.run( - [result.get(), result.get_field('scores')]) - self.assertAllClose(corners_output, exp_corners) - self.assertAllClose(scores_output, exp_scores) - - -class NonMaxSuppressionTest(tf.test.TestCase): - - def test_select_from_three_clusters(self): - corners = tf.constant([[0, 0, 1, 1], - [0, 0.1, 1, 1.1], - [0, -0.1, 1, 0.9], - [0, 10, 1, 11], - [0, 10.1, 1, 11.1], - [0, 100, 1, 101]], tf.float32) - boxes = box_list.BoxList(corners) - boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3])) - iou_thresh = .5 - max_output_size = 3 - - exp_nms = [[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 100, 1, 101]] - nms = box_list_ops.non_max_suppression( - boxes, iou_thresh, max_output_size) - with self.test_session() as sess: - nms_output = sess.run(nms.get()) - self.assertAllClose(nms_output, exp_nms) - - def test_select_at_most_two_boxes_from_three_clusters(self): - corners = tf.constant([[0, 0, 1, 1], - [0, 0.1, 1, 1.1], - [0, -0.1, 1, 0.9], - [0, 10, 1, 11], - [0, 10.1, 1, 11.1], - [0, 100, 1, 101]], tf.float32) - boxes = box_list.BoxList(corners) - boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3])) - iou_thresh = .5 - max_output_size = 2 - - exp_nms = [[0, 10, 1, 11], - [0, 0, 1, 1]] - nms = box_list_ops.non_max_suppression( - boxes, iou_thresh, max_output_size) - with self.test_session() as sess: - nms_output = sess.run(nms.get()) - self.assertAllClose(nms_output, exp_nms) - - def test_select_at_most_thirty_boxes_from_three_clusters(self): - corners = tf.constant([[0, 0, 1, 1], - [0, 0.1, 1, 1.1], - [0, -0.1, 1, 0.9], - [0, 10, 1, 11], - [0, 10.1, 1, 11.1], - [0, 100, 1, 101]], tf.float32) - boxes = box_list.BoxList(corners) - boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3])) - iou_thresh = .5 - max_output_size = 30 - - exp_nms = [[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 100, 1, 101]] - nms = box_list_ops.non_max_suppression( - boxes, iou_thresh, max_output_size) - with self.test_session() as sess: - nms_output = sess.run(nms.get()) - self.assertAllClose(nms_output, exp_nms) - - def test_select_single_box(self): - corners = tf.constant([[0, 0, 1, 1]], tf.float32) - boxes = box_list.BoxList(corners) - boxes.add_field('scores', tf.constant([.9])) - iou_thresh = .5 - max_output_size = 3 - - exp_nms = [[0, 0, 1, 1]] - nms = box_list_ops.non_max_suppression( - boxes, iou_thresh, max_output_size) - with self.test_session() as sess: - nms_output = sess.run(nms.get()) - self.assertAllClose(nms_output, exp_nms) - - def test_select_from_ten_identical_boxes(self): - corners = tf.constant(10 * [[0, 0, 1, 1]], tf.float32) - boxes = box_list.BoxList(corners) - boxes.add_field('scores', tf.constant(10 * [.9])) - iou_thresh = .5 - max_output_size = 3 - - exp_nms = [[0, 0, 1, 1]] - nms = box_list_ops.non_max_suppression( - boxes, iou_thresh, max_output_size) - with self.test_session() as sess: - nms_output = sess.run(nms.get()) - self.assertAllClose(nms_output, exp_nms) - - def test_copy_extra_fields(self): - corners = tf.constant([[0, 0, 1, 1], - [0, 0.1, 1, 1.1]], tf.float32) - boxes = box_list.BoxList(corners) - tensor1 = np.array([[1], [4]]) - tensor2 = np.array([[1, 1], [2, 2]]) - boxes.add_field('tensor1', tf.constant(tensor1)) - boxes.add_field('tensor2', tf.constant(tensor2)) - new_boxes = box_list.BoxList(tf.constant([[0, 0, 10, 10], - [1, 3, 5, 5]], tf.float32)) - new_boxes = box_list_ops._copy_extra_fields(new_boxes, boxes) - with self.test_session() as sess: - self.assertAllClose(tensor1, sess.run(new_boxes.get_field('tensor1'))) - self.assertAllClose(tensor2, sess.run(new_boxes.get_field('tensor2'))) - - -class CoordinatesConversionTest(tf.test.TestCase): - - def test_to_normalized_coordinates(self): - coordinates = tf.constant([[0, 0, 100, 100], - [25, 25, 75, 75]], tf.float32) - img = tf.ones((128, 100, 100, 3)) - boxlist = box_list.BoxList(coordinates) - normalized_boxlist = box_list_ops.to_normalized_coordinates( - boxlist, tf.shape(img)[1], tf.shape(img)[2]) - expected_boxes = [[0, 0, 1, 1], - [0.25, 0.25, 0.75, 0.75]] - - with self.test_session() as sess: - normalized_boxes = sess.run(normalized_boxlist.get()) - self.assertAllClose(normalized_boxes, expected_boxes) - - def test_to_normalized_coordinates_already_normalized(self): - coordinates = tf.constant([[0, 0, 1, 1], - [0.25, 0.25, 0.75, 0.75]], tf.float32) - img = tf.ones((128, 100, 100, 3)) - boxlist = box_list.BoxList(coordinates) - normalized_boxlist = box_list_ops.to_normalized_coordinates( - boxlist, tf.shape(img)[1], tf.shape(img)[2]) - - with self.test_session() as sess: - with self.assertRaisesOpError('assertion failed'): - sess.run(normalized_boxlist.get()) - - def test_to_absolute_coordinates(self): - coordinates = tf.constant([[0, 0, 1, 1], - [0.25, 0.25, 0.75, 0.75]], tf.float32) - img = tf.ones((128, 100, 100, 3)) - boxlist = box_list.BoxList(coordinates) - absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist, - tf.shape(img)[1], - tf.shape(img)[2]) - expected_boxes = [[0, 0, 100, 100], - [25, 25, 75, 75]] - - with self.test_session() as sess: - absolute_boxes = sess.run(absolute_boxlist.get()) - self.assertAllClose(absolute_boxes, expected_boxes) - - def test_to_absolute_coordinates_already_abolute(self): - coordinates = tf.constant([[0, 0, 100, 100], - [25, 25, 75, 75]], tf.float32) - img = tf.ones((128, 100, 100, 3)) - boxlist = box_list.BoxList(coordinates) - absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist, - tf.shape(img)[1], - tf.shape(img)[2]) - - with self.test_session() as sess: - with self.assertRaisesOpError('assertion failed'): - sess.run(absolute_boxlist.get()) - - def test_convert_to_normalized_and_back(self): - coordinates = np.random.uniform(size=(100, 4)) - coordinates = np.round(np.sort(coordinates) * 200) - coordinates[:, 2:4] += 1 - coordinates[99, :] = [0, 0, 201, 201] - img = tf.ones((128, 202, 202, 3)) - - boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32)) - boxlist = box_list_ops.to_normalized_coordinates(boxlist, - tf.shape(img)[1], - tf.shape(img)[2]) - boxlist = box_list_ops.to_absolute_coordinates(boxlist, - tf.shape(img)[1], - tf.shape(img)[2]) - - with self.test_session() as sess: - out = sess.run(boxlist.get()) - self.assertAllClose(out, coordinates) - - def test_convert_to_absolute_and_back(self): - coordinates = np.random.uniform(size=(100, 4)) - coordinates = np.sort(coordinates) - coordinates[99, :] = [0, 0, 1, 1] - img = tf.ones((128, 202, 202, 3)) - - boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32)) - boxlist = box_list_ops.to_absolute_coordinates(boxlist, - tf.shape(img)[1], - tf.shape(img)[2]) - boxlist = box_list_ops.to_normalized_coordinates(boxlist, - tf.shape(img)[1], - tf.shape(img)[2]) - - with self.test_session() as sess: - out = sess.run(boxlist.get()) - self.assertAllClose(out, coordinates) - - def test_to_absolute_coordinates_maximum_coordinate_check(self): - coordinates = tf.constant([[0, 0, 1.2, 1.2], - [0.25, 0.25, 0.75, 0.75]], tf.float32) - img = tf.ones((128, 100, 100, 3)) - boxlist = box_list.BoxList(coordinates) - absolute_boxlist = box_list_ops.to_absolute_coordinates( - boxlist, - tf.shape(img)[1], - tf.shape(img)[2], - maximum_normalized_coordinate=1.1) - - with self.test_session() as sess: - with self.assertRaisesOpError('assertion failed'): - sess.run(absolute_boxlist.get()) - - -class BoxRefinementTest(tf.test.TestCase): - - def test_box_voting(self): - candidates = box_list.BoxList( - tf.constant([[0.1, 0.1, 0.4, 0.4], [0.6, 0.6, 0.8, 0.8]], tf.float32)) - candidates.add_field('ExtraField', tf.constant([1, 2])) - pool = box_list.BoxList( - tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5], - [0.6, 0.6, 0.8, 0.8]], tf.float32)) - pool.add_field('scores', tf.constant([0.75, 0.25, 0.3])) - averaged_boxes = box_list_ops.box_voting(candidates, pool) - expected_boxes = [[0.1, 0.1, 0.425, 0.425], [0.6, 0.6, 0.8, 0.8]] - expected_scores = [0.5, 0.3] - with self.test_session() as sess: - boxes_out, scores_out, extra_field_out = sess.run( - [averaged_boxes.get(), averaged_boxes.get_field('scores'), - averaged_boxes.get_field('ExtraField')]) - - self.assertAllClose(expected_boxes, boxes_out) - self.assertAllClose(expected_scores, scores_out) - self.assertAllEqual(extra_field_out, [1, 2]) - - def test_box_voting_fails_with_negative_scores(self): - candidates = box_list.BoxList( - tf.constant([[0.1, 0.1, 0.4, 0.4]], tf.float32)) - pool = box_list.BoxList(tf.constant([[0.1, 0.1, 0.4, 0.4]], tf.float32)) - pool.add_field('scores', tf.constant([-0.2])) - averaged_boxes = box_list_ops.box_voting(candidates, pool) - - with self.test_session() as sess: - with self.assertRaisesOpError('Scores must be non negative'): - sess.run([averaged_boxes.get()]) - - def test_box_voting_fails_when_unmatched(self): - candidates = box_list.BoxList( - tf.constant([[0.1, 0.1, 0.4, 0.4]], tf.float32)) - pool = box_list.BoxList(tf.constant([[0.6, 0.6, 0.8, 0.8]], tf.float32)) - pool.add_field('scores', tf.constant([0.2])) - averaged_boxes = box_list_ops.box_voting(candidates, pool) - - with self.test_session() as sess: - with self.assertRaisesOpError('Each box in selected_boxes must match ' - 'with at least one box in pool_boxes.'): - sess.run([averaged_boxes.get()]) - - def test_refine_boxes(self): - pool = box_list.BoxList( - tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5], - [0.6, 0.6, 0.8, 0.8]], tf.float32)) - pool.add_field('ExtraField', tf.constant([1, 2, 3])) - pool.add_field('scores', tf.constant([0.75, 0.25, 0.3])) - refined_boxes = box_list_ops.refine_boxes(pool, 0.5, 10) - - expected_boxes = [[0.1, 0.1, 0.425, 0.425], [0.6, 0.6, 0.8, 0.8]] - expected_scores = [0.5, 0.3] - with self.test_session() as sess: - boxes_out, scores_out, extra_field_out = sess.run( - [refined_boxes.get(), refined_boxes.get_field('scores'), - refined_boxes.get_field('ExtraField')]) - - self.assertAllClose(expected_boxes, boxes_out) - self.assertAllClose(expected_scores, scores_out) - self.assertAllEqual(extra_field_out, [1, 3]) - - def test_refine_boxes_multi_class(self): - pool = box_list.BoxList( - tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5], - [0.6, 0.6, 0.8, 0.8], [0.2, 0.2, 0.3, 0.3]], tf.float32)) - pool.add_field('classes', tf.constant([0, 0, 1, 1])) - pool.add_field('scores', tf.constant([0.75, 0.25, 0.3, 0.2])) - refined_boxes = box_list_ops.refine_boxes_multi_class(pool, 3, 0.5, 10) - - expected_boxes = [[0.1, 0.1, 0.425, 0.425], [0.6, 0.6, 0.8, 0.8], - [0.2, 0.2, 0.3, 0.3]] - expected_scores = [0.5, 0.3, 0.2] - with self.test_session() as sess: - boxes_out, scores_out, extra_field_out = sess.run( - [refined_boxes.get(), refined_boxes.get_field('scores'), - refined_boxes.get_field('classes')]) - - self.assertAllClose(expected_boxes, boxes_out) - self.assertAllClose(expected_scores, scores_out) - self.assertAllEqual(extra_field_out, [0, 1, 1]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_list_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_list_test.py deleted file mode 100644 index edc00ebbc40227713739e2583fe9fc067e9449e2..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_list_test.py +++ /dev/null @@ -1,134 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.core.box_list.""" - -import tensorflow as tf - -from object_detection.core import box_list - - -class BoxListTest(tf.test.TestCase): - """Tests for BoxList class.""" - - def test_num_boxes(self): - data = tf.constant([[0, 0, 1, 1], [1, 1, 2, 3], [3, 4, 5, 5]], tf.float32) - expected_num_boxes = 3 - - boxes = box_list.BoxList(data) - with self.test_session() as sess: - num_boxes_output = sess.run(boxes.num_boxes()) - self.assertEquals(num_boxes_output, expected_num_boxes) - - def test_get_correct_center_coordinates_and_sizes(self): - boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] - boxes = box_list.BoxList(tf.constant(boxes)) - centers_sizes = boxes.get_center_coordinates_and_sizes() - expected_centers_sizes = [[15, 0.35], [12.5, 0.25], [10, 0.3], [5, 0.3]] - with self.test_session() as sess: - centers_sizes_out = sess.run(centers_sizes) - self.assertAllClose(centers_sizes_out, expected_centers_sizes) - - def test_create_box_list_with_dynamic_shape(self): - data = tf.constant([[0, 0, 1, 1], [1, 1, 2, 3], [3, 4, 5, 5]], tf.float32) - indices = tf.reshape(tf.where(tf.greater([1, 0, 1], 0)), [-1]) - data = tf.gather(data, indices) - assert data.get_shape().as_list() == [None, 4] - expected_num_boxes = 2 - - boxes = box_list.BoxList(data) - with self.test_session() as sess: - num_boxes_output = sess.run(boxes.num_boxes()) - self.assertEquals(num_boxes_output, expected_num_boxes) - - def test_transpose_coordinates(self): - boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] - boxes = box_list.BoxList(tf.constant(boxes)) - boxes.transpose_coordinates() - expected_corners = [[10.0, 10.0, 15.0, 20.0], [0.1, 0.2, 0.4, 0.5]] - with self.test_session() as sess: - corners_out = sess.run(boxes.get()) - self.assertAllClose(corners_out, expected_corners) - - def test_box_list_invalid_inputs(self): - data0 = tf.constant([[[0, 0, 1, 1], [3, 4, 5, 5]]], tf.float32) - data1 = tf.constant([[0, 0, 1], [1, 1, 2], [3, 4, 5]], tf.float32) - data2 = tf.constant([[0, 0, 1], [1, 1, 2], [3, 4, 5]], tf.int32) - - with self.assertRaises(ValueError): - _ = box_list.BoxList(data0) - with self.assertRaises(ValueError): - _ = box_list.BoxList(data1) - with self.assertRaises(ValueError): - _ = box_list.BoxList(data2) - - def test_num_boxes_static(self): - box_corners = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] - boxes = box_list.BoxList(tf.constant(box_corners)) - self.assertEquals(boxes.num_boxes_static(), 2) - self.assertEquals(type(boxes.num_boxes_static()), int) - - def test_num_boxes_static_for_uninferrable_shape(self): - placeholder = tf.placeholder(tf.float32, shape=[None, 4]) - boxes = box_list.BoxList(placeholder) - self.assertEquals(boxes.num_boxes_static(), None) - - def test_as_tensor_dict(self): - boxlist = box_list.BoxList( - tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32)) - boxlist.add_field('classes', tf.constant([0, 1])) - boxlist.add_field('scores', tf.constant([0.75, 0.2])) - tensor_dict = boxlist.as_tensor_dict() - - expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]] - expected_classes = [0, 1] - expected_scores = [0.75, 0.2] - - with self.test_session() as sess: - tensor_dict_out = sess.run(tensor_dict) - self.assertAllEqual(3, len(tensor_dict_out)) - self.assertAllClose(expected_boxes, tensor_dict_out['boxes']) - self.assertAllEqual(expected_classes, tensor_dict_out['classes']) - self.assertAllClose(expected_scores, tensor_dict_out['scores']) - - def test_as_tensor_dict_with_features(self): - boxlist = box_list.BoxList( - tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32)) - boxlist.add_field('classes', tf.constant([0, 1])) - boxlist.add_field('scores', tf.constant([0.75, 0.2])) - tensor_dict = boxlist.as_tensor_dict(['boxes', 'classes', 'scores']) - - expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]] - expected_classes = [0, 1] - expected_scores = [0.75, 0.2] - - with self.test_session() as sess: - tensor_dict_out = sess.run(tensor_dict) - self.assertAllEqual(3, len(tensor_dict_out)) - self.assertAllClose(expected_boxes, tensor_dict_out['boxes']) - self.assertAllEqual(expected_classes, tensor_dict_out['classes']) - self.assertAllClose(expected_scores, tensor_dict_out['scores']) - - def test_as_tensor_dict_missing_field(self): - boxlist = box_list.BoxList( - tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32)) - boxlist.add_field('classes', tf.constant([0, 1])) - boxlist.add_field('scores', tf.constant([0.75, 0.2])) - with self.assertRaises(ValueError): - boxlist.as_tensor_dict(['foo', 'bar']) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_predictor.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_predictor.py deleted file mode 100644 index 78d8242372549fecbdd5442fda2a520850308972..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_predictor.py +++ /dev/null @@ -1,963 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Box predictor for object detectors. - -Box predictors are classes that take a high level -image feature map as input and produce two predictions, -(1) a tensor encoding box locations, and -(2) a tensor encoding classes for each box. - -These components are passed directly to loss functions -in our detection models. - -These modules are separated from the main model since the same -few box predictor architectures are shared across many models. -""" -from abc import abstractmethod -import math -import tensorflow as tf -from object_detection.utils import ops -from object_detection.utils import shape_utils -from object_detection.utils import static_shape - -slim = tf.contrib.slim - -BOX_ENCODINGS = 'box_encodings' -CLASS_PREDICTIONS_WITH_BACKGROUND = 'class_predictions_with_background' -MASK_PREDICTIONS = 'mask_predictions' - - -class BoxPredictor(object): - """BoxPredictor.""" - - def __init__(self, is_training, num_classes): - """Constructor. - - Args: - is_training: Indicates whether the BoxPredictor is in training mode. - num_classes: number of classes. Note that num_classes *does not* - include the background category, so if groundtruth labels take values - in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the - assigned classification targets can range from {0,... K}). - """ - self._is_training = is_training - self._num_classes = num_classes - - @property - def num_classes(self): - return self._num_classes - - def predict(self, image_features, num_predictions_per_location, - scope=None, **params): - """Computes encoded object locations and corresponding confidences. - - Takes a list of high level image feature maps as input and produces a list - of box encodings and a list of class scores where each element in the output - lists correspond to the feature maps in the input list. - - Args: - image_features: A list of float tensors of shape [batch_size, height_i, - width_i, channels_i] containing features for a batch of images. - num_predictions_per_location: A list of integers representing the number - of box predictions to be made per spatial location for each feature map. - scope: Variable and Op scope name. - **params: Additional keyword arguments for specific implementations of - BoxPredictor. - - Returns: - A dictionary containing at least the following tensors. - box_encodings: A list of float tensors. Each entry in the list - corresponds to a feature map in the input `image_features` list. All - tensors in the list have one of the two following shapes: - a. [batch_size, num_anchors_i, q, code_size] representing the location - of the objects, where q is 1 or the number of classes. - b. [batch_size, num_anchors_i, code_size]. - class_predictions_with_background: A list of float tensors of shape - [batch_size, num_anchors_i, num_classes + 1] representing the class - predictions for the proposals. Each entry in the list corresponds to a - feature map in the input `image_features` list. - - Raises: - ValueError: If length of `image_features` is not equal to length of - `num_predictions_per_location`. - """ - if len(image_features) != len(num_predictions_per_location): - raise ValueError('image_feature and num_predictions_per_location must ' - 'be of same length, found: {} vs {}'. - format(len(image_features), - len(num_predictions_per_location))) - if scope is not None: - with tf.variable_scope(scope): - return self._predict(image_features, num_predictions_per_location, - **params) - return self._predict(image_features, num_predictions_per_location, - **params) - - # TODO(rathodv): num_predictions_per_location could be moved to constructor. - # This is currently only used by ConvolutionalBoxPredictor. - @abstractmethod - def _predict(self, image_features, num_predictions_per_location, **params): - """Implementations must override this method. - - Args: - image_features: A list of float tensors of shape [batch_size, height_i, - width_i, channels_i] containing features for a batch of images. - num_predictions_per_location: A list of integers representing the number - of box predictions to be made per spatial location for each feature map. - **params: Additional keyword arguments for specific implementations of - BoxPredictor. - - Returns: - A dictionary containing at least the following tensors. - box_encodings: A list of float tensors. Each entry in the list - corresponds to a feature map in the input `image_features` list. All - tensors in the list have one of the two following shapes: - a. [batch_size, num_anchors_i, q, code_size] representing the location - of the objects, where q is 1 or the number of classes. - b. [batch_size, num_anchors_i, code_size]. - class_predictions_with_background: A list of float tensors of shape - [batch_size, num_anchors_i, num_classes + 1] representing the class - predictions for the proposals. Each entry in the list corresponds to a - feature map in the input `image_features` list. - """ - pass - - -class RfcnBoxPredictor(BoxPredictor): - """RFCN Box Predictor. - - Applies a position sensitive ROI pooling on position sensitive feature maps to - predict classes and refined locations. See https://arxiv.org/abs/1605.06409 - for details. - - This is used for the second stage of the RFCN meta architecture. Notice that - locations are *not* shared across classes, thus for each anchor, a separate - prediction is made for each class. - """ - - def __init__(self, - is_training, - num_classes, - conv_hyperparams_fn, - num_spatial_bins, - depth, - crop_size, - box_code_size): - """Constructor. - - Args: - is_training: Indicates whether the BoxPredictor is in training mode. - num_classes: number of classes. Note that num_classes *does not* - include the background category, so if groundtruth labels take values - in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the - assigned classification targets can range from {0,... K}). - conv_hyperparams_fn: A function to construct tf-slim arg_scope with - hyperparameters for convolutional layers. - num_spatial_bins: A list of two integers `[spatial_bins_y, - spatial_bins_x]`. - depth: Target depth to reduce the input feature maps to. - crop_size: A list of two integers `[crop_height, crop_width]`. - box_code_size: Size of encoding for each box. - """ - super(RfcnBoxPredictor, self).__init__(is_training, num_classes) - self._conv_hyperparams_fn = conv_hyperparams_fn - self._num_spatial_bins = num_spatial_bins - self._depth = depth - self._crop_size = crop_size - self._box_code_size = box_code_size - - @property - def num_classes(self): - return self._num_classes - - def _predict(self, image_features, num_predictions_per_location, - proposal_boxes): - """Computes encoded object locations and corresponding confidences. - - Args: - image_features: A list of float tensors of shape [batch_size, height_i, - width_i, channels_i] containing features for a batch of images. - num_predictions_per_location: A list of integers representing the number - of box predictions to be made per spatial location for each feature map. - Currently, this must be set to [1], or an error will be raised. - proposal_boxes: A float tensor of shape [batch_size, num_proposals, - box_code_size]. - - Returns: - box_encodings: A list of float tensors of shape - [batch_size, num_anchors_i, q, code_size] representing the location of - the objects, where q is 1 or the number of classes. Each entry in the - list corresponds to a feature map in the input `image_features` list. - class_predictions_with_background: A list of float tensors of shape - [batch_size, num_anchors_i, num_classes + 1] representing the class - predictions for the proposals. Each entry in the list corresponds to a - feature map in the input `image_features` list. - - Raises: - ValueError: if num_predictions_per_location is not 1 or if - len(image_features) is not 1. - """ - if (len(num_predictions_per_location) != 1 or - num_predictions_per_location[0] != 1): - raise ValueError('Currently RfcnBoxPredictor only supports ' - 'predicting a single box per class per location.') - if len(image_features) != 1: - raise ValueError('length of `image_features` must be 1. Found {}'. - format(len(image_features))) - image_feature = image_features[0] - num_predictions_per_location = num_predictions_per_location[0] - batch_size = tf.shape(proposal_boxes)[0] - num_boxes = tf.shape(proposal_boxes)[1] - def get_box_indices(proposals): - proposals_shape = proposals.get_shape().as_list() - if any(dim is None for dim in proposals_shape): - proposals_shape = tf.shape(proposals) - ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32) - multiplier = tf.expand_dims( - tf.range(start=0, limit=proposals_shape[0]), 1) - return tf.reshape(ones_mat * multiplier, [-1]) - - net = image_feature - with slim.arg_scope(self._conv_hyperparams_fn()): - net = slim.conv2d(net, self._depth, [1, 1], scope='reduce_depth') - # Location predictions. - location_feature_map_depth = (self._num_spatial_bins[0] * - self._num_spatial_bins[1] * - self.num_classes * - self._box_code_size) - location_feature_map = slim.conv2d(net, location_feature_map_depth, - [1, 1], activation_fn=None, - scope='refined_locations') - box_encodings = ops.position_sensitive_crop_regions( - location_feature_map, - boxes=tf.reshape(proposal_boxes, [-1, self._box_code_size]), - box_ind=get_box_indices(proposal_boxes), - crop_size=self._crop_size, - num_spatial_bins=self._num_spatial_bins, - global_pool=True) - box_encodings = tf.squeeze(box_encodings, squeeze_dims=[1, 2]) - box_encodings = tf.reshape(box_encodings, - [batch_size * num_boxes, 1, self.num_classes, - self._box_code_size]) - - # Class predictions. - total_classes = self.num_classes + 1 # Account for background class. - class_feature_map_depth = (self._num_spatial_bins[0] * - self._num_spatial_bins[1] * - total_classes) - class_feature_map = slim.conv2d(net, class_feature_map_depth, [1, 1], - activation_fn=None, - scope='class_predictions') - class_predictions_with_background = ops.position_sensitive_crop_regions( - class_feature_map, - boxes=tf.reshape(proposal_boxes, [-1, self._box_code_size]), - box_ind=get_box_indices(proposal_boxes), - crop_size=self._crop_size, - num_spatial_bins=self._num_spatial_bins, - global_pool=True) - class_predictions_with_background = tf.squeeze( - class_predictions_with_background, squeeze_dims=[1, 2]) - class_predictions_with_background = tf.reshape( - class_predictions_with_background, - [batch_size * num_boxes, 1, total_classes]) - - return {BOX_ENCODINGS: [box_encodings], - CLASS_PREDICTIONS_WITH_BACKGROUND: - [class_predictions_with_background]} - - -# TODO(rathodv): Change the implementation to return lists of predictions. -class MaskRCNNBoxPredictor(BoxPredictor): - """Mask R-CNN Box Predictor. - - See Mask R-CNN: He, K., Gkioxari, G., Dollar, P., & Girshick, R. (2017). - Mask R-CNN. arXiv preprint arXiv:1703.06870. - - This is used for the second stage of the Mask R-CNN detector where proposals - cropped from an image are arranged along the batch dimension of the input - image_features tensor. Notice that locations are *not* shared across classes, - thus for each anchor, a separate prediction is made for each class. - - In addition to predicting boxes and classes, optionally this class allows - predicting masks and/or keypoints inside detection boxes. - - Currently this box predictor makes per-class predictions; that is, each - anchor makes a separate box prediction for each class. - """ - - def __init__(self, - is_training, - num_classes, - fc_hyperparams_fn, - use_dropout, - dropout_keep_prob, - box_code_size, - conv_hyperparams_fn=None, - predict_instance_masks=False, - mask_height=14, - mask_width=14, - mask_prediction_num_conv_layers=2, - mask_prediction_conv_depth=256, - masks_are_class_agnostic=False, - predict_keypoints=False, - share_box_across_classes=False): - """Constructor. - - Args: - is_training: Indicates whether the BoxPredictor is in training mode. - num_classes: number of classes. Note that num_classes *does not* - include the background category, so if groundtruth labels take values - in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the - assigned classification targets can range from {0,... K}). - fc_hyperparams_fn: A function to generate tf-slim arg_scope with - hyperparameters for fully connected ops. - use_dropout: Option to use dropout or not. Note that a single dropout - op is applied here prior to both box and class predictions, which stands - in contrast to the ConvolutionalBoxPredictor below. - dropout_keep_prob: Keep probability for dropout. - This is only used if use_dropout is True. - box_code_size: Size of encoding for each box. - conv_hyperparams_fn: A function to generate tf-slim arg_scope with - hyperparameters for convolution ops. - predict_instance_masks: Whether to predict object masks inside detection - boxes. - mask_height: Desired output mask height. The default value is 14. - mask_width: Desired output mask width. The default value is 14. - mask_prediction_num_conv_layers: Number of convolution layers applied to - the image_features in mask prediction branch. - mask_prediction_conv_depth: The depth for the first conv2d_transpose op - applied to the image_features in the mask prediction branch. If set - to 0, the depth of the convolution layers will be automatically chosen - based on the number of object classes and the number of channels in the - image features. - masks_are_class_agnostic: Boolean determining if the mask-head is - class-agnostic or not. - predict_keypoints: Whether to predict keypoints insde detection boxes. - share_box_across_classes: Whether to share boxes across classes rather - than use a different box for each class. - - Raises: - ValueError: If predict_instance_masks is true but conv_hyperparams is not - set. - ValueError: If predict_keypoints is true since it is not implemented yet. - ValueError: If mask_prediction_num_conv_layers is smaller than two. - """ - super(MaskRCNNBoxPredictor, self).__init__(is_training, num_classes) - self._fc_hyperparams_fn = fc_hyperparams_fn - self._use_dropout = use_dropout - self._box_code_size = box_code_size - self._dropout_keep_prob = dropout_keep_prob - self._conv_hyperparams_fn = conv_hyperparams_fn - self._predict_instance_masks = predict_instance_masks - self._mask_height = mask_height - self._mask_width = mask_width - self._mask_prediction_num_conv_layers = mask_prediction_num_conv_layers - self._mask_prediction_conv_depth = mask_prediction_conv_depth - self._masks_are_class_agnostic = masks_are_class_agnostic - self._predict_keypoints = predict_keypoints - self._share_box_across_classes = share_box_across_classes - if self._predict_keypoints: - raise ValueError('Keypoint prediction is unimplemented.') - if ((self._predict_instance_masks or self._predict_keypoints) and - self._conv_hyperparams_fn is None): - raise ValueError('`conv_hyperparams` must be provided when predicting ' - 'masks.') - if self._mask_prediction_num_conv_layers < 2: - raise ValueError( - 'Mask prediction should consist of at least 2 conv layers') - - @property - def num_classes(self): - return self._num_classes - - @property - def predicts_instance_masks(self): - return self._predict_instance_masks - - def _predict_boxes_and_classes(self, image_features): - """Predicts boxes and class scores. - - Args: - image_features: A float tensor of shape [batch_size, height, width, - channels] containing features for a batch of images. - - Returns: - box_encodings: A float tensor of shape - [batch_size, 1, num_classes, code_size] representing the location of the - objects. - class_predictions_with_background: A float tensor of shape - [batch_size, 1, num_classes + 1] representing the class predictions for - the proposals. - """ - spatial_averaged_image_features = tf.reduce_mean(image_features, [1, 2], - keep_dims=True, - name='AvgPool') - flattened_image_features = slim.flatten(spatial_averaged_image_features) - if self._use_dropout: - flattened_image_features = slim.dropout(flattened_image_features, - keep_prob=self._dropout_keep_prob, - is_training=self._is_training) - number_of_boxes = 1 - if not self._share_box_across_classes: - number_of_boxes = self._num_classes - - with slim.arg_scope(self._fc_hyperparams_fn()): - box_encodings = slim.fully_connected( - flattened_image_features, - number_of_boxes * self._box_code_size, - activation_fn=None, - scope='BoxEncodingPredictor') - class_predictions_with_background = slim.fully_connected( - flattened_image_features, - self._num_classes + 1, - activation_fn=None, - scope='ClassPredictor') - box_encodings = tf.reshape( - box_encodings, [-1, 1, number_of_boxes, self._box_code_size]) - class_predictions_with_background = tf.reshape( - class_predictions_with_background, [-1, 1, self._num_classes + 1]) - return box_encodings, class_predictions_with_background - - def _get_mask_predictor_conv_depth(self, num_feature_channels, num_classes, - class_weight=3.0, feature_weight=2.0): - """Computes the depth of the mask predictor convolutions. - - Computes the depth of the mask predictor convolutions given feature channels - and number of classes by performing a weighted average of the two in - log space to compute the number of convolution channels. The weights that - are used for computing the weighted average do not need to sum to 1. - - Args: - num_feature_channels: An integer containing the number of feature - channels. - num_classes: An integer containing the number of classes. - class_weight: Class weight used in computing the weighted average. - feature_weight: Feature weight used in computing the weighted average. - - Returns: - An integer containing the number of convolution channels used by mask - predictor. - """ - num_feature_channels_log = math.log(float(num_feature_channels), 2.0) - num_classes_log = math.log(float(num_classes), 2.0) - weighted_num_feature_channels_log = ( - num_feature_channels_log * feature_weight) - weighted_num_classes_log = num_classes_log * class_weight - total_weight = feature_weight + class_weight - num_conv_channels_log = round( - (weighted_num_feature_channels_log + weighted_num_classes_log) / - total_weight) - return int(math.pow(2.0, num_conv_channels_log)) - - def _predict_masks(self, image_features): - """Performs mask prediction. - - Args: - image_features: A float tensor of shape [batch_size, height, width, - channels] containing features for a batch of images. - - Returns: - instance_masks: A float tensor of shape - [batch_size, 1, num_classes, image_height, image_width]. - """ - num_conv_channels = self._mask_prediction_conv_depth - if num_conv_channels == 0: - num_feature_channels = image_features.get_shape().as_list()[3] - num_conv_channels = self._get_mask_predictor_conv_depth( - num_feature_channels, self.num_classes) - with slim.arg_scope(self._conv_hyperparams_fn()): - upsampled_features = tf.image.resize_bilinear( - image_features, - [self._mask_height, self._mask_width], - align_corners=True) - for _ in range(self._mask_prediction_num_conv_layers - 1): - upsampled_features = slim.conv2d( - upsampled_features, - num_outputs=num_conv_channels, - kernel_size=[3, 3]) - num_masks = 1 if self._masks_are_class_agnostic else self.num_classes - mask_predictions = slim.conv2d(upsampled_features, - num_outputs=num_masks, - activation_fn=None, - kernel_size=[3, 3]) - return tf.expand_dims( - tf.transpose(mask_predictions, perm=[0, 3, 1, 2]), - axis=1, - name='MaskPredictor') - - def _predict(self, image_features, num_predictions_per_location, - predict_boxes_and_classes=True, predict_auxiliary_outputs=False): - """Optionally computes encoded object locations, confidences, and masks. - - Flattens image_features and applies fully connected ops (with no - non-linearity) to predict box encodings and class predictions. In this - setting, anchors are not spatially arranged in any way and are assumed to - have been folded into the batch dimension. Thus we output 1 for the - anchors dimension. - - Also optionally predicts instance masks. - The mask prediction head is based on the Mask RCNN paper with the following - modifications: We replace the deconvolution layer with a bilinear resize - and a convolution. - - Args: - image_features: A list of float tensors of shape [batch_size, height_i, - width_i, channels_i] containing features for a batch of images. - num_predictions_per_location: A list of integers representing the number - of box predictions to be made per spatial location for each feature map. - Currently, this must be set to [1], or an error will be raised. - predict_boxes_and_classes: If true, the function will perform box - refinement and classification. - predict_auxiliary_outputs: If true, the function will perform other - predictions such as mask, keypoint, boundaries, etc. if any. - - Returns: - A dictionary containing the following tensors. - box_encodings: A float tensor of shape - [batch_size, 1, num_classes, code_size] representing the - location of the objects. - class_predictions_with_background: A float tensor of shape - [batch_size, 1, num_classes + 1] representing the class - predictions for the proposals. - If predict_masks is True the dictionary also contains: - instance_masks: A float tensor of shape - [batch_size, 1, num_classes, image_height, image_width] - If predict_keypoints is True the dictionary also contains: - keypoints: [batch_size, 1, num_keypoints, 2] - - Raises: - ValueError: If num_predictions_per_location is not 1 or if both - predict_boxes_and_classes and predict_auxiliary_outputs are false or if - len(image_features) is not 1. - """ - if (len(num_predictions_per_location) != 1 or - num_predictions_per_location[0] != 1): - raise ValueError('Currently FullyConnectedBoxPredictor only supports ' - 'predicting a single box per class per location.') - if not predict_boxes_and_classes and not predict_auxiliary_outputs: - raise ValueError('Should perform at least one prediction.') - if len(image_features) != 1: - raise ValueError('length of `image_features` must be 1. Found {}'. - format(len(image_features))) - image_feature = image_features[0] - num_predictions_per_location = num_predictions_per_location[0] - predictions_dict = {} - - if predict_boxes_and_classes: - (box_encodings, class_predictions_with_background - ) = self._predict_boxes_and_classes(image_feature) - predictions_dict[BOX_ENCODINGS] = box_encodings - predictions_dict[ - CLASS_PREDICTIONS_WITH_BACKGROUND] = class_predictions_with_background - - if self._predict_instance_masks and predict_auxiliary_outputs: - predictions_dict[MASK_PREDICTIONS] = self._predict_masks(image_feature) - - return predictions_dict - - -class _NoopVariableScope(object): - """A dummy class that does not push any scope.""" - - def __enter__(self): - return None - - def __exit__(self, exc_type, exc_value, traceback): - return False - - -class ConvolutionalBoxPredictor(BoxPredictor): - """Convolutional Box Predictor. - - Optionally add an intermediate 1x1 convolutional layer after features and - predict in parallel branches box_encodings and - class_predictions_with_background. - - Currently this box predictor assumes that predictions are "shared" across - classes --- that is each anchor makes box predictions which do not depend - on class. - """ - - def __init__(self, - is_training, - num_classes, - conv_hyperparams_fn, - min_depth, - max_depth, - num_layers_before_predictor, - use_dropout, - dropout_keep_prob, - kernel_size, - box_code_size, - apply_sigmoid_to_scores=False, - class_prediction_bias_init=0.0, - use_depthwise=False): - """Constructor. - - Args: - is_training: Indicates whether the BoxPredictor is in training mode. - num_classes: number of classes. Note that num_classes *does not* - include the background category, so if groundtruth labels take values - in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the - assigned classification targets can range from {0,... K}). - conv_hyperparams_fn: A function to generate tf-slim arg_scope with - hyperparameters for convolution ops. - min_depth: Minimum feature depth prior to predicting box encodings - and class predictions. - max_depth: Maximum feature depth prior to predicting box encodings - and class predictions. If max_depth is set to 0, no additional - feature map will be inserted before location and class predictions. - num_layers_before_predictor: Number of the additional conv layers before - the predictor. - use_dropout: Option to use dropout for class prediction or not. - dropout_keep_prob: Keep probability for dropout. - This is only used if use_dropout is True. - kernel_size: Size of final convolution kernel. If the - spatial resolution of the feature map is smaller than the kernel size, - then the kernel size is automatically set to be - min(feature_width, feature_height). - box_code_size: Size of encoding for each box. - apply_sigmoid_to_scores: if True, apply the sigmoid on the output - class_predictions. - class_prediction_bias_init: constant value to initialize bias of the last - conv2d layer before class prediction. - use_depthwise: Whether to use depthwise convolutions for prediction - steps. Default is False. - - Raises: - ValueError: if min_depth > max_depth. - """ - super(ConvolutionalBoxPredictor, self).__init__(is_training, num_classes) - if min_depth > max_depth: - raise ValueError('min_depth should be less than or equal to max_depth') - self._conv_hyperparams_fn = conv_hyperparams_fn - self._min_depth = min_depth - self._max_depth = max_depth - self._num_layers_before_predictor = num_layers_before_predictor - self._use_dropout = use_dropout - self._kernel_size = kernel_size - self._box_code_size = box_code_size - self._dropout_keep_prob = dropout_keep_prob - self._apply_sigmoid_to_scores = apply_sigmoid_to_scores - self._class_prediction_bias_init = class_prediction_bias_init - self._use_depthwise = use_depthwise - - def _predict(self, image_features, num_predictions_per_location_list): - """Computes encoded object locations and corresponding confidences. - - Args: - image_features: A list of float tensors of shape [batch_size, height_i, - width_i, channels_i] containing features for a batch of images. - num_predictions_per_location_list: A list of integers representing the - number of box predictions to be made per spatial location for each - feature map. - - Returns: - box_encodings: A list of float tensors of shape - [batch_size, num_anchors_i, q, code_size] representing the location of - the objects, where q is 1 or the number of classes. Each entry in the - list corresponds to a feature map in the input `image_features` list. - class_predictions_with_background: A list of float tensors of shape - [batch_size, num_anchors_i, num_classes + 1] representing the class - predictions for the proposals. Each entry in the list corresponds to a - feature map in the input `image_features` list. - """ - box_encodings_list = [] - class_predictions_list = [] - # TODO(rathodv): Come up with a better way to generate scope names - # in box predictor once we have time to retrain all models in the zoo. - # The following lines create scope names to be backwards compatible with the - # existing checkpoints. - box_predictor_scopes = [_NoopVariableScope()] - if len(image_features) > 1: - box_predictor_scopes = [ - tf.variable_scope('BoxPredictor_{}'.format(i)) - for i in range(len(image_features)) - ] - - for (image_feature, - num_predictions_per_location, box_predictor_scope) in zip( - image_features, num_predictions_per_location_list, - box_predictor_scopes): - with box_predictor_scope: - # Add a slot for the background class. - num_class_slots = self.num_classes + 1 - net = image_feature - with slim.arg_scope(self._conv_hyperparams_fn()), \ - slim.arg_scope([slim.dropout], is_training=self._is_training): - # Add additional conv layers before the class predictor. - features_depth = static_shape.get_depth(image_feature.get_shape()) - depth = max(min(features_depth, self._max_depth), self._min_depth) - tf.logging.info('depth of additional conv before box predictor: {}'. - format(depth)) - if depth > 0 and self._num_layers_before_predictor > 0: - for i in range(self._num_layers_before_predictor): - net = slim.conv2d( - net, depth, [1, 1], scope='Conv2d_%d_1x1_%d' % (i, depth)) - with slim.arg_scope([slim.conv2d], activation_fn=None, - normalizer_fn=None, normalizer_params=None): - if self._use_depthwise: - box_encodings = slim.separable_conv2d( - net, None, [self._kernel_size, self._kernel_size], - padding='SAME', depth_multiplier=1, stride=1, - rate=1, scope='BoxEncodingPredictor_depthwise') - box_encodings = slim.conv2d( - box_encodings, - num_predictions_per_location * self._box_code_size, [1, 1], - scope='BoxEncodingPredictor') - else: - box_encodings = slim.conv2d( - net, num_predictions_per_location * self._box_code_size, - [self._kernel_size, self._kernel_size], - scope='BoxEncodingPredictor') - if self._use_dropout: - net = slim.dropout(net, keep_prob=self._dropout_keep_prob) - if self._use_depthwise: - class_predictions_with_background = slim.separable_conv2d( - net, None, [self._kernel_size, self._kernel_size], - padding='SAME', depth_multiplier=1, stride=1, - rate=1, scope='ClassPredictor_depthwise') - class_predictions_with_background = slim.conv2d( - class_predictions_with_background, - num_predictions_per_location * num_class_slots, - [1, 1], scope='ClassPredictor') - else: - class_predictions_with_background = slim.conv2d( - net, num_predictions_per_location * num_class_slots, - [self._kernel_size, self._kernel_size], - scope='ClassPredictor', - biases_initializer=tf.constant_initializer( - self._class_prediction_bias_init)) - if self._apply_sigmoid_to_scores: - class_predictions_with_background = tf.sigmoid( - class_predictions_with_background) - - combined_feature_map_shape = (shape_utils. - combined_static_and_dynamic_shape( - image_feature)) - box_encodings = tf.reshape( - box_encodings, tf.stack([combined_feature_map_shape[0], - combined_feature_map_shape[1] * - combined_feature_map_shape[2] * - num_predictions_per_location, - 1, self._box_code_size])) - box_encodings_list.append(box_encodings) - class_predictions_with_background = tf.reshape( - class_predictions_with_background, - tf.stack([combined_feature_map_shape[0], - combined_feature_map_shape[1] * - combined_feature_map_shape[2] * - num_predictions_per_location, - num_class_slots])) - class_predictions_list.append(class_predictions_with_background) - return { - BOX_ENCODINGS: box_encodings_list, - CLASS_PREDICTIONS_WITH_BACKGROUND: class_predictions_list - } - - -# TODO(rathodv): Replace with slim.arg_scope_func_key once its available -# externally. -def _arg_scope_func_key(op): - """Returns a key that can be used to index arg_scope dictionary.""" - return getattr(op, '_key_op', str(op)) - - -# TODO(rathodv): Merge the implementation with ConvolutionalBoxPredictor above -# since they are very similar. -class WeightSharedConvolutionalBoxPredictor(BoxPredictor): - """Convolutional Box Predictor with weight sharing. - - Defines the box predictor as defined in - https://arxiv.org/abs/1708.02002. This class differs from - ConvolutionalBoxPredictor in that it shares weights and biases while - predicting from different feature maps. However, batch_norm parameters are not - shared because the statistics of the activations vary among the different - feature maps. - - Also note that separate multi-layer towers are constructed for the box - encoding and class predictors respectively. - """ - - def __init__(self, - is_training, - num_classes, - conv_hyperparams_fn, - depth, - num_layers_before_predictor, - box_code_size, - kernel_size=3, - class_prediction_bias_init=0.0, - use_dropout=False, - dropout_keep_prob=0.8): - """Constructor. - - Args: - is_training: Indicates whether the BoxPredictor is in training mode. - num_classes: number of classes. Note that num_classes *does not* - include the background category, so if groundtruth labels take values - in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the - assigned classification targets can range from {0,... K}). - conv_hyperparams_fn: A function to generate tf-slim arg_scope with - hyperparameters for convolution ops. - depth: depth of conv layers. - num_layers_before_predictor: Number of the additional conv layers before - the predictor. - box_code_size: Size of encoding for each box. - kernel_size: Size of final convolution kernel. - class_prediction_bias_init: constant value to initialize bias of the last - conv2d layer before class prediction. - use_dropout: Whether to apply dropout to class prediction head. - dropout_keep_prob: Probability of keeping activiations. - """ - super(WeightSharedConvolutionalBoxPredictor, self).__init__(is_training, - num_classes) - self._conv_hyperparams_fn = conv_hyperparams_fn - self._depth = depth - self._num_layers_before_predictor = num_layers_before_predictor - self._box_code_size = box_code_size - self._kernel_size = kernel_size - self._class_prediction_bias_init = class_prediction_bias_init - self._use_dropout = use_dropout - self._dropout_keep_prob = dropout_keep_prob - - def _predict(self, image_features, num_predictions_per_location_list): - """Computes encoded object locations and corresponding confidences. - - Args: - image_features: A list of float tensors of shape [batch_size, height_i, - width_i, channels] containing features for a batch of images. Note that - all tensors in the list must have the same number of channels. - num_predictions_per_location_list: A list of integers representing the - number of box predictions to be made per spatial location for each - feature map. Note that all values must be the same since the weights are - shared. - - Returns: - box_encodings: A list of float tensors of shape - [batch_size, num_anchors_i, code_size] representing the location of - the objects. Each entry in the list corresponds to a feature map in the - input `image_features` list. - class_predictions_with_background: A list of float tensors of shape - [batch_size, num_anchors_i, num_classes + 1] representing the class - predictions for the proposals. Each entry in the list corresponds to a - feature map in the input `image_features` list. - - - Raises: - ValueError: If the image feature maps do not have the same number of - channels or if the num predictions per locations is differs between the - feature maps. - """ - if len(set(num_predictions_per_location_list)) > 1: - raise ValueError('num predictions per location must be same for all' - 'feature maps, found: {}'.format( - num_predictions_per_location_list)) - feature_channels = [ - image_feature.shape[3].value for image_feature in image_features - ] - if len(set(feature_channels)) > 1: - raise ValueError('all feature maps must have the same number of ' - 'channels, found: {}'.format(feature_channels)) - box_encodings_list = [] - class_predictions_list = [] - for feature_index, (image_feature, - num_predictions_per_location) in enumerate( - zip(image_features, - num_predictions_per_location_list)): - # Add a slot for the background class. - with tf.variable_scope('WeightSharedConvolutionalBoxPredictor', - reuse=tf.AUTO_REUSE): - num_class_slots = self.num_classes + 1 - box_encodings_net = image_feature - class_predictions_net = image_feature - with slim.arg_scope(self._conv_hyperparams_fn()) as sc: - apply_batch_norm = _arg_scope_func_key(slim.batch_norm) in sc - for i in range(self._num_layers_before_predictor): - box_encodings_net = slim.conv2d( - box_encodings_net, - self._depth, - [self._kernel_size, self._kernel_size], - stride=1, - padding='SAME', - activation_fn=None, - normalizer_fn=(tf.identity if apply_batch_norm else None), - scope='BoxPredictionTower/conv2d_{}'.format(i)) - if apply_batch_norm: - box_encodings_net = slim.batch_norm( - box_encodings_net, - scope='BoxPredictionTower/conv2d_{}/BatchNorm/feature_{}'. - format(i, feature_index)) - box_encodings_net = tf.nn.relu6(box_encodings_net) - box_encodings = slim.conv2d( - box_encodings_net, - num_predictions_per_location * self._box_code_size, - [self._kernel_size, self._kernel_size], - activation_fn=None, stride=1, padding='SAME', - normalizer_fn=None, - scope='BoxPredictor') - - for i in range(self._num_layers_before_predictor): - class_predictions_net = slim.conv2d( - class_predictions_net, - self._depth, - [self._kernel_size, self._kernel_size], - stride=1, - padding='SAME', - activation_fn=None, - normalizer_fn=(tf.identity if apply_batch_norm else None), - scope='ClassPredictionTower/conv2d_{}'.format(i)) - if apply_batch_norm: - class_predictions_net = slim.batch_norm( - class_predictions_net, - scope='ClassPredictionTower/conv2d_{}/BatchNorm/feature_{}' - .format(i, feature_index)) - class_predictions_net = tf.nn.relu6(class_predictions_net) - if self._use_dropout: - class_predictions_net = slim.dropout( - class_predictions_net, keep_prob=self._dropout_keep_prob) - class_predictions_with_background = slim.conv2d( - class_predictions_net, - num_predictions_per_location * num_class_slots, - [self._kernel_size, self._kernel_size], - activation_fn=None, stride=1, padding='SAME', - normalizer_fn=None, - biases_initializer=tf.constant_initializer( - self._class_prediction_bias_init), - scope='ClassPredictor') - - combined_feature_map_shape = (shape_utils. - combined_static_and_dynamic_shape( - image_feature)) - box_encodings = tf.reshape( - box_encodings, tf.stack([combined_feature_map_shape[0], - combined_feature_map_shape[1] * - combined_feature_map_shape[2] * - num_predictions_per_location, - self._box_code_size])) - box_encodings_list.append(box_encodings) - class_predictions_with_background = tf.reshape( - class_predictions_with_background, - tf.stack([combined_feature_map_shape[0], - combined_feature_map_shape[1] * - combined_feature_map_shape[2] * - num_predictions_per_location, - num_class_slots])) - class_predictions_list.append(class_predictions_with_background) - return { - BOX_ENCODINGS: box_encodings_list, - CLASS_PREDICTIONS_WITH_BACKGROUND: class_predictions_list - } diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_predictor_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_predictor_test.py deleted file mode 100644 index 49680596f8d607ba52c0791a49df67a5d5ebe293..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/box_predictor_test.py +++ /dev/null @@ -1,724 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.core.box_predictor.""" -import numpy as np -import tensorflow as tf - -from google.protobuf import text_format -from object_detection.builders import hyperparams_builder -from object_detection.core import box_predictor -from object_detection.protos import hyperparams_pb2 -from object_detection.utils import test_case - - -class MaskRCNNBoxPredictorTest(tf.test.TestCase): - - def _build_arg_scope_with_hyperparams(self, - op_type=hyperparams_pb2.Hyperparams.FC): - hyperparams = hyperparams_pb2.Hyperparams() - hyperparams_text_proto = """ - activation: NONE - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - """ - text_format.Merge(hyperparams_text_proto, hyperparams) - hyperparams.op = op_type - return hyperparams_builder.build(hyperparams, is_training=True) - - def test_get_boxes_with_five_classes(self): - image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32) - mask_box_predictor = box_predictor.MaskRCNNBoxPredictor( - is_training=False, - num_classes=5, - fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(), - use_dropout=False, - dropout_keep_prob=0.5, - box_code_size=4, - ) - box_predictions = mask_box_predictor.predict( - [image_features], num_predictions_per_location=[1], - scope='BoxPredictor') - box_encodings = box_predictions[box_predictor.BOX_ENCODINGS] - class_predictions_with_background = box_predictions[ - box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND] - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - (box_encodings_shape, - class_predictions_with_background_shape) = sess.run( - [tf.shape(box_encodings), - tf.shape(class_predictions_with_background)]) - self.assertAllEqual(box_encodings_shape, [2, 1, 5, 4]) - self.assertAllEqual(class_predictions_with_background_shape, [2, 1, 6]) - - def test_get_boxes_with_five_classes_share_box_across_classes(self): - image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32) - mask_box_predictor = box_predictor.MaskRCNNBoxPredictor( - is_training=False, - num_classes=5, - fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(), - use_dropout=False, - dropout_keep_prob=0.5, - box_code_size=4, - share_box_across_classes=True - ) - box_predictions = mask_box_predictor.predict( - [image_features], num_predictions_per_location=[1], - scope='BoxPredictor') - box_encodings = box_predictions[box_predictor.BOX_ENCODINGS] - class_predictions_with_background = box_predictions[ - box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND] - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - (box_encodings_shape, - class_predictions_with_background_shape) = sess.run( - [tf.shape(box_encodings), - tf.shape(class_predictions_with_background)]) - self.assertAllEqual(box_encodings_shape, [2, 1, 1, 4]) - self.assertAllEqual(class_predictions_with_background_shape, [2, 1, 6]) - - def test_value_error_on_predict_instance_masks_with_no_conv_hyperparms(self): - with self.assertRaises(ValueError): - box_predictor.MaskRCNNBoxPredictor( - is_training=False, - num_classes=5, - fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(), - use_dropout=False, - dropout_keep_prob=0.5, - box_code_size=4, - predict_instance_masks=True) - - def test_get_instance_masks(self): - image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32) - mask_box_predictor = box_predictor.MaskRCNNBoxPredictor( - is_training=False, - num_classes=5, - fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(), - use_dropout=False, - dropout_keep_prob=0.5, - box_code_size=4, - conv_hyperparams_fn=self._build_arg_scope_with_hyperparams( - op_type=hyperparams_pb2.Hyperparams.CONV), - predict_instance_masks=True) - box_predictions = mask_box_predictor.predict( - [image_features], - num_predictions_per_location=[1], - scope='BoxPredictor', - predict_boxes_and_classes=True, - predict_auxiliary_outputs=True) - mask_predictions = box_predictions[box_predictor.MASK_PREDICTIONS] - self.assertListEqual([2, 1, 5, 14, 14], - mask_predictions.get_shape().as_list()) - - def test_do_not_return_instance_masks_without_request(self): - image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32) - mask_box_predictor = box_predictor.MaskRCNNBoxPredictor( - is_training=False, - num_classes=5, - fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(), - use_dropout=False, - dropout_keep_prob=0.5, - box_code_size=4) - box_predictions = mask_box_predictor.predict( - [image_features], num_predictions_per_location=[1], - scope='BoxPredictor') - self.assertEqual(len(box_predictions), 2) - self.assertTrue(box_predictor.BOX_ENCODINGS in box_predictions) - self.assertTrue(box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND - in box_predictions) - - def test_value_error_on_predict_keypoints(self): - with self.assertRaises(ValueError): - box_predictor.MaskRCNNBoxPredictor( - is_training=False, - num_classes=5, - fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(), - use_dropout=False, - dropout_keep_prob=0.5, - box_code_size=4, - predict_keypoints=True) - - -class RfcnBoxPredictorTest(tf.test.TestCase): - - def _build_arg_scope_with_conv_hyperparams(self): - conv_hyperparams = hyperparams_pb2.Hyperparams() - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - """ - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) - return hyperparams_builder.build(conv_hyperparams, is_training=True) - - def test_get_correct_box_encoding_and_class_prediction_shapes(self): - image_features = tf.random_uniform([4, 8, 8, 64], dtype=tf.float32) - proposal_boxes = tf.random_normal([4, 2, 4], dtype=tf.float32) - rfcn_box_predictor = box_predictor.RfcnBoxPredictor( - is_training=False, - num_classes=2, - conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), - num_spatial_bins=[3, 3], - depth=4, - crop_size=[12, 12], - box_code_size=4 - ) - box_predictions = rfcn_box_predictor.predict( - [image_features], num_predictions_per_location=[1], - scope='BoxPredictor', - proposal_boxes=proposal_boxes) - box_encodings = tf.concat( - box_predictions[box_predictor.BOX_ENCODINGS], axis=1) - class_predictions_with_background = tf.concat( - box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], - axis=1) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - (box_encodings_shape, - class_predictions_shape) = sess.run( - [tf.shape(box_encodings), - tf.shape(class_predictions_with_background)]) - self.assertAllEqual(box_encodings_shape, [8, 1, 2, 4]) - self.assertAllEqual(class_predictions_shape, [8, 1, 3]) - - -class ConvolutionalBoxPredictorTest(test_case.TestCase): - - def _build_arg_scope_with_conv_hyperparams(self): - conv_hyperparams = hyperparams_pb2.Hyperparams() - conv_hyperparams_text_proto = """ - activation: RELU_6 - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - """ - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) - return hyperparams_builder.build(conv_hyperparams, is_training=True) - - def test_get_boxes_for_five_aspect_ratios_per_location(self): - def graph_fn(image_features): - conv_box_predictor = box_predictor.ConvolutionalBoxPredictor( - is_training=False, - num_classes=0, - conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), - min_depth=0, - max_depth=32, - num_layers_before_predictor=1, - use_dropout=True, - dropout_keep_prob=0.8, - kernel_size=1, - box_code_size=4 - ) - box_predictions = conv_box_predictor.predict( - [image_features], num_predictions_per_location=[5], - scope='BoxPredictor') - box_encodings = tf.concat( - box_predictions[box_predictor.BOX_ENCODINGS], axis=1) - objectness_predictions = tf.concat( - box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], - axis=1) - return (box_encodings, objectness_predictions) - image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) - (box_encodings, objectness_predictions) = self.execute(graph_fn, - [image_features]) - self.assertAllEqual(box_encodings.shape, [4, 320, 1, 4]) - self.assertAllEqual(objectness_predictions.shape, [4, 320, 1]) - - def test_get_boxes_for_one_aspect_ratio_per_location(self): - def graph_fn(image_features): - conv_box_predictor = box_predictor.ConvolutionalBoxPredictor( - is_training=False, - num_classes=0, - conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), - min_depth=0, - max_depth=32, - num_layers_before_predictor=1, - use_dropout=True, - dropout_keep_prob=0.8, - kernel_size=1, - box_code_size=4 - ) - box_predictions = conv_box_predictor.predict( - [image_features], num_predictions_per_location=[1], - scope='BoxPredictor') - box_encodings = tf.concat( - box_predictions[box_predictor.BOX_ENCODINGS], axis=1) - objectness_predictions = tf.concat(box_predictions[ - box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) - return (box_encodings, objectness_predictions) - image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) - (box_encodings, objectness_predictions) = self.execute(graph_fn, - [image_features]) - self.assertAllEqual(box_encodings.shape, [4, 64, 1, 4]) - self.assertAllEqual(objectness_predictions.shape, [4, 64, 1]) - - def test_get_multi_class_predictions_for_five_aspect_ratios_per_location( - self): - num_classes_without_background = 6 - image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) - def graph_fn(image_features): - conv_box_predictor = box_predictor.ConvolutionalBoxPredictor( - is_training=False, - num_classes=num_classes_without_background, - conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), - min_depth=0, - max_depth=32, - num_layers_before_predictor=1, - use_dropout=True, - dropout_keep_prob=0.8, - kernel_size=1, - box_code_size=4 - ) - box_predictions = conv_box_predictor.predict( - [image_features], - num_predictions_per_location=[5], - scope='BoxPredictor') - box_encodings = tf.concat( - box_predictions[box_predictor.BOX_ENCODINGS], axis=1) - class_predictions_with_background = tf.concat( - box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], - axis=1) - return (box_encodings, class_predictions_with_background) - (box_encodings, - class_predictions_with_background) = self.execute(graph_fn, - [image_features]) - self.assertAllEqual(box_encodings.shape, [4, 320, 1, 4]) - self.assertAllEqual(class_predictions_with_background.shape, - [4, 320, num_classes_without_background+1]) - - def test_get_predictions_with_feature_maps_of_dynamic_shape( - self): - image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64]) - conv_box_predictor = box_predictor.ConvolutionalBoxPredictor( - is_training=False, - num_classes=0, - conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), - min_depth=0, - max_depth=32, - num_layers_before_predictor=1, - use_dropout=True, - dropout_keep_prob=0.8, - kernel_size=1, - box_code_size=4 - ) - box_predictions = conv_box_predictor.predict( - [image_features], num_predictions_per_location=[5], - scope='BoxPredictor') - box_encodings = tf.concat( - box_predictions[box_predictor.BOX_ENCODINGS], axis=1) - objectness_predictions = tf.concat( - box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], - axis=1) - init_op = tf.global_variables_initializer() - - resolution = 32 - expected_num_anchors = resolution*resolution*5 - with self.test_session() as sess: - sess.run(init_op) - (box_encodings_shape, - objectness_predictions_shape) = sess.run( - [tf.shape(box_encodings), tf.shape(objectness_predictions)], - feed_dict={image_features: - np.random.rand(4, resolution, resolution, 64)}) - actual_variable_set = set( - [var.op.name for var in tf.trainable_variables()]) - self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4]) - self.assertAllEqual(objectness_predictions_shape, - [4, expected_num_anchors, 1]) - expected_variable_set = set([ - 'BoxPredictor/Conv2d_0_1x1_32/biases', - 'BoxPredictor/Conv2d_0_1x1_32/weights', - 'BoxPredictor/BoxEncodingPredictor/biases', - 'BoxPredictor/BoxEncodingPredictor/weights', - 'BoxPredictor/ClassPredictor/biases', - 'BoxPredictor/ClassPredictor/weights']) - self.assertEqual(expected_variable_set, actual_variable_set) - - def test_use_depthwise_convolution(self): - image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64]) - conv_box_predictor = box_predictor.ConvolutionalBoxPredictor( - is_training=False, - num_classes=0, - conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), - min_depth=0, - max_depth=32, - num_layers_before_predictor=1, - dropout_keep_prob=0.8, - kernel_size=1, - box_code_size=4, - use_dropout=True, - use_depthwise=True - ) - box_predictions = conv_box_predictor.predict( - [image_features], num_predictions_per_location=[5], - scope='BoxPredictor') - box_encodings = tf.concat( - box_predictions[box_predictor.BOX_ENCODINGS], axis=1) - objectness_predictions = tf.concat( - box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], - axis=1) - init_op = tf.global_variables_initializer() - - resolution = 32 - expected_num_anchors = resolution*resolution*5 - with self.test_session() as sess: - sess.run(init_op) - (box_encodings_shape, - objectness_predictions_shape) = sess.run( - [tf.shape(box_encodings), tf.shape(objectness_predictions)], - feed_dict={image_features: - np.random.rand(4, resolution, resolution, 64)}) - actual_variable_set = set( - [var.op.name for var in tf.trainable_variables()]) - self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4]) - self.assertAllEqual(objectness_predictions_shape, - [4, expected_num_anchors, 1]) - expected_variable_set = set([ - 'BoxPredictor/Conv2d_0_1x1_32/biases', - 'BoxPredictor/Conv2d_0_1x1_32/weights', - 'BoxPredictor/BoxEncodingPredictor_depthwise/biases', - 'BoxPredictor/BoxEncodingPredictor_depthwise/depthwise_weights', - 'BoxPredictor/BoxEncodingPredictor/biases', - 'BoxPredictor/BoxEncodingPredictor/weights', - 'BoxPredictor/ClassPredictor_depthwise/biases', - 'BoxPredictor/ClassPredictor_depthwise/depthwise_weights', - 'BoxPredictor/ClassPredictor/biases', - 'BoxPredictor/ClassPredictor/weights']) - self.assertEqual(expected_variable_set, actual_variable_set) - - -class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase): - - def _build_arg_scope_with_conv_hyperparams(self): - conv_hyperparams = hyperparams_pb2.Hyperparams() - conv_hyperparams_text_proto = """ - activation: RELU_6 - regularizer { - l2_regularizer { - } - } - initializer { - random_normal_initializer { - stddev: 0.01 - mean: 0.0 - } - } - batch_norm { - train: true, - } - """ - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) - return hyperparams_builder.build(conv_hyperparams, is_training=True) - - def _build_conv_arg_scope_no_batch_norm(self): - conv_hyperparams = hyperparams_pb2.Hyperparams() - conv_hyperparams_text_proto = """ - activation: RELU_6 - regularizer { - l2_regularizer { - } - } - initializer { - random_normal_initializer { - stddev: 0.01 - mean: 0.0 - } - } - """ - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) - return hyperparams_builder.build(conv_hyperparams, is_training=True) - - def test_get_boxes_for_five_aspect_ratios_per_location(self): - - def graph_fn(image_features): - conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor( - is_training=False, - num_classes=0, - conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), - depth=32, - num_layers_before_predictor=1, - box_code_size=4) - box_predictions = conv_box_predictor.predict( - [image_features], num_predictions_per_location=[5], - scope='BoxPredictor') - box_encodings = tf.concat( - box_predictions[box_predictor.BOX_ENCODINGS], axis=1) - objectness_predictions = tf.concat(box_predictions[ - box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) - return (box_encodings, objectness_predictions) - image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) - (box_encodings, objectness_predictions) = self.execute( - graph_fn, [image_features]) - self.assertAllEqual(box_encodings.shape, [4, 320, 4]) - self.assertAllEqual(objectness_predictions.shape, [4, 320, 1]) - - def test_bias_predictions_to_background_with_sigmoid_score_conversion(self): - - def graph_fn(image_features): - conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor( - is_training=True, - num_classes=2, - conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), - depth=32, - num_layers_before_predictor=1, - class_prediction_bias_init=-4.6, - box_code_size=4) - box_predictions = conv_box_predictor.predict( - [image_features], num_predictions_per_location=[5], - scope='BoxPredictor') - class_predictions = tf.concat(box_predictions[ - box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) - return (tf.nn.sigmoid(class_predictions),) - image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) - class_predictions = self.execute(graph_fn, [image_features]) - self.assertAlmostEqual(np.mean(class_predictions), 0.01, places=3) - - def test_get_multi_class_predictions_for_five_aspect_ratios_per_location( - self): - - num_classes_without_background = 6 - def graph_fn(image_features): - conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor( - is_training=False, - num_classes=num_classes_without_background, - conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), - depth=32, - num_layers_before_predictor=1, - box_code_size=4) - box_predictions = conv_box_predictor.predict( - [image_features], - num_predictions_per_location=[5], - scope='BoxPredictor') - box_encodings = tf.concat( - box_predictions[box_predictor.BOX_ENCODINGS], axis=1) - class_predictions_with_background = tf.concat(box_predictions[ - box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) - return (box_encodings, class_predictions_with_background) - - image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) - (box_encodings, class_predictions_with_background) = self.execute( - graph_fn, [image_features]) - self.assertAllEqual(box_encodings.shape, [4, 320, 4]) - self.assertAllEqual(class_predictions_with_background.shape, - [4, 320, num_classes_without_background+1]) - - def test_get_multi_class_predictions_from_two_feature_maps( - self): - - num_classes_without_background = 6 - def graph_fn(image_features1, image_features2): - conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor( - is_training=False, - num_classes=num_classes_without_background, - conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), - depth=32, - num_layers_before_predictor=1, - box_code_size=4) - box_predictions = conv_box_predictor.predict( - [image_features1, image_features2], - num_predictions_per_location=[5, 5], - scope='BoxPredictor') - box_encodings = tf.concat( - box_predictions[box_predictor.BOX_ENCODINGS], axis=1) - class_predictions_with_background = tf.concat( - box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], - axis=1) - return (box_encodings, class_predictions_with_background) - - image_features1 = np.random.rand(4, 8, 8, 64).astype(np.float32) - image_features2 = np.random.rand(4, 8, 8, 64).astype(np.float32) - (box_encodings, class_predictions_with_background) = self.execute( - graph_fn, [image_features1, image_features2]) - self.assertAllEqual(box_encodings.shape, [4, 640, 4]) - self.assertAllEqual(class_predictions_with_background.shape, - [4, 640, num_classes_without_background+1]) - - def test_predictions_from_multiple_feature_maps_share_weights_not_batchnorm( - self): - num_classes_without_background = 6 - def graph_fn(image_features1, image_features2): - conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor( - is_training=False, - num_classes=num_classes_without_background, - conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), - depth=32, - num_layers_before_predictor=2, - box_code_size=4) - box_predictions = conv_box_predictor.predict( - [image_features1, image_features2], - num_predictions_per_location=[5, 5], - scope='BoxPredictor') - box_encodings = tf.concat( - box_predictions[box_predictor.BOX_ENCODINGS], axis=1) - class_predictions_with_background = tf.concat( - box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], - axis=1) - return (box_encodings, class_predictions_with_background) - - with self.test_session(graph=tf.Graph()): - graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32), - tf.random_uniform([4, 16, 16, 3], dtype=tf.float32)) - actual_variable_set = set( - [var.op.name for var in tf.trainable_variables()]) - expected_variable_set = set([ - # Box prediction tower - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'BoxPredictionTower/conv2d_0/weights'), - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'BoxPredictionTower/conv2d_0/BatchNorm/feature_0/beta'), - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'BoxPredictionTower/conv2d_0/BatchNorm/feature_1/beta'), - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'BoxPredictionTower/conv2d_1/weights'), - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'BoxPredictionTower/conv2d_1/BatchNorm/feature_0/beta'), - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'BoxPredictionTower/conv2d_1/BatchNorm/feature_1/beta'), - # Box prediction head - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'BoxPredictor/weights'), - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'BoxPredictor/biases'), - # Class prediction tower - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'ClassPredictionTower/conv2d_0/weights'), - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'ClassPredictionTower/conv2d_0/BatchNorm/feature_0/beta'), - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'ClassPredictionTower/conv2d_0/BatchNorm/feature_1/beta'), - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'ClassPredictionTower/conv2d_1/weights'), - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'ClassPredictionTower/conv2d_1/BatchNorm/feature_0/beta'), - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'ClassPredictionTower/conv2d_1/BatchNorm/feature_1/beta'), - # Class prediction head - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'ClassPredictor/weights'), - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'ClassPredictor/biases')]) - self.assertEqual(expected_variable_set, actual_variable_set) - - def test_no_batchnorm_params_when_batchnorm_is_not_configured(self): - num_classes_without_background = 6 - def graph_fn(image_features1, image_features2): - conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor( - is_training=False, - num_classes=num_classes_without_background, - conv_hyperparams_fn=self._build_conv_arg_scope_no_batch_norm(), - depth=32, - num_layers_before_predictor=2, - box_code_size=4) - box_predictions = conv_box_predictor.predict( - [image_features1, image_features2], - num_predictions_per_location=[5, 5], - scope='BoxPredictor') - box_encodings = tf.concat( - box_predictions[box_predictor.BOX_ENCODINGS], axis=1) - class_predictions_with_background = tf.concat( - box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], - axis=1) - return (box_encodings, class_predictions_with_background) - - with self.test_session(graph=tf.Graph()): - graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32), - tf.random_uniform([4, 16, 16, 3], dtype=tf.float32)) - actual_variable_set = set( - [var.op.name for var in tf.trainable_variables()]) - expected_variable_set = set([ - # Box prediction tower - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'BoxPredictionTower/conv2d_0/weights'), - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'BoxPredictionTower/conv2d_0/biases'), - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'BoxPredictionTower/conv2d_1/weights'), - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'BoxPredictionTower/conv2d_1/biases'), - # Box prediction head - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'BoxPredictor/weights'), - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'BoxPredictor/biases'), - # Class prediction tower - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'ClassPredictionTower/conv2d_0/weights'), - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'ClassPredictionTower/conv2d_0/biases'), - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'ClassPredictionTower/conv2d_1/weights'), - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'ClassPredictionTower/conv2d_1/biases'), - # Class prediction head - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'ClassPredictor/weights'), - ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' - 'ClassPredictor/biases')]) - self.assertEqual(expected_variable_set, actual_variable_set) - - def test_get_predictions_with_feature_maps_of_dynamic_shape( - self): - image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64]) - conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor( - is_training=False, - num_classes=0, - conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), - depth=32, - num_layers_before_predictor=1, - box_code_size=4) - box_predictions = conv_box_predictor.predict( - [image_features], num_predictions_per_location=[5], - scope='BoxPredictor') - box_encodings = tf.concat(box_predictions[box_predictor.BOX_ENCODINGS], - axis=1) - objectness_predictions = tf.concat(box_predictions[ - box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) - init_op = tf.global_variables_initializer() - - resolution = 32 - expected_num_anchors = resolution*resolution*5 - with self.test_session() as sess: - sess.run(init_op) - (box_encodings_shape, - objectness_predictions_shape) = sess.run( - [tf.shape(box_encodings), tf.shape(objectness_predictions)], - feed_dict={image_features: - np.random.rand(4, resolution, resolution, 64)}) - self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 4]) - self.assertAllEqual(objectness_predictions_shape, - [4, expected_num_anchors, 1]) - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/data_decoder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/data_decoder.py deleted file mode 100644 index 9ae18c1f957ea69432b08740451abb2af2548910..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/data_decoder.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Interface for data decoders. - -Data decoders decode the input data and return a dictionary of tensors keyed by -the entries in core.reader.Fields. -""" -from abc import ABCMeta -from abc import abstractmethod - - -class DataDecoder(object): - """Interface for data decoders.""" - __metaclass__ = ABCMeta - - @abstractmethod - def decode(self, data): - """Return a single image and associated labels. - - Args: - data: a string tensor holding a serialized protocol buffer corresponding - to data for a single image. - - Returns: - tensor_dict: a dictionary containing tensors. Possible keys are defined in - reader.Fields. - """ - pass diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/data_parser.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/data_parser.py deleted file mode 100644 index 3dac4de28ec52da5697e0b2fee81a56ebb72e35c..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/data_parser.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Interface for data parsers. - -Data parser parses input data and returns a dictionary of numpy arrays -keyed by the entries in standard_fields.py. Since the parser parses records -to numpy arrays (materialized tensors) directly, it is used to read data for -evaluation/visualization; to parse the data during training, DataDecoder should -be used. -""" -from abc import ABCMeta -from abc import abstractmethod - - -class DataToNumpyParser(object): - __metaclass__ = ABCMeta - - @abstractmethod - def parse(self, input_data): - """Parses input and returns a numpy array or a dictionary of numpy arrays. - - Args: - input_data: an input data - - Returns: - A numpy array or a dictionary of numpy arrays or None, if input - cannot be parsed. - """ - pass diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/keypoint_ops.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/keypoint_ops.py deleted file mode 100644 index e520845f92f10faf39c419c321c696e871f4558c..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/keypoint_ops.py +++ /dev/null @@ -1,282 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Keypoint operations. - -Keypoints are represented as tensors of shape [num_instances, num_keypoints, 2], -where the last dimension holds rank 2 tensors of the form [y, x] representing -the coordinates of the keypoint. -""" -import numpy as np -import tensorflow as tf - - -def scale(keypoints, y_scale, x_scale, scope=None): - """Scales keypoint coordinates in x and y dimensions. - - Args: - keypoints: a tensor of shape [num_instances, num_keypoints, 2] - y_scale: (float) scalar tensor - x_scale: (float) scalar tensor - scope: name scope. - - Returns: - new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] - """ - with tf.name_scope(scope, 'Scale'): - y_scale = tf.cast(y_scale, tf.float32) - x_scale = tf.cast(x_scale, tf.float32) - new_keypoints = keypoints * [[[y_scale, x_scale]]] - return new_keypoints - - -def clip_to_window(keypoints, window, scope=None): - """Clips keypoints to a window. - - This op clips any input keypoints to a window. - - Args: - keypoints: a tensor of shape [num_instances, num_keypoints, 2] - window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max] - window to which the op should clip the keypoints. - scope: name scope. - - Returns: - new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] - """ - with tf.name_scope(scope, 'ClipToWindow'): - y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2) - win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) - y = tf.maximum(tf.minimum(y, win_y_max), win_y_min) - x = tf.maximum(tf.minimum(x, win_x_max), win_x_min) - new_keypoints = tf.concat([y, x], 2) - return new_keypoints - - -def prune_outside_window(keypoints, window, scope=None): - """Prunes keypoints that fall outside a given window. - - This function replaces keypoints that fall outside the given window with nan. - See also clip_to_window which clips any keypoints that fall outside the given - window. - - Args: - keypoints: a tensor of shape [num_instances, num_keypoints, 2] - window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max] - window outside of which the op should prune the keypoints. - scope: name scope. - - Returns: - new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] - """ - with tf.name_scope(scope, 'PruneOutsideWindow'): - y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2) - win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) - - valid_indices = tf.logical_and( - tf.logical_and(y >= win_y_min, y <= win_y_max), - tf.logical_and(x >= win_x_min, x <= win_x_max)) - - new_y = tf.where(valid_indices, y, np.nan * tf.ones_like(y)) - new_x = tf.where(valid_indices, x, np.nan * tf.ones_like(x)) - new_keypoints = tf.concat([new_y, new_x], 2) - - return new_keypoints - - -def change_coordinate_frame(keypoints, window, scope=None): - """Changes coordinate frame of the keypoints to be relative to window's frame. - - Given a window of the form [y_min, x_min, y_max, x_max], changes keypoint - coordinates from keypoints of shape [num_instances, num_keypoints, 2] - to be relative to this window. - - An example use case is data augmentation: where we are given groundtruth - keypoints and would like to randomly crop the image to some window. In this - case we need to change the coordinate frame of each groundtruth keypoint to be - relative to this new window. - - Args: - keypoints: a tensor of shape [num_instances, num_keypoints, 2] - window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max] - window we should change the coordinate frame to. - scope: name scope. - - Returns: - new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] - """ - with tf.name_scope(scope, 'ChangeCoordinateFrame'): - win_height = window[2] - window[0] - win_width = window[3] - window[1] - new_keypoints = scale(keypoints - [window[0], window[1]], 1.0 / win_height, - 1.0 / win_width) - return new_keypoints - - -def to_normalized_coordinates(keypoints, height, width, - check_range=True, scope=None): - """Converts absolute keypoint coordinates to normalized coordinates in [0, 1]. - - Usually one uses the dynamic shape of the image or conv-layer tensor: - keypoints = keypoint_ops.to_normalized_coordinates(keypoints, - tf.shape(images)[1], - tf.shape(images)[2]), - - This function raises an assertion failed error at graph execution time when - the maximum coordinate is smaller than 1.01 (which means that coordinates are - already normalized). The value 1.01 is to deal with small rounding errors. - - Args: - keypoints: A tensor of shape [num_instances, num_keypoints, 2]. - height: Maximum value for y coordinate of absolute keypoint coordinates. - width: Maximum value for x coordinate of absolute keypoint coordinates. - check_range: If True, checks if the coordinates are normalized. - scope: name scope. - - Returns: - tensor of shape [num_instances, num_keypoints, 2] with normalized - coordinates in [0, 1]. - """ - with tf.name_scope(scope, 'ToNormalizedCoordinates'): - height = tf.cast(height, tf.float32) - width = tf.cast(width, tf.float32) - - if check_range: - max_val = tf.reduce_max(keypoints) - max_assert = tf.Assert(tf.greater(max_val, 1.01), - ['max value is lower than 1.01: ', max_val]) - with tf.control_dependencies([max_assert]): - width = tf.identity(width) - - return scale(keypoints, 1.0 / height, 1.0 / width) - - -def to_absolute_coordinates(keypoints, height, width, - check_range=True, scope=None): - """Converts normalized keypoint coordinates to absolute pixel coordinates. - - This function raises an assertion failed error when the maximum keypoint - coordinate value is larger than 1.01 (in which case coordinates are already - absolute). - - Args: - keypoints: A tensor of shape [num_instances, num_keypoints, 2] - height: Maximum value for y coordinate of absolute keypoint coordinates. - width: Maximum value for x coordinate of absolute keypoint coordinates. - check_range: If True, checks if the coordinates are normalized or not. - scope: name scope. - - Returns: - tensor of shape [num_instances, num_keypoints, 2] with absolute coordinates - in terms of the image size. - - """ - with tf.name_scope(scope, 'ToAbsoluteCoordinates'): - height = tf.cast(height, tf.float32) - width = tf.cast(width, tf.float32) - - # Ensure range of input keypoints is correct. - if check_range: - max_val = tf.reduce_max(keypoints) - max_assert = tf.Assert(tf.greater_equal(1.01, max_val), - ['maximum keypoint coordinate value is larger ' - 'than 1.01: ', max_val]) - with tf.control_dependencies([max_assert]): - width = tf.identity(width) - - return scale(keypoints, height, width) - - -def flip_horizontal(keypoints, flip_point, flip_permutation, scope=None): - """Flips the keypoints horizontally around the flip_point. - - This operation flips the x coordinate for each keypoint around the flip_point - and also permutes the keypoints in a manner specified by flip_permutation. - - Args: - keypoints: a tensor of shape [num_instances, num_keypoints, 2] - flip_point: (float) scalar tensor representing the x coordinate to flip the - keypoints around. - flip_permutation: rank 1 int32 tensor containing the keypoint flip - permutation. This specifies the mapping from original keypoint indices - to the flipped keypoint indices. This is used primarily for keypoints - that are not reflection invariant. E.g. Suppose there are 3 keypoints - representing ['head', 'right_eye', 'left_eye'], then a logical choice for - flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye' - and 'right_eye' after a horizontal flip. - scope: name scope. - - Returns: - new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] - """ - with tf.name_scope(scope, 'FlipHorizontal'): - keypoints = tf.transpose(keypoints, [1, 0, 2]) - keypoints = tf.gather(keypoints, flip_permutation) - v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2) - u = flip_point * 2.0 - u - new_keypoints = tf.concat([v, u], 2) - new_keypoints = tf.transpose(new_keypoints, [1, 0, 2]) - return new_keypoints - - -def flip_vertical(keypoints, flip_point, flip_permutation, scope=None): - """Flips the keypoints vertically around the flip_point. - - This operation flips the y coordinate for each keypoint around the flip_point - and also permutes the keypoints in a manner specified by flip_permutation. - - Args: - keypoints: a tensor of shape [num_instances, num_keypoints, 2] - flip_point: (float) scalar tensor representing the y coordinate to flip the - keypoints around. - flip_permutation: rank 1 int32 tensor containing the keypoint flip - permutation. This specifies the mapping from original keypoint indices - to the flipped keypoint indices. This is used primarily for keypoints - that are not reflection invariant. E.g. Suppose there are 3 keypoints - representing ['head', 'right_eye', 'left_eye'], then a logical choice for - flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye' - and 'right_eye' after a horizontal flip. - scope: name scope. - - Returns: - new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] - """ - with tf.name_scope(scope, 'FlipVertical'): - keypoints = tf.transpose(keypoints, [1, 0, 2]) - keypoints = tf.gather(keypoints, flip_permutation) - v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2) - v = flip_point * 2.0 - v - new_keypoints = tf.concat([v, u], 2) - new_keypoints = tf.transpose(new_keypoints, [1, 0, 2]) - return new_keypoints - - -def rot90(keypoints, scope=None): - """Rotates the keypoints counter-clockwise by 90 degrees. - - Args: - keypoints: a tensor of shape [num_instances, num_keypoints, 2] - scope: name scope. - - Returns: - new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] - """ - with tf.name_scope(scope, 'Rot90'): - keypoints = tf.transpose(keypoints, [1, 0, 2]) - v, u = tf.split(value=keypoints[:, :, ::-1], num_or_size_splits=2, axis=2) - v = 1.0 - v - new_keypoints = tf.concat([v, u], 2) - new_keypoints = tf.transpose(new_keypoints, [1, 0, 2]) - return new_keypoints diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/keypoint_ops_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/keypoint_ops_test.py deleted file mode 100644 index 1c09c55aa2c834e566dd8d6cd57b9a254bf26efe..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/keypoint_ops_test.py +++ /dev/null @@ -1,200 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.core.keypoint_ops.""" -import numpy as np -import tensorflow as tf - -from object_detection.core import keypoint_ops - - -class KeypointOpsTest(tf.test.TestCase): - """Tests for common keypoint operations.""" - - def test_scale(self): - keypoints = tf.constant([ - [[0.0, 0.0], [100.0, 200.0]], - [[50.0, 120.0], [100.0, 140.0]] - ]) - y_scale = tf.constant(1.0 / 100) - x_scale = tf.constant(1.0 / 200) - - expected_keypoints = tf.constant([ - [[0., 0.], [1.0, 1.0]], - [[0.5, 0.6], [1.0, 0.7]] - ]) - output = keypoint_ops.scale(keypoints, y_scale, x_scale) - - with self.test_session() as sess: - output_, expected_keypoints_ = sess.run([output, expected_keypoints]) - self.assertAllClose(output_, expected_keypoints_) - - def test_clip_to_window(self): - keypoints = tf.constant([ - [[0.25, 0.5], [0.75, 0.75]], - [[0.5, 0.0], [1.0, 1.0]] - ]) - window = tf.constant([0.25, 0.25, 0.75, 0.75]) - - expected_keypoints = tf.constant([ - [[0.25, 0.5], [0.75, 0.75]], - [[0.5, 0.25], [0.75, 0.75]] - ]) - output = keypoint_ops.clip_to_window(keypoints, window) - - with self.test_session() as sess: - output_, expected_keypoints_ = sess.run([output, expected_keypoints]) - self.assertAllClose(output_, expected_keypoints_) - - def test_prune_outside_window(self): - keypoints = tf.constant([ - [[0.25, 0.5], [0.75, 0.75]], - [[0.5, 0.0], [1.0, 1.0]] - ]) - window = tf.constant([0.25, 0.25, 0.75, 0.75]) - - expected_keypoints = tf.constant([[[0.25, 0.5], [0.75, 0.75]], - [[np.nan, np.nan], [np.nan, np.nan]]]) - output = keypoint_ops.prune_outside_window(keypoints, window) - - with self.test_session() as sess: - output_, expected_keypoints_ = sess.run([output, expected_keypoints]) - self.assertAllClose(output_, expected_keypoints_) - - def test_change_coordinate_frame(self): - keypoints = tf.constant([ - [[0.25, 0.5], [0.75, 0.75]], - [[0.5, 0.0], [1.0, 1.0]] - ]) - window = tf.constant([0.25, 0.25, 0.75, 0.75]) - - expected_keypoints = tf.constant([ - [[0, 0.5], [1.0, 1.0]], - [[0.5, -0.5], [1.5, 1.5]] - ]) - output = keypoint_ops.change_coordinate_frame(keypoints, window) - - with self.test_session() as sess: - output_, expected_keypoints_ = sess.run([output, expected_keypoints]) - self.assertAllClose(output_, expected_keypoints_) - - def test_to_normalized_coordinates(self): - keypoints = tf.constant([ - [[10., 30.], [30., 45.]], - [[20., 0.], [40., 60.]] - ]) - output = keypoint_ops.to_normalized_coordinates( - keypoints, 40, 60) - expected_keypoints = tf.constant([ - [[0.25, 0.5], [0.75, 0.75]], - [[0.5, 0.0], [1.0, 1.0]] - ]) - - with self.test_session() as sess: - output_, expected_keypoints_ = sess.run([output, expected_keypoints]) - self.assertAllClose(output_, expected_keypoints_) - - def test_to_normalized_coordinates_already_normalized(self): - keypoints = tf.constant([ - [[0.25, 0.5], [0.75, 0.75]], - [[0.5, 0.0], [1.0, 1.0]] - ]) - output = keypoint_ops.to_normalized_coordinates( - keypoints, 40, 60) - - with self.test_session() as sess: - with self.assertRaisesOpError('assertion failed'): - sess.run(output) - - def test_to_absolute_coordinates(self): - keypoints = tf.constant([ - [[0.25, 0.5], [0.75, 0.75]], - [[0.5, 0.0], [1.0, 1.0]] - ]) - output = keypoint_ops.to_absolute_coordinates( - keypoints, 40, 60) - expected_keypoints = tf.constant([ - [[10., 30.], [30., 45.]], - [[20., 0.], [40., 60.]] - ]) - - with self.test_session() as sess: - output_, expected_keypoints_ = sess.run([output, expected_keypoints]) - self.assertAllClose(output_, expected_keypoints_) - - def test_to_absolute_coordinates_already_absolute(self): - keypoints = tf.constant([ - [[10., 30.], [30., 45.]], - [[20., 0.], [40., 60.]] - ]) - output = keypoint_ops.to_absolute_coordinates( - keypoints, 40, 60) - - with self.test_session() as sess: - with self.assertRaisesOpError('assertion failed'): - sess.run(output) - - def test_flip_horizontal(self): - keypoints = tf.constant([ - [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]], - [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]] - ]) - flip_permutation = [0, 2, 1] - - expected_keypoints = tf.constant([ - [[0.1, 0.9], [0.3, 0.7], [0.2, 0.8]], - [[0.4, 0.6], [0.6, 0.4], [0.5, 0.5]], - ]) - output = keypoint_ops.flip_horizontal(keypoints, 0.5, flip_permutation) - - with self.test_session() as sess: - output_, expected_keypoints_ = sess.run([output, expected_keypoints]) - self.assertAllClose(output_, expected_keypoints_) - - def test_flip_vertical(self): - keypoints = tf.constant([ - [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]], - [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]] - ]) - flip_permutation = [0, 2, 1] - - expected_keypoints = tf.constant([ - [[0.9, 0.1], [0.7, 0.3], [0.8, 0.2]], - [[0.6, 0.4], [0.4, 0.6], [0.5, 0.5]], - ]) - output = keypoint_ops.flip_vertical(keypoints, 0.5, flip_permutation) - - with self.test_session() as sess: - output_, expected_keypoints_ = sess.run([output, expected_keypoints]) - self.assertAllClose(output_, expected_keypoints_) - - def test_rot90(self): - keypoints = tf.constant([ - [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]], - [[0.4, 0.6], [0.5, 0.6], [0.6, 0.7]] - ]) - expected_keypoints = tf.constant([ - [[0.9, 0.1], [0.8, 0.2], [0.7, 0.3]], - [[0.4, 0.4], [0.4, 0.5], [0.3, 0.6]], - ]) - output = keypoint_ops.rot90(keypoints) - - with self.test_session() as sess: - output_, expected_keypoints_ = sess.run([output, expected_keypoints]) - self.assertAllClose(output_, expected_keypoints_) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/losses.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/losses.py deleted file mode 100644 index 5471c955fdcef7530c04557dba8b8cbb54936cef..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/losses.py +++ /dev/null @@ -1,641 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Classification and regression loss functions for object detection. - -Localization losses: - * WeightedL2LocalizationLoss - * WeightedSmoothL1LocalizationLoss - * WeightedIOULocalizationLoss - -Classification losses: - * WeightedSigmoidClassificationLoss - * WeightedSoftmaxClassificationLoss - * WeightedSoftmaxClassificationAgainstLogitsLoss - * BootstrappedSigmoidClassificationLoss -""" -from abc import ABCMeta -from abc import abstractmethod - -import tensorflow as tf - -from object_detection.core import box_list -from object_detection.core import box_list_ops -from object_detection.utils import ops - -slim = tf.contrib.slim - - -class Loss(object): - """Abstract base class for loss functions.""" - __metaclass__ = ABCMeta - - def __call__(self, - prediction_tensor, - target_tensor, - ignore_nan_targets=False, - scope=None, - **params): - """Call the loss function. - - Args: - prediction_tensor: an N-d tensor of shape [batch, anchors, ...] - representing predicted quantities. - target_tensor: an N-d tensor of shape [batch, anchors, ...] representing - regression or classification targets. - ignore_nan_targets: whether to ignore nan targets in the loss computation. - E.g. can be used if the target tensor is missing groundtruth data that - shouldn't be factored into the loss. - scope: Op scope name. Defaults to 'Loss' if None. - **params: Additional keyword arguments for specific implementations of - the Loss. - - Returns: - loss: a tensor representing the value of the loss function. - """ - with tf.name_scope(scope, 'Loss', - [prediction_tensor, target_tensor, params]) as scope: - if ignore_nan_targets: - target_tensor = tf.where(tf.is_nan(target_tensor), - prediction_tensor, - target_tensor) - return self._compute_loss(prediction_tensor, target_tensor, **params) - - @abstractmethod - def _compute_loss(self, prediction_tensor, target_tensor, **params): - """Method to be overridden by implementations. - - Args: - prediction_tensor: a tensor representing predicted quantities - target_tensor: a tensor representing regression or classification targets - **params: Additional keyword arguments for specific implementations of - the Loss. - - Returns: - loss: an N-d tensor of shape [batch, anchors, ...] containing the loss per - anchor - """ - pass - - -class WeightedL2LocalizationLoss(Loss): - """L2 localization loss function with anchorwise output support. - - Loss[b,a] = .5 * ||weights[b,a] * (prediction[b,a,:] - target[b,a,:])||^2 - """ - - def _compute_loss(self, prediction_tensor, target_tensor, weights): - """Compute loss function. - - Args: - prediction_tensor: A float tensor of shape [batch_size, num_anchors, - code_size] representing the (encoded) predicted locations of objects. - target_tensor: A float tensor of shape [batch_size, num_anchors, - code_size] representing the regression targets - weights: a float tensor of shape [batch_size, num_anchors] - - Returns: - loss: a float tensor of shape [batch_size, num_anchors] tensor - representing the value of the loss function. - """ - weighted_diff = (prediction_tensor - target_tensor) * tf.expand_dims( - weights, 2) - square_diff = 0.5 * tf.square(weighted_diff) - return tf.reduce_sum(square_diff, 2) - - -class WeightedSmoothL1LocalizationLoss(Loss): - """Smooth L1 localization loss function aka Huber Loss.. - - The smooth L1_loss is defined elementwise as .5 x^2 if |x| <= delta and - 0.5 x^2 + delta * (|x|-delta) otherwise, where x is the difference between - predictions and target. - - See also Equation (3) in the Fast R-CNN paper by Ross Girshick (ICCV 2015) - """ - - def __init__(self, delta=1.0): - """Constructor. - - Args: - delta: delta for smooth L1 loss. - """ - self._delta = delta - - def _compute_loss(self, prediction_tensor, target_tensor, weights): - """Compute loss function. - - Args: - prediction_tensor: A float tensor of shape [batch_size, num_anchors, - code_size] representing the (encoded) predicted locations of objects. - target_tensor: A float tensor of shape [batch_size, num_anchors, - code_size] representing the regression targets - weights: a float tensor of shape [batch_size, num_anchors] - - Returns: - loss: a float tensor of shape [batch_size, num_anchors] tensor - representing the value of the loss function. - """ - return tf.reduce_sum(tf.losses.huber_loss( - target_tensor, - prediction_tensor, - delta=self._delta, - weights=tf.expand_dims(weights, axis=2), - loss_collection=None, - reduction=tf.losses.Reduction.NONE - ), axis=2) - - -class WeightedIOULocalizationLoss(Loss): - """IOU localization loss function. - - Sums the IOU for corresponding pairs of predicted/groundtruth boxes - and for each pair assign a loss of 1 - IOU. We then compute a weighted - sum over all pairs which is returned as the total loss. - """ - - def _compute_loss(self, prediction_tensor, target_tensor, weights): - """Compute loss function. - - Args: - prediction_tensor: A float tensor of shape [batch_size, num_anchors, 4] - representing the decoded predicted boxes - target_tensor: A float tensor of shape [batch_size, num_anchors, 4] - representing the decoded target boxes - weights: a float tensor of shape [batch_size, num_anchors] - - Returns: - loss: a float tensor of shape [batch_size, num_anchors] tensor - representing the value of the loss function. - """ - predicted_boxes = box_list.BoxList(tf.reshape(prediction_tensor, [-1, 4])) - target_boxes = box_list.BoxList(tf.reshape(target_tensor, [-1, 4])) - per_anchor_iou_loss = 1.0 - box_list_ops.matched_iou(predicted_boxes, - target_boxes) - return tf.reshape(weights, [-1]) * per_anchor_iou_loss - - -class WeightedSigmoidClassificationLoss(Loss): - """Sigmoid cross entropy classification loss function.""" - - def _compute_loss(self, - prediction_tensor, - target_tensor, - weights, - class_indices=None): - """Compute loss function. - - Args: - prediction_tensor: A float tensor of shape [batch_size, num_anchors, - num_classes] representing the predicted logits for each class - target_tensor: A float tensor of shape [batch_size, num_anchors, - num_classes] representing one-hot encoded classification targets - weights: a float tensor of shape [batch_size, num_anchors] - class_indices: (Optional) A 1-D integer tensor of class indices. - If provided, computes loss only for the specified class indices. - - Returns: - loss: a float tensor of shape [batch_size, num_anchors, num_classes] - representing the value of the loss function. - """ - weights = tf.expand_dims(weights, 2) - if class_indices is not None: - weights *= tf.reshape( - ops.indices_to_dense_vector(class_indices, - tf.shape(prediction_tensor)[2]), - [1, 1, -1]) - per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits( - labels=target_tensor, logits=prediction_tensor)) - return per_entry_cross_ent * weights - - -class SigmoidFocalClassificationLoss(Loss): - """Sigmoid focal cross entropy loss. - - Focal loss down-weights well classified examples and focusses on the hard - examples. See https://arxiv.org/pdf/1708.02002.pdf for the loss definition. - """ - - def __init__(self, gamma=2.0, alpha=0.25): - """Constructor. - - Args: - gamma: exponent of the modulating factor (1 - p_t) ^ gamma. - alpha: optional alpha weighting factor to balance positives vs negatives. - """ - self._alpha = alpha - self._gamma = gamma - - def _compute_loss(self, - prediction_tensor, - target_tensor, - weights, - class_indices=None): - """Compute loss function. - - Args: - prediction_tensor: A float tensor of shape [batch_size, num_anchors, - num_classes] representing the predicted logits for each class - target_tensor: A float tensor of shape [batch_size, num_anchors, - num_classes] representing one-hot encoded classification targets - weights: a float tensor of shape [batch_size, num_anchors] - class_indices: (Optional) A 1-D integer tensor of class indices. - If provided, computes loss only for the specified class indices. - - Returns: - loss: a float tensor of shape [batch_size, num_anchors, num_classes] - representing the value of the loss function. - """ - weights = tf.expand_dims(weights, 2) - if class_indices is not None: - weights *= tf.reshape( - ops.indices_to_dense_vector(class_indices, - tf.shape(prediction_tensor)[2]), - [1, 1, -1]) - per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits( - labels=target_tensor, logits=prediction_tensor)) - prediction_probabilities = tf.sigmoid(prediction_tensor) - p_t = ((target_tensor * prediction_probabilities) + - ((1 - target_tensor) * (1 - prediction_probabilities))) - modulating_factor = 1.0 - if self._gamma: - modulating_factor = tf.pow(1.0 - p_t, self._gamma) - alpha_weight_factor = 1.0 - if self._alpha is not None: - alpha_weight_factor = (target_tensor * self._alpha + - (1 - target_tensor) * (1 - self._alpha)) - focal_cross_entropy_loss = (modulating_factor * alpha_weight_factor * - per_entry_cross_ent) - return focal_cross_entropy_loss * weights - - -class WeightedSoftmaxClassificationLoss(Loss): - """Softmax loss function.""" - - def __init__(self, logit_scale=1.0): - """Constructor. - - Args: - logit_scale: When this value is high, the prediction is "diffused" and - when this value is low, the prediction is made peakier. - (default 1.0) - - """ - self._logit_scale = logit_scale - - def _compute_loss(self, prediction_tensor, target_tensor, weights): - """Compute loss function. - - Args: - prediction_tensor: A float tensor of shape [batch_size, num_anchors, - num_classes] representing the predicted logits for each class - target_tensor: A float tensor of shape [batch_size, num_anchors, - num_classes] representing one-hot encoded classification targets - weights: a float tensor of shape [batch_size, num_anchors] - - Returns: - loss: a float tensor of shape [batch_size, num_anchors] - representing the value of the loss function. - """ - num_classes = prediction_tensor.get_shape().as_list()[-1] - prediction_tensor = tf.divide( - prediction_tensor, self._logit_scale, name='scale_logit') - per_row_cross_ent = (tf.nn.softmax_cross_entropy_with_logits( - labels=tf.reshape(target_tensor, [-1, num_classes]), - logits=tf.reshape(prediction_tensor, [-1, num_classes]))) - return tf.reshape(per_row_cross_ent, tf.shape(weights)) * weights - - -class WeightedSoftmaxClassificationAgainstLogitsLoss(Loss): - """Softmax loss function against logits. - - Targets are expected to be provided in logits space instead of "one hot" or - "probability distribution" space. - """ - - def __init__(self, logit_scale=1.0): - """Constructor. - - Args: - logit_scale: When this value is high, the target is "diffused" and - when this value is low, the target is made peakier. - (default 1.0) - - """ - self._logit_scale = logit_scale - - def _scale_and_softmax_logits(self, logits): - """Scale logits then apply softmax.""" - scaled_logits = tf.divide(logits, self._logit_scale, name='scale_logits') - return tf.nn.softmax(scaled_logits, name='convert_scores') - - def _compute_loss(self, prediction_tensor, target_tensor, weights): - """Compute loss function. - - Args: - prediction_tensor: A float tensor of shape [batch_size, num_anchors, - num_classes] representing the predicted logits for each class - target_tensor: A float tensor of shape [batch_size, num_anchors, - num_classes] representing logit classification targets - weights: a float tensor of shape [batch_size, num_anchors] - - Returns: - loss: a float tensor of shape [batch_size, num_anchors] - representing the value of the loss function. - """ - num_classes = prediction_tensor.get_shape().as_list()[-1] - target_tensor = self._scale_and_softmax_logits(target_tensor) - prediction_tensor = tf.divide(prediction_tensor, self._logit_scale, - name='scale_logits') - - per_row_cross_ent = (tf.nn.softmax_cross_entropy_with_logits( - labels=tf.reshape(target_tensor, [-1, num_classes]), - logits=tf.reshape(prediction_tensor, [-1, num_classes]))) - return tf.reshape(per_row_cross_ent, tf.shape(weights)) * weights - - -class BootstrappedSigmoidClassificationLoss(Loss): - """Bootstrapped sigmoid cross entropy classification loss function. - - This loss uses a convex combination of training labels and the current model's - predictions as training targets in the classification loss. The idea is that - as the model improves over time, its predictions can be trusted more and we - can use these predictions to mitigate the damage of noisy/incorrect labels, - because incorrect labels are likely to be eventually highly inconsistent with - other stimuli predicted to have the same label by the model. - - In "soft" bootstrapping, we use all predicted class probabilities, whereas in - "hard" bootstrapping, we use the single class favored by the model. - - See also Training Deep Neural Networks On Noisy Labels with Bootstrapping by - Reed et al. (ICLR 2015). - """ - - def __init__(self, alpha, bootstrap_type='soft'): - """Constructor. - - Args: - alpha: a float32 scalar tensor between 0 and 1 representing interpolation - weight - bootstrap_type: set to either 'hard' or 'soft' (default) - - Raises: - ValueError: if bootstrap_type is not either 'hard' or 'soft' - """ - if bootstrap_type != 'hard' and bootstrap_type != 'soft': - raise ValueError('Unrecognized bootstrap_type: must be one of ' - '\'hard\' or \'soft.\'') - self._alpha = alpha - self._bootstrap_type = bootstrap_type - - def _compute_loss(self, prediction_tensor, target_tensor, weights): - """Compute loss function. - - Args: - prediction_tensor: A float tensor of shape [batch_size, num_anchors, - num_classes] representing the predicted logits for each class - target_tensor: A float tensor of shape [batch_size, num_anchors, - num_classes] representing one-hot encoded classification targets - weights: a float tensor of shape [batch_size, num_anchors] - - Returns: - loss: a float tensor of shape [batch_size, num_anchors, num_classes] - representing the value of the loss function. - """ - if self._bootstrap_type == 'soft': - bootstrap_target_tensor = self._alpha * target_tensor + ( - 1.0 - self._alpha) * tf.sigmoid(prediction_tensor) - else: - bootstrap_target_tensor = self._alpha * target_tensor + ( - 1.0 - self._alpha) * tf.cast( - tf.sigmoid(prediction_tensor) > 0.5, tf.float32) - per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits( - labels=bootstrap_target_tensor, logits=prediction_tensor)) - return per_entry_cross_ent * tf.expand_dims(weights, 2) - - -class HardExampleMiner(object): - """Hard example mining for regions in a list of images. - - Implements hard example mining to select a subset of regions to be - back-propagated. For each image, selects the regions with highest losses, - subject to the condition that a newly selected region cannot have - an IOU > iou_threshold with any of the previously selected regions. - This can be achieved by re-using a greedy non-maximum suppression algorithm. - A constraint on the number of negatives mined per positive region can also be - enforced. - - Reference papers: "Training Region-based Object Detectors with Online - Hard Example Mining" (CVPR 2016) by Srivastava et al., and - "SSD: Single Shot MultiBox Detector" (ECCV 2016) by Liu et al. - """ - - def __init__(self, - num_hard_examples=64, - iou_threshold=0.7, - loss_type='both', - cls_loss_weight=0.05, - loc_loss_weight=0.06, - max_negatives_per_positive=None, - min_negatives_per_image=0): - """Constructor. - - The hard example mining implemented by this class can replicate the behavior - in the two aforementioned papers (Srivastava et al., and Liu et al). - To replicate the A2 paper (Srivastava et al), num_hard_examples is set - to a fixed parameter (64 by default) and iou_threshold is set to .7 for - running non-max-suppression the predicted boxes prior to hard mining. - In order to replicate the SSD paper (Liu et al), num_hard_examples should - be set to None, max_negatives_per_positive should be 3 and iou_threshold - should be 1.0 (in order to effectively turn off NMS). - - Args: - num_hard_examples: maximum number of hard examples to be - selected per image (prior to enforcing max negative to positive ratio - constraint). If set to None, all examples obtained after NMS are - considered. - iou_threshold: minimum intersection over union for an example - to be discarded during NMS. - loss_type: use only classification losses ('cls', default), - localization losses ('loc') or both losses ('both'). - In the last case, cls_loss_weight and loc_loss_weight are used to - compute weighted sum of the two losses. - cls_loss_weight: weight for classification loss. - loc_loss_weight: weight for location loss. - max_negatives_per_positive: maximum number of negatives to retain for - each positive anchor. By default, num_negatives_per_positive is None, - which means that we do not enforce a prespecified negative:positive - ratio. Note also that num_negatives_per_positives can be a float - (and will be converted to be a float even if it is passed in otherwise). - min_negatives_per_image: minimum number of negative anchors to sample for - a given image. Setting this to a positive number allows sampling - negatives in an image without any positive anchors and thus not biased - towards at least one detection per image. - """ - self._num_hard_examples = num_hard_examples - self._iou_threshold = iou_threshold - self._loss_type = loss_type - self._cls_loss_weight = cls_loss_weight - self._loc_loss_weight = loc_loss_weight - self._max_negatives_per_positive = max_negatives_per_positive - self._min_negatives_per_image = min_negatives_per_image - if self._max_negatives_per_positive is not None: - self._max_negatives_per_positive = float(self._max_negatives_per_positive) - self._num_positives_list = None - self._num_negatives_list = None - - def __call__(self, - location_losses, - cls_losses, - decoded_boxlist_list, - match_list=None): - """Computes localization and classification losses after hard mining. - - Args: - location_losses: a float tensor of shape [num_images, num_anchors] - representing anchorwise localization losses. - cls_losses: a float tensor of shape [num_images, num_anchors] - representing anchorwise classification losses. - decoded_boxlist_list: a list of decoded BoxList representing location - predictions for each image. - match_list: an optional list of matcher.Match objects encoding the match - between anchors and groundtruth boxes for each image of the batch, - with rows of the Match objects corresponding to groundtruth boxes - and columns corresponding to anchors. Match objects in match_list are - used to reference which anchors are positive, negative or ignored. If - self._max_negatives_per_positive exists, these are then used to enforce - a prespecified negative to positive ratio. - - Returns: - mined_location_loss: a float scalar with sum of localization losses from - selected hard examples. - mined_cls_loss: a float scalar with sum of classification losses from - selected hard examples. - Raises: - ValueError: if location_losses, cls_losses and decoded_boxlist_list do - not have compatible shapes (i.e., they must correspond to the same - number of images). - ValueError: if match_list is specified but its length does not match - len(decoded_boxlist_list). - """ - mined_location_losses = [] - mined_cls_losses = [] - location_losses = tf.unstack(location_losses) - cls_losses = tf.unstack(cls_losses) - num_images = len(decoded_boxlist_list) - if not match_list: - match_list = num_images * [None] - if not len(location_losses) == len(decoded_boxlist_list) == len(cls_losses): - raise ValueError('location_losses, cls_losses and decoded_boxlist_list ' - 'do not have compatible shapes.') - if not isinstance(match_list, list): - raise ValueError('match_list must be a list.') - if len(match_list) != len(decoded_boxlist_list): - raise ValueError('match_list must either be None or have ' - 'length=len(decoded_boxlist_list).') - num_positives_list = [] - num_negatives_list = [] - for ind, detection_boxlist in enumerate(decoded_boxlist_list): - box_locations = detection_boxlist.get() - match = match_list[ind] - image_losses = cls_losses[ind] - if self._loss_type == 'loc': - image_losses = location_losses[ind] - elif self._loss_type == 'both': - image_losses *= self._cls_loss_weight - image_losses += location_losses[ind] * self._loc_loss_weight - if self._num_hard_examples is not None: - num_hard_examples = self._num_hard_examples - else: - num_hard_examples = detection_boxlist.num_boxes() - selected_indices = tf.image.non_max_suppression( - box_locations, image_losses, num_hard_examples, self._iou_threshold) - if self._max_negatives_per_positive is not None and match: - (selected_indices, num_positives, - num_negatives) = self._subsample_selection_to_desired_neg_pos_ratio( - selected_indices, match, self._max_negatives_per_positive, - self._min_negatives_per_image) - num_positives_list.append(num_positives) - num_negatives_list.append(num_negatives) - mined_location_losses.append( - tf.reduce_sum(tf.gather(location_losses[ind], selected_indices))) - mined_cls_losses.append( - tf.reduce_sum(tf.gather(cls_losses[ind], selected_indices))) - location_loss = tf.reduce_sum(tf.stack(mined_location_losses)) - cls_loss = tf.reduce_sum(tf.stack(mined_cls_losses)) - if match and self._max_negatives_per_positive: - self._num_positives_list = num_positives_list - self._num_negatives_list = num_negatives_list - return (location_loss, cls_loss) - - def summarize(self): - """Summarize the number of positives and negatives after mining.""" - if self._num_positives_list and self._num_negatives_list: - avg_num_positives = tf.reduce_mean(tf.to_float(self._num_positives_list)) - avg_num_negatives = tf.reduce_mean(tf.to_float(self._num_negatives_list)) - tf.summary.scalar('HardExampleMiner/NumPositives', avg_num_positives) - tf.summary.scalar('HardExampleMiner/NumNegatives', avg_num_negatives) - - def _subsample_selection_to_desired_neg_pos_ratio(self, - indices, - match, - max_negatives_per_positive, - min_negatives_per_image=0): - """Subsample a collection of selected indices to a desired neg:pos ratio. - - This function takes a subset of M indices (indexing into a large anchor - collection of N anchors where M=0, - meaning that column i is matched with row match_results[i]. - (2) match_results[i]=-1, meaning that column i is not matched. - (3) match_results[i]=-2, meaning that column i is ignored. - use_matmul_gather: Use matrix multiplication based gather instead of - standard tf.gather. (Default: False). - - Raises: - ValueError: if match_results does not have rank 1 or is not an - integer int32 scalar tensor - """ - if match_results.shape.ndims != 1: - raise ValueError('match_results should have rank 1') - if match_results.dtype != tf.int32: - raise ValueError('match_results should be an int32 or int64 scalar ' - 'tensor') - self._match_results = match_results - self._gather_op = tf.gather - if use_matmul_gather: - self._gather_op = ops.matmul_gather_on_zeroth_axis - - @property - def match_results(self): - """The accessor for match results. - - Returns: - the tensor which encodes the match results. - """ - return self._match_results - - def matched_column_indices(self): - """Returns column indices that match to some row. - - The indices returned by this op are always sorted in increasing order. - - Returns: - column_indices: int32 tensor of shape [K] with column indices. - """ - return self._reshape_and_cast(tf.where(tf.greater(self._match_results, -1))) - - def matched_column_indicator(self): - """Returns column indices that are matched. - - Returns: - column_indices: int32 tensor of shape [K] with column indices. - """ - return tf.greater_equal(self._match_results, 0) - - def num_matched_columns(self): - """Returns number (int32 scalar tensor) of matched columns.""" - return tf.size(self.matched_column_indices()) - - def unmatched_column_indices(self): - """Returns column indices that do not match any row. - - The indices returned by this op are always sorted in increasing order. - - Returns: - column_indices: int32 tensor of shape [K] with column indices. - """ - return self._reshape_and_cast(tf.where(tf.equal(self._match_results, -1))) - - def unmatched_column_indicator(self): - """Returns column indices that are unmatched. - - Returns: - column_indices: int32 tensor of shape [K] with column indices. - """ - return tf.equal(self._match_results, -1) - - def num_unmatched_columns(self): - """Returns number (int32 scalar tensor) of unmatched columns.""" - return tf.size(self.unmatched_column_indices()) - - def ignored_column_indices(self): - """Returns column indices that are ignored (neither Matched nor Unmatched). - - The indices returned by this op are always sorted in increasing order. - - Returns: - column_indices: int32 tensor of shape [K] with column indices. - """ - return self._reshape_and_cast(tf.where(self.ignored_column_indicator())) - - def ignored_column_indicator(self): - """Returns boolean column indicator where True means the colum is ignored. - - Returns: - column_indicator: boolean vector which is True for all ignored column - indices. - """ - return tf.equal(self._match_results, -2) - - def num_ignored_columns(self): - """Returns number (int32 scalar tensor) of matched columns.""" - return tf.size(self.ignored_column_indices()) - - def unmatched_or_ignored_column_indices(self): - """Returns column indices that are unmatched or ignored. - - The indices returned by this op are always sorted in increasing order. - - Returns: - column_indices: int32 tensor of shape [K] with column indices. - """ - return self._reshape_and_cast(tf.where(tf.greater(0, self._match_results))) - - def matched_row_indices(self): - """Returns row indices that match some column. - - The indices returned by this op are ordered so as to be in correspondence - with the output of matched_column_indicator(). For example if - self.matched_column_indicator() is [0,2], and self.matched_row_indices() is - [7, 3], then we know that column 0 was matched to row 7 and column 2 was - matched to row 3. - - Returns: - row_indices: int32 tensor of shape [K] with row indices. - """ - return self._reshape_and_cast( - self._gather_op(self._match_results, self.matched_column_indices())) - - def _reshape_and_cast(self, t): - return tf.cast(tf.reshape(t, [-1]), tf.int32) - - def gather_based_on_match(self, input_tensor, unmatched_value, - ignored_value): - """Gathers elements from `input_tensor` based on match results. - - For columns that are matched to a row, gathered_tensor[col] is set to - input_tensor[match_results[col]]. For columns that are unmatched, - gathered_tensor[col] is set to unmatched_value. Finally, for columns that - are ignored gathered_tensor[col] is set to ignored_value. - - Note that the input_tensor.shape[1:] must match with unmatched_value.shape - and ignored_value.shape - - Args: - input_tensor: Tensor to gather values from. - unmatched_value: Constant tensor value for unmatched columns. - ignored_value: Constant tensor value for ignored columns. - - Returns: - gathered_tensor: A tensor containing values gathered from input_tensor. - The shape of the gathered tensor is [match_results.shape[0]] + - input_tensor.shape[1:]. - """ - input_tensor = tf.concat([tf.stack([ignored_value, unmatched_value]), - input_tensor], axis=0) - gather_indices = tf.maximum(self.match_results + 2, 0) - gathered_tensor = self._gather_op(input_tensor, gather_indices) - return gathered_tensor - - -class Matcher(object): - """Abstract base class for matcher. - """ - __metaclass__ = ABCMeta - - def __init__(self, use_matmul_gather=False): - """Constructs a Matcher. - - Args: - use_matmul_gather: Force constructed match objects to use matrix - multiplication based gather instead of standard tf.gather. - (Default: False). - """ - self._use_matmul_gather = use_matmul_gather - - def match(self, similarity_matrix, scope=None, **params): - """Computes matches among row and column indices and returns the result. - - Computes matches among the row and column indices based on the similarity - matrix and optional arguments. - - Args: - similarity_matrix: Float tensor of shape [N, M] with pairwise similarity - where higher value means more similar. - scope: Op scope name. Defaults to 'Match' if None. - **params: Additional keyword arguments for specific implementations of - the Matcher. - - Returns: - A Match object with the results of matching. - """ - with tf.name_scope(scope, 'Match', [similarity_matrix, params]) as scope: - return Match(self._match(similarity_matrix, **params), - self._use_matmul_gather) - - @abstractmethod - def _match(self, similarity_matrix, **params): - """Method to be overridden by implementations. - - Args: - similarity_matrix: Float tensor of shape [N, M] with pairwise similarity - where higher value means more similar. - **params: Additional keyword arguments for specific implementations of - the Matcher. - - Returns: - match_results: Integer tensor of shape [M]: match_results[i]>=0 means - that column i is matched to row match_results[i], match_results[i]=-1 - means that the column is not matched. match_results[i]=-2 means that - the column is ignored (usually this happens when there is a very weak - match which one neither wants as positive nor negative example). - """ - pass diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/matcher_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/matcher_test.py deleted file mode 100644 index 05607834a1dd116e2e0beeb79a508d6196fad235..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/matcher_test.py +++ /dev/null @@ -1,192 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.core.matcher.""" -import numpy as np -import tensorflow as tf - -from object_detection.core import matcher - - -class MatchTest(tf.test.TestCase): - - def test_get_correct_matched_columnIndices(self): - match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) - match = matcher.Match(match_results) - expected_column_indices = [0, 1, 3, 5] - matched_column_indices = match.matched_column_indices() - self.assertEquals(matched_column_indices.dtype, tf.int32) - with self.test_session() as sess: - matched_column_indices = sess.run(matched_column_indices) - self.assertAllEqual(matched_column_indices, expected_column_indices) - - def test_get_correct_counts(self): - match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) - match = matcher.Match(match_results) - exp_num_matched_columns = 4 - exp_num_unmatched_columns = 2 - exp_num_ignored_columns = 1 - num_matched_columns = match.num_matched_columns() - num_unmatched_columns = match.num_unmatched_columns() - num_ignored_columns = match.num_ignored_columns() - self.assertEquals(num_matched_columns.dtype, tf.int32) - self.assertEquals(num_unmatched_columns.dtype, tf.int32) - self.assertEquals(num_ignored_columns.dtype, tf.int32) - with self.test_session() as sess: - (num_matched_columns_out, num_unmatched_columns_out, - num_ignored_columns_out) = sess.run( - [num_matched_columns, num_unmatched_columns, num_ignored_columns]) - self.assertAllEqual(num_matched_columns_out, exp_num_matched_columns) - self.assertAllEqual(num_unmatched_columns_out, exp_num_unmatched_columns) - self.assertAllEqual(num_ignored_columns_out, exp_num_ignored_columns) - - def testGetCorrectUnmatchedColumnIndices(self): - match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) - match = matcher.Match(match_results) - expected_column_indices = [2, 4] - unmatched_column_indices = match.unmatched_column_indices() - self.assertEquals(unmatched_column_indices.dtype, tf.int32) - with self.test_session() as sess: - unmatched_column_indices = sess.run(unmatched_column_indices) - self.assertAllEqual(unmatched_column_indices, expected_column_indices) - - def testGetCorrectMatchedRowIndices(self): - match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) - match = matcher.Match(match_results) - expected_row_indices = [3, 1, 0, 5] - matched_row_indices = match.matched_row_indices() - self.assertEquals(matched_row_indices.dtype, tf.int32) - with self.test_session() as sess: - matched_row_inds = sess.run(matched_row_indices) - self.assertAllEqual(matched_row_inds, expected_row_indices) - - def test_get_correct_ignored_column_indices(self): - match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) - match = matcher.Match(match_results) - expected_column_indices = [6] - ignored_column_indices = match.ignored_column_indices() - self.assertEquals(ignored_column_indices.dtype, tf.int32) - with self.test_session() as sess: - ignored_column_indices = sess.run(ignored_column_indices) - self.assertAllEqual(ignored_column_indices, expected_column_indices) - - def test_get_correct_matched_column_indicator(self): - match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) - match = matcher.Match(match_results) - expected_column_indicator = [True, True, False, True, False, True, False] - matched_column_indicator = match.matched_column_indicator() - self.assertEquals(matched_column_indicator.dtype, tf.bool) - with self.test_session() as sess: - matched_column_indicator = sess.run(matched_column_indicator) - self.assertAllEqual(matched_column_indicator, expected_column_indicator) - - def test_get_correct_unmatched_column_indicator(self): - match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) - match = matcher.Match(match_results) - expected_column_indicator = [False, False, True, False, True, False, False] - unmatched_column_indicator = match.unmatched_column_indicator() - self.assertEquals(unmatched_column_indicator.dtype, tf.bool) - with self.test_session() as sess: - unmatched_column_indicator = sess.run(unmatched_column_indicator) - self.assertAllEqual(unmatched_column_indicator, expected_column_indicator) - - def test_get_correct_ignored_column_indicator(self): - match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) - match = matcher.Match(match_results) - expected_column_indicator = [False, False, False, False, False, False, True] - ignored_column_indicator = match.ignored_column_indicator() - self.assertEquals(ignored_column_indicator.dtype, tf.bool) - with self.test_session() as sess: - ignored_column_indicator = sess.run(ignored_column_indicator) - self.assertAllEqual(ignored_column_indicator, expected_column_indicator) - - def test_get_correct_unmatched_ignored_column_indices(self): - match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) - match = matcher.Match(match_results) - expected_column_indices = [2, 4, 6] - unmatched_ignored_column_indices = (match. - unmatched_or_ignored_column_indices()) - self.assertEquals(unmatched_ignored_column_indices.dtype, tf.int32) - with self.test_session() as sess: - unmatched_ignored_column_indices = sess.run( - unmatched_ignored_column_indices) - self.assertAllEqual(unmatched_ignored_column_indices, - expected_column_indices) - - def test_all_columns_accounted_for(self): - # Note: deliberately setting to small number so not always - # all possibilities appear (matched, unmatched, ignored) - num_matches = 10 - match_results = tf.random_uniform( - [num_matches], minval=-2, maxval=5, dtype=tf.int32) - match = matcher.Match(match_results) - matched_column_indices = match.matched_column_indices() - unmatched_column_indices = match.unmatched_column_indices() - ignored_column_indices = match.ignored_column_indices() - with self.test_session() as sess: - matched, unmatched, ignored = sess.run([ - matched_column_indices, unmatched_column_indices, - ignored_column_indices - ]) - all_indices = np.hstack((matched, unmatched, ignored)) - all_indices_sorted = np.sort(all_indices) - self.assertAllEqual(all_indices_sorted, - np.arange(num_matches, dtype=np.int32)) - - def test_scalar_gather_based_on_match(self): - match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) - input_tensor = tf.constant([0, 1, 2, 3, 4, 5, 6, 7], dtype=tf.float32) - expected_gathered_tensor = [3, 1, 100, 0, 100, 5, 200] - match = matcher.Match(match_results) - gathered_tensor = match.gather_based_on_match(input_tensor, - unmatched_value=100., - ignored_value=200.) - self.assertEquals(gathered_tensor.dtype, tf.float32) - with self.test_session(): - gathered_tensor_out = gathered_tensor.eval() - self.assertAllEqual(expected_gathered_tensor, gathered_tensor_out) - - def test_multidimensional_gather_based_on_match(self): - match_results = tf.constant([1, -1, -2]) - input_tensor = tf.constant([[0, 0.5, 0, 0.5], [0, 0, 0.5, 0.5]], - dtype=tf.float32) - expected_gathered_tensor = [[0, 0, 0.5, 0.5], [0, 0, 0, 0], [0, 0, 0, 0]] - match = matcher.Match(match_results) - gathered_tensor = match.gather_based_on_match(input_tensor, - unmatched_value=tf.zeros(4), - ignored_value=tf.zeros(4)) - self.assertEquals(gathered_tensor.dtype, tf.float32) - with self.test_session(): - gathered_tensor_out = gathered_tensor.eval() - self.assertAllEqual(expected_gathered_tensor, gathered_tensor_out) - - def test_multidimensional_gather_based_on_match_with_matmul_gather_op(self): - match_results = tf.constant([1, -1, -2]) - input_tensor = tf.constant([[0, 0.5, 0, 0.5], [0, 0, 0.5, 0.5]], - dtype=tf.float32) - expected_gathered_tensor = [[0, 0, 0.5, 0.5], [0, 0, 0, 0], [0, 0, 0, 0]] - match = matcher.Match(match_results, use_matmul_gather=True) - gathered_tensor = match.gather_based_on_match(input_tensor, - unmatched_value=tf.zeros(4), - ignored_value=tf.zeros(4)) - self.assertEquals(gathered_tensor.dtype, tf.float32) - with self.test_session() as sess: - self.assertTrue( - all([op.name is not 'Gather' for op in sess.graph.get_operations()])) - gathered_tensor_out = gathered_tensor.eval() - self.assertAllEqual(expected_gathered_tensor, gathered_tensor_out) - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/minibatch_sampler.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/minibatch_sampler.py deleted file mode 100644 index dc622221ae526360d0a5f85f914bc2c53365911c..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/minibatch_sampler.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Base minibatch sampler module. - -The job of the minibatch_sampler is to subsample a minibatch based on some -criterion. - -The main function call is: - subsample(indicator, batch_size, **params). -Indicator is a 1d boolean tensor where True denotes which examples can be -sampled. It returns a boolean indicator where True denotes an example has been -sampled.. - -Subclasses should implement the Subsample function and can make use of the -@staticmethod SubsampleIndicator. -""" - -from abc import ABCMeta -from abc import abstractmethod - -import tensorflow as tf - -from object_detection.utils import ops - - -class MinibatchSampler(object): - """Abstract base class for subsampling minibatches.""" - __metaclass__ = ABCMeta - - def __init__(self): - """Constructs a minibatch sampler.""" - pass - - @abstractmethod - def subsample(self, indicator, batch_size, **params): - """Returns subsample of entries in indicator. - - Args: - indicator: boolean tensor of shape [N] whose True entries can be sampled. - batch_size: desired batch size. - **params: additional keyword arguments for specific implementations of - the MinibatchSampler. - - Returns: - sample_indicator: boolean tensor of shape [N] whose True entries have been - sampled. If sum(indicator) >= batch_size, sum(is_sampled) = batch_size - """ - pass - - @staticmethod - def subsample_indicator(indicator, num_samples): - """Subsample indicator vector. - - Given a boolean indicator vector with M elements set to `True`, the function - assigns all but `num_samples` of these previously `True` elements to - `False`. If `num_samples` is greater than M, the original indicator vector - is returned. - - Args: - indicator: a 1-dimensional boolean tensor indicating which elements - are allowed to be sampled and which are not. - num_samples: int32 scalar tensor - - Returns: - a boolean tensor with the same shape as input (indicator) tensor - """ - indices = tf.where(indicator) - indices = tf.random_shuffle(indices) - indices = tf.reshape(indices, [-1]) - - num_samples = tf.minimum(tf.size(indices), num_samples) - selected_indices = tf.slice(indices, [0], tf.reshape(num_samples, [1])) - - selected_indicator = ops.indices_to_dense_vector(selected_indices, - tf.shape(indicator)[0]) - - return tf.equal(selected_indicator, 1) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/minibatch_sampler_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/minibatch_sampler_test.py deleted file mode 100644 index 7420ae5d03ca5318d2fd5df4dd4a5cee400189b1..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/minibatch_sampler_test.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for google3.research.vale.object_detection.minibatch_sampler.""" - -import numpy as np -import tensorflow as tf - -from object_detection.core import minibatch_sampler - - -class MinibatchSamplerTest(tf.test.TestCase): - - def test_subsample_indicator_when_more_true_elements_than_num_samples(self): - np_indicator = [True, False, True, False, True, True, False] - indicator = tf.constant(np_indicator) - samples = minibatch_sampler.MinibatchSampler.subsample_indicator( - indicator, 3) - with self.test_session() as sess: - samples_out = sess.run(samples) - self.assertTrue(np.sum(samples_out), 3) - self.assertAllEqual(samples_out, - np.logical_and(samples_out, np_indicator)) - - def test_subsample_when_more_true_elements_than_num_samples_no_shape(self): - np_indicator = [True, False, True, False, True, True, False] - indicator = tf.placeholder(tf.bool) - feed_dict = {indicator: np_indicator} - - samples = minibatch_sampler.MinibatchSampler.subsample_indicator( - indicator, 3) - with self.test_session() as sess: - samples_out = sess.run(samples, feed_dict=feed_dict) - self.assertTrue(np.sum(samples_out), 3) - self.assertAllEqual(samples_out, - np.logical_and(samples_out, np_indicator)) - - def test_subsample_indicator_when_less_true_elements_than_num_samples(self): - np_indicator = [True, False, True, False, True, True, False] - indicator = tf.constant(np_indicator) - samples = minibatch_sampler.MinibatchSampler.subsample_indicator( - indicator, 5) - with self.test_session() as sess: - samples_out = sess.run(samples) - self.assertTrue(np.sum(samples_out), 4) - self.assertAllEqual(samples_out, - np.logical_and(samples_out, np_indicator)) - - def test_subsample_indicator_when_num_samples_is_zero(self): - np_indicator = [True, False, True, False, True, True, False] - indicator = tf.constant(np_indicator) - samples_none = minibatch_sampler.MinibatchSampler.subsample_indicator( - indicator, 0) - with self.test_session() as sess: - samples_none_out = sess.run(samples_none) - self.assertAllEqual( - np.zeros_like(samples_none_out, dtype=bool), - samples_none_out) - - def test_subsample_indicator_when_indicator_all_false(self): - indicator_empty = tf.zeros([0], dtype=tf.bool) - samples_empty = minibatch_sampler.MinibatchSampler.subsample_indicator( - indicator_empty, 4) - with self.test_session() as sess: - samples_empty_out = sess.run(samples_empty) - self.assertEqual(0, samples_empty_out.size) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/model.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/model.py deleted file mode 100644 index 081136f9c6a64ca8b56b2a98b9113a81bdc791f8..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/model.py +++ /dev/null @@ -1,305 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Abstract detection model. - -This file defines a generic base class for detection models. Programs that are -designed to work with arbitrary detection models should only depend on this -class. We intend for the functions in this class to follow tensor-in/tensor-out -design, thus all functions have tensors or lists/dictionaries holding tensors as -inputs and outputs. - -Abstractly, detection models predict output tensors given input images -which can be passed to a loss function at training time or passed to a -postprocessing function at eval time. The computation graphs at a high level -consequently look as follows: - -Training time: -inputs (images tensor) -> preprocess -> predict -> loss -> outputs (loss tensor) - -Evaluation time: -inputs (images tensor) -> preprocess -> predict -> postprocess - -> outputs (boxes tensor, scores tensor, classes tensor, num_detections tensor) - -DetectionModels must thus implement four functions (1) preprocess, (2) predict, -(3) postprocess and (4) loss. DetectionModels should make no assumptions about -the input size or aspect ratio --- they are responsible for doing any -resize/reshaping necessary (see docstring for the preprocess function). -Output classes are always integers in the range [0, num_classes). Any mapping -of these integers to semantic labels is to be handled outside of this class. - -Images are resized in the `preprocess` method. All of `preprocess`, `predict`, -and `postprocess` should be reentrant. - -The `preprocess` method runs `image_resizer_fn` that returns resized_images and -`true_image_shapes`. Since `image_resizer_fn` can pad the images with zeros, -true_image_shapes indicate the slices that contain the image without padding. -This is useful for padding images to be a fixed size for batching. - -The `postprocess` method uses the true image shapes to clip predictions that lie -outside of images. - -By default, DetectionModels produce bounding box detections; However, we support -a handful of auxiliary annotations associated with each bounding box, namely, -instance masks and keypoints. -""" -from abc import ABCMeta -from abc import abstractmethod - -from object_detection.core import standard_fields as fields - - -class DetectionModel(object): - """Abstract base class for detection models.""" - __metaclass__ = ABCMeta - - def __init__(self, num_classes): - """Constructor. - - Args: - num_classes: number of classes. Note that num_classes *does not* include - background categories that might be implicitly predicted in various - implementations. - """ - self._num_classes = num_classes - self._groundtruth_lists = {} - - @property - def num_classes(self): - return self._num_classes - - def groundtruth_lists(self, field): - """Access list of groundtruth tensors. - - Args: - field: a string key, options are - fields.BoxListFields.{boxes,classes,masks,keypoints} - - Returns: - a list of tensors holding groundtruth information (see also - provide_groundtruth function below), with one entry for each image in the - batch. - Raises: - RuntimeError: if the field has not been provided via provide_groundtruth. - """ - if field not in self._groundtruth_lists: - raise RuntimeError('Groundtruth tensor %s has not been provided', field) - return self._groundtruth_lists[field] - - def groundtruth_has_field(self, field): - """Determines whether the groundtruth includes the given field. - - Args: - field: a string key, options are - fields.BoxListFields.{boxes,classes,masks,keypoints} - - Returns: - True if the groundtruth includes the given field, False otherwise. - """ - return field in self._groundtruth_lists - - @abstractmethod - def preprocess(self, inputs): - """Input preprocessing. - - To be overridden by implementations. - - This function is responsible for any scaling/shifting of input values that - is necessary prior to running the detector on an input image. - It is also responsible for any resizing, padding that might be necessary - as images are assumed to arrive in arbitrary sizes. While this function - could conceivably be part of the predict method (below), it is often - convenient to keep these separate --- for example, we may want to preprocess - on one device, place onto a queue, and let another device (e.g., the GPU) - handle prediction. - - A few important notes about the preprocess function: - + We assume that this operation does not have any trainable variables nor - does it affect the groundtruth annotations in any way (thus data - augmentation operations such as random cropping should be performed - externally). - + There is no assumption that the batchsize in this function is the same as - the batch size in the predict function. In fact, we recommend calling the - preprocess function prior to calling any batching operations (which should - happen outside of the model) and thus assuming that batch sizes are equal - to 1 in the preprocess function. - + There is also no explicit assumption that the output resolutions - must be fixed across inputs --- this is to support "fully convolutional" - settings in which input images can have different shapes/resolutions. - - Args: - inputs: a [batch, height_in, width_in, channels] float32 tensor - representing a batch of images with values between 0 and 255.0. - - Returns: - preprocessed_inputs: a [batch, height_out, width_out, channels] float32 - tensor representing a batch of images. - true_image_shapes: int32 tensor of shape [batch, 3] where each row is - of the form [height, width, channels] indicating the shapes - of true images in the resized images, as resized images can be padded - with zeros. - """ - pass - - @abstractmethod - def predict(self, preprocessed_inputs, true_image_shapes): - """Predict prediction tensors from inputs tensor. - - Outputs of this function can be passed to loss or postprocess functions. - - Args: - preprocessed_inputs: a [batch, height, width, channels] float32 tensor - representing a batch of images. - true_image_shapes: int32 tensor of shape [batch, 3] where each row is - of the form [height, width, channels] indicating the shapes - of true images in the resized images, as resized images can be padded - with zeros. - - Returns: - prediction_dict: a dictionary holding prediction tensors to be - passed to the Loss or Postprocess functions. - """ - pass - - @abstractmethod - def postprocess(self, prediction_dict, true_image_shapes, **params): - """Convert predicted output tensors to final detections. - - Outputs adhere to the following conventions: - * Classes are integers in [0, num_classes); background classes are removed - and the first non-background class is mapped to 0. If the model produces - class-agnostic detections, then no output is produced for classes. - * Boxes are to be interpreted as being in [y_min, x_min, y_max, x_max] - format and normalized relative to the image window. - * `num_detections` is provided for settings where detections are padded to a - fixed number of boxes. - * We do not specifically assume any kind of probabilistic interpretation - of the scores --- the only important thing is their relative ordering. - Thus implementations of the postprocess function are free to output - logits, probabilities, calibrated probabilities, or anything else. - - Args: - prediction_dict: a dictionary holding prediction tensors. - true_image_shapes: int32 tensor of shape [batch, 3] where each row is - of the form [height, width, channels] indicating the shapes - of true images in the resized images, as resized images can be padded - with zeros. - **params: Additional keyword arguments for specific implementations of - DetectionModel. - - Returns: - detections: a dictionary containing the following fields - detection_boxes: [batch, max_detections, 4] - detection_scores: [batch, max_detections] - detection_classes: [batch, max_detections] - (If a model is producing class-agnostic detections, this field may be - missing) - instance_masks: [batch, max_detections, image_height, image_width] - (optional) - keypoints: [batch, max_detections, num_keypoints, 2] (optional) - num_detections: [batch] - """ - pass - - @abstractmethod - def loss(self, prediction_dict, true_image_shapes): - """Compute scalar loss tensors with respect to provided groundtruth. - - Calling this function requires that groundtruth tensors have been - provided via the provide_groundtruth function. - - Args: - prediction_dict: a dictionary holding predicted tensors - true_image_shapes: int32 tensor of shape [batch, 3] where each row is - of the form [height, width, channels] indicating the shapes - of true images in the resized images, as resized images can be padded - with zeros. - - Returns: - a dictionary mapping strings (loss names) to scalar tensors representing - loss values. - """ - pass - - def provide_groundtruth(self, - groundtruth_boxes_list, - groundtruth_classes_list, - groundtruth_masks_list=None, - groundtruth_keypoints_list=None, - groundtruth_weights_list=None, - groundtruth_is_crowd_list=None): - """Provide groundtruth tensors. - - Args: - groundtruth_boxes_list: a list of 2-D tf.float32 tensors of shape - [num_boxes, 4] containing coordinates of the groundtruth boxes. - Groundtruth boxes are provided in [y_min, x_min, y_max, x_max] - format and assumed to be normalized and clipped - relative to the image window with y_min <= y_max and x_min <= x_max. - groundtruth_classes_list: a list of 2-D tf.float32 one-hot (or k-hot) - tensors of shape [num_boxes, num_classes] containing the class targets - with the 0th index assumed to map to the first non-background class. - groundtruth_masks_list: a list of 3-D tf.float32 tensors of - shape [num_boxes, height_in, width_in] containing instance - masks with values in {0, 1}. If None, no masks are provided. - Mask resolution `height_in`x`width_in` must agree with the resolution - of the input image tensor provided to the `preprocess` function. - groundtruth_keypoints_list: a list of 3-D tf.float32 tensors of - shape [num_boxes, num_keypoints, 2] containing keypoints. - Keypoints are assumed to be provided in normalized coordinates and - missing keypoints should be encoded as NaN. - groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape - [num_boxes] containing weights for groundtruth boxes. - groundtruth_is_crowd_list: A list of 1-D tf.bool tensors of shape - [num_boxes] containing is_crowd annotations - """ - self._groundtruth_lists[fields.BoxListFields.boxes] = groundtruth_boxes_list - self._groundtruth_lists[ - fields.BoxListFields.classes] = groundtruth_classes_list - if groundtruth_weights_list: - self._groundtruth_lists[fields.BoxListFields. - weights] = groundtruth_weights_list - if groundtruth_masks_list: - self._groundtruth_lists[ - fields.BoxListFields.masks] = groundtruth_masks_list - if groundtruth_keypoints_list: - self._groundtruth_lists[ - fields.BoxListFields.keypoints] = groundtruth_keypoints_list - if groundtruth_is_crowd_list: - self._groundtruth_lists[ - fields.BoxListFields.is_crowd] = groundtruth_is_crowd_list - - @abstractmethod - def restore_map(self, fine_tune_checkpoint_type='detection'): - """Returns a map of variables to load from a foreign checkpoint. - - Returns a map of variable names to load from a checkpoint to variables in - the model graph. This enables the model to initialize based on weights from - another task. For example, the feature extractor variables from a - classification model can be used to bootstrap training of an object - detector. When loading from an object detection model, the checkpoint model - should have the same parameters as this detection model with exception of - the num_classes parameter. - - Args: - fine_tune_checkpoint_type: whether to restore from a full detection - checkpoint (with compatible variable names) or to restore from a - classification checkpoint for initialization prior to training. - Valid values: `detection`, `classification`. Default 'detection'. - - Returns: - A dict mapping variable names (to load from a checkpoint) to variables in - the model graph. - """ - pass diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/post_processing.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/post_processing.py deleted file mode 100644 index bbc61f66fe7e61b2a3d243fa3285a204374af0c1..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/post_processing.py +++ /dev/null @@ -1,425 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Post-processing operations on detected boxes.""" - -import tensorflow as tf - -from object_detection.core import box_list -from object_detection.core import box_list_ops -from object_detection.core import standard_fields as fields -from object_detection.utils import shape_utils - - -def multiclass_non_max_suppression(boxes, - scores, - score_thresh, - iou_thresh, - max_size_per_class, - max_total_size=0, - clip_window=None, - change_coordinate_frame=False, - masks=None, - boundaries=None, - additional_fields=None, - scope=None): - """Multi-class version of non maximum suppression. - - This op greedily selects a subset of detection bounding boxes, pruning - away boxes that have high IOU (intersection over union) overlap (> thresh) - with already selected boxes. It operates independently for each class for - which scores are provided (via the scores field of the input box_list), - pruning boxes with score less than a provided threshold prior to - applying NMS. - - Please note that this operation is performed on *all* classes, therefore any - background classes should be removed prior to calling this function. - - Args: - boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either - number of classes or 1 depending on whether a separate box is predicted - per class. - scores: A [k, num_classes] float32 tensor containing the scores for each of - the k detections. - score_thresh: scalar threshold for score (low scoring boxes are removed). - iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap - with previously selected boxes are removed). - max_size_per_class: maximum number of retained boxes per class. - max_total_size: maximum number of boxes retained over all classes. By - default returns all boxes retained after capping boxes per class. - clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max] - representing the window to clip and normalize boxes to before performing - non-max suppression. - change_coordinate_frame: Whether to normalize coordinates after clipping - relative to clip_window (this can only be set to True if a clip_window - is provided) - masks: (optional) a [k, q, mask_height, mask_width] float32 tensor - containing box masks. `q` can be either number of classes or 1 depending - on whether a separate mask is predicted per class. - boundaries: (optional) a [k, q, boundary_height, boundary_width] float32 - tensor containing box boundaries. `q` can be either number of classes or 1 - depending on whether a separate boundary is predicted per class. - additional_fields: (optional) If not None, a dictionary that maps keys to - tensors whose first dimensions are all of size `k`. After non-maximum - suppression, all tensors corresponding to the selected boxes will be - added to resulting BoxList. - scope: name scope. - - Returns: - a BoxList holding M boxes with a rank-1 scores field representing - corresponding scores for each box with scores sorted in decreasing order - and a rank-1 classes field representing a class label for each box. - - Raises: - ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have - a valid scores field. - """ - if not 0 <= iou_thresh <= 1.0: - raise ValueError('iou_thresh must be between 0 and 1') - if scores.shape.ndims != 2: - raise ValueError('scores field must be of rank 2') - if scores.shape[1].value is None: - raise ValueError('scores must have statically defined second ' - 'dimension') - if boxes.shape.ndims != 3: - raise ValueError('boxes must be of rank 3.') - if not (boxes.shape[1].value == scores.shape[1].value or - boxes.shape[1].value == 1): - raise ValueError('second dimension of boxes must be either 1 or equal ' - 'to the second dimension of scores') - if boxes.shape[2].value != 4: - raise ValueError('last dimension of boxes must be of size 4.') - if change_coordinate_frame and clip_window is None: - raise ValueError('if change_coordinate_frame is True, then a clip_window' - 'must be specified.') - - with tf.name_scope(scope, 'MultiClassNonMaxSuppression'): - num_boxes = tf.shape(boxes)[0] - num_scores = tf.shape(scores)[0] - num_classes = scores.get_shape()[1] - - length_assert = tf.Assert( - tf.equal(num_boxes, num_scores), - ['Incorrect scores field length: actual vs expected.', - num_scores, num_boxes]) - - selected_boxes_list = [] - per_class_boxes_list = tf.unstack(boxes, axis=1) - if masks is not None: - per_class_masks_list = tf.unstack(masks, axis=1) - if boundaries is not None: - per_class_boundaries_list = tf.unstack(boundaries, axis=1) - boxes_ids = (range(num_classes) if len(per_class_boxes_list) > 1 - else [0] * num_classes.value) - for class_idx, boxes_idx in zip(range(num_classes), boxes_ids): - per_class_boxes = per_class_boxes_list[boxes_idx] - boxlist_and_class_scores = box_list.BoxList(per_class_boxes) - with tf.control_dependencies([length_assert]): - class_scores = tf.reshape( - tf.slice(scores, [0, class_idx], tf.stack([num_scores, 1])), [-1]) - boxlist_and_class_scores.add_field(fields.BoxListFields.scores, - class_scores) - if masks is not None: - per_class_masks = per_class_masks_list[boxes_idx] - boxlist_and_class_scores.add_field(fields.BoxListFields.masks, - per_class_masks) - if boundaries is not None: - per_class_boundaries = per_class_boundaries_list[boxes_idx] - boxlist_and_class_scores.add_field(fields.BoxListFields.boundaries, - per_class_boundaries) - if additional_fields is not None: - for key, tensor in additional_fields.items(): - boxlist_and_class_scores.add_field(key, tensor) - boxlist_filtered = box_list_ops.filter_greater_than( - boxlist_and_class_scores, score_thresh) - if clip_window is not None: - boxlist_filtered = box_list_ops.clip_to_window( - boxlist_filtered, clip_window) - if change_coordinate_frame: - boxlist_filtered = box_list_ops.change_coordinate_frame( - boxlist_filtered, clip_window) - max_selection_size = tf.minimum(max_size_per_class, - boxlist_filtered.num_boxes()) - selected_indices = tf.image.non_max_suppression( - boxlist_filtered.get(), - boxlist_filtered.get_field(fields.BoxListFields.scores), - max_selection_size, - iou_threshold=iou_thresh) - nms_result = box_list_ops.gather(boxlist_filtered, selected_indices) - nms_result.add_field( - fields.BoxListFields.classes, (tf.zeros_like( - nms_result.get_field(fields.BoxListFields.scores)) + class_idx)) - selected_boxes_list.append(nms_result) - selected_boxes = box_list_ops.concatenate(selected_boxes_list) - sorted_boxes = box_list_ops.sort_by_field(selected_boxes, - fields.BoxListFields.scores) - if max_total_size: - max_total_size = tf.minimum(max_total_size, - sorted_boxes.num_boxes()) - sorted_boxes = box_list_ops.gather(sorted_boxes, - tf.range(max_total_size)) - return sorted_boxes - - -def batch_multiclass_non_max_suppression(boxes, - scores, - score_thresh, - iou_thresh, - max_size_per_class, - max_total_size=0, - clip_window=None, - change_coordinate_frame=False, - num_valid_boxes=None, - masks=None, - additional_fields=None, - scope=None, - parallel_iterations=32): - """Multi-class version of non maximum suppression that operates on a batch. - - This op is similar to `multiclass_non_max_suppression` but operates on a batch - of boxes and scores. See documentation for `multiclass_non_max_suppression` - for details. - - Args: - boxes: A [batch_size, num_anchors, q, 4] float32 tensor containing - detections. If `q` is 1 then same boxes are used for all classes - otherwise, if `q` is equal to number of classes, class-specific boxes - are used. - scores: A [batch_size, num_anchors, num_classes] float32 tensor containing - the scores for each of the `num_anchors` detections. - score_thresh: scalar threshold for score (low scoring boxes are removed). - iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap - with previously selected boxes are removed). - max_size_per_class: maximum number of retained boxes per class. - max_total_size: maximum number of boxes retained over all classes. By - default returns all boxes retained after capping boxes per class. - clip_window: A float32 tensor of shape [batch_size, 4] where each entry is - of the form [y_min, x_min, y_max, x_max] representing the window to clip - boxes to before performing non-max suppression. This argument can also be - a tensor of shape [4] in which case, the same clip window is applied to - all images in the batch. If clip_widow is None, all boxes are used to - perform non-max suppression. - change_coordinate_frame: Whether to normalize coordinates after clipping - relative to clip_window (this can only be set to True if a clip_window - is provided) - num_valid_boxes: (optional) a Tensor of type `int32`. A 1-D tensor of shape - [batch_size] representing the number of valid boxes to be considered - for each image in the batch. This parameter allows for ignoring zero - paddings. - masks: (optional) a [batch_size, num_anchors, q, mask_height, mask_width] - float32 tensor containing box masks. `q` can be either number of classes - or 1 depending on whether a separate mask is predicted per class. - additional_fields: (optional) If not None, a dictionary that maps keys to - tensors whose dimensions are [batch_size, num_anchors, ...]. - scope: tf scope name. - parallel_iterations: (optional) number of batch items to process in - parallel. - - Returns: - 'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor - containing the non-max suppressed boxes. - 'nmsed_scores': A [batch_size, max_detections] float32 tensor containing - the scores for the boxes. - 'nmsed_classes': A [batch_size, max_detections] float32 tensor - containing the class for boxes. - 'nmsed_masks': (optional) a - [batch_size, max_detections, mask_height, mask_width] float32 tensor - containing masks for each selected box. This is set to None if input - `masks` is None. - 'nmsed_additional_fields': (optional) a dictionary of - [batch_size, max_detections, ...] float32 tensors corresponding to the - tensors specified in the input `additional_fields`. This is not returned - if input `additional_fields` is None. - 'num_detections': A [batch_size] int32 tensor indicating the number of - valid detections per batch item. Only the top num_detections[i] entries in - nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the - entries are zero paddings. - - Raises: - ValueError: if `q` in boxes.shape is not 1 or not equal to number of - classes as inferred from scores.shape. - """ - q = boxes.shape[2].value - num_classes = scores.shape[2].value - if q != 1 and q != num_classes: - raise ValueError('third dimension of boxes must be either 1 or equal ' - 'to the third dimension of scores') - if change_coordinate_frame and clip_window is None: - raise ValueError('if change_coordinate_frame is True, then a clip_window' - 'must be specified.') - original_masks = masks - original_additional_fields = additional_fields - with tf.name_scope(scope, 'BatchMultiClassNonMaxSuppression'): - boxes_shape = boxes.shape - batch_size = boxes_shape[0].value - num_anchors = boxes_shape[1].value - - if batch_size is None: - batch_size = tf.shape(boxes)[0] - if num_anchors is None: - num_anchors = tf.shape(boxes)[1] - - # If num valid boxes aren't provided, create one and mark all boxes as - # valid. - if num_valid_boxes is None: - num_valid_boxes = tf.ones([batch_size], dtype=tf.int32) * num_anchors - - # If masks aren't provided, create dummy masks so we can only have one copy - # of _single_image_nms_fn and discard the dummy masks after map_fn. - if masks is None: - masks_shape = tf.stack([batch_size, num_anchors, 1, 0, 0]) - masks = tf.zeros(masks_shape) - - if clip_window is None: - clip_window = tf.stack([ - tf.reduce_min(boxes[:, :, :, 0]), - tf.reduce_min(boxes[:, :, :, 1]), - tf.reduce_max(boxes[:, :, :, 2]), - tf.reduce_max(boxes[:, :, :, 3]) - ]) - if clip_window.shape.ndims == 1: - clip_window = tf.tile(tf.expand_dims(clip_window, 0), [batch_size, 1]) - - if additional_fields is None: - additional_fields = {} - - def _single_image_nms_fn(args): - """Runs NMS on a single image and returns padded output. - - Args: - args: A list of tensors consisting of the following: - per_image_boxes - A [num_anchors, q, 4] float32 tensor containing - detections. If `q` is 1 then same boxes are used for all classes - otherwise, if `q` is equal to number of classes, class-specific - boxes are used. - per_image_scores - A [num_anchors, num_classes] float32 tensor - containing the scores for each of the `num_anchors` detections. - per_image_masks - A [num_anchors, q, mask_height, mask_width] float32 - tensor containing box masks. `q` can be either number of classes - or 1 depending on whether a separate mask is predicted per class. - per_image_clip_window - A 1D float32 tensor of the form - [ymin, xmin, ymax, xmax] representing the window to clip the boxes - to. - per_image_additional_fields - (optional) A variable number of float32 - tensors each with size [num_anchors, ...]. - per_image_num_valid_boxes - A tensor of type `int32`. A 1-D tensor of - shape [batch_size] representing the number of valid boxes to be - considered for each image in the batch. This parameter allows for - ignoring zero paddings. - - Returns: - 'nmsed_boxes': A [max_detections, 4] float32 tensor containing the - non-max suppressed boxes. - 'nmsed_scores': A [max_detections] float32 tensor containing the scores - for the boxes. - 'nmsed_classes': A [max_detections] float32 tensor containing the class - for boxes. - 'nmsed_masks': (optional) a [max_detections, mask_height, mask_width] - float32 tensor containing masks for each selected box. This is set to - None if input `masks` is None. - 'nmsed_additional_fields': (optional) A variable number of float32 - tensors each with size [max_detections, ...] corresponding to the - input `per_image_additional_fields`. - 'num_detections': A [batch_size] int32 tensor indicating the number of - valid detections per batch item. Only the top num_detections[i] - entries in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The - rest of the entries are zero paddings. - """ - per_image_boxes = args[0] - per_image_scores = args[1] - per_image_masks = args[2] - per_image_clip_window = args[3] - per_image_additional_fields = { - key: value - for key, value in zip(additional_fields, args[4:-1]) - } - per_image_num_valid_boxes = args[-1] - per_image_boxes = tf.reshape( - tf.slice(per_image_boxes, 3 * [0], - tf.stack([per_image_num_valid_boxes, -1, -1])), [-1, q, 4]) - per_image_scores = tf.reshape( - tf.slice(per_image_scores, [0, 0], - tf.stack([per_image_num_valid_boxes, -1])), - [-1, num_classes]) - per_image_masks = tf.reshape( - tf.slice(per_image_masks, 4 * [0], - tf.stack([per_image_num_valid_boxes, -1, -1, -1])), - [-1, q, per_image_masks.shape[2].value, - per_image_masks.shape[3].value]) - if per_image_additional_fields is not None: - for key, tensor in per_image_additional_fields.items(): - additional_field_shape = tensor.get_shape() - additional_field_dim = len(additional_field_shape) - per_image_additional_fields[key] = tf.reshape( - tf.slice(per_image_additional_fields[key], - additional_field_dim * [0], - tf.stack([per_image_num_valid_boxes] + - (additional_field_dim - 1) * [-1])), - [-1] + [dim.value for dim in additional_field_shape[1:]]) - nmsed_boxlist = multiclass_non_max_suppression( - per_image_boxes, - per_image_scores, - score_thresh, - iou_thresh, - max_size_per_class, - max_total_size, - clip_window=per_image_clip_window, - change_coordinate_frame=change_coordinate_frame, - masks=per_image_masks, - additional_fields=per_image_additional_fields) - padded_boxlist = box_list_ops.pad_or_clip_box_list(nmsed_boxlist, - max_total_size) - num_detections = nmsed_boxlist.num_boxes() - nmsed_boxes = padded_boxlist.get() - nmsed_scores = padded_boxlist.get_field(fields.BoxListFields.scores) - nmsed_classes = padded_boxlist.get_field(fields.BoxListFields.classes) - nmsed_masks = padded_boxlist.get_field(fields.BoxListFields.masks) - nmsed_additional_fields = [ - padded_boxlist.get_field(key) for key in per_image_additional_fields - ] - return ([nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks] + - nmsed_additional_fields + [num_detections]) - - num_additional_fields = 0 - if additional_fields is not None: - num_additional_fields = len(additional_fields) - num_nmsed_outputs = 4 + num_additional_fields - - batch_outputs = shape_utils.static_or_dynamic_map_fn( - _single_image_nms_fn, - elems=([boxes, scores, masks, clip_window] + - list(additional_fields.values()) + [num_valid_boxes]), - dtype=(num_nmsed_outputs * [tf.float32] + [tf.int32]), - parallel_iterations=parallel_iterations) - - batch_nmsed_boxes = batch_outputs[0] - batch_nmsed_scores = batch_outputs[1] - batch_nmsed_classes = batch_outputs[2] - batch_nmsed_masks = batch_outputs[3] - batch_nmsed_additional_fields = { - key: value - for key, value in zip(additional_fields, batch_outputs[4:-1]) - } - batch_num_detections = batch_outputs[-1] - - if original_masks is None: - batch_nmsed_masks = None - - if original_additional_fields is None: - batch_nmsed_additional_fields = None - - return (batch_nmsed_boxes, batch_nmsed_scores, batch_nmsed_classes, - batch_nmsed_masks, batch_nmsed_additional_fields, - batch_num_detections) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/post_processing_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/post_processing_test.py deleted file mode 100644 index 9674139967f933192026c2245a82bf0026a732fe..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/post_processing_test.py +++ /dev/null @@ -1,1078 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for tensorflow_models.object_detection.core.post_processing.""" -import numpy as np -import tensorflow as tf -from object_detection.core import post_processing -from object_detection.core import standard_fields as fields - - -class MulticlassNonMaxSuppressionTest(tf.test.TestCase): - - def test_with_invalid_scores_size(self): - boxes = tf.constant([[[0, 0, 1, 1]], - [[0, 0.1, 1, 1.1]], - [[0, -0.1, 1, 0.9]], - [[0, 10, 1, 11]], - [[0, 10.1, 1, 11.1]], - [[0, 100, 1, 101]]], tf.float32) - scores = tf.constant([[.9], [.75], [.6], [.95], [.5]]) - iou_thresh = .5 - score_thresh = 0.6 - max_output_size = 3 - nms = post_processing.multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, max_output_size) - with self.test_session() as sess: - with self.assertRaisesWithPredicateMatch( - tf.errors.InvalidArgumentError, 'Incorrect scores field length'): - sess.run(nms.get()) - - def test_multiclass_nms_select_with_shared_boxes(self): - boxes = tf.constant([[[0, 0, 1, 1]], - [[0, 0.1, 1, 1.1]], - [[0, -0.1, 1, 0.9]], - [[0, 10, 1, 11]], - [[0, 10.1, 1, 11.1]], - [[0, 100, 1, 101]], - [[0, 1000, 1, 1002]], - [[0, 1000, 1, 1002.1]]], tf.float32) - scores = tf.constant([[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0], - [.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]) - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - - exp_nms_corners = [[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 1000, 1, 1002], - [0, 100, 1, 101]] - exp_nms_scores = [.95, .9, .85, .3] - exp_nms_classes = [0, 0, 1, 0] - - nms = post_processing.multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, max_output_size) - with self.test_session() as sess: - nms_corners_output, nms_scores_output, nms_classes_output = sess.run( - [nms.get(), nms.get_field(fields.BoxListFields.scores), - nms.get_field(fields.BoxListFields.classes)]) - self.assertAllClose(nms_corners_output, exp_nms_corners) - self.assertAllClose(nms_scores_output, exp_nms_scores) - self.assertAllClose(nms_classes_output, exp_nms_classes) - - def test_multiclass_nms_select_with_shared_boxes_given_keypoints(self): - boxes = tf.constant([[[0, 0, 1, 1]], - [[0, 0.1, 1, 1.1]], - [[0, -0.1, 1, 0.9]], - [[0, 10, 1, 11]], - [[0, 10.1, 1, 11.1]], - [[0, 100, 1, 101]], - [[0, 1000, 1, 1002]], - [[0, 1000, 1, 1002.1]]], tf.float32) - scores = tf.constant([[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0], - [.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]) - num_keypoints = 6 - keypoints = tf.tile( - tf.reshape(tf.range(8), [8, 1, 1]), - [1, num_keypoints, 2]) - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - - exp_nms_corners = [[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 1000, 1, 1002], - [0, 100, 1, 101]] - exp_nms_scores = [.95, .9, .85, .3] - exp_nms_classes = [0, 0, 1, 0] - exp_nms_keypoints_tensor = tf.tile( - tf.reshape(tf.constant([3, 0, 6, 5], dtype=tf.float32), [4, 1, 1]), - [1, num_keypoints, 2]) - - nms = post_processing.multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, max_output_size, - additional_fields={ - fields.BoxListFields.keypoints: keypoints}) - - with self.test_session() as sess: - (nms_corners_output, - nms_scores_output, - nms_classes_output, - nms_keypoints, - exp_nms_keypoints) = sess.run([ - nms.get(), - nms.get_field(fields.BoxListFields.scores), - nms.get_field(fields.BoxListFields.classes), - nms.get_field(fields.BoxListFields.keypoints), - exp_nms_keypoints_tensor - ]) - self.assertAllClose(nms_corners_output, exp_nms_corners) - self.assertAllClose(nms_scores_output, exp_nms_scores) - self.assertAllClose(nms_classes_output, exp_nms_classes) - self.assertAllEqual(nms_keypoints, exp_nms_keypoints) - - def test_multiclass_nms_with_shared_boxes_given_keypoint_heatmaps(self): - boxes = tf.constant([[[0, 0, 1, 1]], - [[0, 0.1, 1, 1.1]], - [[0, -0.1, 1, 0.9]], - [[0, 10, 1, 11]], - [[0, 10.1, 1, 11.1]], - [[0, 100, 1, 101]], - [[0, 1000, 1, 1002]], - [[0, 1000, 1, 1002.1]]], tf.float32) - - scores = tf.constant([[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0], - [.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]) - - num_boxes = tf.shape(boxes)[0] - heatmap_height = 5 - heatmap_width = 5 - num_keypoints = 17 - keypoint_heatmaps = tf.ones( - [num_boxes, heatmap_height, heatmap_width, num_keypoints], - dtype=tf.float32) - - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - exp_nms_corners = [[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 1000, 1, 1002], - [0, 100, 1, 101]] - - exp_nms_scores = [.95, .9, .85, .3] - exp_nms_classes = [0, 0, 1, 0] - exp_nms_keypoint_heatmaps = np.ones( - (4, heatmap_height, heatmap_width, num_keypoints), dtype=np.float32) - - nms = post_processing.multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, max_output_size, - additional_fields={ - fields.BoxListFields.keypoint_heatmaps: keypoint_heatmaps}) - - with self.test_session() as sess: - (nms_corners_output, - nms_scores_output, - nms_classes_output, - nms_keypoint_heatmaps) = sess.run( - [nms.get(), - nms.get_field(fields.BoxListFields.scores), - nms.get_field(fields.BoxListFields.classes), - nms.get_field(fields.BoxListFields.keypoint_heatmaps)]) - - self.assertAllClose(nms_corners_output, exp_nms_corners) - self.assertAllClose(nms_scores_output, exp_nms_scores) - self.assertAllClose(nms_classes_output, exp_nms_classes) - self.assertAllEqual(nms_keypoint_heatmaps, exp_nms_keypoint_heatmaps) - - def test_multiclass_nms_with_additional_fields(self): - boxes = tf.constant([[[0, 0, 1, 1]], - [[0, 0.1, 1, 1.1]], - [[0, -0.1, 1, 0.9]], - [[0, 10, 1, 11]], - [[0, 10.1, 1, 11.1]], - [[0, 100, 1, 101]], - [[0, 1000, 1, 1002]], - [[0, 1000, 1, 1002.1]]], tf.float32) - - scores = tf.constant([[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0], - [.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]) - - coarse_boxes_key = 'coarse_boxes' - coarse_boxes = tf.constant([[0.1, 0.1, 1.1, 1.1], - [0.1, 0.2, 1.1, 1.2], - [0.1, -0.2, 1.1, 1.0], - [0.1, 10.1, 1.1, 11.1], - [0.1, 10.2, 1.1, 11.2], - [0.1, 100.1, 1.1, 101.1], - [0.1, 1000.1, 1.1, 1002.1], - [0.1, 1000.1, 1.1, 1002.2]], tf.float32) - - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - - exp_nms_corners = np.array([[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 1000, 1, 1002], - [0, 100, 1, 101]], dtype=np.float32) - - exp_nms_coarse_corners = np.array([[0.1, 10.1, 1.1, 11.1], - [0.1, 0.1, 1.1, 1.1], - [0.1, 1000.1, 1.1, 1002.1], - [0.1, 100.1, 1.1, 101.1]], - dtype=np.float32) - - exp_nms_scores = [.95, .9, .85, .3] - exp_nms_classes = [0, 0, 1, 0] - - nms = post_processing.multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, max_output_size, - additional_fields={coarse_boxes_key: coarse_boxes}) - - with self.test_session() as sess: - (nms_corners_output, - nms_scores_output, - nms_classes_output, - nms_coarse_corners) = sess.run( - [nms.get(), - nms.get_field(fields.BoxListFields.scores), - nms.get_field(fields.BoxListFields.classes), - nms.get_field(coarse_boxes_key)]) - - self.assertAllClose(nms_corners_output, exp_nms_corners) - self.assertAllClose(nms_scores_output, exp_nms_scores) - self.assertAllClose(nms_classes_output, exp_nms_classes) - self.assertAllEqual(nms_coarse_corners, exp_nms_coarse_corners) - - def test_multiclass_nms_select_with_shared_boxes_given_masks(self): - boxes = tf.constant([[[0, 0, 1, 1]], - [[0, 0.1, 1, 1.1]], - [[0, -0.1, 1, 0.9]], - [[0, 10, 1, 11]], - [[0, 10.1, 1, 11.1]], - [[0, 100, 1, 101]], - [[0, 1000, 1, 1002]], - [[0, 1000, 1, 1002.1]]], tf.float32) - scores = tf.constant([[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0], - [.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]) - num_classes = 2 - mask_height = 3 - mask_width = 3 - masks = tf.tile( - tf.reshape(tf.range(8), [8, 1, 1, 1]), - [1, num_classes, mask_height, mask_width]) - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - - exp_nms_corners = [[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 1000, 1, 1002], - [0, 100, 1, 101]] - exp_nms_scores = [.95, .9, .85, .3] - exp_nms_classes = [0, 0, 1, 0] - exp_nms_masks_tensor = tf.tile( - tf.reshape(tf.constant([3, 0, 6, 5], dtype=tf.float32), [4, 1, 1]), - [1, mask_height, mask_width]) - - nms = post_processing.multiclass_non_max_suppression(boxes, scores, - score_thresh, - iou_thresh, - max_output_size, - masks=masks) - with self.test_session() as sess: - (nms_corners_output, - nms_scores_output, - nms_classes_output, - nms_masks, - exp_nms_masks) = sess.run([nms.get(), - nms.get_field(fields.BoxListFields.scores), - nms.get_field(fields.BoxListFields.classes), - nms.get_field(fields.BoxListFields.masks), - exp_nms_masks_tensor]) - self.assertAllClose(nms_corners_output, exp_nms_corners) - self.assertAllClose(nms_scores_output, exp_nms_scores) - self.assertAllClose(nms_classes_output, exp_nms_classes) - self.assertAllEqual(nms_masks, exp_nms_masks) - - def test_multiclass_nms_select_with_clip_window(self): - boxes = tf.constant([[[0, 0, 10, 10]], - [[1, 1, 11, 11]]], tf.float32) - scores = tf.constant([[.9], [.75]]) - clip_window = tf.constant([5, 4, 8, 7], tf.float32) - score_thresh = 0.0 - iou_thresh = 0.5 - max_output_size = 100 - - exp_nms_corners = [[5, 4, 8, 7]] - exp_nms_scores = [.9] - exp_nms_classes = [0] - - nms = post_processing.multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, max_output_size, - clip_window=clip_window) - with self.test_session() as sess: - nms_corners_output, nms_scores_output, nms_classes_output = sess.run( - [nms.get(), nms.get_field(fields.BoxListFields.scores), - nms.get_field(fields.BoxListFields.classes)]) - self.assertAllClose(nms_corners_output, exp_nms_corners) - self.assertAllClose(nms_scores_output, exp_nms_scores) - self.assertAllClose(nms_classes_output, exp_nms_classes) - - def test_multiclass_nms_select_with_clip_window_change_coordinate_frame(self): - boxes = tf.constant([[[0, 0, 10, 10]], - [[1, 1, 11, 11]]], tf.float32) - scores = tf.constant([[.9], [.75]]) - clip_window = tf.constant([5, 4, 8, 7], tf.float32) - score_thresh = 0.0 - iou_thresh = 0.5 - max_output_size = 100 - - exp_nms_corners = [[0, 0, 1, 1]] - exp_nms_scores = [.9] - exp_nms_classes = [0] - - nms = post_processing.multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, max_output_size, - clip_window=clip_window, change_coordinate_frame=True) - with self.test_session() as sess: - nms_corners_output, nms_scores_output, nms_classes_output = sess.run( - [nms.get(), nms.get_field(fields.BoxListFields.scores), - nms.get_field(fields.BoxListFields.classes)]) - self.assertAllClose(nms_corners_output, exp_nms_corners) - self.assertAllClose(nms_scores_output, exp_nms_scores) - self.assertAllClose(nms_classes_output, exp_nms_classes) - - def test_multiclass_nms_select_with_per_class_cap(self): - boxes = tf.constant([[[0, 0, 1, 1]], - [[0, 0.1, 1, 1.1]], - [[0, -0.1, 1, 0.9]], - [[0, 10, 1, 11]], - [[0, 10.1, 1, 11.1]], - [[0, 100, 1, 101]], - [[0, 1000, 1, 1002]], - [[0, 1000, 1, 1002.1]]], tf.float32) - scores = tf.constant([[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0], - [.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]) - score_thresh = 0.1 - iou_thresh = .5 - max_size_per_class = 2 - - exp_nms_corners = [[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 1000, 1, 1002]] - exp_nms_scores = [.95, .9, .85] - exp_nms_classes = [0, 0, 1] - - nms = post_processing.multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, max_size_per_class) - with self.test_session() as sess: - nms_corners_output, nms_scores_output, nms_classes_output = sess.run( - [nms.get(), nms.get_field(fields.BoxListFields.scores), - nms.get_field(fields.BoxListFields.classes)]) - self.assertAllClose(nms_corners_output, exp_nms_corners) - self.assertAllClose(nms_scores_output, exp_nms_scores) - self.assertAllClose(nms_classes_output, exp_nms_classes) - - def test_multiclass_nms_select_with_total_cap(self): - boxes = tf.constant([[[0, 0, 1, 1]], - [[0, 0.1, 1, 1.1]], - [[0, -0.1, 1, 0.9]], - [[0, 10, 1, 11]], - [[0, 10.1, 1, 11.1]], - [[0, 100, 1, 101]], - [[0, 1000, 1, 1002]], - [[0, 1000, 1, 1002.1]]], tf.float32) - scores = tf.constant([[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0], - [.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]) - score_thresh = 0.1 - iou_thresh = .5 - max_size_per_class = 4 - max_total_size = 2 - - exp_nms_corners = [[0, 10, 1, 11], - [0, 0, 1, 1]] - exp_nms_scores = [.95, .9] - exp_nms_classes = [0, 0] - - nms = post_processing.multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, max_size_per_class, - max_total_size) - with self.test_session() as sess: - nms_corners_output, nms_scores_output, nms_classes_output = sess.run( - [nms.get(), nms.get_field(fields.BoxListFields.scores), - nms.get_field(fields.BoxListFields.classes)]) - self.assertAllClose(nms_corners_output, exp_nms_corners) - self.assertAllClose(nms_scores_output, exp_nms_scores) - self.assertAllClose(nms_classes_output, exp_nms_classes) - - def test_multiclass_nms_threshold_then_select_with_shared_boxes(self): - boxes = tf.constant([[[0, 0, 1, 1]], - [[0, 0.1, 1, 1.1]], - [[0, -0.1, 1, 0.9]], - [[0, 10, 1, 11]], - [[0, 10.1, 1, 11.1]], - [[0, 100, 1, 101]], - [[0, 1000, 1, 1002]], - [[0, 1000, 1, 1002.1]]], tf.float32) - scores = tf.constant([[.9], [.75], [.6], [.95], [.5], [.3], [.01], [.01]]) - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 3 - - exp_nms = [[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 100, 1, 101]] - nms = post_processing.multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, max_output_size) - with self.test_session() as sess: - nms_output = sess.run(nms.get()) - self.assertAllClose(nms_output, exp_nms) - - def test_multiclass_nms_select_with_separate_boxes(self): - boxes = tf.constant([[[0, 0, 1, 1], [0, 0, 4, 5]], - [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], - [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], - [[0, 10, 1, 11], [0, 10, 1, 11]], - [[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], - [[0, 100, 1, 101], [0, 100, 1, 101]], - [[0, 1000, 1, 1002], [0, 999, 2, 1004]], - [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]], - tf.float32) - scores = tf.constant([[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0], - [.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]) - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - - exp_nms_corners = [[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 999, 2, 1004], - [0, 100, 1, 101]] - exp_nms_scores = [.95, .9, .85, .3] - exp_nms_classes = [0, 0, 1, 0] - - nms = post_processing.multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, max_output_size) - with self.test_session() as sess: - nms_corners_output, nms_scores_output, nms_classes_output = sess.run( - [nms.get(), nms.get_field(fields.BoxListFields.scores), - nms.get_field(fields.BoxListFields.classes)]) - self.assertAllClose(nms_corners_output, exp_nms_corners) - self.assertAllClose(nms_scores_output, exp_nms_scores) - self.assertAllClose(nms_classes_output, exp_nms_classes) - - def test_batch_multiclass_nms_with_batch_size_1(self): - boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]], - [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], - [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], - [[0, 10, 1, 11], [0, 10, 1, 11]], - [[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], - [[0, 100, 1, 101], [0, 100, 1, 101]], - [[0, 1000, 1, 1002], [0, 999, 2, 1004]], - [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]], - tf.float32) - scores = tf.constant([[[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0], - [.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]]) - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - - exp_nms_corners = [[[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 999, 2, 1004], - [0, 100, 1, 101]]] - exp_nms_scores = [[.95, .9, .85, .3]] - exp_nms_classes = [[0, 0, 1, 0]] - - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, - nmsed_additional_fields, num_detections - ) = post_processing.batch_multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, - max_size_per_class=max_output_size, max_total_size=max_output_size) - - self.assertIsNone(nmsed_masks) - self.assertIsNone(nmsed_additional_fields) - - with self.test_session() as sess: - (nmsed_boxes, nmsed_scores, nmsed_classes, - num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes, - num_detections]) - self.assertAllClose(nmsed_boxes, exp_nms_corners) - self.assertAllClose(nmsed_scores, exp_nms_scores) - self.assertAllClose(nmsed_classes, exp_nms_classes) - self.assertEqual(num_detections, [4]) - - def test_batch_multiclass_nms_with_batch_size_2(self): - boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]], - [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], - [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], - [[0, 10, 1, 11], [0, 10, 1, 11]]], - [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], - [[0, 100, 1, 101], [0, 100, 1, 101]], - [[0, 1000, 1, 1002], [0, 999, 2, 1004]], - [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]], - tf.float32) - scores = tf.constant([[[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0]], - [[.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]]) - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - - exp_nms_corners = np.array([[[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 0, 0, 0], - [0, 0, 0, 0]], - [[0, 999, 2, 1004], - [0, 10.1, 1, 11.1], - [0, 100, 1, 101], - [0, 0, 0, 0]]]) - exp_nms_scores = np.array([[.95, .9, 0, 0], - [.85, .5, .3, 0]]) - exp_nms_classes = np.array([[0, 0, 0, 0], - [1, 0, 0, 0]]) - - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, - nmsed_additional_fields, num_detections - ) = post_processing.batch_multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, - max_size_per_class=max_output_size, max_total_size=max_output_size) - - self.assertIsNone(nmsed_masks) - self.assertIsNone(nmsed_additional_fields) - # Check static shapes - self.assertAllEqual(nmsed_boxes.shape.as_list(), - exp_nms_corners.shape) - self.assertAllEqual(nmsed_scores.shape.as_list(), - exp_nms_scores.shape) - self.assertAllEqual(nmsed_classes.shape.as_list(), - exp_nms_classes.shape) - self.assertEqual(num_detections.shape.as_list(), [2]) - - with self.test_session() as sess: - (nmsed_boxes, nmsed_scores, nmsed_classes, - num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes, - num_detections]) - self.assertAllClose(nmsed_boxes, exp_nms_corners) - self.assertAllClose(nmsed_scores, exp_nms_scores) - self.assertAllClose(nmsed_classes, exp_nms_classes) - self.assertAllClose(num_detections, [2, 3]) - - def test_batch_multiclass_nms_with_per_batch_clip_window(self): - boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]], - [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], - [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], - [[0, 10, 1, 11], [0, 10, 1, 11]]], - [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], - [[0, 100, 1, 101], [0, 100, 1, 101]], - [[0, 1000, 1, 1002], [0, 999, 2, 1004]], - [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]], - tf.float32) - scores = tf.constant([[[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0]], - [[.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]]) - clip_window = tf.constant([0., 0., 200., 200.]) - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - - exp_nms_corners = np.array([[[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 0, 0, 0], - [0, 0, 0, 0]], - [[0, 10.1, 1, 11.1], - [0, 100, 1, 101], - [0, 0, 0, 0], - [0, 0, 0, 0]]]) - exp_nms_scores = np.array([[.95, .9, 0, 0], - [.5, .3, 0, 0]]) - exp_nms_classes = np.array([[0, 0, 0, 0], - [0, 0, 0, 0]]) - - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, - nmsed_additional_fields, num_detections - ) = post_processing.batch_multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, - max_size_per_class=max_output_size, max_total_size=max_output_size, - clip_window=clip_window) - - self.assertIsNone(nmsed_masks) - self.assertIsNone(nmsed_additional_fields) - # Check static shapes - self.assertAllEqual(nmsed_boxes.shape.as_list(), - exp_nms_corners.shape) - self.assertAllEqual(nmsed_scores.shape.as_list(), - exp_nms_scores.shape) - self.assertAllEqual(nmsed_classes.shape.as_list(), - exp_nms_classes.shape) - self.assertEqual(num_detections.shape.as_list(), [2]) - - with self.test_session() as sess: - (nmsed_boxes, nmsed_scores, nmsed_classes, - num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes, - num_detections]) - self.assertAllClose(nmsed_boxes, exp_nms_corners) - self.assertAllClose(nmsed_scores, exp_nms_scores) - self.assertAllClose(nmsed_classes, exp_nms_classes) - self.assertAllClose(num_detections, [2, 2]) - - def test_batch_multiclass_nms_with_per_image_clip_window(self): - boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]], - [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], - [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], - [[0, 10, 1, 11], [0, 10, 1, 11]]], - [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], - [[0, 100, 1, 101], [0, 100, 1, 101]], - [[0, 1000, 1, 1002], [0, 999, 2, 1004]], - [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]], - tf.float32) - scores = tf.constant([[[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0]], - [[.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]]) - clip_window = tf.constant([[0., 0., 5., 5.], - [0., 0., 200., 200.]]) - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - - exp_nms_corners = np.array([[[0, 0, 1, 1], - [0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0]], - [[0, 10.1, 1, 11.1], - [0, 100, 1, 101], - [0, 0, 0, 0], - [0, 0, 0, 0]]]) - exp_nms_scores = np.array([[.9, 0., 0., 0.], - [.5, .3, 0, 0]]) - exp_nms_classes = np.array([[0, 0, 0, 0], - [0, 0, 0, 0]]) - - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, - nmsed_additional_fields, num_detections - ) = post_processing.batch_multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, - max_size_per_class=max_output_size, max_total_size=max_output_size, - clip_window=clip_window) - - self.assertIsNone(nmsed_masks) - self.assertIsNone(nmsed_additional_fields) - # Check static shapes - self.assertAllEqual(nmsed_boxes.shape.as_list(), - exp_nms_corners.shape) - self.assertAllEqual(nmsed_scores.shape.as_list(), - exp_nms_scores.shape) - self.assertAllEqual(nmsed_classes.shape.as_list(), - exp_nms_classes.shape) - self.assertEqual(num_detections.shape.as_list(), [2]) - - with self.test_session() as sess: - (nmsed_boxes, nmsed_scores, nmsed_classes, - num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes, - num_detections]) - self.assertAllClose(nmsed_boxes, exp_nms_corners) - self.assertAllClose(nmsed_scores, exp_nms_scores) - self.assertAllClose(nmsed_classes, exp_nms_classes) - self.assertAllClose(num_detections, [1, 2]) - - def test_batch_multiclass_nms_with_masks(self): - boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]], - [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], - [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], - [[0, 10, 1, 11], [0, 10, 1, 11]]], - [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], - [[0, 100, 1, 101], [0, 100, 1, 101]], - [[0, 1000, 1, 1002], [0, 999, 2, 1004]], - [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]], - tf.float32) - scores = tf.constant([[[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0]], - [[.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]]) - masks = tf.constant([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]], - [[[2, 3], [4, 5]], [[3, 4], [5, 6]]], - [[[4, 5], [6, 7]], [[5, 6], [7, 8]]], - [[[6, 7], [8, 9]], [[7, 8], [9, 10]]]], - [[[[8, 9], [10, 11]], [[9, 10], [11, 12]]], - [[[10, 11], [12, 13]], [[11, 12], [13, 14]]], - [[[12, 13], [14, 15]], [[13, 14], [15, 16]]], - [[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]], - tf.float32) - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - - exp_nms_corners = np.array([[[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 0, 0, 0], - [0, 0, 0, 0]], - [[0, 999, 2, 1004], - [0, 10.1, 1, 11.1], - [0, 100, 1, 101], - [0, 0, 0, 0]]]) - exp_nms_scores = np.array([[.95, .9, 0, 0], - [.85, .5, .3, 0]]) - exp_nms_classes = np.array([[0, 0, 0, 0], - [1, 0, 0, 0]]) - exp_nms_masks = np.array([[[[6, 7], [8, 9]], - [[0, 1], [2, 3]], - [[0, 0], [0, 0]], - [[0, 0], [0, 0]]], - [[[13, 14], [15, 16]], - [[8, 9], [10, 11]], - [[10, 11], [12, 13]], - [[0, 0], [0, 0]]]]) - - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, - nmsed_additional_fields, num_detections - ) = post_processing.batch_multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, - max_size_per_class=max_output_size, max_total_size=max_output_size, - masks=masks) - - self.assertIsNone(nmsed_additional_fields) - # Check static shapes - self.assertAllEqual(nmsed_boxes.shape.as_list(), exp_nms_corners.shape) - self.assertAllEqual(nmsed_scores.shape.as_list(), exp_nms_scores.shape) - self.assertAllEqual(nmsed_classes.shape.as_list(), exp_nms_classes.shape) - self.assertAllEqual(nmsed_masks.shape.as_list(), exp_nms_masks.shape) - self.assertEqual(num_detections.shape.as_list(), [2]) - - with self.test_session() as sess: - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, - num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes, - nmsed_masks, num_detections]) - - self.assertAllClose(nmsed_boxes, exp_nms_corners) - self.assertAllClose(nmsed_scores, exp_nms_scores) - self.assertAllClose(nmsed_classes, exp_nms_classes) - self.assertAllClose(num_detections, [2, 3]) - self.assertAllClose(nmsed_masks, exp_nms_masks) - - def test_batch_multiclass_nms_with_additional_fields(self): - boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]], - [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], - [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], - [[0, 10, 1, 11], [0, 10, 1, 11]]], - [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], - [[0, 100, 1, 101], [0, 100, 1, 101]], - [[0, 1000, 1, 1002], [0, 999, 2, 1004]], - [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]], - tf.float32) - scores = tf.constant([[[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0]], - [[.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]]) - additional_fields = { - 'keypoints': tf.constant( - [[[[6, 7], [8, 9]], - [[0, 1], [2, 3]], - [[0, 0], [0, 0]], - [[0, 0], [0, 0]]], - [[[13, 14], [15, 16]], - [[8, 9], [10, 11]], - [[10, 11], [12, 13]], - [[0, 0], [0, 0]]]], - tf.float32) - } - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - - exp_nms_corners = np.array([[[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 0, 0, 0], - [0, 0, 0, 0]], - [[0, 999, 2, 1004], - [0, 10.1, 1, 11.1], - [0, 100, 1, 101], - [0, 0, 0, 0]]]) - exp_nms_scores = np.array([[.95, .9, 0, 0], - [.85, .5, .3, 0]]) - exp_nms_classes = np.array([[0, 0, 0, 0], - [1, 0, 0, 0]]) - exp_nms_additional_fields = { - 'keypoints': np.array([[[[0, 0], [0, 0]], - [[6, 7], [8, 9]], - [[0, 0], [0, 0]], - [[0, 0], [0, 0]]], - [[[10, 11], [12, 13]], - [[13, 14], [15, 16]], - [[8, 9], [10, 11]], - [[0, 0], [0, 0]]]]) - } - - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, - nmsed_additional_fields, num_detections - ) = post_processing.batch_multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, - max_size_per_class=max_output_size, max_total_size=max_output_size, - additional_fields=additional_fields) - - self.assertIsNone(nmsed_masks) - # Check static shapes - self.assertAllEqual(nmsed_boxes.shape.as_list(), exp_nms_corners.shape) - self.assertAllEqual(nmsed_scores.shape.as_list(), exp_nms_scores.shape) - self.assertAllEqual(nmsed_classes.shape.as_list(), exp_nms_classes.shape) - self.assertEqual(len(nmsed_additional_fields), - len(exp_nms_additional_fields)) - for key in exp_nms_additional_fields: - self.assertAllEqual(nmsed_additional_fields[key].shape.as_list(), - exp_nms_additional_fields[key].shape) - self.assertEqual(num_detections.shape.as_list(), [2]) - - with self.test_session() as sess: - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_additional_fields, - num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes, - nmsed_additional_fields, num_detections]) - - self.assertAllClose(nmsed_boxes, exp_nms_corners) - self.assertAllClose(nmsed_scores, exp_nms_scores) - self.assertAllClose(nmsed_classes, exp_nms_classes) - for key in exp_nms_additional_fields: - self.assertAllClose(nmsed_additional_fields[key], - exp_nms_additional_fields[key]) - self.assertAllClose(num_detections, [2, 3]) - - def test_batch_multiclass_nms_with_dynamic_batch_size(self): - boxes_placeholder = tf.placeholder(tf.float32, shape=(None, None, 2, 4)) - scores_placeholder = tf.placeholder(tf.float32, shape=(None, None, 2)) - masks_placeholder = tf.placeholder(tf.float32, shape=(None, None, 2, 2, 2)) - - boxes = np.array([[[[0, 0, 1, 1], [0, 0, 4, 5]], - [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], - [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], - [[0, 10, 1, 11], [0, 10, 1, 11]]], - [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], - [[0, 100, 1, 101], [0, 100, 1, 101]], - [[0, 1000, 1, 1002], [0, 999, 2, 1004]], - [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]]) - scores = np.array([[[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0]], - [[.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]]) - masks = np.array([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]], - [[[2, 3], [4, 5]], [[3, 4], [5, 6]]], - [[[4, 5], [6, 7]], [[5, 6], [7, 8]]], - [[[6, 7], [8, 9]], [[7, 8], [9, 10]]]], - [[[[8, 9], [10, 11]], [[9, 10], [11, 12]]], - [[[10, 11], [12, 13]], [[11, 12], [13, 14]]], - [[[12, 13], [14, 15]], [[13, 14], [15, 16]]], - [[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]]) - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - - exp_nms_corners = np.array([[[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 0, 0, 0], - [0, 0, 0, 0]], - [[0, 999, 2, 1004], - [0, 10.1, 1, 11.1], - [0, 100, 1, 101], - [0, 0, 0, 0]]]) - exp_nms_scores = np.array([[.95, .9, 0, 0], - [.85, .5, .3, 0]]) - exp_nms_classes = np.array([[0, 0, 0, 0], - [1, 0, 0, 0]]) - exp_nms_masks = np.array([[[[6, 7], [8, 9]], - [[0, 1], [2, 3]], - [[0, 0], [0, 0]], - [[0, 0], [0, 0]]], - [[[13, 14], [15, 16]], - [[8, 9], [10, 11]], - [[10, 11], [12, 13]], - [[0, 0], [0, 0]]]]) - - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, - nmsed_additional_fields, num_detections - ) = post_processing.batch_multiclass_non_max_suppression( - boxes_placeholder, scores_placeholder, score_thresh, iou_thresh, - max_size_per_class=max_output_size, max_total_size=max_output_size, - masks=masks_placeholder) - - self.assertIsNone(nmsed_additional_fields) - # Check static shapes - self.assertAllEqual(nmsed_boxes.shape.as_list(), [None, 4, 4]) - self.assertAllEqual(nmsed_scores.shape.as_list(), [None, 4]) - self.assertAllEqual(nmsed_classes.shape.as_list(), [None, 4]) - self.assertAllEqual(nmsed_masks.shape.as_list(), [None, 4, 2, 2]) - self.assertEqual(num_detections.shape.as_list(), [None]) - - with self.test_session() as sess: - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, - num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes, - nmsed_masks, num_detections], - feed_dict={boxes_placeholder: boxes, - scores_placeholder: scores, - masks_placeholder: masks}) - self.assertAllClose(nmsed_boxes, exp_nms_corners) - self.assertAllClose(nmsed_scores, exp_nms_scores) - self.assertAllClose(nmsed_classes, exp_nms_classes) - self.assertAllClose(num_detections, [2, 3]) - self.assertAllClose(nmsed_masks, exp_nms_masks) - - def test_batch_multiclass_nms_with_masks_and_num_valid_boxes(self): - boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]], - [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], - [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], - [[0, 10, 1, 11], [0, 10, 1, 11]]], - [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], - [[0, 100, 1, 101], [0, 100, 1, 101]], - [[0, 1000, 1, 1002], [0, 999, 2, 1004]], - [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]], - tf.float32) - scores = tf.constant([[[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0]], - [[.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]]) - masks = tf.constant([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]], - [[[2, 3], [4, 5]], [[3, 4], [5, 6]]], - [[[4, 5], [6, 7]], [[5, 6], [7, 8]]], - [[[6, 7], [8, 9]], [[7, 8], [9, 10]]]], - [[[[8, 9], [10, 11]], [[9, 10], [11, 12]]], - [[[10, 11], [12, 13]], [[11, 12], [13, 14]]], - [[[12, 13], [14, 15]], [[13, 14], [15, 16]]], - [[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]], - tf.float32) - num_valid_boxes = tf.constant([1, 1], tf.int32) - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - - exp_nms_corners = [[[0, 0, 1, 1], - [0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0]], - [[0, 10.1, 1, 11.1], - [0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0]]] - exp_nms_scores = [[.9, 0, 0, 0], - [.5, 0, 0, 0]] - exp_nms_classes = [[0, 0, 0, 0], - [0, 0, 0, 0]] - exp_nms_masks = [[[[0, 1], [2, 3]], - [[0, 0], [0, 0]], - [[0, 0], [0, 0]], - [[0, 0], [0, 0]]], - [[[8, 9], [10, 11]], - [[0, 0], [0, 0]], - [[0, 0], [0, 0]], - [[0, 0], [0, 0]]]] - - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, - nmsed_additional_fields, num_detections - ) = post_processing.batch_multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, - max_size_per_class=max_output_size, max_total_size=max_output_size, - num_valid_boxes=num_valid_boxes, masks=masks) - - self.assertIsNone(nmsed_additional_fields) - - with self.test_session() as sess: - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, - num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes, - nmsed_masks, num_detections]) - self.assertAllClose(nmsed_boxes, exp_nms_corners) - self.assertAllClose(nmsed_scores, exp_nms_scores) - self.assertAllClose(nmsed_classes, exp_nms_classes) - self.assertAllClose(num_detections, [1, 1]) - self.assertAllClose(nmsed_masks, exp_nms_masks) - - def test_batch_multiclass_nms_with_additional_fields_and_num_valid_boxes( - self): - boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]], - [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], - [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], - [[0, 10, 1, 11], [0, 10, 1, 11]]], - [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], - [[0, 100, 1, 101], [0, 100, 1, 101]], - [[0, 1000, 1, 1002], [0, 999, 2, 1004]], - [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]], - tf.float32) - scores = tf.constant([[[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0]], - [[.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]]) - additional_fields = { - 'keypoints': tf.constant( - [[[[6, 7], [8, 9]], - [[0, 1], [2, 3]], - [[0, 0], [0, 0]], - [[0, 0], [0, 0]]], - [[[13, 14], [15, 16]], - [[8, 9], [10, 11]], - [[10, 11], [12, 13]], - [[0, 0], [0, 0]]]], - tf.float32) - } - num_valid_boxes = tf.constant([1, 1], tf.int32) - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - - exp_nms_corners = [[[0, 0, 1, 1], - [0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0]], - [[0, 10.1, 1, 11.1], - [0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0]]] - exp_nms_scores = [[.9, 0, 0, 0], - [.5, 0, 0, 0]] - exp_nms_classes = [[0, 0, 0, 0], - [0, 0, 0, 0]] - exp_nms_additional_fields = { - 'keypoints': np.array([[[[6, 7], [8, 9]], - [[0, 0], [0, 0]], - [[0, 0], [0, 0]], - [[0, 0], [0, 0]]], - [[[13, 14], [15, 16]], - [[0, 0], [0, 0]], - [[0, 0], [0, 0]], - [[0, 0], [0, 0]]]]) - } - - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, - nmsed_additional_fields, num_detections - ) = post_processing.batch_multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, - max_size_per_class=max_output_size, max_total_size=max_output_size, - num_valid_boxes=num_valid_boxes, - additional_fields=additional_fields) - - self.assertIsNone(nmsed_masks) - - with self.test_session() as sess: - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_additional_fields, - num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes, - nmsed_additional_fields, num_detections]) - - self.assertAllClose(nmsed_boxes, exp_nms_corners) - self.assertAllClose(nmsed_scores, exp_nms_scores) - self.assertAllClose(nmsed_classes, exp_nms_classes) - for key in exp_nms_additional_fields: - self.assertAllClose(nmsed_additional_fields[key], - exp_nms_additional_fields[key]) - self.assertAllClose(num_detections, [1, 1]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/prefetcher.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/prefetcher.py deleted file mode 100644 index e690c599fa74e024d9b7ec857628cdbfb0e3ee81..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/prefetcher.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Provides functions to prefetch tensors to feed into models.""" -import tensorflow as tf - - -def prefetch(tensor_dict, capacity): - """Creates a prefetch queue for tensors. - - Creates a FIFO queue to asynchronously enqueue tensor_dicts and returns a - dequeue op that evaluates to a tensor_dict. This function is useful in - prefetching preprocessed tensors so that the data is readily available for - consumers. - - Example input pipeline when you don't need batching: - ---------------------------------------------------- - key, string_tensor = slim.parallel_reader.parallel_read(...) - tensor_dict = decoder.decode(string_tensor) - tensor_dict = preprocessor.preprocess(tensor_dict, ...) - prefetch_queue = prefetcher.prefetch(tensor_dict, capacity=20) - tensor_dict = prefetch_queue.dequeue() - outputs = Model(tensor_dict) - ... - ---------------------------------------------------- - - For input pipelines with batching, refer to core/batcher.py - - Args: - tensor_dict: a dictionary of tensors to prefetch. - capacity: the size of the prefetch queue. - - Returns: - a FIFO prefetcher queue - """ - names = list(tensor_dict.keys()) - dtypes = [t.dtype for t in tensor_dict.values()] - shapes = [t.get_shape() for t in tensor_dict.values()] - prefetch_queue = tf.PaddingFIFOQueue(capacity, dtypes=dtypes, - shapes=shapes, - names=names, - name='prefetch_queue') - enqueue_op = prefetch_queue.enqueue(tensor_dict) - tf.train.queue_runner.add_queue_runner(tf.train.queue_runner.QueueRunner( - prefetch_queue, [enqueue_op])) - tf.summary.scalar('queue/%s/fraction_of_%d_full' % (prefetch_queue.name, - capacity), - tf.to_float(prefetch_queue.size()) * (1. / capacity)) - return prefetch_queue diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/prefetcher_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/prefetcher_test.py deleted file mode 100644 index 63f557e3318c25d02434bc1dd0763f1df35b18ac..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/prefetcher_test.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.core.prefetcher.""" -import tensorflow as tf - -from object_detection.core import prefetcher - -slim = tf.contrib.slim - - -class PrefetcherTest(tf.test.TestCase): - - def test_prefetch_tensors_with_fully_defined_shapes(self): - with self.test_session() as sess: - batch_size = 10 - image_size = 32 - num_batches = 5 - examples = tf.Variable(tf.constant(0, dtype=tf.int64)) - counter = examples.count_up_to(num_batches) - image = tf.random_normal([batch_size, image_size, - image_size, 3], - dtype=tf.float32, - name='images') - label = tf.random_uniform([batch_size, 1], 0, 10, - dtype=tf.int32, name='labels') - - prefetch_queue = prefetcher.prefetch(tensor_dict={'counter': counter, - 'image': image, - 'label': label}, - capacity=100) - tensor_dict = prefetch_queue.dequeue() - - self.assertAllEqual(tensor_dict['image'].get_shape().as_list(), - [batch_size, image_size, image_size, 3]) - self.assertAllEqual(tensor_dict['label'].get_shape().as_list(), - [batch_size, 1]) - - tf.initialize_all_variables().run() - with slim.queues.QueueRunners(sess): - for _ in range(num_batches): - results = sess.run(tensor_dict) - self.assertEquals(results['image'].shape, - (batch_size, image_size, image_size, 3)) - self.assertEquals(results['label'].shape, (batch_size, 1)) - with self.assertRaises(tf.errors.OutOfRangeError): - sess.run(tensor_dict) - - def test_prefetch_tensors_with_partially_defined_shapes(self): - with self.test_session() as sess: - batch_size = 10 - image_size = 32 - num_batches = 5 - examples = tf.Variable(tf.constant(0, dtype=tf.int64)) - counter = examples.count_up_to(num_batches) - image = tf.random_normal([batch_size, - tf.Variable(image_size), - tf.Variable(image_size), 3], - dtype=tf.float32, - name='image') - image.set_shape([batch_size, None, None, 3]) - label = tf.random_uniform([batch_size, tf.Variable(1)], 0, - 10, dtype=tf.int32, name='label') - label.set_shape([batch_size, None]) - - prefetch_queue = prefetcher.prefetch(tensor_dict={'counter': counter, - 'image': image, - 'label': label}, - capacity=100) - tensor_dict = prefetch_queue.dequeue() - - self.assertAllEqual(tensor_dict['image'].get_shape().as_list(), - [batch_size, None, None, 3]) - self.assertAllEqual(tensor_dict['label'].get_shape().as_list(), - [batch_size, None]) - - tf.initialize_all_variables().run() - with slim.queues.QueueRunners(sess): - for _ in range(num_batches): - results = sess.run(tensor_dict) - self.assertEquals(results['image'].shape, - (batch_size, image_size, image_size, 3)) - self.assertEquals(results['label'].shape, (batch_size, 1)) - with self.assertRaises(tf.errors.OutOfRangeError): - sess.run(tensor_dict) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/preprocessor.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/preprocessor.py deleted file mode 100644 index 0fcdfcc69c273c634a9c7183e159f912e099c6c1..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/preprocessor.py +++ /dev/null @@ -1,3176 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Preprocess images and bounding boxes for detection. - -We perform two sets of operations in preprocessing stage: -(a) operations that are applied to both training and testing data, -(b) operations that are applied only to training data for the purpose of - data augmentation. - -A preprocessing function receives a set of inputs, -e.g. an image and bounding boxes, -performs an operation on them, and returns them. -Some examples are: randomly cropping the image, randomly mirroring the image, - randomly changing the brightness, contrast, hue and - randomly jittering the bounding boxes. - -The preprocess function receives a tensor_dict which is a dictionary that maps -different field names to their tensors. For example, -tensor_dict[fields.InputDataFields.image] holds the image tensor. -The image is a rank 4 tensor: [1, height, width, channels] with -dtype=tf.float32. The groundtruth_boxes is a rank 2 tensor: [N, 4] where -in each row there is a box with [ymin xmin ymax xmax]. -Boxes are in normalized coordinates meaning -their coordinate values range in [0, 1] - -To preprocess multiple images with the same operations in cases where -nondeterministic operations are used, a preprocessor_cache.PreprocessorCache -object can be passed into the preprocess function or individual operations. -All nondeterministic operations except random_jitter_boxes support caching. -E.g. -Let tensor_dict{1,2,3,4,5} be copies of the same inputs. -Let preprocess_options contain nondeterministic operation(s) excluding -random_jitter_boxes. - -cache1 = preprocessor_cache.PreprocessorCache() -cache2 = preprocessor_cache.PreprocessorCache() -a = preprocess(tensor_dict1, preprocess_options, preprocess_vars_cache=cache1) -b = preprocess(tensor_dict2, preprocess_options, preprocess_vars_cache=cache1) -c = preprocess(tensor_dict3, preprocess_options, preprocess_vars_cache=cache2) -d = preprocess(tensor_dict4, preprocess_options, preprocess_vars_cache=cache2) -e = preprocess(tensor_dict5, preprocess_options) - -Then correspondings tensors of object pairs (a,b) and (c,d) -are guaranteed to be equal element-wise, but the equality of any other object -pair cannot be determined. - -Important Note: In tensor_dict, images is a rank 4 tensor, but preprocessing -functions receive a rank 3 tensor for processing the image. Thus, inside the -preprocess function we squeeze the image to become a rank 3 tensor and then -we pass it to the functions. At the end of the preprocess we expand the image -back to rank 4. -""" - -import functools -import inspect -import sys -import tensorflow as tf - -from tensorflow.python.ops import control_flow_ops - -from object_detection.core import box_list -from object_detection.core import box_list_ops -from object_detection.core import keypoint_ops -from object_detection.core import preprocessor_cache -from object_detection.core import standard_fields as fields -from object_detection.utils import shape_utils - - -def _apply_with_random_selector(x, - func, - num_cases, - preprocess_vars_cache=None, - key=''): - """Computes func(x, sel), with sel sampled from [0...num_cases-1]. - - If both preprocess_vars_cache AND key are the same between two calls, sel will - be the same value in both calls. - - Args: - x: input Tensor. - func: Python function to apply. - num_cases: Python int32, number of cases to sample sel from. - preprocess_vars_cache: PreprocessorCache object that records previously - performed augmentations. Updated in-place. If this - function is called multiple times with the same - non-null cache, it will perform deterministically. - key: variable identifier for preprocess_vars_cache. - - Returns: - The result of func(x, sel), where func receives the value of the - selector as a python integer, but sel is sampled dynamically. - """ - generator_func = functools.partial( - tf.random_uniform, [], maxval=num_cases, dtype=tf.int32) - rand_sel = _get_or_create_preprocess_rand_vars( - generator_func, preprocessor_cache.PreprocessorCache.SELECTOR, - preprocess_vars_cache, key) - - # Pass the real x only to one of the func calls. - return control_flow_ops.merge([func( - control_flow_ops.switch(x, tf.equal(rand_sel, case))[1], case) - for case in range(num_cases)])[0] - - -def _apply_with_random_selector_tuples(x, - func, - num_cases, - preprocess_vars_cache=None, - key=''): - """Computes func(x, sel), with sel sampled from [0...num_cases-1]. - - If both preprocess_vars_cache AND key are the same between two calls, sel will - be the same value in both calls. - - Args: - x: A tuple of input tensors. - func: Python function to apply. - num_cases: Python int32, number of cases to sample sel from. - preprocess_vars_cache: PreprocessorCache object that records previously - performed augmentations. Updated in-place. If this - function is called multiple times with the same - non-null cache, it will perform deterministically. - key: variable identifier for preprocess_vars_cache. - - Returns: - The result of func(x, sel), where func receives the value of the - selector as a python integer, but sel is sampled dynamically. - """ - num_inputs = len(x) - generator_func = functools.partial( - tf.random_uniform, [], maxval=num_cases, dtype=tf.int32) - rand_sel = _get_or_create_preprocess_rand_vars( - generator_func, preprocessor_cache.PreprocessorCache.SELECTOR_TUPLES, - preprocess_vars_cache, key) - - # Pass the real x only to one of the func calls. - tuples = [list() for t in x] - for case in range(num_cases): - new_x = [control_flow_ops.switch(t, tf.equal(rand_sel, case))[1] for t in x] - output = func(tuple(new_x), case) - for j in range(num_inputs): - tuples[j].append(output[j]) - - for i in range(num_inputs): - tuples[i] = control_flow_ops.merge(tuples[i])[0] - return tuple(tuples) - - -def _get_or_create_preprocess_rand_vars(generator_func, - function_id, - preprocess_vars_cache, - key=''): - """Returns a tensor stored in preprocess_vars_cache or using generator_func. - - If the tensor was previously generated and appears in the PreprocessorCache, - the previously generated tensor will be returned. Otherwise, a new tensor - is generated using generator_func and stored in the cache. - - Args: - generator_func: A 0-argument function that generates a tensor. - function_id: identifier for the preprocessing function used. - preprocess_vars_cache: PreprocessorCache object that records previously - performed augmentations. Updated in-place. If this - function is called multiple times with the same - non-null cache, it will perform deterministically. - key: identifier for the variable stored. - Returns: - The generated tensor. - """ - if preprocess_vars_cache is not None: - var = preprocess_vars_cache.get(function_id, key) - if var is None: - var = generator_func() - preprocess_vars_cache.update(function_id, key, var) - else: - var = generator_func() - return var - - -def _random_integer(minval, maxval, seed): - """Returns a random 0-D tensor between minval and maxval. - - Args: - minval: minimum value of the random tensor. - maxval: maximum value of the random tensor. - seed: random seed. - - Returns: - A random 0-D tensor between minval and maxval. - """ - return tf.random_uniform( - [], minval=minval, maxval=maxval, dtype=tf.int32, seed=seed) - - -# TODO(mttang): This method is needed because the current -# tf.image.rgb_to_grayscale method does not support quantization. Replace with -# tf.image.rgb_to_grayscale after quantization support is added. -def _rgb_to_grayscale(images, name=None): - """Converts one or more images from RGB to Grayscale. - - Outputs a tensor of the same `DType` and rank as `images`. The size of the - last dimension of the output is 1, containing the Grayscale value of the - pixels. - - Args: - images: The RGB tensor to convert. Last dimension must have size 3 and - should contain RGB values. - name: A name for the operation (optional). - - Returns: - The converted grayscale image(s). - """ - with tf.name_scope(name, 'rgb_to_grayscale', [images]) as name: - images = tf.convert_to_tensor(images, name='images') - # Remember original dtype to so we can convert back if needed - orig_dtype = images.dtype - flt_image = tf.image.convert_image_dtype(images, tf.float32) - - # Reference for converting between RGB and grayscale. - # https://en.wikipedia.org/wiki/Luma_%28video%29 - rgb_weights = [0.2989, 0.5870, 0.1140] - rank_1 = tf.expand_dims(tf.rank(images) - 1, 0) - gray_float = tf.reduce_sum( - flt_image * rgb_weights, rank_1, keep_dims=True) - gray_float.set_shape(images.get_shape()[:-1].concatenate([1])) - return tf.image.convert_image_dtype(gray_float, orig_dtype, name=name) - - -def normalize_image(image, original_minval, original_maxval, target_minval, - target_maxval): - """Normalizes pixel values in the image. - - Moves the pixel values from the current [original_minval, original_maxval] - range to a the [target_minval, target_maxval] range. - - Args: - image: rank 3 float32 tensor containing 1 - image -> [height, width, channels]. - original_minval: current image minimum value. - original_maxval: current image maximum value. - target_minval: target image minimum value. - target_maxval: target image maximum value. - - Returns: - image: image which is the same shape as input image. - """ - with tf.name_scope('NormalizeImage', values=[image]): - original_minval = float(original_minval) - original_maxval = float(original_maxval) - target_minval = float(target_minval) - target_maxval = float(target_maxval) - image = tf.to_float(image) - image = tf.subtract(image, original_minval) - image = tf.multiply(image, (target_maxval - target_minval) / - (original_maxval - original_minval)) - image = tf.add(image, target_minval) - return image - - -def retain_boxes_above_threshold(boxes, - labels, - label_scores, - multiclass_scores=None, - masks=None, - keypoints=None, - threshold=0.0): - """Retains boxes whose label score is above a given threshold. - - If the label score for a box is missing (represented by NaN), the box is - retained. The boxes that don't pass the threshold will not appear in the - returned tensor. - - Args: - boxes: float32 tensor of shape [num_instance, 4] representing boxes - location in normalized coordinates. - labels: rank 1 int32 tensor of shape [num_instance] containing the object - classes. - label_scores: float32 tensor of shape [num_instance] representing the - score for each box. - multiclass_scores: (optional) float32 tensor of shape - [num_instances, num_classes] representing the score for each box for each - class. - masks: (optional) rank 3 float32 tensor with shape - [num_instances, height, width] containing instance masks. The masks are of - the same height, width as the input `image`. - keypoints: (optional) rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized - coordinates. - threshold: scalar python float. - - Returns: - retained_boxes: [num_retained_instance, 4] - retianed_labels: [num_retained_instance] - retained_label_scores: [num_retained_instance] - - If multiclass_scores, masks, or keypoints are not None, the function also - returns: - - retained_multiclass_scores: [num_retained_instance, num_classes] - retained_masks: [num_retained_instance, height, width] - retained_keypoints: [num_retained_instance, num_keypoints, 2] - """ - with tf.name_scope('RetainBoxesAboveThreshold', - values=[boxes, labels, label_scores]): - indices = tf.where( - tf.logical_or(label_scores > threshold, tf.is_nan(label_scores))) - indices = tf.squeeze(indices, axis=1) - retained_boxes = tf.gather(boxes, indices) - retained_labels = tf.gather(labels, indices) - retained_label_scores = tf.gather(label_scores, indices) - result = [retained_boxes, retained_labels, retained_label_scores] - - if multiclass_scores is not None: - retained_multiclass_scores = tf.gather(multiclass_scores, indices) - result.append(retained_multiclass_scores) - - if masks is not None: - retained_masks = tf.gather(masks, indices) - result.append(retained_masks) - - if keypoints is not None: - retained_keypoints = tf.gather(keypoints, indices) - result.append(retained_keypoints) - - return result - - -def _flip_boxes_left_right(boxes): - """Left-right flip the boxes. - - Args: - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - - Returns: - Flipped boxes. - """ - ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1) - flipped_xmin = tf.subtract(1.0, xmax) - flipped_xmax = tf.subtract(1.0, xmin) - flipped_boxes = tf.concat([ymin, flipped_xmin, ymax, flipped_xmax], 1) - return flipped_boxes - - -def _flip_boxes_up_down(boxes): - """Up-down flip the boxes. - - Args: - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - - Returns: - Flipped boxes. - """ - ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1) - flipped_ymin = tf.subtract(1.0, ymax) - flipped_ymax = tf.subtract(1.0, ymin) - flipped_boxes = tf.concat([flipped_ymin, xmin, flipped_ymax, xmax], 1) - return flipped_boxes - - -def _rot90_boxes(boxes): - """Rotate boxes counter-clockwise by 90 degrees. - - Args: - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - - Returns: - Rotated boxes. - """ - ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1) - rotated_ymin = tf.subtract(1.0, xmax) - rotated_ymax = tf.subtract(1.0, xmin) - rotated_xmin = ymin - rotated_xmax = ymax - rotated_boxes = tf.concat( - [rotated_ymin, rotated_xmin, rotated_ymax, rotated_xmax], 1) - return rotated_boxes - - -def _flip_masks_left_right(masks): - """Left-right flip masks. - - Args: - masks: rank 3 float32 tensor with shape - [num_instances, height, width] representing instance masks. - - Returns: - flipped masks: rank 3 float32 tensor with shape - [num_instances, height, width] representing instance masks. - """ - return masks[:, :, ::-1] - - -def _flip_masks_up_down(masks): - """Up-down flip masks. - - Args: - masks: rank 3 float32 tensor with shape - [num_instances, height, width] representing instance masks. - - Returns: - flipped masks: rank 3 float32 tensor with shape - [num_instances, height, width] representing instance masks. - """ - return masks[:, ::-1, :] - - -def _rot90_masks(masks): - """Rotate masks counter-clockwise by 90 degrees. - - Args: - masks: rank 3 float32 tensor with shape - [num_instances, height, width] representing instance masks. - - Returns: - rotated masks: rank 3 float32 tensor with shape - [num_instances, height, width] representing instance masks. - """ - masks = tf.transpose(masks, [0, 2, 1]) - return masks[:, ::-1, :] - - -def random_horizontal_flip(image, - boxes=None, - masks=None, - keypoints=None, - keypoint_flip_permutation=None, - seed=None, - preprocess_vars_cache=None): - """Randomly flips the image and detections horizontally. - - The probability of flipping the image is 50%. - - Args: - image: rank 3 float32 tensor with shape [height, width, channels]. - boxes: (optional) rank 2 float32 tensor with shape [N, 4] - containing the bounding boxes. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - masks: (optional) rank 3 float32 tensor with shape - [num_instances, height, width] containing instance masks. The masks - are of the same height, width as the input `image`. - keypoints: (optional) rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2]. The keypoints are in y-x - normalized coordinates. - keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip - permutation. - seed: random seed - preprocess_vars_cache: PreprocessorCache object that records previously - performed augmentations. Updated in-place. If this - function is called multiple times with the same - non-null cache, it will perform deterministically. - - Returns: - image: image which is the same shape as input image. - - If boxes, masks, keypoints, and keypoint_flip_permutation are not None, - the function also returns the following tensors. - - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - masks: rank 3 float32 tensor with shape [num_instances, height, width] - containing instance masks. - keypoints: rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2] - - Raises: - ValueError: if keypoints are provided but keypoint_flip_permutation is not. - """ - - def _flip_image(image): - # flip image - image_flipped = tf.image.flip_left_right(image) - return image_flipped - - if keypoints is not None and keypoint_flip_permutation is None: - raise ValueError( - 'keypoints are provided but keypoints_flip_permutation is not provided') - - with tf.name_scope('RandomHorizontalFlip', values=[image, boxes]): - result = [] - # random variable defining whether to do flip or not - generator_func = functools.partial(tf.random_uniform, [], seed=seed) - do_a_flip_random = _get_or_create_preprocess_rand_vars( - generator_func, - preprocessor_cache.PreprocessorCache.HORIZONTAL_FLIP, - preprocess_vars_cache) - do_a_flip_random = tf.greater(do_a_flip_random, 0.5) - - # flip image - image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image) - result.append(image) - - # flip boxes - if boxes is not None: - boxes = tf.cond(do_a_flip_random, lambda: _flip_boxes_left_right(boxes), - lambda: boxes) - result.append(boxes) - - # flip masks - if masks is not None: - masks = tf.cond(do_a_flip_random, lambda: _flip_masks_left_right(masks), - lambda: masks) - result.append(masks) - - # flip keypoints - if keypoints is not None and keypoint_flip_permutation is not None: - permutation = keypoint_flip_permutation - keypoints = tf.cond( - do_a_flip_random, - lambda: keypoint_ops.flip_horizontal(keypoints, 0.5, permutation), - lambda: keypoints) - result.append(keypoints) - - return tuple(result) - - -def random_vertical_flip(image, - boxes=None, - masks=None, - keypoints=None, - keypoint_flip_permutation=None, - seed=None, - preprocess_vars_cache=None): - """Randomly flips the image and detections vertically. - - The probability of flipping the image is 50%. - - Args: - image: rank 3 float32 tensor with shape [height, width, channels]. - boxes: (optional) rank 2 float32 tensor with shape [N, 4] - containing the bounding boxes. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - masks: (optional) rank 3 float32 tensor with shape - [num_instances, height, width] containing instance masks. The masks - are of the same height, width as the input `image`. - keypoints: (optional) rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2]. The keypoints are in y-x - normalized coordinates. - keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip - permutation. - seed: random seed - preprocess_vars_cache: PreprocessorCache object that records previously - performed augmentations. Updated in-place. If this - function is called multiple times with the same - non-null cache, it will perform deterministically. - - Returns: - image: image which is the same shape as input image. - - If boxes, masks, keypoints, and keypoint_flip_permutation are not None, - the function also returns the following tensors. - - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - masks: rank 3 float32 tensor with shape [num_instances, height, width] - containing instance masks. - keypoints: rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2] - - Raises: - ValueError: if keypoints are provided but keypoint_flip_permutation is not. - """ - - def _flip_image(image): - # flip image - image_flipped = tf.image.flip_up_down(image) - return image_flipped - - if keypoints is not None and keypoint_flip_permutation is None: - raise ValueError( - 'keypoints are provided but keypoints_flip_permutation is not provided') - - with tf.name_scope('RandomVerticalFlip', values=[image, boxes]): - result = [] - # random variable defining whether to do flip or not - generator_func = functools.partial(tf.random_uniform, [], seed=seed) - do_a_flip_random = _get_or_create_preprocess_rand_vars( - generator_func, preprocessor_cache.PreprocessorCache.VERTICAL_FLIP, - preprocess_vars_cache) - do_a_flip_random = tf.greater(do_a_flip_random, 0.5) - - # flip image - image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image) - result.append(image) - - # flip boxes - if boxes is not None: - boxes = tf.cond(do_a_flip_random, lambda: _flip_boxes_up_down(boxes), - lambda: boxes) - result.append(boxes) - - # flip masks - if masks is not None: - masks = tf.cond(do_a_flip_random, lambda: _flip_masks_up_down(masks), - lambda: masks) - result.append(masks) - - # flip keypoints - if keypoints is not None and keypoint_flip_permutation is not None: - permutation = keypoint_flip_permutation - keypoints = tf.cond( - do_a_flip_random, - lambda: keypoint_ops.flip_vertical(keypoints, 0.5, permutation), - lambda: keypoints) - result.append(keypoints) - - return tuple(result) - - -def random_rotation90(image, - boxes=None, - masks=None, - keypoints=None, - seed=None, - preprocess_vars_cache=None): - """Randomly rotates the image and detections 90 degrees counter-clockwise. - - The probability of rotating the image is 50%. This can be combined with - random_horizontal_flip and random_vertical_flip to produce an output with a - uniform distribution of the eight possible 90 degree rotation / reflection - combinations. - - Args: - image: rank 3 float32 tensor with shape [height, width, channels]. - boxes: (optional) rank 2 float32 tensor with shape [N, 4] - containing the bounding boxes. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - masks: (optional) rank 3 float32 tensor with shape - [num_instances, height, width] containing instance masks. The masks - are of the same height, width as the input `image`. - keypoints: (optional) rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2]. The keypoints are in y-x - normalized coordinates. - seed: random seed - preprocess_vars_cache: PreprocessorCache object that records previously - performed augmentations. Updated in-place. If this - function is called multiple times with the same - non-null cache, it will perform deterministically. - - Returns: - image: image which is the same shape as input image. - - If boxes, masks, and keypoints, are not None, - the function also returns the following tensors. - - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - masks: rank 3 float32 tensor with shape [num_instances, height, width] - containing instance masks. - keypoints: rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2] - """ - - def _rot90_image(image): - # flip image - image_rotated = tf.image.rot90(image) - return image_rotated - - with tf.name_scope('RandomRotation90', values=[image, boxes]): - result = [] - - # random variable defining whether to rotate by 90 degrees or not - generator_func = functools.partial(tf.random_uniform, [], seed=seed) - do_a_rot90_random = _get_or_create_preprocess_rand_vars( - generator_func, preprocessor_cache.PreprocessorCache.ROTATION90, - preprocess_vars_cache) - do_a_rot90_random = tf.greater(do_a_rot90_random, 0.5) - - # flip image - image = tf.cond(do_a_rot90_random, lambda: _rot90_image(image), - lambda: image) - result.append(image) - - # flip boxes - if boxes is not None: - boxes = tf.cond(do_a_rot90_random, lambda: _rot90_boxes(boxes), - lambda: boxes) - result.append(boxes) - - # flip masks - if masks is not None: - masks = tf.cond(do_a_rot90_random, lambda: _rot90_masks(masks), - lambda: masks) - result.append(masks) - - # flip keypoints - if keypoints is not None: - keypoints = tf.cond( - do_a_rot90_random, - lambda: keypoint_ops.rot90(keypoints), - lambda: keypoints) - result.append(keypoints) - - return tuple(result) - - -def random_pixel_value_scale(image, - minval=0.9, - maxval=1.1, - seed=None, - preprocess_vars_cache=None): - """Scales each value in the pixels of the image. - - This function scales each pixel independent of the other ones. - For each value in image tensor, draws a random number between - minval and maxval and multiples the values with them. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels] - with pixel values varying between [0, 255]. - minval: lower ratio of scaling pixel values. - maxval: upper ratio of scaling pixel values. - seed: random seed. - preprocess_vars_cache: PreprocessorCache object that records previously - performed augmentations. Updated in-place. If this - function is called multiple times with the same - non-null cache, it will perform deterministically. - - Returns: - image: image which is the same shape as input image. - """ - with tf.name_scope('RandomPixelValueScale', values=[image]): - generator_func = functools.partial( - tf.random_uniform, tf.shape(image), - minval=minval, maxval=maxval, - dtype=tf.float32, seed=seed) - color_coef = _get_or_create_preprocess_rand_vars( - generator_func, - preprocessor_cache.PreprocessorCache.PIXEL_VALUE_SCALE, - preprocess_vars_cache) - - image = tf.multiply(image, color_coef) - image = tf.clip_by_value(image, 0.0, 255.0) - - return image - - -def random_image_scale(image, - masks=None, - min_scale_ratio=0.5, - max_scale_ratio=2.0, - seed=None, - preprocess_vars_cache=None): - """Scales the image size. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels]. - masks: (optional) rank 3 float32 tensor containing masks with - size [height, width, num_masks]. The value is set to None if there are no - masks. - min_scale_ratio: minimum scaling ratio. - max_scale_ratio: maximum scaling ratio. - seed: random seed. - preprocess_vars_cache: PreprocessorCache object that records previously - performed augmentations. Updated in-place. If this - function is called multiple times with the same - non-null cache, it will perform deterministically. - - Returns: - image: image which is the same rank as input image. - masks: If masks is not none, resized masks which are the same rank as input - masks will be returned. - """ - with tf.name_scope('RandomImageScale', values=[image]): - result = [] - image_shape = tf.shape(image) - image_height = image_shape[0] - image_width = image_shape[1] - generator_func = functools.partial( - tf.random_uniform, [], - minval=min_scale_ratio, maxval=max_scale_ratio, - dtype=tf.float32, seed=seed) - size_coef = _get_or_create_preprocess_rand_vars( - generator_func, preprocessor_cache.PreprocessorCache.IMAGE_SCALE, - preprocess_vars_cache) - - image_newysize = tf.to_int32( - tf.multiply(tf.to_float(image_height), size_coef)) - image_newxsize = tf.to_int32( - tf.multiply(tf.to_float(image_width), size_coef)) - image = tf.image.resize_images( - image, [image_newysize, image_newxsize], align_corners=True) - result.append(image) - if masks: - masks = tf.image.resize_nearest_neighbor( - masks, [image_newysize, image_newxsize], align_corners=True) - result.append(masks) - return tuple(result) - - -def random_rgb_to_gray(image, - probability=0.1, - seed=None, - preprocess_vars_cache=None): - """Changes the image from RGB to Grayscale with the given probability. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels] - with pixel values varying between [0, 255]. - probability: the probability of returning a grayscale image. - The probability should be a number between [0, 1]. - seed: random seed. - preprocess_vars_cache: PreprocessorCache object that records previously - performed augmentations. Updated in-place. If this - function is called multiple times with the same - non-null cache, it will perform deterministically. - - Returns: - image: image which is the same shape as input image. - """ - def _image_to_gray(image): - image_gray1 = _rgb_to_grayscale(image) - image_gray3 = tf.image.grayscale_to_rgb(image_gray1) - return image_gray3 - - with tf.name_scope('RandomRGBtoGray', values=[image]): - # random variable defining whether to change to grayscale or not - generator_func = functools.partial(tf.random_uniform, [], seed=seed) - do_gray_random = _get_or_create_preprocess_rand_vars( - generator_func, preprocessor_cache.PreprocessorCache.RGB_TO_GRAY, - preprocess_vars_cache) - - image = tf.cond( - tf.greater(do_gray_random, probability), lambda: image, - lambda: _image_to_gray(image)) - - return image - - -def random_adjust_brightness(image, - max_delta=0.2, - seed=None, - preprocess_vars_cache=None): - """Randomly adjusts brightness. - - Makes sure the output image is still between 0 and 255. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels] - with pixel values varying between [0, 255]. - max_delta: how much to change the brightness. A value between [0, 1). - seed: random seed. - preprocess_vars_cache: PreprocessorCache object that records previously - performed augmentations. Updated in-place. If this - function is called multiple times with the same - non-null cache, it will perform deterministically. - - Returns: - image: image which is the same shape as input image. - boxes: boxes which is the same shape as input boxes. - """ - with tf.name_scope('RandomAdjustBrightness', values=[image]): - generator_func = functools.partial(tf.random_uniform, [], - -max_delta, max_delta, seed=seed) - delta = _get_or_create_preprocess_rand_vars( - generator_func, - preprocessor_cache.PreprocessorCache.ADJUST_BRIGHTNESS, - preprocess_vars_cache) - - image = tf.image.adjust_brightness(image / 255, delta) * 255 - image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=255.0) - return image - - -def random_adjust_contrast(image, - min_delta=0.8, - max_delta=1.25, - seed=None, - preprocess_vars_cache=None): - """Randomly adjusts contrast. - - Makes sure the output image is still between 0 and 255. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels] - with pixel values varying between [0, 255]. - min_delta: see max_delta. - max_delta: how much to change the contrast. Contrast will change with a - value between min_delta and max_delta. This value will be - multiplied to the current contrast of the image. - seed: random seed. - preprocess_vars_cache: PreprocessorCache object that records previously - performed augmentations. Updated in-place. If this - function is called multiple times with the same - non-null cache, it will perform deterministically. - - Returns: - image: image which is the same shape as input image. - """ - with tf.name_scope('RandomAdjustContrast', values=[image]): - generator_func = functools.partial(tf.random_uniform, [], - min_delta, max_delta, seed=seed) - contrast_factor = _get_or_create_preprocess_rand_vars( - generator_func, - preprocessor_cache.PreprocessorCache.ADJUST_CONTRAST, - preprocess_vars_cache) - image = tf.image.adjust_contrast(image / 255, contrast_factor) * 255 - image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=255.0) - return image - - -def random_adjust_hue(image, - max_delta=0.02, - seed=None, - preprocess_vars_cache=None): - """Randomly adjusts hue. - - Makes sure the output image is still between 0 and 255. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels] - with pixel values varying between [0, 255]. - max_delta: change hue randomly with a value between 0 and max_delta. - seed: random seed. - preprocess_vars_cache: PreprocessorCache object that records previously - performed augmentations. Updated in-place. If this - function is called multiple times with the same - non-null cache, it will perform deterministically. - - Returns: - image: image which is the same shape as input image. - """ - with tf.name_scope('RandomAdjustHue', values=[image]): - generator_func = functools.partial(tf.random_uniform, [], - -max_delta, max_delta, seed=seed) - delta = _get_or_create_preprocess_rand_vars( - generator_func, preprocessor_cache.PreprocessorCache.ADJUST_HUE, - preprocess_vars_cache) - image = tf.image.adjust_hue(image / 255, delta) * 255 - image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=255.0) - return image - - -def random_adjust_saturation(image, - min_delta=0.8, - max_delta=1.25, - seed=None, - preprocess_vars_cache=None): - """Randomly adjusts saturation. - - Makes sure the output image is still between 0 and 255. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels] - with pixel values varying between [0, 255]. - min_delta: see max_delta. - max_delta: how much to change the saturation. Saturation will change with a - value between min_delta and max_delta. This value will be - multiplied to the current saturation of the image. - seed: random seed. - preprocess_vars_cache: PreprocessorCache object that records previously - performed augmentations. Updated in-place. If this - function is called multiple times with the same - non-null cache, it will perform deterministically. - - Returns: - image: image which is the same shape as input image. - """ - with tf.name_scope('RandomAdjustSaturation', values=[image]): - generator_func = functools.partial(tf.random_uniform, [], - min_delta, max_delta, seed=seed) - saturation_factor = _get_or_create_preprocess_rand_vars( - generator_func, - preprocessor_cache.PreprocessorCache.ADJUST_SATURATION, - preprocess_vars_cache) - image = tf.image.adjust_saturation(image / 255, saturation_factor) * 255 - image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=255.0) - return image - - -def random_distort_color(image, color_ordering=0, preprocess_vars_cache=None): - """Randomly distorts color. - - Randomly distorts color using a combination of brightness, hue, contrast and - saturation changes. Makes sure the output image is still between 0 and 255. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels] - with pixel values varying between [0, 255]. - color_ordering: Python int, a type of distortion (valid values: 0, 1). - preprocess_vars_cache: PreprocessorCache object that records previously - performed augmentations. Updated in-place. If this - function is called multiple times with the same - non-null cache, it will perform deterministically. - - Returns: - image: image which is the same shape as input image. - - Raises: - ValueError: if color_ordering is not in {0, 1}. - """ - with tf.name_scope('RandomDistortColor', values=[image]): - if color_ordering == 0: - image = random_adjust_brightness( - image, max_delta=32. / 255., - preprocess_vars_cache=preprocess_vars_cache) - image = random_adjust_saturation( - image, min_delta=0.5, max_delta=1.5, - preprocess_vars_cache=preprocess_vars_cache) - image = random_adjust_hue( - image, max_delta=0.2, - preprocess_vars_cache=preprocess_vars_cache) - image = random_adjust_contrast( - image, min_delta=0.5, max_delta=1.5, - preprocess_vars_cache=preprocess_vars_cache) - - elif color_ordering == 1: - image = random_adjust_brightness( - image, max_delta=32. / 255., - preprocess_vars_cache=preprocess_vars_cache) - image = random_adjust_contrast( - image, min_delta=0.5, max_delta=1.5, - preprocess_vars_cache=preprocess_vars_cache) - image = random_adjust_saturation( - image, min_delta=0.5, max_delta=1.5, - preprocess_vars_cache=preprocess_vars_cache) - image = random_adjust_hue( - image, max_delta=0.2, - preprocess_vars_cache=preprocess_vars_cache) - else: - raise ValueError('color_ordering must be in {0, 1}') - return image - - -def random_jitter_boxes(boxes, ratio=0.05, seed=None): - """Randomly jitter boxes in image. - - Args: - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - ratio: The ratio of the box width and height that the corners can jitter. - For example if the width is 100 pixels and ratio is 0.05, - the corners can jitter up to 5 pixels in the x direction. - seed: random seed. - - Returns: - boxes: boxes which is the same shape as input boxes. - """ - def random_jitter_box(box, ratio, seed): - """Randomly jitter box. - - Args: - box: bounding box [1, 1, 4]. - ratio: max ratio between jittered box and original box, - a number between [0, 0.5]. - seed: random seed. - - Returns: - jittered_box: jittered box. - """ - rand_numbers = tf.random_uniform( - [1, 1, 4], minval=-ratio, maxval=ratio, dtype=tf.float32, seed=seed) - box_width = tf.subtract(box[0, 0, 3], box[0, 0, 1]) - box_height = tf.subtract(box[0, 0, 2], box[0, 0, 0]) - hw_coefs = tf.stack([box_height, box_width, box_height, box_width]) - hw_rand_coefs = tf.multiply(hw_coefs, rand_numbers) - jittered_box = tf.add(box, hw_rand_coefs) - jittered_box = tf.clip_by_value(jittered_box, 0.0, 1.0) - return jittered_box - - with tf.name_scope('RandomJitterBoxes', values=[boxes]): - # boxes are [N, 4]. Lets first make them [N, 1, 1, 4] - boxes_shape = tf.shape(boxes) - boxes = tf.expand_dims(boxes, 1) - boxes = tf.expand_dims(boxes, 2) - - distorted_boxes = tf.map_fn( - lambda x: random_jitter_box(x, ratio, seed), boxes, dtype=tf.float32) - - distorted_boxes = tf.reshape(distorted_boxes, boxes_shape) - - return distorted_boxes - - -def _strict_random_crop_image(image, - boxes, - labels, - label_scores=None, - multiclass_scores=None, - masks=None, - keypoints=None, - min_object_covered=1.0, - aspect_ratio_range=(0.75, 1.33), - area_range=(0.1, 1.0), - overlap_thresh=0.3, - preprocess_vars_cache=None): - """Performs random crop. - - Note: boxes will be clipped to the crop. Keypoint coordinates that are - outside the crop will be set to NaN, which is consistent with the original - keypoint encoding for non-existing keypoints. This function always crops - the image and is supposed to be used by `random_crop_image` function which - sometimes returns image unchanged. - - Args: - image: rank 3 float32 tensor containing 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - boxes: rank 2 float32 tensor containing the bounding boxes with shape - [num_instances, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - labels: rank 1 int32 tensor containing the object classes. - label_scores: (optional) float32 tensor of shape [num_instances] - representing the score for each box. - multiclass_scores: (optional) float32 tensor of shape - [num_instances, num_classes] representing the score for each box for each - class. - masks: (optional) rank 3 float32 tensor with shape - [num_instances, height, width] containing instance masks. The masks - are of the same height, width as the input `image`. - keypoints: (optional) rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2]. The keypoints are in y-x - normalized coordinates. - min_object_covered: the cropped image must cover at least this fraction of - at least one of the input bounding boxes. - aspect_ratio_range: allowed range for aspect ratio of cropped image. - area_range: allowed range for area ratio between cropped image and the - original image. - overlap_thresh: minimum overlap thresh with new cropped - image to keep the box. - preprocess_vars_cache: PreprocessorCache object that records previously - performed augmentations. Updated in-place. If this - function is called multiple times with the same - non-null cache, it will perform deterministically. - - Returns: - image: image which is the same rank as input image. - boxes: boxes which is the same rank as input boxes. - Boxes are in normalized form. - labels: new labels. - - If label_scores, multiclass_scores, masks, or keypoints is not None, the - function also returns: - label_scores: rank 1 float32 tensor with shape [num_instances]. - multiclass_scores: rank 2 float32 tensor with shape - [num_instances, num_classes] - masks: rank 3 float32 tensor with shape [num_instances, height, width] - containing instance masks. - keypoints: rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2] - """ - with tf.name_scope('RandomCropImage', values=[image, boxes]): - image_shape = tf.shape(image) - - # boxes are [N, 4]. Lets first make them [N, 1, 4]. - boxes_expanded = tf.expand_dims( - tf.clip_by_value( - boxes, clip_value_min=0.0, clip_value_max=1.0), 1) - - generator_func = functools.partial( - tf.image.sample_distorted_bounding_box, - image_shape, - bounding_boxes=boxes_expanded, - min_object_covered=min_object_covered, - aspect_ratio_range=aspect_ratio_range, - area_range=area_range, - max_attempts=100, - use_image_if_no_bounding_boxes=True) - - # for ssd cropping, each value of min_object_covered has its own - # cached random variable - sample_distorted_bounding_box = _get_or_create_preprocess_rand_vars( - generator_func, - preprocessor_cache.PreprocessorCache.STRICT_CROP_IMAGE, - preprocess_vars_cache, key=min_object_covered) - - im_box_begin, im_box_size, im_box = sample_distorted_bounding_box - - new_image = tf.slice(image, im_box_begin, im_box_size) - new_image.set_shape([None, None, image.get_shape()[2]]) - - # [1, 4] - im_box_rank2 = tf.squeeze(im_box, squeeze_dims=[0]) - # [4] - im_box_rank1 = tf.squeeze(im_box) - - boxlist = box_list.BoxList(boxes) - boxlist.add_field('labels', labels) - - if label_scores is not None: - boxlist.add_field('label_scores', label_scores) - - if multiclass_scores is not None: - boxlist.add_field('multiclass_scores', multiclass_scores) - - im_boxlist = box_list.BoxList(im_box_rank2) - - # remove boxes that are outside cropped image - boxlist, inside_window_ids = box_list_ops.prune_completely_outside_window( - boxlist, im_box_rank1) - - # remove boxes that are outside image - overlapping_boxlist, keep_ids = box_list_ops.prune_non_overlapping_boxes( - boxlist, im_boxlist, overlap_thresh) - - # change the coordinate of the remaining boxes - new_labels = overlapping_boxlist.get_field('labels') - new_boxlist = box_list_ops.change_coordinate_frame(overlapping_boxlist, - im_box_rank1) - new_boxes = new_boxlist.get() - new_boxes = tf.clip_by_value( - new_boxes, clip_value_min=0.0, clip_value_max=1.0) - - result = [new_image, new_boxes, new_labels] - - if label_scores is not None: - new_label_scores = overlapping_boxlist.get_field('label_scores') - result.append(new_label_scores) - - if multiclass_scores is not None: - new_multiclass_scores = overlapping_boxlist.get_field('multiclass_scores') - result.append(new_multiclass_scores) - - if masks is not None: - masks_of_boxes_inside_window = tf.gather(masks, inside_window_ids) - masks_of_boxes_completely_inside_window = tf.gather( - masks_of_boxes_inside_window, keep_ids) - masks_box_begin = [0, im_box_begin[0], im_box_begin[1]] - masks_box_size = [-1, im_box_size[0], im_box_size[1]] - new_masks = tf.slice( - masks_of_boxes_completely_inside_window, - masks_box_begin, masks_box_size) - result.append(new_masks) - - if keypoints is not None: - keypoints_of_boxes_inside_window = tf.gather(keypoints, inside_window_ids) - keypoints_of_boxes_completely_inside_window = tf.gather( - keypoints_of_boxes_inside_window, keep_ids) - new_keypoints = keypoint_ops.change_coordinate_frame( - keypoints_of_boxes_completely_inside_window, im_box_rank1) - new_keypoints = keypoint_ops.prune_outside_window(new_keypoints, - [0.0, 0.0, 1.0, 1.0]) - result.append(new_keypoints) - - return tuple(result) - - -def random_crop_image(image, - boxes, - labels, - label_scores=None, - multiclass_scores=None, - masks=None, - keypoints=None, - min_object_covered=1.0, - aspect_ratio_range=(0.75, 1.33), - area_range=(0.1, 1.0), - overlap_thresh=0.3, - random_coef=0.0, - seed=None, - preprocess_vars_cache=None): - """Randomly crops the image. - - Given the input image and its bounding boxes, this op randomly - crops a subimage. Given a user-provided set of input constraints, - the crop window is resampled until it satisfies these constraints. - If within 100 trials it is unable to find a valid crop, the original - image is returned. See the Args section for a description of the input - constraints. Both input boxes and returned Boxes are in normalized - form (e.g., lie in the unit square [0, 1]). - This function will return the original image with probability random_coef. - - Note: boxes will be clipped to the crop. Keypoint coordinates that are - outside the crop will be set to NaN, which is consistent with the original - keypoint encoding for non-existing keypoints. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - boxes: rank 2 float32 tensor containing the bounding boxes with shape - [num_instances, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - labels: rank 1 int32 tensor containing the object classes. - label_scores: (optional) float32 tensor of shape [num_instances]. - representing the score for each box. - multiclass_scores: (optional) float32 tensor of shape - [num_instances, num_classes] representing the score for each box for each - class. - masks: (optional) rank 3 float32 tensor with shape - [num_instances, height, width] containing instance masks. The masks - are of the same height, width as the input `image`. - keypoints: (optional) rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2]. The keypoints are in y-x - normalized coordinates. - min_object_covered: the cropped image must cover at least this fraction of - at least one of the input bounding boxes. - aspect_ratio_range: allowed range for aspect ratio of cropped image. - area_range: allowed range for area ratio between cropped image and the - original image. - overlap_thresh: minimum overlap thresh with new cropped - image to keep the box. - random_coef: a random coefficient that defines the chance of getting the - original image. If random_coef is 0, we will always get the - cropped image, and if it is 1.0, we will always get the - original image. - seed: random seed. - preprocess_vars_cache: PreprocessorCache object that records previously - performed augmentations. Updated in-place. If this - function is called multiple times with the same - non-null cache, it will perform deterministically. - - Returns: - image: Image shape will be [new_height, new_width, channels]. - boxes: boxes which is the same rank as input boxes. Boxes are in normalized - form. - labels: new labels. - - If label_scores, multiclass_scores, masks, or keypoints is not None, the - function also returns: - label_scores: rank 1 float32 tensor with shape [num_instances]. - multiclass_scores: rank 2 float32 tensor with shape - [num_instances, num_classes] - masks: rank 3 float32 tensor with shape [num_instances, height, width] - containing instance masks. - keypoints: rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2] - """ - - def strict_random_crop_image_fn(): - return _strict_random_crop_image( - image, - boxes, - labels, - label_scores=label_scores, - multiclass_scores=multiclass_scores, - masks=masks, - keypoints=keypoints, - min_object_covered=min_object_covered, - aspect_ratio_range=aspect_ratio_range, - area_range=area_range, - overlap_thresh=overlap_thresh, - preprocess_vars_cache=preprocess_vars_cache) - - # avoids tf.cond to make faster RCNN training on borg. See b/140057645. - if random_coef < sys.float_info.min: - result = strict_random_crop_image_fn() - else: - generator_func = functools.partial(tf.random_uniform, [], seed=seed) - do_a_crop_random = _get_or_create_preprocess_rand_vars( - generator_func, preprocessor_cache.PreprocessorCache.CROP_IMAGE, - preprocess_vars_cache) - do_a_crop_random = tf.greater(do_a_crop_random, random_coef) - - outputs = [image, boxes, labels] - - if label_scores is not None: - outputs.append(label_scores) - if multiclass_scores is not None: - outputs.append(multiclass_scores) - if masks is not None: - outputs.append(masks) - if keypoints is not None: - outputs.append(keypoints) - - result = tf.cond(do_a_crop_random, strict_random_crop_image_fn, - lambda: tuple(outputs)) - return result - - -def random_pad_image(image, - boxes, - min_image_size=None, - max_image_size=None, - pad_color=None, - seed=None, - preprocess_vars_cache=None): - """Randomly pads the image. - - This function randomly pads the image with zeros. The final size of the - padded image will be between min_image_size and max_image_size. - if min_image_size is smaller than the input image size, min_image_size will - be set to the input image size. The same for max_image_size. The input image - will be located at a uniformly random location inside the padded image. - The relative location of the boxes to the original image will remain the same. - - Args: - image: rank 3 float32 tensor containing 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - min_image_size: a tensor of size [min_height, min_width], type tf.int32. - If passed as None, will be set to image size - [height, width]. - max_image_size: a tensor of size [max_height, max_width], type tf.int32. - If passed as None, will be set to twice the - image [height * 2, width * 2]. - pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32. - if set as None, it will be set to average color of the input - image. - seed: random seed. - preprocess_vars_cache: PreprocessorCache object that records previously - performed augmentations. Updated in-place. If this - function is called multiple times with the same - non-null cache, it will perform deterministically. - - Returns: - image: Image shape will be [new_height, new_width, channels]. - boxes: boxes which is the same rank as input boxes. Boxes are in normalized - form. - """ - if pad_color is None: - pad_color = tf.reduce_mean(image, axis=[0, 1]) - - image_shape = tf.shape(image) - image_height = image_shape[0] - image_width = image_shape[1] - - if max_image_size is None: - max_image_size = tf.stack([image_height * 2, image_width * 2]) - max_image_size = tf.maximum(max_image_size, - tf.stack([image_height, image_width])) - - if min_image_size is None: - min_image_size = tf.stack([image_height, image_width]) - min_image_size = tf.maximum(min_image_size, - tf.stack([image_height, image_width])) - - target_height = tf.cond( - max_image_size[0] > min_image_size[0], - lambda: _random_integer(min_image_size[0], max_image_size[0], seed), - lambda: max_image_size[0]) - - target_width = tf.cond( - max_image_size[1] > min_image_size[1], - lambda: _random_integer(min_image_size[1], max_image_size[1], seed), - lambda: max_image_size[1]) - - offset_height = tf.cond( - target_height > image_height, - lambda: _random_integer(0, target_height - image_height, seed), - lambda: tf.constant(0, dtype=tf.int32)) - - offset_width = tf.cond( - target_width > image_width, - lambda: _random_integer(0, target_width - image_width, seed), - lambda: tf.constant(0, dtype=tf.int32)) - - gen_func = lambda: (target_height, target_width, offset_height, offset_width) - params = _get_or_create_preprocess_rand_vars( - gen_func, preprocessor_cache.PreprocessorCache.PAD_IMAGE, - preprocess_vars_cache) - target_height, target_width, offset_height, offset_width = params - - new_image = tf.image.pad_to_bounding_box( - image, - offset_height=offset_height, - offset_width=offset_width, - target_height=target_height, - target_width=target_width) - - # Setting color of the padded pixels - image_ones = tf.ones_like(image) - image_ones_padded = tf.image.pad_to_bounding_box( - image_ones, - offset_height=offset_height, - offset_width=offset_width, - target_height=target_height, - target_width=target_width) - image_color_padded = (1.0 - image_ones_padded) * pad_color - new_image += image_color_padded - - # setting boxes - new_window = tf.to_float( - tf.stack([ - -offset_height, -offset_width, target_height - offset_height, - target_width - offset_width - ])) - new_window /= tf.to_float( - tf.stack([image_height, image_width, image_height, image_width])) - boxlist = box_list.BoxList(boxes) - new_boxlist = box_list_ops.change_coordinate_frame(boxlist, new_window) - new_boxes = new_boxlist.get() - - return new_image, new_boxes - - -def random_crop_pad_image(image, - boxes, - labels, - label_scores=None, - multiclass_scores=None, - min_object_covered=1.0, - aspect_ratio_range=(0.75, 1.33), - area_range=(0.1, 1.0), - overlap_thresh=0.3, - random_coef=0.0, - min_padded_size_ratio=(1.0, 1.0), - max_padded_size_ratio=(2.0, 2.0), - pad_color=None, - seed=None, - preprocess_vars_cache=None): - """Randomly crops and pads the image. - - Given an input image and its bounding boxes, this op first randomly crops - the image and then randomly pads the image with background values. Parameters - min_padded_size_ratio and max_padded_size_ratio, determine the range of the - final output image size. Specifically, the final image size will have a size - in the range of min_padded_size_ratio * tf.shape(image) and - max_padded_size_ratio * tf.shape(image). Note that these ratios are with - respect to the size of the original image, so we can't capture the same - effect easily by independently applying RandomCropImage - followed by RandomPadImage. - - Args: - image: rank 3 float32 tensor containing 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - labels: rank 1 int32 tensor containing the object classes. - label_scores: rank 1 float32 containing the label scores. - multiclass_scores: (optional) float32 tensor of shape - [num_instances, num_classes] representing the score for each box for each - class. - min_object_covered: the cropped image must cover at least this fraction of - at least one of the input bounding boxes. - aspect_ratio_range: allowed range for aspect ratio of cropped image. - area_range: allowed range for area ratio between cropped image and the - original image. - overlap_thresh: minimum overlap thresh with new cropped - image to keep the box. - random_coef: a random coefficient that defines the chance of getting the - original image. If random_coef is 0, we will always get the - cropped image, and if it is 1.0, we will always get the - original image. - min_padded_size_ratio: min ratio of padded image height and width to the - input image's height and width. - max_padded_size_ratio: max ratio of padded image height and width to the - input image's height and width. - pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32. - if set as None, it will be set to average color of the randomly - cropped image. - seed: random seed. - preprocess_vars_cache: PreprocessorCache object that records previously - performed augmentations. Updated in-place. If this - function is called multiple times with the same - non-null cache, it will perform deterministically. - - Returns: - padded_image: padded image. - padded_boxes: boxes which is the same rank as input boxes. Boxes are in - normalized form. - cropped_labels: cropped labels. - if label_scores is not None also returns: - cropped_label_scores: cropped label scores. - if multiclass_scores is not None also returns: - cropped_multiclass_scores: cropped_multiclass_scores. - - """ - image_size = tf.shape(image) - image_height = image_size[0] - image_width = image_size[1] - result = random_crop_image( - image=image, - boxes=boxes, - labels=labels, - label_scores=label_scores, - multiclass_scores=multiclass_scores, - min_object_covered=min_object_covered, - aspect_ratio_range=aspect_ratio_range, - area_range=area_range, - overlap_thresh=overlap_thresh, - random_coef=random_coef, - seed=seed, - preprocess_vars_cache=preprocess_vars_cache) - - cropped_image, cropped_boxes, cropped_labels = result[:3] - - min_image_size = tf.to_int32( - tf.to_float(tf.stack([image_height, image_width])) * - min_padded_size_ratio) - max_image_size = tf.to_int32( - tf.to_float(tf.stack([image_height, image_width])) * - max_padded_size_ratio) - - padded_image, padded_boxes = random_pad_image( - cropped_image, - cropped_boxes, - min_image_size=min_image_size, - max_image_size=max_image_size, - pad_color=pad_color, - seed=seed, - preprocess_vars_cache=preprocess_vars_cache) - - cropped_padded_output = (padded_image, padded_boxes, cropped_labels) - - index = 3 - if label_scores is not None: - cropped_label_scores = result[index] - cropped_padded_output += (cropped_label_scores,) - index += 1 - - if multiclass_scores is not None: - cropped_multiclass_scores = result[index] - cropped_padded_output += (cropped_multiclass_scores,) - - return cropped_padded_output - - -def random_crop_to_aspect_ratio(image, - boxes, - labels, - label_scores=None, - multiclass_scores=None, - masks=None, - keypoints=None, - aspect_ratio=1.0, - overlap_thresh=0.3, - seed=None, - preprocess_vars_cache=None): - """Randomly crops an image to the specified aspect ratio. - - Randomly crops the a portion of the image such that the crop is of the - specified aspect ratio, and the crop is as large as possible. If the specified - aspect ratio is larger than the aspect ratio of the image, this op will - randomly remove rows from the top and bottom of the image. If the specified - aspect ratio is less than the aspect ratio of the image, this op will randomly - remove cols from the left and right of the image. If the specified aspect - ratio is the same as the aspect ratio of the image, this op will return the - image. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - labels: rank 1 int32 tensor containing the object classes. - label_scores: (optional) float32 tensor of shape [num_instances] - representing the score for each box. - multiclass_scores: (optional) float32 tensor of shape - [num_instances, num_classes] representing the score for each box for each - class. - masks: (optional) rank 3 float32 tensor with shape - [num_instances, height, width] containing instance masks. The masks - are of the same height, width as the input `image`. - keypoints: (optional) rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2]. The keypoints are in y-x - normalized coordinates. - aspect_ratio: the aspect ratio of cropped image. - overlap_thresh: minimum overlap thresh with new cropped - image to keep the box. - seed: random seed. - preprocess_vars_cache: PreprocessorCache object that records previously - performed augmentations. Updated in-place. If this - function is called multiple times with the same - non-null cache, it will perform deterministically. - - Returns: - image: image which is the same rank as input image. - boxes: boxes which is the same rank as input boxes. - Boxes are in normalized form. - labels: new labels. - - If label_scores, masks, keypoints, or multiclass_scores is not None, the - function also returns: - label_scores: rank 1 float32 tensor with shape [num_instances]. - masks: rank 3 float32 tensor with shape [num_instances, height, width] - containing instance masks. - keypoints: rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2] - multiclass_scores: rank 2 float32 tensor with shape - [num_instances, num_classes] - - Raises: - ValueError: If image is not a 3D tensor. - """ - if len(image.get_shape()) != 3: - raise ValueError('Image should be 3D tensor') - - with tf.name_scope('RandomCropToAspectRatio', values=[image]): - image_shape = tf.shape(image) - orig_height = image_shape[0] - orig_width = image_shape[1] - orig_aspect_ratio = tf.to_float(orig_width) / tf.to_float(orig_height) - new_aspect_ratio = tf.constant(aspect_ratio, dtype=tf.float32) - def target_height_fn(): - return tf.to_int32(tf.round(tf.to_float(orig_width) / new_aspect_ratio)) - - target_height = tf.cond(orig_aspect_ratio >= new_aspect_ratio, - lambda: orig_height, target_height_fn) - - def target_width_fn(): - return tf.to_int32(tf.round(tf.to_float(orig_height) * new_aspect_ratio)) - - target_width = tf.cond(orig_aspect_ratio <= new_aspect_ratio, - lambda: orig_width, target_width_fn) - - # either offset_height = 0 and offset_width is randomly chosen from - # [0, offset_width - target_width), or else offset_width = 0 and - # offset_height is randomly chosen from [0, offset_height - target_height) - offset_height = _random_integer(0, orig_height - target_height + 1, seed) - offset_width = _random_integer(0, orig_width - target_width + 1, seed) - - generator_func = lambda: (offset_height, offset_width) - offset_height, offset_width = _get_or_create_preprocess_rand_vars( - generator_func, - preprocessor_cache.PreprocessorCache.CROP_TO_ASPECT_RATIO, - preprocess_vars_cache) - - new_image = tf.image.crop_to_bounding_box( - image, offset_height, offset_width, target_height, target_width) - - im_box = tf.stack([ - tf.to_float(offset_height) / tf.to_float(orig_height), - tf.to_float(offset_width) / tf.to_float(orig_width), - tf.to_float(offset_height + target_height) / tf.to_float(orig_height), - tf.to_float(offset_width + target_width) / tf.to_float(orig_width) - ]) - - boxlist = box_list.BoxList(boxes) - boxlist.add_field('labels', labels) - - if label_scores is not None: - boxlist.add_field('label_scores', label_scores) - - if multiclass_scores is not None: - boxlist.add_field('multiclass_scores', multiclass_scores) - - im_boxlist = box_list.BoxList(tf.expand_dims(im_box, 0)) - - # remove boxes whose overlap with the image is less than overlap_thresh - overlapping_boxlist, keep_ids = box_list_ops.prune_non_overlapping_boxes( - boxlist, im_boxlist, overlap_thresh) - - # change the coordinate of the remaining boxes - new_labels = overlapping_boxlist.get_field('labels') - new_boxlist = box_list_ops.change_coordinate_frame(overlapping_boxlist, - im_box) - new_boxlist = box_list_ops.clip_to_window(new_boxlist, - tf.constant([0.0, 0.0, 1.0, 1.0], - tf.float32)) - new_boxes = new_boxlist.get() - - result = [new_image, new_boxes, new_labels] - - if label_scores is not None: - new_label_scores = overlapping_boxlist.get_field('label_scores') - result.append(new_label_scores) - - if multiclass_scores is not None: - new_multiclass_scores = overlapping_boxlist.get_field('multiclass_scores') - result.append(new_multiclass_scores) - - if masks is not None: - masks_inside_window = tf.gather(masks, keep_ids) - masks_box_begin = tf.stack([0, offset_height, offset_width]) - masks_box_size = tf.stack([-1, target_height, target_width]) - new_masks = tf.slice(masks_inside_window, masks_box_begin, masks_box_size) - result.append(new_masks) - - if keypoints is not None: - keypoints_inside_window = tf.gather(keypoints, keep_ids) - new_keypoints = keypoint_ops.change_coordinate_frame( - keypoints_inside_window, im_box) - new_keypoints = keypoint_ops.prune_outside_window(new_keypoints, - [0.0, 0.0, 1.0, 1.0]) - result.append(new_keypoints) - - return tuple(result) - - -def random_pad_to_aspect_ratio(image, - boxes, - masks=None, - keypoints=None, - aspect_ratio=1.0, - min_padded_size_ratio=(1.0, 1.0), - max_padded_size_ratio=(2.0, 2.0), - seed=None, - preprocess_vars_cache=None): - """Randomly zero pads an image to the specified aspect ratio. - - Pads the image so that the resulting image will have the specified aspect - ratio without scaling less than the min_padded_size_ratio or more than the - max_padded_size_ratio. If the min_padded_size_ratio or max_padded_size_ratio - is lower than what is possible to maintain the aspect ratio, then this method - will use the least padding to achieve the specified aspect ratio. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - masks: (optional) rank 3 float32 tensor with shape - [num_instances, height, width] containing instance masks. The masks - are of the same height, width as the input `image`. - keypoints: (optional) rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2]. The keypoints are in y-x - normalized coordinates. - aspect_ratio: aspect ratio of the final image. - min_padded_size_ratio: min ratio of padded image height and width to the - input image's height and width. - max_padded_size_ratio: max ratio of padded image height and width to the - input image's height and width. - seed: random seed. - preprocess_vars_cache: PreprocessorCache object that records previously - performed augmentations. Updated in-place. If this - function is called multiple times with the same - non-null cache, it will perform deterministically. - - Returns: - image: image which is the same rank as input image. - boxes: boxes which is the same rank as input boxes. - Boxes are in normalized form. - labels: new labels. - - If masks, or keypoints is not None, the function also returns: - masks: rank 3 float32 tensor with shape [num_instances, height, width] - containing instance masks. - keypoints: rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2] - - Raises: - ValueError: If image is not a 3D tensor. - """ - if len(image.get_shape()) != 3: - raise ValueError('Image should be 3D tensor') - - with tf.name_scope('RandomPadToAspectRatio', values=[image]): - image_shape = tf.shape(image) - image_height = tf.to_float(image_shape[0]) - image_width = tf.to_float(image_shape[1]) - image_aspect_ratio = image_width / image_height - new_aspect_ratio = tf.constant(aspect_ratio, dtype=tf.float32) - target_height = tf.cond( - image_aspect_ratio <= new_aspect_ratio, - lambda: image_height, - lambda: image_width / new_aspect_ratio) - target_width = tf.cond( - image_aspect_ratio >= new_aspect_ratio, - lambda: image_width, - lambda: image_height * new_aspect_ratio) - - min_height = tf.maximum( - min_padded_size_ratio[0] * image_height, target_height) - min_width = tf.maximum( - min_padded_size_ratio[1] * image_width, target_width) - max_height = tf.maximum( - max_padded_size_ratio[0] * image_height, target_height) - max_width = tf.maximum( - max_padded_size_ratio[1] * image_width, target_width) - - max_scale = tf.minimum(max_height / target_height, max_width / target_width) - min_scale = tf.minimum( - max_scale, - tf.maximum(min_height / target_height, min_width / target_width)) - - generator_func = functools.partial(tf.random_uniform, [], - min_scale, max_scale, seed=seed) - scale = _get_or_create_preprocess_rand_vars( - generator_func, - preprocessor_cache.PreprocessorCache.PAD_TO_ASPECT_RATIO, - preprocess_vars_cache) - - target_height = tf.round(scale * target_height) - target_width = tf.round(scale * target_width) - - new_image = tf.image.pad_to_bounding_box( - image, 0, 0, tf.to_int32(target_height), tf.to_int32(target_width)) - - im_box = tf.stack([ - 0.0, - 0.0, - target_height / image_height, - target_width / image_width - ]) - boxlist = box_list.BoxList(boxes) - new_boxlist = box_list_ops.change_coordinate_frame(boxlist, im_box) - new_boxes = new_boxlist.get() - - result = [new_image, new_boxes] - - if masks is not None: - new_masks = tf.expand_dims(masks, -1) - new_masks = tf.image.pad_to_bounding_box(new_masks, 0, 0, - tf.to_int32(target_height), - tf.to_int32(target_width)) - new_masks = tf.squeeze(new_masks, [-1]) - result.append(new_masks) - - if keypoints is not None: - new_keypoints = keypoint_ops.change_coordinate_frame(keypoints, im_box) - result.append(new_keypoints) - - return tuple(result) - - -def random_black_patches(image, - max_black_patches=10, - probability=0.5, - size_to_image_ratio=0.1, - random_seed=None, - preprocess_vars_cache=None): - """Randomly adds some black patches to the image. - - This op adds up to max_black_patches square black patches of a fixed size - to the image where size is specified via the size_to_image_ratio parameter. - - Args: - image: rank 3 float32 tensor containing 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - max_black_patches: number of times that the function tries to add a - black box to the image. - probability: at each try, what is the chance of adding a box. - size_to_image_ratio: Determines the ratio of the size of the black patches - to the size of the image. - box_size = size_to_image_ratio * - min(image_width, image_height) - random_seed: random seed. - preprocess_vars_cache: PreprocessorCache object that records previously - performed augmentations. Updated in-place. If this - function is called multiple times with the same - non-null cache, it will perform deterministically. - - Returns: - image - """ - def add_black_patch_to_image(image, idx): - """Function for adding one patch to the image. - - Args: - image: image - idx: counter for number of patches that could have been added - - Returns: - image with a randomly added black box - """ - image_shape = tf.shape(image) - image_height = image_shape[0] - image_width = image_shape[1] - box_size = tf.to_int32( - tf.multiply( - tf.minimum(tf.to_float(image_height), tf.to_float(image_width)), - size_to_image_ratio)) - - generator_func = functools.partial(tf.random_uniform, [], minval=0.0, - maxval=(1.0 - size_to_image_ratio), - seed=random_seed) - normalized_y_min = _get_or_create_preprocess_rand_vars( - generator_func, - preprocessor_cache.PreprocessorCache.ADD_BLACK_PATCH, - preprocess_vars_cache, key=str(idx) + 'y') - normalized_x_min = _get_or_create_preprocess_rand_vars( - generator_func, - preprocessor_cache.PreprocessorCache.ADD_BLACK_PATCH, - preprocess_vars_cache, key=str(idx) + 'x') - - y_min = tf.to_int32(normalized_y_min * tf.to_float(image_height)) - x_min = tf.to_int32(normalized_x_min * tf.to_float(image_width)) - black_box = tf.ones([box_size, box_size, 3], dtype=tf.float32) - mask = 1.0 - tf.image.pad_to_bounding_box(black_box, y_min, x_min, - image_height, image_width) - image = tf.multiply(image, mask) - return image - - with tf.name_scope('RandomBlackPatchInImage', values=[image]): - for idx in range(max_black_patches): - generator_func = functools.partial(tf.random_uniform, [], - minval=0.0, maxval=1.0, - dtype=tf.float32, seed=random_seed) - random_prob = _get_or_create_preprocess_rand_vars( - generator_func, - preprocessor_cache.PreprocessorCache.BLACK_PATCHES, - preprocess_vars_cache, key=idx) - image = tf.cond( - tf.greater(random_prob, probability), lambda: image, - functools.partial(add_black_patch_to_image, image=image, idx=idx)) - return image - - -def image_to_float(image): - """Used in Faster R-CNN. Casts image pixel values to float. - - Args: - image: input image which might be in tf.uint8 or sth else format - - Returns: - image: image in tf.float32 format. - """ - with tf.name_scope('ImageToFloat', values=[image]): - image = tf.to_float(image) - return image - - -def random_resize_method(image, target_size, preprocess_vars_cache=None): - """Uses a random resize method to resize the image to target size. - - Args: - image: a rank 3 tensor. - target_size: a list of [target_height, target_width] - preprocess_vars_cache: PreprocessorCache object that records previously - performed augmentations. Updated in-place. If this - function is called multiple times with the same - non-null cache, it will perform deterministically. - - Returns: - resized image. - """ - - resized_image = _apply_with_random_selector( - image, - lambda x, method: tf.image.resize_images(x, target_size, method), - num_cases=4, - preprocess_vars_cache=preprocess_vars_cache, - key=preprocessor_cache.PreprocessorCache.RESIZE_METHOD) - - return resized_image - - -def _compute_new_static_size(image, min_dimension, max_dimension): - """Compute new static shape for resize_to_range method.""" - image_shape = image.get_shape().as_list() - orig_height = image_shape[0] - orig_width = image_shape[1] - num_channels = image_shape[2] - orig_min_dim = min(orig_height, orig_width) - # Calculates the larger of the possible sizes - large_scale_factor = min_dimension / float(orig_min_dim) - # Scaling orig_(height|width) by large_scale_factor will make the smaller - # dimension equal to min_dimension, save for floating point rounding errors. - # For reasonably-sized images, taking the nearest integer will reliably - # eliminate this error. - large_height = int(round(orig_height * large_scale_factor)) - large_width = int(round(orig_width * large_scale_factor)) - large_size = [large_height, large_width] - if max_dimension: - # Calculates the smaller of the possible sizes, use that if the larger - # is too big. - orig_max_dim = max(orig_height, orig_width) - small_scale_factor = max_dimension / float(orig_max_dim) - # Scaling orig_(height|width) by small_scale_factor will make the larger - # dimension equal to max_dimension, save for floating point rounding - # errors. For reasonably-sized images, taking the nearest integer will - # reliably eliminate this error. - small_height = int(round(orig_height * small_scale_factor)) - small_width = int(round(orig_width * small_scale_factor)) - small_size = [small_height, small_width] - new_size = large_size - if max(large_size) > max_dimension: - new_size = small_size - else: - new_size = large_size - return tf.constant(new_size + [num_channels]) - - -def _compute_new_dynamic_size(image, min_dimension, max_dimension): - """Compute new dynamic shape for resize_to_range method.""" - image_shape = tf.shape(image) - orig_height = tf.to_float(image_shape[0]) - orig_width = tf.to_float(image_shape[1]) - num_channels = image_shape[2] - orig_min_dim = tf.minimum(orig_height, orig_width) - # Calculates the larger of the possible sizes - min_dimension = tf.constant(min_dimension, dtype=tf.float32) - large_scale_factor = min_dimension / orig_min_dim - # Scaling orig_(height|width) by large_scale_factor will make the smaller - # dimension equal to min_dimension, save for floating point rounding errors. - # For reasonably-sized images, taking the nearest integer will reliably - # eliminate this error. - large_height = tf.to_int32(tf.round(orig_height * large_scale_factor)) - large_width = tf.to_int32(tf.round(orig_width * large_scale_factor)) - large_size = tf.stack([large_height, large_width]) - if max_dimension: - # Calculates the smaller of the possible sizes, use that if the larger - # is too big. - orig_max_dim = tf.maximum(orig_height, orig_width) - max_dimension = tf.constant(max_dimension, dtype=tf.float32) - small_scale_factor = max_dimension / orig_max_dim - # Scaling orig_(height|width) by small_scale_factor will make the larger - # dimension equal to max_dimension, save for floating point rounding - # errors. For reasonably-sized images, taking the nearest integer will - # reliably eliminate this error. - small_height = tf.to_int32(tf.round(orig_height * small_scale_factor)) - small_width = tf.to_int32(tf.round(orig_width * small_scale_factor)) - small_size = tf.stack([small_height, small_width]) - new_size = tf.cond( - tf.to_float(tf.reduce_max(large_size)) > max_dimension, - lambda: small_size, lambda: large_size) - else: - new_size = large_size - return tf.stack(tf.unstack(new_size) + [num_channels]) - - -def resize_to_range(image, - masks=None, - min_dimension=None, - max_dimension=None, - method=tf.image.ResizeMethod.BILINEAR, - align_corners=False, - pad_to_max_dimension=False, - per_channel_pad_value=(0, 0, 0)): - """Resizes an image so its dimensions are within the provided value. - - The output size can be described by two cases: - 1. If the image can be rescaled so its minimum dimension is equal to the - provided value without the other dimension exceeding max_dimension, - then do so. - 2. Otherwise, resize so the largest dimension is equal to max_dimension. - - Args: - image: A 3D tensor of shape [height, width, channels] - masks: (optional) rank 3 float32 tensor with shape - [num_instances, height, width] containing instance masks. - min_dimension: (optional) (scalar) desired size of the smaller image - dimension. - max_dimension: (optional) (scalar) maximum allowed size - of the larger image dimension. - method: (optional) interpolation method used in resizing. Defaults to - BILINEAR. - align_corners: bool. If true, exactly align all 4 corners of the input - and output. Defaults to False. - pad_to_max_dimension: Whether to resize the image and pad it with zeros - so the resulting image is of the spatial size - [max_dimension, max_dimension]. If masks are included they are padded - similarly. - per_channel_pad_value: A tuple of per-channel scalar value to use for - padding. By default pads zeros. - - Returns: - Note that the position of the resized_image_shape changes based on whether - masks are present. - resized_image: A 3D tensor of shape [new_height, new_width, channels], - where the image has been resized (with bilinear interpolation) so that - min(new_height, new_width) == min_dimension or - max(new_height, new_width) == max_dimension. - resized_masks: If masks is not None, also outputs masks. A 3D tensor of - shape [num_instances, new_height, new_width]. - resized_image_shape: A 1D tensor of shape [3] containing shape of the - resized image. - - Raises: - ValueError: if the image is not a 3D tensor. - """ - if len(image.get_shape()) != 3: - raise ValueError('Image should be 3D tensor') - - with tf.name_scope('ResizeToRange', values=[image, min_dimension]): - if image.get_shape().is_fully_defined(): - new_size = _compute_new_static_size(image, min_dimension, max_dimension) - else: - new_size = _compute_new_dynamic_size(image, min_dimension, max_dimension) - new_image = tf.image.resize_images( - image, new_size[:-1], method=method, align_corners=align_corners) - - if pad_to_max_dimension: - channels = tf.unstack(new_image, axis=2) - if len(channels) != len(per_channel_pad_value): - raise ValueError('Number of channels must be equal to the length of ' - 'per-channel pad value.') - new_image = tf.stack( - [ - tf.pad( - channels[i], [[0, max_dimension - new_size[0]], - [0, max_dimension - new_size[1]]], - constant_values=per_channel_pad_value[i]) - for i in range(len(channels)) - ], - axis=2) - new_image.set_shape([max_dimension, max_dimension, 3]) - - result = [new_image] - if masks is not None: - new_masks = tf.expand_dims(masks, 3) - new_masks = tf.image.resize_images( - new_masks, - new_size[:-1], - method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, - align_corners=align_corners) - new_masks = tf.squeeze(new_masks, 3) - if pad_to_max_dimension: - new_masks = tf.image.pad_to_bounding_box( - new_masks, 0, 0, max_dimension, max_dimension) - result.append(new_masks) - - result.append(new_size) - return result - - -# TODO(alirezafathi): Make sure the static shapes are preserved. -def resize_to_min_dimension(image, masks=None, min_dimension=600): - """Resizes image and masks given the min size maintaining the aspect ratio. - - If one of the image dimensions is smaller that min_dimension, it will scale - the image such that its smallest dimension is equal to min_dimension. - Otherwise, will keep the image size as is. - - Args: - image: a tensor of size [height, width, channels]. - masks: (optional) a tensors of size [num_instances, height, width]. - min_dimension: minimum image dimension. - - Returns: - Note that the position of the resized_image_shape changes based on whether - masks are present. - resized_image: A tensor of size [new_height, new_width, channels]. - resized_masks: If masks is not None, also outputs masks. A 3D tensor of - shape [num_instances, new_height, new_width] - resized_image_shape: A 1D tensor of shape [3] containing the shape of the - resized image. - - Raises: - ValueError: if the image is not a 3D tensor. - """ - if len(image.get_shape()) != 3: - raise ValueError('Image should be 3D tensor') - - with tf.name_scope('ResizeGivenMinDimension', values=[image, min_dimension]): - image_height = tf.shape(image)[0] - image_width = tf.shape(image)[1] - num_channels = tf.shape(image)[2] - min_image_dimension = tf.minimum(image_height, image_width) - min_target_dimension = tf.maximum(min_image_dimension, min_dimension) - target_ratio = tf.to_float(min_target_dimension) / tf.to_float( - min_image_dimension) - target_height = tf.to_int32(tf.to_float(image_height) * target_ratio) - target_width = tf.to_int32(tf.to_float(image_width) * target_ratio) - image = tf.image.resize_bilinear( - tf.expand_dims(image, axis=0), - size=[target_height, target_width], - align_corners=True) - result = [tf.squeeze(image, axis=0)] - - if masks is not None: - masks = tf.image.resize_nearest_neighbor( - tf.expand_dims(masks, axis=3), - size=[target_height, target_width], - align_corners=True) - result.append(tf.squeeze(masks, axis=3)) - - result.append(tf.stack([target_height, target_width, num_channels])) - return result - - -def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None): - """Scales boxes from normalized to pixel coordinates. - - Args: - image: A 3D float32 tensor of shape [height, width, channels]. - boxes: A 2D float32 tensor of shape [num_boxes, 4] containing the bounding - boxes in normalized coordinates. Each row is of the form - [ymin, xmin, ymax, xmax]. - keypoints: (optional) rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized - coordinates. - - Returns: - image: unchanged input image. - scaled_boxes: a 2D float32 tensor of shape [num_boxes, 4] containing the - bounding boxes in pixel coordinates. - scaled_keypoints: a 3D float32 tensor with shape - [num_instances, num_keypoints, 2] containing the keypoints in pixel - coordinates. - """ - boxlist = box_list.BoxList(boxes) - image_height = tf.shape(image)[0] - image_width = tf.shape(image)[1] - scaled_boxes = box_list_ops.scale(boxlist, image_height, image_width).get() - result = [image, scaled_boxes] - if keypoints is not None: - scaled_keypoints = keypoint_ops.scale(keypoints, image_height, image_width) - result.append(scaled_keypoints) - return tuple(result) - - -# TODO(alirezafathi): Investigate if instead the function should return None if -# masks is None. -# pylint: disable=g-doc-return-or-yield -def resize_image(image, - masks=None, - new_height=600, - new_width=1024, - method=tf.image.ResizeMethod.BILINEAR, - align_corners=False): - """Resizes images to the given height and width. - - Args: - image: A 3D tensor of shape [height, width, channels] - masks: (optional) rank 3 float32 tensor with shape - [num_instances, height, width] containing instance masks. - new_height: (optional) (scalar) desired height of the image. - new_width: (optional) (scalar) desired width of the image. - method: (optional) interpolation method used in resizing. Defaults to - BILINEAR. - align_corners: bool. If true, exactly align all 4 corners of the input - and output. Defaults to False. - - Returns: - Note that the position of the resized_image_shape changes based on whether - masks are present. - resized_image: A tensor of size [new_height, new_width, channels]. - resized_masks: If masks is not None, also outputs masks. A 3D tensor of - shape [num_instances, new_height, new_width] - resized_image_shape: A 1D tensor of shape [3] containing the shape of the - resized image. - """ - with tf.name_scope( - 'ResizeImage', - values=[image, new_height, new_width, method, align_corners]): - new_image = tf.image.resize_images( - image, tf.stack([new_height, new_width]), - method=method, - align_corners=align_corners) - image_shape = shape_utils.combined_static_and_dynamic_shape(image) - result = [new_image] - if masks is not None: - num_instances = tf.shape(masks)[0] - new_size = tf.stack([new_height, new_width]) - def resize_masks_branch(): - new_masks = tf.expand_dims(masks, 3) - new_masks = tf.image.resize_nearest_neighbor( - new_masks, new_size, align_corners=align_corners) - new_masks = tf.squeeze(new_masks, axis=3) - return new_masks - - def reshape_masks_branch(): - # The shape function will be computed for both branches of the - # condition, regardless of which branch is actually taken. Make sure - # that we don't trigger an assertion in the shape function when trying - # to reshape a non empty tensor into an empty one. - new_masks = tf.reshape(masks, [-1, new_size[0], new_size[1]]) - return new_masks - - masks = tf.cond(num_instances > 0, resize_masks_branch, - reshape_masks_branch) - result.append(masks) - - result.append(tf.stack([new_height, new_width, image_shape[2]])) - return result - - -def subtract_channel_mean(image, means=None): - """Normalizes an image by subtracting a mean from each channel. - - Args: - image: A 3D tensor of shape [height, width, channels] - means: float list containing a mean for each channel - Returns: - normalized_images: a tensor of shape [height, width, channels] - Raises: - ValueError: if images is not a 4D tensor or if the number of means is not - equal to the number of channels. - """ - with tf.name_scope('SubtractChannelMean', values=[image, means]): - if len(image.get_shape()) != 3: - raise ValueError('Input must be of size [height, width, channels]') - if len(means) != image.get_shape()[-1]: - raise ValueError('len(means) must match the number of channels') - return image - [[means]] - - -def one_hot_encoding(labels, num_classes=None): - """One-hot encodes the multiclass labels. - - Example usage: - labels = tf.constant([1, 4], dtype=tf.int32) - one_hot = OneHotEncoding(labels, num_classes=5) - one_hot.eval() # evaluates to [0, 1, 0, 0, 1] - - Args: - labels: A tensor of shape [None] corresponding to the labels. - num_classes: Number of classes in the dataset. - Returns: - onehot_labels: a tensor of shape [num_classes] corresponding to the one hot - encoding of the labels. - Raises: - ValueError: if num_classes is not specified. - """ - with tf.name_scope('OneHotEncoding', values=[labels]): - if num_classes is None: - raise ValueError('num_classes must be specified') - - labels = tf.one_hot(labels, num_classes, 1, 0) - return tf.reduce_max(labels, 0) - - -def rgb_to_gray(image): - """Converts a 3 channel RGB image to a 1 channel grayscale image. - - Args: - image: Rank 3 float32 tensor containing 1 image -> [height, width, 3] - with pixel values varying between [0, 1]. - - Returns: - image: A single channel grayscale image -> [image, height, 1]. - """ - return _rgb_to_grayscale(image) - - -def ssd_random_crop(image, - boxes, - labels, - label_scores=None, - multiclass_scores=None, - masks=None, - keypoints=None, - min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0), - aspect_ratio_range=((0.5, 2.0),) * 7, - area_range=((0.1, 1.0),) * 7, - overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0), - random_coef=(0.15,) * 7, - seed=None, - preprocess_vars_cache=None): - """Random crop preprocessing with default parameters as in SSD paper. - - Liu et al., SSD: Single shot multibox detector. - For further information on random crop preprocessing refer to RandomCrop - function above. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - labels: rank 1 int32 tensor containing the object classes. - label_scores: rank 1 float32 tensor containing the scores. - multiclass_scores: (optional) float32 tensor of shape - [num_instances, num_classes] representing the score for each box for each - class. - masks: (optional) rank 3 float32 tensor with shape - [num_instances, height, width] containing instance masks. The masks - are of the same height, width as the input `image`. - keypoints: (optional) rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2]. The keypoints are in y-x - normalized coordinates. - min_object_covered: the cropped image must cover at least this fraction of - at least one of the input bounding boxes. - aspect_ratio_range: allowed range for aspect ratio of cropped image. - area_range: allowed range for area ratio between cropped image and the - original image. - overlap_thresh: minimum overlap thresh with new cropped - image to keep the box. - random_coef: a random coefficient that defines the chance of getting the - original image. If random_coef is 0, we will always get the - cropped image, and if it is 1.0, we will always get the - original image. - seed: random seed. - preprocess_vars_cache: PreprocessorCache object that records previously - performed augmentations. Updated in-place. If this - function is called multiple times with the same - non-null cache, it will perform deterministically. - - Returns: - image: image which is the same rank as input image. - boxes: boxes which is the same rank as input boxes. - Boxes are in normalized form. - labels: new labels. - - If label_scores, multiclass_scores, masks, or keypoints is not None, the - function also returns: - label_scores: rank 1 float32 tensor with shape [num_instances]. - multiclass_scores: rank 2 float32 tensor with shape - [num_instances, num_classes] - masks: rank 3 float32 tensor with shape [num_instances, height, width] - containing instance masks. - keypoints: rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2] - """ - - def random_crop_selector(selected_result, index): - """Applies random_crop_image to selected result. - - Args: - selected_result: A tuple containing image, boxes, labels, keypoints (if - not None), and masks (if not None). - index: The index that was randomly selected. - - Returns: A tuple containing image, boxes, labels, keypoints (if not None), - and masks (if not None). - """ - - i = 3 - image, boxes, labels = selected_result[:i] - selected_label_scores = None - selected_multiclass_scores = None - selected_masks = None - selected_keypoints = None - if label_scores is not None: - selected_label_scores = selected_result[i] - i += 1 - if multiclass_scores is not None: - selected_multiclass_scores = selected_result[i] - i += 1 - if masks is not None: - selected_masks = selected_result[i] - i += 1 - if keypoints is not None: - selected_keypoints = selected_result[i] - - return random_crop_image( - image=image, - boxes=boxes, - labels=labels, - label_scores=selected_label_scores, - multiclass_scores=selected_multiclass_scores, - masks=selected_masks, - keypoints=selected_keypoints, - min_object_covered=min_object_covered[index], - aspect_ratio_range=aspect_ratio_range[index], - area_range=area_range[index], - overlap_thresh=overlap_thresh[index], - random_coef=random_coef[index], - seed=seed, - preprocess_vars_cache=preprocess_vars_cache) - - result = _apply_with_random_selector_tuples( - tuple( - t for t in (image, boxes, labels, label_scores, multiclass_scores, - masks, keypoints) if t is not None), - random_crop_selector, - num_cases=len(min_object_covered), - preprocess_vars_cache=preprocess_vars_cache, - key=preprocessor_cache.PreprocessorCache.SSD_CROP_SELECTOR_ID) - return result - - -def ssd_random_crop_pad(image, - boxes, - labels, - label_scores=None, - multiclass_scores=None, - min_object_covered=(0.1, 0.3, 0.5, 0.7, 0.9, 1.0), - aspect_ratio_range=((0.5, 2.0),) * 6, - area_range=((0.1, 1.0),) * 6, - overlap_thresh=(0.1, 0.3, 0.5, 0.7, 0.9, 1.0), - random_coef=(0.15,) * 6, - min_padded_size_ratio=((1.0, 1.0),) * 6, - max_padded_size_ratio=((2.0, 2.0),) * 6, - pad_color=(None,) * 6, - seed=None, - preprocess_vars_cache=None): - """Random crop preprocessing with default parameters as in SSD paper. - - Liu et al., SSD: Single shot multibox detector. - For further information on random crop preprocessing refer to RandomCrop - function above. - - Args: - image: rank 3 float32 tensor containing 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - labels: rank 1 int32 tensor containing the object classes. - label_scores: float32 tensor of shape [num_instances] representing the - score for each box. - multiclass_scores: (optional) float32 tensor of shape - [num_instances, num_classes] representing the score for each box for each - class. - min_object_covered: the cropped image must cover at least this fraction of - at least one of the input bounding boxes. - aspect_ratio_range: allowed range for aspect ratio of cropped image. - area_range: allowed range for area ratio between cropped image and the - original image. - overlap_thresh: minimum overlap thresh with new cropped - image to keep the box. - random_coef: a random coefficient that defines the chance of getting the - original image. If random_coef is 0, we will always get the - cropped image, and if it is 1.0, we will always get the - original image. - min_padded_size_ratio: min ratio of padded image height and width to the - input image's height and width. - max_padded_size_ratio: max ratio of padded image height and width to the - input image's height and width. - pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32. - if set as None, it will be set to average color of the randomly - cropped image. - seed: random seed. - preprocess_vars_cache: PreprocessorCache object that records previously - performed augmentations. Updated in-place. If this - function is called multiple times with the same - non-null cache, it will perform deterministically. - - Returns: - image: Image shape will be [new_height, new_width, channels]. - boxes: boxes which is the same rank as input boxes. Boxes are in normalized - form. - new_labels: new labels. - new_label_scores: new label scores. - """ - - def random_crop_pad_selector(image_boxes_labels, index): - """Random crop preprocessing helper.""" - i = 3 - image, boxes, labels = image_boxes_labels[:i] - selected_label_scores = None - selected_multiclass_scores = None - if label_scores is not None: - selected_label_scores = image_boxes_labels[i] - i += 1 - if multiclass_scores is not None: - selected_multiclass_scores = image_boxes_labels[i] - - return random_crop_pad_image( - image, - boxes, - labels, - label_scores=selected_label_scores, - multiclass_scores=selected_multiclass_scores, - min_object_covered=min_object_covered[index], - aspect_ratio_range=aspect_ratio_range[index], - area_range=area_range[index], - overlap_thresh=overlap_thresh[index], - random_coef=random_coef[index], - min_padded_size_ratio=min_padded_size_ratio[index], - max_padded_size_ratio=max_padded_size_ratio[index], - pad_color=pad_color[index], - seed=seed, - preprocess_vars_cache=preprocess_vars_cache) - - return _apply_with_random_selector_tuples( - tuple(t for t in (image, boxes, labels, label_scores, multiclass_scores) - if t is not None), - random_crop_pad_selector, - num_cases=len(min_object_covered), - preprocess_vars_cache=preprocess_vars_cache, - key=preprocessor_cache.PreprocessorCache.SSD_CROP_PAD_SELECTOR_ID) - - -def ssd_random_crop_fixed_aspect_ratio( - image, - boxes, - labels, - label_scores=None, - multiclass_scores=None, - masks=None, - keypoints=None, - min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0), - aspect_ratio=1.0, - area_range=((0.1, 1.0),) * 7, - overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0), - random_coef=(0.15,) * 7, - seed=None, - preprocess_vars_cache=None): - """Random crop preprocessing with default parameters as in SSD paper. - - Liu et al., SSD: Single shot multibox detector. - For further information on random crop preprocessing refer to RandomCrop - function above. - - The only difference is that the aspect ratio of the crops are fixed. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - labels: rank 1 int32 tensor containing the object classes. - label_scores: (optional) float32 tensor of shape [num_instances] - representing the score for each box. - multiclass_scores: (optional) float32 tensor of shape - [num_instances, num_classes] representing the score for each box for each - class. - masks: (optional) rank 3 float32 tensor with shape - [num_instances, height, width] containing instance masks. The masks - are of the same height, width as the input `image`. - keypoints: (optional) rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2]. The keypoints are in y-x - normalized coordinates. - min_object_covered: the cropped image must cover at least this fraction of - at least one of the input bounding boxes. - aspect_ratio: aspect ratio of the cropped image. - area_range: allowed range for area ratio between cropped image and the - original image. - overlap_thresh: minimum overlap thresh with new cropped - image to keep the box. - random_coef: a random coefficient that defines the chance of getting the - original image. If random_coef is 0, we will always get the - cropped image, and if it is 1.0, we will always get the - original image. - seed: random seed. - preprocess_vars_cache: PreprocessorCache object that records previously - performed augmentations. Updated in-place. If this - function is called multiple times with the same - non-null cache, it will perform deterministically. - - Returns: - image: image which is the same rank as input image. - boxes: boxes which is the same rank as input boxes. - Boxes are in normalized form. - labels: new labels. - - If mulitclass_scores, masks, or keypoints is not None, the function also - returns: - - multiclass_scores: rank 2 float32 tensor with shape - [num_instances, num_classes] - masks: rank 3 float32 tensor with shape [num_instances, height, width] - containing instance masks. - keypoints: rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2] - """ - aspect_ratio_range = ((aspect_ratio, aspect_ratio),) * len(area_range) - - crop_result = ssd_random_crop( - image, - boxes, - labels, - label_scores=label_scores, - multiclass_scores=multiclass_scores, - masks=masks, - keypoints=keypoints, - min_object_covered=min_object_covered, - aspect_ratio_range=aspect_ratio_range, - area_range=area_range, - overlap_thresh=overlap_thresh, - random_coef=random_coef, - seed=seed, - preprocess_vars_cache=preprocess_vars_cache) - i = 3 - new_image, new_boxes, new_labels = crop_result[:i] - new_label_scores = None - new_multiclass_scores = None - new_masks = None - new_keypoints = None - if label_scores is not None: - new_label_scores = crop_result[i] - i += 1 - if multiclass_scores is not None: - new_multiclass_scores = crop_result[i] - i += 1 - if masks is not None: - new_masks = crop_result[i] - i += 1 - if keypoints is not None: - new_keypoints = crop_result[i] - - result = random_crop_to_aspect_ratio( - new_image, - new_boxes, - new_labels, - label_scores=new_label_scores, - multiclass_scores=new_multiclass_scores, - masks=new_masks, - keypoints=new_keypoints, - aspect_ratio=aspect_ratio, - seed=seed, - preprocess_vars_cache=preprocess_vars_cache) - - return result - - -def ssd_random_crop_pad_fixed_aspect_ratio( - image, - boxes, - labels, - label_scores=None, - multiclass_scores=None, - masks=None, - keypoints=None, - min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0), - aspect_ratio=1.0, - aspect_ratio_range=((0.5, 2.0),) * 7, - area_range=((0.1, 1.0),) * 7, - overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0), - random_coef=(0.15,) * 7, - min_padded_size_ratio=(1.0, 1.0), - max_padded_size_ratio=(2.0, 2.0), - seed=None, - preprocess_vars_cache=None): - """Random crop and pad preprocessing with default parameters as in SSD paper. - - Liu et al., SSD: Single shot multibox detector. - For further information on random crop preprocessing refer to RandomCrop - function above. - - The only difference is that after the initial crop, images are zero-padded - to a fixed aspect ratio instead of being resized to that aspect ratio. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - labels: rank 1 int32 tensor containing the object classes. - label_scores: (optional) float32 tensor of shape [num_instances] - representing the score for each box. - multiclass_scores: (optional) float32 tensor of shape - [num_instances, num_classes] representing the score for each box for each - class. - masks: (optional) rank 3 float32 tensor with shape - [num_instances, height, width] containing instance masks. The masks - are of the same height, width as the input `image`. - keypoints: (optional) rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2]. The keypoints are in y-x - normalized coordinates. - min_object_covered: the cropped image must cover at least this fraction of - at least one of the input bounding boxes. - aspect_ratio: the final aspect ratio to pad to. - aspect_ratio_range: allowed range for aspect ratio of cropped image. - area_range: allowed range for area ratio between cropped image and the - original image. - overlap_thresh: minimum overlap thresh with new cropped - image to keep the box. - random_coef: a random coefficient that defines the chance of getting the - original image. If random_coef is 0, we will always get the - cropped image, and if it is 1.0, we will always get the - original image. - min_padded_size_ratio: min ratio of padded image height and width to the - input image's height and width. - max_padded_size_ratio: max ratio of padded image height and width to the - input image's height and width. - seed: random seed. - preprocess_vars_cache: PreprocessorCache object that records previously - performed augmentations. Updated in-place. If this - function is called multiple times with the same - non-null cache, it will perform deterministically. - - Returns: - image: image which is the same rank as input image. - boxes: boxes which is the same rank as input boxes. - Boxes are in normalized form. - labels: new labels. - - If multiclass_scores, masks, or keypoints is not None, the function also - returns: - - multiclass_scores: rank 2 with shape [num_instances, num_classes] - masks: rank 3 float32 tensor with shape [num_instances, height, width] - containing instance masks. - keypoints: rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2] - """ - crop_result = ssd_random_crop( - image, - boxes, - labels, - label_scores=label_scores, - multiclass_scores=multiclass_scores, - masks=masks, - keypoints=keypoints, - min_object_covered=min_object_covered, - aspect_ratio_range=aspect_ratio_range, - area_range=area_range, - overlap_thresh=overlap_thresh, - random_coef=random_coef, - seed=seed, - preprocess_vars_cache=preprocess_vars_cache) - i = 3 - new_image, new_boxes, new_labels = crop_result[:i] - new_label_scores = None - new_multiclass_scores = None - new_masks = None - new_keypoints = None - if label_scores is not None: - new_label_scores = crop_result[i] - i += 1 - if multiclass_scores is not None: - new_multiclass_scores = crop_result[i] - i += 1 - if masks is not None: - new_masks = crop_result[i] - i += 1 - if keypoints is not None: - new_keypoints = crop_result[i] - - result = random_pad_to_aspect_ratio( - new_image, - new_boxes, - masks=new_masks, - keypoints=new_keypoints, - aspect_ratio=aspect_ratio, - min_padded_size_ratio=min_padded_size_ratio, - max_padded_size_ratio=max_padded_size_ratio, - seed=seed, - preprocess_vars_cache=preprocess_vars_cache) - - result = list(result) - i = 3 - result.insert(2, new_labels) - if new_label_scores is not None: - result.insert(i, new_label_scores) - i += 1 - if multiclass_scores is not None: - result.insert(i, new_multiclass_scores) - result = tuple(result) - - return result - - -def get_default_func_arg_map(include_label_scores=False, - include_multiclass_scores=False, - include_instance_masks=False, - include_keypoints=False): - """Returns the default mapping from a preprocessor function to its args. - - Args: - include_label_scores: If True, preprocessing functions will modify the - label scores, too. - include_multiclass_scores: If True, preprocessing functions will modify the - multiclass scores, too. - include_instance_masks: If True, preprocessing functions will modify the - instance masks, too. - include_keypoints: If True, preprocessing functions will modify the - keypoints, too. - - Returns: - A map from preprocessing functions to the arguments they receive. - """ - groundtruth_label_scores = None - if include_label_scores: - groundtruth_label_scores = (fields.InputDataFields.groundtruth_label_scores) - - multiclass_scores = None - if include_multiclass_scores: - multiclass_scores = (fields.InputDataFields.multiclass_scores) - - groundtruth_instance_masks = None - if include_instance_masks: - groundtruth_instance_masks = ( - fields.InputDataFields.groundtruth_instance_masks) - - groundtruth_keypoints = None - if include_keypoints: - groundtruth_keypoints = fields.InputDataFields.groundtruth_keypoints - - prep_func_arg_map = { - normalize_image: (fields.InputDataFields.image,), - random_horizontal_flip: ( - fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes, - groundtruth_instance_masks, - groundtruth_keypoints, - ), - random_vertical_flip: ( - fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes, - groundtruth_instance_masks, - groundtruth_keypoints, - ), - random_rotation90: ( - fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes, - groundtruth_instance_masks, - groundtruth_keypoints, - ), - random_pixel_value_scale: (fields.InputDataFields.image,), - random_image_scale: ( - fields.InputDataFields.image, - groundtruth_instance_masks, - ), - random_rgb_to_gray: (fields.InputDataFields.image,), - random_adjust_brightness: (fields.InputDataFields.image,), - random_adjust_contrast: (fields.InputDataFields.image,), - random_adjust_hue: (fields.InputDataFields.image,), - random_adjust_saturation: (fields.InputDataFields.image,), - random_distort_color: (fields.InputDataFields.image,), - random_jitter_boxes: (fields.InputDataFields.groundtruth_boxes,), - random_crop_image: (fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes, - fields.InputDataFields.groundtruth_classes, - groundtruth_label_scores, multiclass_scores, - groundtruth_instance_masks, groundtruth_keypoints), - random_pad_image: (fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes), - random_crop_pad_image: (fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes, - fields.InputDataFields.groundtruth_classes, - groundtruth_label_scores, - multiclass_scores), - random_crop_to_aspect_ratio: ( - fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes, - fields.InputDataFields.groundtruth_classes, - groundtruth_label_scores, - multiclass_scores, - groundtruth_instance_masks, - groundtruth_keypoints, - ), - random_pad_to_aspect_ratio: ( - fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes, - groundtruth_instance_masks, - groundtruth_keypoints, - ), - random_black_patches: (fields.InputDataFields.image,), - retain_boxes_above_threshold: ( - fields.InputDataFields.groundtruth_boxes, - fields.InputDataFields.groundtruth_classes, - groundtruth_label_scores, - multiclass_scores, - groundtruth_instance_masks, - groundtruth_keypoints, - ), - image_to_float: (fields.InputDataFields.image,), - random_resize_method: (fields.InputDataFields.image,), - resize_to_range: ( - fields.InputDataFields.image, - groundtruth_instance_masks, - ), - resize_to_min_dimension: ( - fields.InputDataFields.image, - groundtruth_instance_masks, - ), - scale_boxes_to_pixel_coordinates: ( - fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes, - groundtruth_keypoints, - ), - resize_image: ( - fields.InputDataFields.image, - groundtruth_instance_masks, - ), - subtract_channel_mean: (fields.InputDataFields.image,), - one_hot_encoding: (fields.InputDataFields.groundtruth_image_classes,), - rgb_to_gray: (fields.InputDataFields.image,), - ssd_random_crop: ( - fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes, - fields.InputDataFields.groundtruth_classes, - groundtruth_label_scores, - multiclass_scores, - groundtruth_instance_masks, - groundtruth_keypoints - ), - ssd_random_crop_pad: (fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes, - fields.InputDataFields.groundtruth_classes, - groundtruth_label_scores, - multiclass_scores), - ssd_random_crop_fixed_aspect_ratio: ( - fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes, - fields.InputDataFields.groundtruth_classes, groundtruth_label_scores, - multiclass_scores, groundtruth_instance_masks, groundtruth_keypoints), - ssd_random_crop_pad_fixed_aspect_ratio: ( - fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes, - fields.InputDataFields.groundtruth_classes, - groundtruth_label_scores, - multiclass_scores, - groundtruth_instance_masks, - groundtruth_keypoints, - ), - } - - return prep_func_arg_map - - -def preprocess(tensor_dict, - preprocess_options, - func_arg_map=None, - preprocess_vars_cache=None): - """Preprocess images and bounding boxes. - - Various types of preprocessing (to be implemented) based on the - preprocess_options dictionary e.g. "crop image" (affects image and possibly - boxes), "white balance image" (affects only image), etc. If self._options - is None, no preprocessing is done. - - Args: - tensor_dict: dictionary that contains images, boxes, and can contain other - things as well. - images-> rank 4 float32 tensor contains - 1 image -> [1, height, width, 3]. - with pixel values varying between [0, 1] - boxes-> rank 2 float32 tensor containing - the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning - their coordinates vary between [0, 1]. - Each row is in the form - of [ymin, xmin, ymax, xmax]. - preprocess_options: It is a list of tuples, where each tuple contains a - function and a dictionary that contains arguments and - their values. - func_arg_map: mapping from preprocessing functions to arguments that they - expect to receive and return. - preprocess_vars_cache: PreprocessorCache object that records previously - performed augmentations. Updated in-place. If this - function is called multiple times with the same - non-null cache, it will perform deterministically. - - Returns: - tensor_dict: which contains the preprocessed images, bounding boxes, etc. - - Raises: - ValueError: (a) If the functions passed to Preprocess - are not in func_arg_map. - (b) If the arguments that a function needs - do not exist in tensor_dict. - (c) If image in tensor_dict is not rank 4 - """ - if func_arg_map is None: - func_arg_map = get_default_func_arg_map() - - # changes the images to image (rank 4 to rank 3) since the functions - # receive rank 3 tensor for image - if fields.InputDataFields.image in tensor_dict: - images = tensor_dict[fields.InputDataFields.image] - if len(images.get_shape()) != 4: - raise ValueError('images in tensor_dict should be rank 4') - image = tf.squeeze(images, squeeze_dims=[0]) - tensor_dict[fields.InputDataFields.image] = image - - # Preprocess inputs based on preprocess_options - for option in preprocess_options: - func, params = option - if func not in func_arg_map: - raise ValueError('The function %s does not exist in func_arg_map' % - (func.__name__)) - arg_names = func_arg_map[func] - for a in arg_names: - if a is not None and a not in tensor_dict: - raise ValueError('The function %s requires argument %s' % - (func.__name__, a)) - - def get_arg(key): - return tensor_dict[key] if key is not None else None - - args = [get_arg(a) for a in arg_names] - if (preprocess_vars_cache is not None and - 'preprocess_vars_cache' in inspect.getargspec(func).args): - params['preprocess_vars_cache'] = preprocess_vars_cache - results = func(*args, **params) - if not isinstance(results, (list, tuple)): - results = (results,) - # Removes None args since the return values will not contain those. - arg_names = [arg_name for arg_name in arg_names if arg_name is not None] - for res, arg_name in zip(results, arg_names): - tensor_dict[arg_name] = res - - # changes the image to images (rank 3 to rank 4) to be compatible to what - # we received in the first place - if fields.InputDataFields.image in tensor_dict: - image = tensor_dict[fields.InputDataFields.image] - images = tf.expand_dims(image, 0) - tensor_dict[fields.InputDataFields.image] = images - - return tensor_dict diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/preprocessor_cache.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/preprocessor_cache.py deleted file mode 100644 index 2822a2bab209f37738b0c807765624114973de4d..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/preprocessor_cache.py +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Records previous preprocessing operations and allows them to be repeated. - -Used with object_detection.core.preprocessor. Passing a PreprocessorCache -into individual data augmentation functions or the general preprocess() function -will store all randomly generated variables in the PreprocessorCache. When -a preprocessor function is called multiple times with the same -PreprocessorCache object, that function will perform the same augmentation -on all calls. -""" - -from collections import defaultdict - - -class PreprocessorCache(object): - """Dictionary wrapper storing random variables generated during preprocessing. - """ - - # Constant keys representing different preprocessing functions - ROTATION90 = 'rotation90' - HORIZONTAL_FLIP = 'horizontal_flip' - VERTICAL_FLIP = 'vertical_flip' - PIXEL_VALUE_SCALE = 'pixel_value_scale' - IMAGE_SCALE = 'image_scale' - RGB_TO_GRAY = 'rgb_to_gray' - ADJUST_BRIGHTNESS = 'adjust_brightness' - ADJUST_CONTRAST = 'adjust_contrast' - ADJUST_HUE = 'adjust_hue' - ADJUST_SATURATION = 'adjust_saturation' - DISTORT_COLOR = 'distort_color' - STRICT_CROP_IMAGE = 'strict_crop_image' - CROP_IMAGE = 'crop_image' - PAD_IMAGE = 'pad_image' - CROP_TO_ASPECT_RATIO = 'crop_to_aspect_ratio' - RESIZE_METHOD = 'resize_method' - PAD_TO_ASPECT_RATIO = 'pad_to_aspect_ratio' - BLACK_PATCHES = 'black_patches' - ADD_BLACK_PATCH = 'add_black_patch' - SELECTOR = 'selector' - SELECTOR_TUPLES = 'selector_tuples' - SSD_CROP_SELECTOR_ID = 'ssd_crop_selector_id' - SSD_CROP_PAD_SELECTOR_ID = 'ssd_crop_pad_selector_id' - - # 23 permitted function ids - _VALID_FNS = [ROTATION90, HORIZONTAL_FLIP, VERTICAL_FLIP, PIXEL_VALUE_SCALE, - IMAGE_SCALE, RGB_TO_GRAY, ADJUST_BRIGHTNESS, ADJUST_CONTRAST, - ADJUST_HUE, ADJUST_SATURATION, DISTORT_COLOR, STRICT_CROP_IMAGE, - CROP_IMAGE, PAD_IMAGE, CROP_TO_ASPECT_RATIO, RESIZE_METHOD, - PAD_TO_ASPECT_RATIO, BLACK_PATCHES, ADD_BLACK_PATCH, SELECTOR, - SELECTOR_TUPLES, SSD_CROP_SELECTOR_ID, SSD_CROP_PAD_SELECTOR_ID] - - def __init__(self): - self._history = defaultdict(dict) - - def clear(self): - """Resets cache.""" - self._history = {} - - def get(self, function_id, key): - """Gets stored value given a function id and key. - - Args: - function_id: identifier for the preprocessing function used. - key: identifier for the variable stored. - Returns: - value: the corresponding value, expected to be a tensor or - nested structure of tensors. - Raises: - ValueError: if function_id is not one of the 23 valid function ids. - """ - if function_id not in self._VALID_FNS: - raise ValueError('Function id not recognized: %s.' % str(function_id)) - return self._history[function_id].get(key) - - def update(self, function_id, key, value): - """Adds a value to the dictionary. - - Args: - function_id: identifier for the preprocessing function used. - key: identifier for the variable stored. - value: the value to store, expected to be a tensor or nested structure - of tensors. - Raises: - ValueError: if function_id is not one of the 23 valid function ids. - """ - if function_id not in self._VALID_FNS: - raise ValueError('Function id not recognized: %s.' % str(function_id)) - self._history[function_id][key] = value - diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/preprocessor_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/preprocessor_test.py deleted file mode 100644 index 588a3f90cb1ec5aa104bd8519ddd5fb5b30dd3be..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/preprocessor_test.py +++ /dev/null @@ -1,2814 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.core.preprocessor.""" - -import numpy as np -import six - -import tensorflow as tf - -from object_detection.core import preprocessor -from object_detection.core import preprocessor_cache -from object_detection.core import standard_fields as fields - -if six.PY2: - import mock # pylint: disable=g-import-not-at-top -else: - from unittest import mock # pylint: disable=g-import-not-at-top - - -class PreprocessorTest(tf.test.TestCase): - - def createColorfulTestImage(self): - ch255 = tf.fill([1, 100, 200, 1], tf.constant(255, dtype=tf.uint8)) - ch128 = tf.fill([1, 100, 200, 1], tf.constant(128, dtype=tf.uint8)) - ch0 = tf.fill([1, 100, 200, 1], tf.constant(0, dtype=tf.uint8)) - imr = tf.concat([ch255, ch0, ch0], 3) - img = tf.concat([ch255, ch255, ch0], 3) - imb = tf.concat([ch255, ch0, ch255], 3) - imw = tf.concat([ch128, ch128, ch128], 3) - imu = tf.concat([imr, img], 2) - imd = tf.concat([imb, imw], 2) - im = tf.concat([imu, imd], 1) - return im - - def createTestImages(self): - images_r = tf.constant([[[128, 128, 128, 128], [0, 0, 128, 128], - [0, 128, 128, 128], [192, 192, 128, 128]]], - dtype=tf.uint8) - images_r = tf.expand_dims(images_r, 3) - images_g = tf.constant([[[0, 0, 128, 128], [0, 0, 128, 128], - [0, 128, 192, 192], [192, 192, 128, 192]]], - dtype=tf.uint8) - images_g = tf.expand_dims(images_g, 3) - images_b = tf.constant([[[128, 128, 192, 0], [0, 0, 128, 192], - [0, 128, 128, 0], [192, 192, 192, 128]]], - dtype=tf.uint8) - images_b = tf.expand_dims(images_b, 3) - images = tf.concat([images_r, images_g, images_b], 3) - return images - - def createEmptyTestBoxes(self): - boxes = tf.constant([[]], dtype=tf.float32) - return boxes - - def createTestBoxes(self): - boxes = tf.constant( - [[0.0, 0.25, 0.75, 1.0], [0.25, 0.5, 0.75, 1.0]], dtype=tf.float32) - return boxes - - def createTestLabelScores(self): - return tf.constant([1.0, 0.5], dtype=tf.float32) - - def createTestLabelScoresWithMissingScore(self): - return tf.constant([0.5, np.nan], dtype=tf.float32) - - def createTestMasks(self): - mask = np.array([ - [[255.0, 0.0, 0.0], - [255.0, 0.0, 0.0], - [255.0, 0.0, 0.0]], - [[255.0, 255.0, 0.0], - [255.0, 255.0, 0.0], - [255.0, 255.0, 0.0]]]) - return tf.constant(mask, dtype=tf.float32) - - def createTestKeypoints(self): - keypoints = np.array([ - [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]], - [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]], - ]) - return tf.constant(keypoints, dtype=tf.float32) - - def createTestKeypointsInsideCrop(self): - keypoints = np.array([ - [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]], - [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]], - ]) - return tf.constant(keypoints, dtype=tf.float32) - - def createTestKeypointsOutsideCrop(self): - keypoints = np.array([ - [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]], - [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]], - ]) - return tf.constant(keypoints, dtype=tf.float32) - - def createKeypointFlipPermutation(self): - return np.array([0, 2, 1], dtype=np.int32) - - def createTestLabels(self): - labels = tf.constant([1, 2], dtype=tf.int32) - return labels - - def createTestBoxesOutOfImage(self): - boxes = tf.constant( - [[-0.1, 0.25, 0.75, 1], [0.25, 0.5, 0.75, 1.1]], dtype=tf.float32) - return boxes - - def createTestMultiClassScores(self): - return tf.constant([[1.0, 0.0], [0.5, 0.5]], dtype=tf.float32) - - def expectedImagesAfterNormalization(self): - images_r = tf.constant([[[0, 0, 0, 0], [-1, -1, 0, 0], - [-1, 0, 0, 0], [0.5, 0.5, 0, 0]]], - dtype=tf.float32) - images_r = tf.expand_dims(images_r, 3) - images_g = tf.constant([[[-1, -1, 0, 0], [-1, -1, 0, 0], - [-1, 0, 0.5, 0.5], [0.5, 0.5, 0, 0.5]]], - dtype=tf.float32) - images_g = tf.expand_dims(images_g, 3) - images_b = tf.constant([[[0, 0, 0.5, -1], [-1, -1, 0, 0.5], - [-1, 0, 0, -1], [0.5, 0.5, 0.5, 0]]], - dtype=tf.float32) - images_b = tf.expand_dims(images_b, 3) - images = tf.concat([images_r, images_g, images_b], 3) - return images - - def expectedMaxImageAfterColorScale(self): - images_r = tf.constant([[[0.1, 0.1, 0.1, 0.1], [-0.9, -0.9, 0.1, 0.1], - [-0.9, 0.1, 0.1, 0.1], [0.6, 0.6, 0.1, 0.1]]], - dtype=tf.float32) - images_r = tf.expand_dims(images_r, 3) - images_g = tf.constant([[[-0.9, -0.9, 0.1, 0.1], [-0.9, -0.9, 0.1, 0.1], - [-0.9, 0.1, 0.6, 0.6], [0.6, 0.6, 0.1, 0.6]]], - dtype=tf.float32) - images_g = tf.expand_dims(images_g, 3) - images_b = tf.constant([[[0.1, 0.1, 0.6, -0.9], [-0.9, -0.9, 0.1, 0.6], - [-0.9, 0.1, 0.1, -0.9], [0.6, 0.6, 0.6, 0.1]]], - dtype=tf.float32) - images_b = tf.expand_dims(images_b, 3) - images = tf.concat([images_r, images_g, images_b], 3) - return images - - def expectedMinImageAfterColorScale(self): - images_r = tf.constant([[[-0.1, -0.1, -0.1, -0.1], [-1, -1, -0.1, -0.1], - [-1, -0.1, -0.1, -0.1], [0.4, 0.4, -0.1, -0.1]]], - dtype=tf.float32) - images_r = tf.expand_dims(images_r, 3) - images_g = tf.constant([[[-1, -1, -0.1, -0.1], [-1, -1, -0.1, -0.1], - [-1, -0.1, 0.4, 0.4], [0.4, 0.4, -0.1, 0.4]]], - dtype=tf.float32) - images_g = tf.expand_dims(images_g, 3) - images_b = tf.constant([[[-0.1, -0.1, 0.4, -1], [-1, -1, -0.1, 0.4], - [-1, -0.1, -0.1, -1], [0.4, 0.4, 0.4, -0.1]]], - dtype=tf.float32) - images_b = tf.expand_dims(images_b, 3) - images = tf.concat([images_r, images_g, images_b], 3) - return images - - def expectedImagesAfterLeftRightFlip(self): - images_r = tf.constant([[[0, 0, 0, 0], [0, 0, -1, -1], - [0, 0, 0, -1], [0, 0, 0.5, 0.5]]], - dtype=tf.float32) - images_r = tf.expand_dims(images_r, 3) - images_g = tf.constant([[[0, 0, -1, -1], [0, 0, -1, -1], - [0.5, 0.5, 0, -1], [0.5, 0, 0.5, 0.5]]], - dtype=tf.float32) - images_g = tf.expand_dims(images_g, 3) - images_b = tf.constant([[[-1, 0.5, 0, 0], [0.5, 0, -1, -1], - [-1, 0, 0, -1], [0, 0.5, 0.5, 0.5]]], - dtype=tf.float32) - images_b = tf.expand_dims(images_b, 3) - images = tf.concat([images_r, images_g, images_b], 3) - return images - - def expectedImagesAfterUpDownFlip(self): - images_r = tf.constant([[[0.5, 0.5, 0, 0], [-1, 0, 0, 0], - [-1, -1, 0, 0], [0, 0, 0, 0]]], - dtype=tf.float32) - images_r = tf.expand_dims(images_r, 3) - images_g = tf.constant([[[0.5, 0.5, 0, 0.5], [-1, 0, 0.5, 0.5], - [-1, -1, 0, 0], [-1, -1, 0, 0]]], - dtype=tf.float32) - images_g = tf.expand_dims(images_g, 3) - images_b = tf.constant([[[0.5, 0.5, 0.5, 0], [-1, 0, 0, -1], - [-1, -1, 0, 0.5], [0, 0, 0.5, -1]]], - dtype=tf.float32) - images_b = tf.expand_dims(images_b, 3) - images = tf.concat([images_r, images_g, images_b], 3) - return images - - def expectedImagesAfterRot90(self): - images_r = tf.constant([[[0, 0, 0, 0], [0, 0, 0, 0], - [0, -1, 0, 0.5], [0, -1, -1, 0.5]]], - dtype=tf.float32) - images_r = tf.expand_dims(images_r, 3) - images_g = tf.constant([[[0, 0, 0.5, 0.5], [0, 0, 0.5, 0], - [-1, -1, 0, 0.5], [-1, -1, -1, 0.5]]], - dtype=tf.float32) - images_g = tf.expand_dims(images_g, 3) - images_b = tf.constant([[[-1, 0.5, -1, 0], [0.5, 0, 0, 0.5], - [0, -1, 0, 0.5], [0, -1, -1, 0.5]]], - dtype=tf.float32) - images_b = tf.expand_dims(images_b, 3) - images = tf.concat([images_r, images_g, images_b], 3) - return images - - def expectedBoxesAfterLeftRightFlip(self): - boxes = tf.constant([[0.0, 0.0, 0.75, 0.75], [0.25, 0.0, 0.75, 0.5]], - dtype=tf.float32) - return boxes - - def expectedBoxesAfterUpDownFlip(self): - boxes = tf.constant([[0.25, 0.25, 1.0, 1.0], [0.25, 0.5, 0.75, 1.0]], - dtype=tf.float32) - return boxes - - def expectedBoxesAfterRot90(self): - boxes = tf.constant( - [[0.0, 0.0, 0.75, 0.75], [0.0, 0.25, 0.5, 0.75]], dtype=tf.float32) - return boxes - - def expectedMasksAfterLeftRightFlip(self): - mask = np.array([ - [[0.0, 0.0, 255.0], - [0.0, 0.0, 255.0], - [0.0, 0.0, 255.0]], - [[0.0, 255.0, 255.0], - [0.0, 255.0, 255.0], - [0.0, 255.0, 255.0]]]) - return tf.constant(mask, dtype=tf.float32) - - def expectedMasksAfterUpDownFlip(self): - mask = np.array([ - [[255.0, 0.0, 0.0], - [255.0, 0.0, 0.0], - [255.0, 0.0, 0.0]], - [[255.0, 255.0, 0.0], - [255.0, 255.0, 0.0], - [255.0, 255.0, 0.0]]]) - return tf.constant(mask, dtype=tf.float32) - - def expectedMasksAfterRot90(self): - mask = np.array([ - [[0.0, 0.0, 0.0], - [0.0, 0.0, 0.0], - [255.0, 255.0, 255.0]], - [[0.0, 0.0, 0.0], - [255.0, 255.0, 255.0], - [255.0, 255.0, 255.0]]]) - return tf.constant(mask, dtype=tf.float32) - - def expectedLabelScoresAfterThresholding(self): - return tf.constant([1.0], dtype=tf.float32) - - def expectedBoxesAfterThresholding(self): - return tf.constant([[0.0, 0.25, 0.75, 1.0]], dtype=tf.float32) - - def expectedLabelsAfterThresholding(self): - return tf.constant([1], dtype=tf.float32) - - def expectedMultiClassScoresAfterThresholding(self): - return tf.constant([[1.0, 0.0]], dtype=tf.float32) - - def expectedMasksAfterThresholding(self): - mask = np.array([ - [[255.0, 0.0, 0.0], - [255.0, 0.0, 0.0], - [255.0, 0.0, 0.0]]]) - return tf.constant(mask, dtype=tf.float32) - - def expectedKeypointsAfterThresholding(self): - keypoints = np.array([ - [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]] - ]) - return tf.constant(keypoints, dtype=tf.float32) - - def expectedLabelScoresAfterThresholdingWithMissingScore(self): - return tf.constant([np.nan], dtype=tf.float32) - - def expectedBoxesAfterThresholdingWithMissingScore(self): - return tf.constant([[0.25, 0.5, 0.75, 1]], dtype=tf.float32) - - def expectedLabelsAfterThresholdingWithMissingScore(self): - return tf.constant([2], dtype=tf.float32) - - def testRgbToGrayscale(self): - images = self.createTestImages() - grayscale_images = preprocessor._rgb_to_grayscale(images) - expected_images = tf.image.rgb_to_grayscale(images) - with self.test_session() as sess: - (grayscale_images, expected_images) = sess.run( - [grayscale_images, expected_images]) - self.assertAllEqual(expected_images, grayscale_images) - - def testNormalizeImage(self): - preprocess_options = [(preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 256, - 'target_minval': -1, - 'target_maxval': 1 - })] - images = self.createTestImages() - tensor_dict = {fields.InputDataFields.image: images} - tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options) - images = tensor_dict[fields.InputDataFields.image] - images_expected = self.expectedImagesAfterNormalization() - - with self.test_session() as sess: - (images_, images_expected_) = sess.run( - [images, images_expected]) - images_shape_ = images_.shape - images_expected_shape_ = images_expected_.shape - expected_shape = [1, 4, 4, 3] - self.assertAllEqual(images_expected_shape_, images_shape_) - self.assertAllEqual(images_shape_, expected_shape) - self.assertAllClose(images_, images_expected_) - - def testRetainBoxesAboveThreshold(self): - boxes = self.createTestBoxes() - labels = self.createTestLabels() - label_scores = self.createTestLabelScores() - (retained_boxes, retained_labels, - retained_label_scores) = preprocessor.retain_boxes_above_threshold( - boxes, labels, label_scores, threshold=0.6) - with self.test_session() as sess: - (retained_boxes_, retained_labels_, retained_label_scores_, - expected_retained_boxes_, expected_retained_labels_, - expected_retained_label_scores_) = sess.run([ - retained_boxes, retained_labels, retained_label_scores, - self.expectedBoxesAfterThresholding(), - self.expectedLabelsAfterThresholding(), - self.expectedLabelScoresAfterThresholding()]) - self.assertAllClose( - retained_boxes_, expected_retained_boxes_) - self.assertAllClose( - retained_labels_, expected_retained_labels_) - self.assertAllClose( - retained_label_scores_, expected_retained_label_scores_) - - def testRetainBoxesAboveThresholdWithMultiClassScores(self): - boxes = self.createTestBoxes() - labels = self.createTestLabels() - label_scores = self.createTestLabelScores() - multiclass_scores = self.createTestMultiClassScores() - (_, _, _, - retained_multiclass_scores) = preprocessor.retain_boxes_above_threshold( - boxes, - labels, - label_scores, - multiclass_scores=multiclass_scores, - threshold=0.6) - with self.test_session() as sess: - (retained_multiclass_scores_, - expected_retained_multiclass_scores_) = sess.run([ - retained_multiclass_scores, - self.expectedMultiClassScoresAfterThresholding() - ]) - - self.assertAllClose(retained_multiclass_scores_, - expected_retained_multiclass_scores_) - - def testRetainBoxesAboveThresholdWithMasks(self): - boxes = self.createTestBoxes() - labels = self.createTestLabels() - label_scores = self.createTestLabelScores() - masks = self.createTestMasks() - _, _, _, retained_masks = preprocessor.retain_boxes_above_threshold( - boxes, labels, label_scores, masks, threshold=0.6) - with self.test_session() as sess: - retained_masks_, expected_retained_masks_ = sess.run([ - retained_masks, self.expectedMasksAfterThresholding()]) - - self.assertAllClose( - retained_masks_, expected_retained_masks_) - - def testRetainBoxesAboveThresholdWithKeypoints(self): - boxes = self.createTestBoxes() - labels = self.createTestLabels() - label_scores = self.createTestLabelScores() - keypoints = self.createTestKeypoints() - (_, _, _, retained_keypoints) = preprocessor.retain_boxes_above_threshold( - boxes, labels, label_scores, keypoints=keypoints, threshold=0.6) - with self.test_session() as sess: - (retained_keypoints_, - expected_retained_keypoints_) = sess.run([ - retained_keypoints, - self.expectedKeypointsAfterThresholding()]) - - self.assertAllClose( - retained_keypoints_, expected_retained_keypoints_) - - def testRetainBoxesAboveThresholdWithMissingScore(self): - boxes = self.createTestBoxes() - labels = self.createTestLabels() - label_scores = self.createTestLabelScoresWithMissingScore() - (retained_boxes, retained_labels, - retained_label_scores) = preprocessor.retain_boxes_above_threshold( - boxes, labels, label_scores, threshold=0.6) - with self.test_session() as sess: - (retained_boxes_, retained_labels_, retained_label_scores_, - expected_retained_boxes_, expected_retained_labels_, - expected_retained_label_scores_) = sess.run([ - retained_boxes, retained_labels, retained_label_scores, - self.expectedBoxesAfterThresholdingWithMissingScore(), - self.expectedLabelsAfterThresholdingWithMissingScore(), - self.expectedLabelScoresAfterThresholdingWithMissingScore()]) - self.assertAllClose( - retained_boxes_, expected_retained_boxes_) - self.assertAllClose( - retained_labels_, expected_retained_labels_) - self.assertAllClose( - retained_label_scores_, expected_retained_label_scores_) - - def testFlipBoxesLeftRight(self): - boxes = self.createTestBoxes() - flipped_boxes = preprocessor._flip_boxes_left_right(boxes) - expected_boxes = self.expectedBoxesAfterLeftRightFlip() - with self.test_session() as sess: - flipped_boxes, expected_boxes = sess.run([flipped_boxes, expected_boxes]) - self.assertAllEqual(flipped_boxes.flatten(), expected_boxes.flatten()) - - def testFlipBoxesUpDown(self): - boxes = self.createTestBoxes() - flipped_boxes = preprocessor._flip_boxes_up_down(boxes) - expected_boxes = self.expectedBoxesAfterUpDownFlip() - with self.test_session() as sess: - flipped_boxes, expected_boxes = sess.run([flipped_boxes, expected_boxes]) - self.assertAllEqual(flipped_boxes.flatten(), expected_boxes.flatten()) - - def testRot90Boxes(self): - boxes = self.createTestBoxes() - rotated_boxes = preprocessor._rot90_boxes(boxes) - expected_boxes = self.expectedBoxesAfterRot90() - with self.test_session() as sess: - rotated_boxes, expected_boxes = sess.run([rotated_boxes, expected_boxes]) - self.assertAllEqual(rotated_boxes.flatten(), expected_boxes.flatten()) - - def testFlipMasksLeftRight(self): - test_mask = self.createTestMasks() - flipped_mask = preprocessor._flip_masks_left_right(test_mask) - expected_mask = self.expectedMasksAfterLeftRightFlip() - with self.test_session() as sess: - flipped_mask, expected_mask = sess.run([flipped_mask, expected_mask]) - self.assertAllEqual(flipped_mask.flatten(), expected_mask.flatten()) - - def testFlipMasksUpDown(self): - test_mask = self.createTestMasks() - flipped_mask = preprocessor._flip_masks_up_down(test_mask) - expected_mask = self.expectedMasksAfterUpDownFlip() - with self.test_session() as sess: - flipped_mask, expected_mask = sess.run([flipped_mask, expected_mask]) - self.assertAllEqual(flipped_mask.flatten(), expected_mask.flatten()) - - def testRot90Masks(self): - test_mask = self.createTestMasks() - rotated_mask = preprocessor._rot90_masks(test_mask) - expected_mask = self.expectedMasksAfterRot90() - with self.test_session() as sess: - rotated_mask, expected_mask = sess.run([rotated_mask, expected_mask]) - self.assertAllEqual(rotated_mask.flatten(), expected_mask.flatten()) - - def _testPreprocessorCache(self, - preprocess_options, - test_boxes=False, - test_masks=False, - test_keypoints=False, - num_runs=4): - cache = preprocessor_cache.PreprocessorCache() - images = self.createTestImages() - boxes = self.createTestBoxes() - classes = self.createTestLabels() - masks = self.createTestMasks() - keypoints = self.createTestKeypoints() - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_instance_masks=test_masks, include_keypoints=test_keypoints) - out = [] - for i in range(num_runs): - tensor_dict = { - fields.InputDataFields.image: images, - } - num_outputs = 1 - if test_boxes: - tensor_dict[fields.InputDataFields.groundtruth_boxes] = boxes - tensor_dict[fields.InputDataFields.groundtruth_classes] = classes - num_outputs += 1 - if test_masks: - tensor_dict[fields.InputDataFields.groundtruth_instance_masks] = masks - num_outputs += 1 - if test_keypoints: - tensor_dict[fields.InputDataFields.groundtruth_keypoints] = keypoints - num_outputs += 1 - out.append(preprocessor.preprocess( - tensor_dict, preprocess_options, preprocessor_arg_map, cache)) - - with self.test_session() as sess: - to_run = [] - for i in range(num_runs): - to_run.append(out[i][fields.InputDataFields.image]) - if test_boxes: - to_run.append(out[i][fields.InputDataFields.groundtruth_boxes]) - if test_masks: - to_run.append( - out[i][fields.InputDataFields.groundtruth_instance_masks]) - if test_keypoints: - to_run.append(out[i][fields.InputDataFields.groundtruth_keypoints]) - - out_array = sess.run(to_run) - for i in range(num_outputs, len(out_array)): - self.assertAllClose(out_array[i], out_array[i - num_outputs]) - - def testRandomHorizontalFlip(self): - preprocess_options = [(preprocessor.random_horizontal_flip, {})] - images = self.expectedImagesAfterNormalization() - boxes = self.createTestBoxes() - tensor_dict = {fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes} - images_expected1 = self.expectedImagesAfterLeftRightFlip() - boxes_expected1 = self.expectedBoxesAfterLeftRightFlip() - images_expected2 = images - boxes_expected2 = boxes - tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options) - images = tensor_dict[fields.InputDataFields.image] - boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] - - boxes_diff1 = tf.squared_difference(boxes, boxes_expected1) - boxes_diff2 = tf.squared_difference(boxes, boxes_expected2) - boxes_diff = tf.multiply(boxes_diff1, boxes_diff2) - boxes_diff_expected = tf.zeros_like(boxes_diff) - - images_diff1 = tf.squared_difference(images, images_expected1) - images_diff2 = tf.squared_difference(images, images_expected2) - images_diff = tf.multiply(images_diff1, images_diff2) - images_diff_expected = tf.zeros_like(images_diff) - - with self.test_session() as sess: - (images_diff_, images_diff_expected_, boxes_diff_, - boxes_diff_expected_) = sess.run([images_diff, images_diff_expected, - boxes_diff, boxes_diff_expected]) - self.assertAllClose(boxes_diff_, boxes_diff_expected_) - self.assertAllClose(images_diff_, images_diff_expected_) - - def testRandomHorizontalFlipWithEmptyBoxes(self): - preprocess_options = [(preprocessor.random_horizontal_flip, {})] - images = self.expectedImagesAfterNormalization() - boxes = self.createEmptyTestBoxes() - tensor_dict = {fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes} - images_expected1 = self.expectedImagesAfterLeftRightFlip() - boxes_expected = self.createEmptyTestBoxes() - images_expected2 = images - tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options) - images = tensor_dict[fields.InputDataFields.image] - boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] - - images_diff1 = tf.squared_difference(images, images_expected1) - images_diff2 = tf.squared_difference(images, images_expected2) - images_diff = tf.multiply(images_diff1, images_diff2) - images_diff_expected = tf.zeros_like(images_diff) - - with self.test_session() as sess: - (images_diff_, images_diff_expected_, boxes_, - boxes_expected_) = sess.run([images_diff, images_diff_expected, boxes, - boxes_expected]) - self.assertAllClose(boxes_, boxes_expected_) - self.assertAllClose(images_diff_, images_diff_expected_) - - def testRandomHorizontalFlipWithCache(self): - keypoint_flip_permutation = self.createKeypointFlipPermutation() - preprocess_options = [ - (preprocessor.random_horizontal_flip, - {'keypoint_flip_permutation': keypoint_flip_permutation})] - self._testPreprocessorCache(preprocess_options, - test_boxes=True, - test_masks=True, - test_keypoints=True) - - def testRunRandomHorizontalFlipWithMaskAndKeypoints(self): - preprocess_options = [(preprocessor.random_horizontal_flip, {})] - image_height = 3 - image_width = 3 - images = tf.random_uniform([1, image_height, image_width, 3]) - boxes = self.createTestBoxes() - masks = self.createTestMasks() - keypoints = self.createTestKeypoints() - keypoint_flip_permutation = self.createKeypointFlipPermutation() - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_instance_masks: masks, - fields.InputDataFields.groundtruth_keypoints: keypoints - } - preprocess_options = [ - (preprocessor.random_horizontal_flip, - {'keypoint_flip_permutation': keypoint_flip_permutation})] - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_instance_masks=True, include_keypoints=True) - tensor_dict = preprocessor.preprocess( - tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map) - boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] - masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks] - keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints] - with self.test_session() as sess: - boxes, masks, keypoints = sess.run([boxes, masks, keypoints]) - self.assertTrue(boxes is not None) - self.assertTrue(masks is not None) - self.assertTrue(keypoints is not None) - - def testRandomVerticalFlip(self): - preprocess_options = [(preprocessor.random_vertical_flip, {})] - images = self.expectedImagesAfterNormalization() - boxes = self.createTestBoxes() - tensor_dict = {fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes} - images_expected1 = self.expectedImagesAfterUpDownFlip() - boxes_expected1 = self.expectedBoxesAfterUpDownFlip() - images_expected2 = images - boxes_expected2 = boxes - tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options) - images = tensor_dict[fields.InputDataFields.image] - boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] - - boxes_diff1 = tf.squared_difference(boxes, boxes_expected1) - boxes_diff2 = tf.squared_difference(boxes, boxes_expected2) - boxes_diff = tf.multiply(boxes_diff1, boxes_diff2) - boxes_diff_expected = tf.zeros_like(boxes_diff) - - images_diff1 = tf.squared_difference(images, images_expected1) - images_diff2 = tf.squared_difference(images, images_expected2) - images_diff = tf.multiply(images_diff1, images_diff2) - images_diff_expected = tf.zeros_like(images_diff) - - with self.test_session() as sess: - (images_diff_, images_diff_expected_, boxes_diff_, - boxes_diff_expected_) = sess.run([images_diff, images_diff_expected, - boxes_diff, boxes_diff_expected]) - self.assertAllClose(boxes_diff_, boxes_diff_expected_) - self.assertAllClose(images_diff_, images_diff_expected_) - - def testRandomVerticalFlipWithEmptyBoxes(self): - preprocess_options = [(preprocessor.random_vertical_flip, {})] - images = self.expectedImagesAfterNormalization() - boxes = self.createEmptyTestBoxes() - tensor_dict = {fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes} - images_expected1 = self.expectedImagesAfterUpDownFlip() - boxes_expected = self.createEmptyTestBoxes() - images_expected2 = images - tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options) - images = tensor_dict[fields.InputDataFields.image] - boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] - - images_diff1 = tf.squared_difference(images, images_expected1) - images_diff2 = tf.squared_difference(images, images_expected2) - images_diff = tf.multiply(images_diff1, images_diff2) - images_diff_expected = tf.zeros_like(images_diff) - - with self.test_session() as sess: - (images_diff_, images_diff_expected_, boxes_, - boxes_expected_) = sess.run([images_diff, images_diff_expected, boxes, - boxes_expected]) - self.assertAllClose(boxes_, boxes_expected_) - self.assertAllClose(images_diff_, images_diff_expected_) - - def testRandomVerticalFlipWithCache(self): - keypoint_flip_permutation = self.createKeypointFlipPermutation() - preprocess_options = [ - (preprocessor.random_vertical_flip, - {'keypoint_flip_permutation': keypoint_flip_permutation})] - self._testPreprocessorCache(preprocess_options, - test_boxes=True, - test_masks=True, - test_keypoints=True) - - def testRunRandomVerticalFlipWithMaskAndKeypoints(self): - preprocess_options = [(preprocessor.random_vertical_flip, {})] - image_height = 3 - image_width = 3 - images = tf.random_uniform([1, image_height, image_width, 3]) - boxes = self.createTestBoxes() - masks = self.createTestMasks() - keypoints = self.createTestKeypoints() - keypoint_flip_permutation = self.createKeypointFlipPermutation() - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_instance_masks: masks, - fields.InputDataFields.groundtruth_keypoints: keypoints - } - preprocess_options = [ - (preprocessor.random_vertical_flip, - {'keypoint_flip_permutation': keypoint_flip_permutation})] - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_instance_masks=True, include_keypoints=True) - tensor_dict = preprocessor.preprocess( - tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map) - boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] - masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks] - keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints] - with self.test_session() as sess: - boxes, masks, keypoints = sess.run([boxes, masks, keypoints]) - self.assertTrue(boxes is not None) - self.assertTrue(masks is not None) - self.assertTrue(keypoints is not None) - - def testRandomRotation90(self): - preprocess_options = [(preprocessor.random_rotation90, {})] - images = self.expectedImagesAfterNormalization() - boxes = self.createTestBoxes() - tensor_dict = {fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes} - images_expected1 = self.expectedImagesAfterRot90() - boxes_expected1 = self.expectedBoxesAfterRot90() - images_expected2 = images - boxes_expected2 = boxes - tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options) - images = tensor_dict[fields.InputDataFields.image] - boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] - - boxes_diff1 = tf.squared_difference(boxes, boxes_expected1) - boxes_diff2 = tf.squared_difference(boxes, boxes_expected2) - boxes_diff = tf.multiply(boxes_diff1, boxes_diff2) - boxes_diff_expected = tf.zeros_like(boxes_diff) - - images_diff1 = tf.squared_difference(images, images_expected1) - images_diff2 = tf.squared_difference(images, images_expected2) - images_diff = tf.multiply(images_diff1, images_diff2) - images_diff_expected = tf.zeros_like(images_diff) - - with self.test_session() as sess: - (images_diff_, images_diff_expected_, boxes_diff_, - boxes_diff_expected_) = sess.run([images_diff, images_diff_expected, - boxes_diff, boxes_diff_expected]) - self.assertAllClose(boxes_diff_, boxes_diff_expected_) - self.assertAllClose(images_diff_, images_diff_expected_) - - def testRandomRotation90WithEmptyBoxes(self): - preprocess_options = [(preprocessor.random_rotation90, {})] - images = self.expectedImagesAfterNormalization() - boxes = self.createEmptyTestBoxes() - tensor_dict = {fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes} - images_expected1 = self.expectedImagesAfterRot90() - boxes_expected = self.createEmptyTestBoxes() - images_expected2 = images - tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options) - images = tensor_dict[fields.InputDataFields.image] - boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] - - images_diff1 = tf.squared_difference(images, images_expected1) - images_diff2 = tf.squared_difference(images, images_expected2) - images_diff = tf.multiply(images_diff1, images_diff2) - images_diff_expected = tf.zeros_like(images_diff) - - with self.test_session() as sess: - (images_diff_, images_diff_expected_, boxes_, - boxes_expected_) = sess.run([images_diff, images_diff_expected, boxes, - boxes_expected]) - self.assertAllClose(boxes_, boxes_expected_) - self.assertAllClose(images_diff_, images_diff_expected_) - - def testRandomRotation90WithCache(self): - preprocess_options = [(preprocessor.random_rotation90, {})] - self._testPreprocessorCache(preprocess_options, - test_boxes=True, - test_masks=True, - test_keypoints=True) - - def testRunRandomRotation90WithMaskAndKeypoints(self): - preprocess_options = [(preprocessor.random_rotation90, {})] - image_height = 3 - image_width = 3 - images = tf.random_uniform([1, image_height, image_width, 3]) - boxes = self.createTestBoxes() - masks = self.createTestMasks() - keypoints = self.createTestKeypoints() - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_instance_masks: masks, - fields.InputDataFields.groundtruth_keypoints: keypoints - } - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_instance_masks=True, include_keypoints=True) - tensor_dict = preprocessor.preprocess( - tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map) - boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] - masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks] - keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints] - with self.test_session() as sess: - boxes, masks, keypoints = sess.run([boxes, masks, keypoints]) - self.assertTrue(boxes is not None) - self.assertTrue(masks is not None) - self.assertTrue(keypoints is not None) - - def testRandomPixelValueScale(self): - preprocessing_options = [] - preprocessing_options.append((preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })) - preprocessing_options.append((preprocessor.random_pixel_value_scale, {})) - images = self.createTestImages() - tensor_dict = {fields.InputDataFields.image: images} - tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) - images_min = tf.to_float(images) * 0.9 / 255.0 - images_max = tf.to_float(images) * 1.1 / 255.0 - images = tensor_dict[fields.InputDataFields.image] - values_greater = tf.greater_equal(images, images_min) - values_less = tf.less_equal(images, images_max) - values_true = tf.fill([1, 4, 4, 3], True) - with self.test_session() as sess: - (values_greater_, values_less_, values_true_) = sess.run( - [values_greater, values_less, values_true]) - self.assertAllClose(values_greater_, values_true_) - self.assertAllClose(values_less_, values_true_) - - def testRandomPixelValueScaleWithCache(self): - preprocess_options = [] - preprocess_options.append((preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })) - preprocess_options.append((preprocessor.random_pixel_value_scale, {})) - self._testPreprocessorCache(preprocess_options, - test_boxes=True, - test_masks=False, - test_keypoints=False) - - def testRandomImageScale(self): - preprocess_options = [(preprocessor.random_image_scale, {})] - images_original = self.createTestImages() - tensor_dict = {fields.InputDataFields.image: images_original} - tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options) - images_scaled = tensor_dict[fields.InputDataFields.image] - images_original_shape = tf.shape(images_original) - images_scaled_shape = tf.shape(images_scaled) - with self.test_session() as sess: - (images_original_shape_, images_scaled_shape_) = sess.run( - [images_original_shape, images_scaled_shape]) - self.assertTrue( - images_original_shape_[1] * 0.5 <= images_scaled_shape_[1]) - self.assertTrue( - images_original_shape_[1] * 2.0 >= images_scaled_shape_[1]) - self.assertTrue( - images_original_shape_[2] * 0.5 <= images_scaled_shape_[2]) - self.assertTrue( - images_original_shape_[2] * 2.0 >= images_scaled_shape_[2]) - - def testRandomImageScaleWithCache(self): - preprocess_options = [(preprocessor.random_image_scale, {})] - self._testPreprocessorCache(preprocess_options, - test_boxes=False, - test_masks=False, - test_keypoints=False) - - def testRandomRGBtoGray(self): - preprocess_options = [(preprocessor.random_rgb_to_gray, {})] - images_original = self.createTestImages() - tensor_dict = {fields.InputDataFields.image: images_original} - tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options) - images_gray = tensor_dict[fields.InputDataFields.image] - images_gray_r, images_gray_g, images_gray_b = tf.split( - value=images_gray, num_or_size_splits=3, axis=3) - images_r, images_g, images_b = tf.split( - value=images_original, num_or_size_splits=3, axis=3) - images_r_diff1 = tf.squared_difference(tf.to_float(images_r), - tf.to_float(images_gray_r)) - images_r_diff2 = tf.squared_difference(tf.to_float(images_gray_r), - tf.to_float(images_gray_g)) - images_r_diff = tf.multiply(images_r_diff1, images_r_diff2) - images_g_diff1 = tf.squared_difference(tf.to_float(images_g), - tf.to_float(images_gray_g)) - images_g_diff2 = tf.squared_difference(tf.to_float(images_gray_g), - tf.to_float(images_gray_b)) - images_g_diff = tf.multiply(images_g_diff1, images_g_diff2) - images_b_diff1 = tf.squared_difference(tf.to_float(images_b), - tf.to_float(images_gray_b)) - images_b_diff2 = tf.squared_difference(tf.to_float(images_gray_b), - tf.to_float(images_gray_r)) - images_b_diff = tf.multiply(images_b_diff1, images_b_diff2) - image_zero1 = tf.constant(0, dtype=tf.float32, shape=[1, 4, 4, 1]) - with self.test_session() as sess: - (images_r_diff_, images_g_diff_, images_b_diff_, image_zero1_) = sess.run( - [images_r_diff, images_g_diff, images_b_diff, image_zero1]) - self.assertAllClose(images_r_diff_, image_zero1_) - self.assertAllClose(images_g_diff_, image_zero1_) - self.assertAllClose(images_b_diff_, image_zero1_) - - def testRandomRGBtoGrayWithCache(self): - preprocess_options = [( - preprocessor.random_rgb_to_gray, {'probability': 0.5})] - self._testPreprocessorCache(preprocess_options, - test_boxes=False, - test_masks=False, - test_keypoints=False) - - def testRandomAdjustBrightness(self): - preprocessing_options = [] - preprocessing_options.append((preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })) - preprocessing_options.append((preprocessor.random_adjust_brightness, {})) - images_original = self.createTestImages() - tensor_dict = {fields.InputDataFields.image: images_original} - tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) - images_bright = tensor_dict[fields.InputDataFields.image] - image_original_shape = tf.shape(images_original) - image_bright_shape = tf.shape(images_bright) - with self.test_session() as sess: - (image_original_shape_, image_bright_shape_) = sess.run( - [image_original_shape, image_bright_shape]) - self.assertAllEqual(image_original_shape_, image_bright_shape_) - - def testRandomAdjustBrightnessWithCache(self): - preprocess_options = [] - preprocess_options.append((preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })) - preprocess_options.append((preprocessor.random_adjust_brightness, {})) - self._testPreprocessorCache(preprocess_options, - test_boxes=False, - test_masks=False, - test_keypoints=False) - - def testRandomAdjustContrast(self): - preprocessing_options = [] - preprocessing_options.append((preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })) - preprocessing_options.append((preprocessor.random_adjust_contrast, {})) - images_original = self.createTestImages() - tensor_dict = {fields.InputDataFields.image: images_original} - tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) - images_contrast = tensor_dict[fields.InputDataFields.image] - image_original_shape = tf.shape(images_original) - image_contrast_shape = tf.shape(images_contrast) - with self.test_session() as sess: - (image_original_shape_, image_contrast_shape_) = sess.run( - [image_original_shape, image_contrast_shape]) - self.assertAllEqual(image_original_shape_, image_contrast_shape_) - - def testRandomAdjustContrastWithCache(self): - preprocess_options = [] - preprocess_options.append((preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })) - preprocess_options.append((preprocessor.random_adjust_contrast, {})) - self._testPreprocessorCache(preprocess_options, - test_boxes=False, - test_masks=False, - test_keypoints=False) - - def testRandomAdjustHue(self): - preprocessing_options = [] - preprocessing_options.append((preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })) - preprocessing_options.append((preprocessor.random_adjust_hue, {})) - images_original = self.createTestImages() - tensor_dict = {fields.InputDataFields.image: images_original} - tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) - images_hue = tensor_dict[fields.InputDataFields.image] - image_original_shape = tf.shape(images_original) - image_hue_shape = tf.shape(images_hue) - with self.test_session() as sess: - (image_original_shape_, image_hue_shape_) = sess.run( - [image_original_shape, image_hue_shape]) - self.assertAllEqual(image_original_shape_, image_hue_shape_) - - def testRandomAdjustHueWithCache(self): - preprocess_options = [] - preprocess_options.append((preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })) - preprocess_options.append((preprocessor.random_adjust_hue, {})) - self._testPreprocessorCache(preprocess_options, - test_boxes=False, - test_masks=False, - test_keypoints=False) - - def testRandomDistortColor(self): - preprocessing_options = [] - preprocessing_options.append((preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })) - preprocessing_options.append((preprocessor.random_distort_color, {})) - images_original = self.createTestImages() - images_original_shape = tf.shape(images_original) - tensor_dict = {fields.InputDataFields.image: images_original} - tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) - images_distorted_color = tensor_dict[fields.InputDataFields.image] - images_distorted_color_shape = tf.shape(images_distorted_color) - with self.test_session() as sess: - (images_original_shape_, images_distorted_color_shape_) = sess.run( - [images_original_shape, images_distorted_color_shape]) - self.assertAllEqual(images_original_shape_, images_distorted_color_shape_) - - def testRandomDistortColorWithCache(self): - preprocess_options = [] - preprocess_options.append((preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })) - preprocess_options.append((preprocessor.random_distort_color, {})) - self._testPreprocessorCache(preprocess_options, - test_boxes=False, - test_masks=False, - test_keypoints=False) - - def testRandomJitterBoxes(self): - preprocessing_options = [] - preprocessing_options.append((preprocessor.random_jitter_boxes, {})) - boxes = self.createTestBoxes() - boxes_shape = tf.shape(boxes) - tensor_dict = {fields.InputDataFields.groundtruth_boxes: boxes} - tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) - distorted_boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] - distorted_boxes_shape = tf.shape(distorted_boxes) - - with self.test_session() as sess: - (boxes_shape_, distorted_boxes_shape_) = sess.run( - [boxes_shape, distorted_boxes_shape]) - self.assertAllEqual(boxes_shape_, distorted_boxes_shape_) - - def testRandomCropImage(self): - preprocessing_options = [] - preprocessing_options.append((preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })) - preprocessing_options.append((preprocessor.random_crop_image, {})) - images = self.createTestImages() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - } - distorted_tensor_dict = preprocessor.preprocess(tensor_dict, - preprocessing_options) - distorted_images = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - boxes_rank = tf.rank(boxes) - distorted_boxes_rank = tf.rank(distorted_boxes) - images_rank = tf.rank(images) - distorted_images_rank = tf.rank(distorted_images) - self.assertEqual(3, distorted_images.get_shape()[3]) - - with self.test_session() as sess: - (boxes_rank_, distorted_boxes_rank_, images_rank_, - distorted_images_rank_) = sess.run([ - boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank - ]) - self.assertAllEqual(boxes_rank_, distorted_boxes_rank_) - self.assertAllEqual(images_rank_, distorted_images_rank_) - - def testRandomCropImageWithCache(self): - preprocess_options = [(preprocessor.random_rgb_to_gray, - {'probability': 0.5}), - (preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1, - }), - (preprocessor.random_crop_image, {})] - self._testPreprocessorCache(preprocess_options, - test_boxes=True, - test_masks=False, - test_keypoints=False) - - def testRandomCropImageGrayscale(self): - preprocessing_options = [(preprocessor.rgb_to_gray, {}), - (preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1, - }), - (preprocessor.random_crop_image, {})] - images = self.createTestImages() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - } - distorted_tensor_dict = preprocessor.preprocess( - tensor_dict, preprocessing_options) - distorted_images = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - boxes_rank = tf.rank(boxes) - distorted_boxes_rank = tf.rank(distorted_boxes) - images_rank = tf.rank(images) - distorted_images_rank = tf.rank(distorted_images) - self.assertEqual(1, distorted_images.get_shape()[3]) - - with self.test_session() as sess: - session_results = sess.run([ - boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank - ]) - (boxes_rank_, distorted_boxes_rank_, images_rank_, - distorted_images_rank_) = session_results - self.assertAllEqual(boxes_rank_, distorted_boxes_rank_) - self.assertAllEqual(images_rank_, distorted_images_rank_) - - def testRandomCropImageWithBoxOutOfImage(self): - preprocessing_options = [] - preprocessing_options.append((preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })) - preprocessing_options.append((preprocessor.random_crop_image, {})) - images = self.createTestImages() - boxes = self.createTestBoxesOutOfImage() - labels = self.createTestLabels() - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - } - distorted_tensor_dict = preprocessor.preprocess(tensor_dict, - preprocessing_options) - distorted_images = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - boxes_rank = tf.rank(boxes) - distorted_boxes_rank = tf.rank(distorted_boxes) - images_rank = tf.rank(images) - distorted_images_rank = tf.rank(distorted_images) - - with self.test_session() as sess: - (boxes_rank_, distorted_boxes_rank_, images_rank_, - distorted_images_rank_) = sess.run( - [boxes_rank, distorted_boxes_rank, images_rank, - distorted_images_rank]) - self.assertAllEqual(boxes_rank_, distorted_boxes_rank_) - self.assertAllEqual(images_rank_, distorted_images_rank_) - - def testRandomCropImageWithRandomCoefOne(self): - preprocessing_options = [(preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })] - - images = self.createTestImages() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - label_scores = self.createTestLabelScores() - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - fields.InputDataFields.groundtruth_label_scores: label_scores - } - tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) - images = tensor_dict[fields.InputDataFields.image] - - preprocessing_options = [(preprocessor.random_crop_image, { - 'random_coef': 1.0 - })] - distorted_tensor_dict = preprocessor.preprocess(tensor_dict, - preprocessing_options) - - distorted_images = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - distorted_labels = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_classes] - distorted_label_scores = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_label_scores] - boxes_shape = tf.shape(boxes) - distorted_boxes_shape = tf.shape(distorted_boxes) - images_shape = tf.shape(images) - distorted_images_shape = tf.shape(distorted_images) - - with self.test_session() as sess: - (boxes_shape_, distorted_boxes_shape_, images_shape_, - distorted_images_shape_, images_, distorted_images_, - boxes_, distorted_boxes_, labels_, distorted_labels_, - label_scores_, distorted_label_scores_) = sess.run( - [boxes_shape, distorted_boxes_shape, images_shape, - distorted_images_shape, images, distorted_images, - boxes, distorted_boxes, labels, distorted_labels, - label_scores, distorted_label_scores]) - self.assertAllEqual(boxes_shape_, distorted_boxes_shape_) - self.assertAllEqual(images_shape_, distorted_images_shape_) - self.assertAllClose(images_, distorted_images_) - self.assertAllClose(boxes_, distorted_boxes_) - self.assertAllEqual(labels_, distorted_labels_) - self.assertAllEqual(label_scores_, distorted_label_scores_) - - def testRandomCropWithMockSampleDistortedBoundingBox(self): - preprocessing_options = [(preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })] - - images = self.createColorfulTestImage() - boxes = tf.constant([[0.1, 0.1, 0.8, 0.3], - [0.2, 0.4, 0.75, 0.75], - [0.3, 0.1, 0.4, 0.7]], dtype=tf.float32) - labels = tf.constant([1, 7, 11], dtype=tf.int32) - - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - } - tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) - images = tensor_dict[fields.InputDataFields.image] - - preprocessing_options = [(preprocessor.random_crop_image, {})] - with mock.patch.object( - tf.image, - 'sample_distorted_bounding_box') as mock_sample_distorted_bounding_box: - mock_sample_distorted_bounding_box.return_value = (tf.constant( - [6, 143, 0], dtype=tf.int32), tf.constant( - [190, 237, -1], dtype=tf.int32), tf.constant( - [[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32)) - - distorted_tensor_dict = preprocessor.preprocess(tensor_dict, - preprocessing_options) - - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - distorted_labels = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_classes] - expected_boxes = tf.constant([[0.178947, 0.07173, 0.75789469, 0.66244733], - [0.28421, 0.0, 0.38947365, 0.57805908]], - dtype=tf.float32) - expected_labels = tf.constant([7, 11], dtype=tf.int32) - - with self.test_session() as sess: - (distorted_boxes_, distorted_labels_, - expected_boxes_, expected_labels_) = sess.run( - [distorted_boxes, distorted_labels, - expected_boxes, expected_labels]) - self.assertAllClose(distorted_boxes_, expected_boxes_) - self.assertAllEqual(distorted_labels_, expected_labels_) - - def testRandomCropImageWithMultiClassScores(self): - preprocessing_options = [] - preprocessing_options.append((preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })) - preprocessing_options.append((preprocessor.random_crop_image, {})) - images = self.createTestImages() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - multiclass_scores = self.createTestMultiClassScores() - - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - fields.InputDataFields.multiclass_scores: multiclass_scores - } - distorted_tensor_dict = preprocessor.preprocess(tensor_dict, - preprocessing_options) - distorted_images = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - distorted_multiclass_scores = distorted_tensor_dict[ - fields.InputDataFields.multiclass_scores] - boxes_rank = tf.rank(boxes) - distorted_boxes_rank = tf.rank(distorted_boxes) - images_rank = tf.rank(images) - distorted_images_rank = tf.rank(distorted_images) - multiclass_scores_rank = tf.rank(multiclass_scores) - distorted_multiclass_scores_rank = tf.rank(distorted_multiclass_scores) - - with self.test_session() as sess: - (boxes_rank_, distorted_boxes_, distorted_boxes_rank_, images_rank_, - distorted_images_rank_, multiclass_scores_rank_, - distorted_multiclass_scores_rank_, - distorted_multiclass_scores_) = sess.run([ - boxes_rank, distorted_boxes, distorted_boxes_rank, images_rank, - distorted_images_rank, multiclass_scores_rank, - distorted_multiclass_scores_rank, distorted_multiclass_scores - ]) - self.assertAllEqual(boxes_rank_, distorted_boxes_rank_) - self.assertAllEqual(images_rank_, distorted_images_rank_) - self.assertAllEqual(multiclass_scores_rank_, - distorted_multiclass_scores_rank_) - self.assertAllEqual(distorted_boxes_.shape[0], - distorted_multiclass_scores_.shape[0]) - - def testStrictRandomCropImageWithLabelScores(self): - image = self.createColorfulTestImage()[0] - boxes = self.createTestBoxes() - labels = self.createTestLabels() - label_scores = self.createTestLabelScores() - with mock.patch.object( - tf.image, - 'sample_distorted_bounding_box' - ) as mock_sample_distorted_bounding_box: - mock_sample_distorted_bounding_box.return_value = ( - tf.constant([6, 143, 0], dtype=tf.int32), - tf.constant([190, 237, -1], dtype=tf.int32), - tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32)) - new_image, new_boxes, new_labels, new_label_scores = ( - preprocessor._strict_random_crop_image( - image, boxes, labels, label_scores)) - with self.test_session() as sess: - new_image, new_boxes, new_labels, new_label_scores = ( - sess.run( - [new_image, new_boxes, new_labels, new_label_scores]) - ) - - expected_boxes = np.array( - [[0.0, 0.0, 0.75789469, 1.0], - [0.23157893, 0.24050637, 0.75789469, 1.0]], dtype=np.float32) - self.assertAllEqual(new_image.shape, [190, 237, 3]) - self.assertAllEqual(new_label_scores, [1.0, 0.5]) - self.assertAllClose( - new_boxes.flatten(), expected_boxes.flatten()) - - def testStrictRandomCropImageWithMasks(self): - image = self.createColorfulTestImage()[0] - boxes = self.createTestBoxes() - labels = self.createTestLabels() - masks = tf.random_uniform([2, 200, 400], dtype=tf.float32) - with mock.patch.object( - tf.image, - 'sample_distorted_bounding_box' - ) as mock_sample_distorted_bounding_box: - mock_sample_distorted_bounding_box.return_value = ( - tf.constant([6, 143, 0], dtype=tf.int32), - tf.constant([190, 237, -1], dtype=tf.int32), - tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32)) - new_image, new_boxes, new_labels, new_masks = ( - preprocessor._strict_random_crop_image( - image, boxes, labels, masks=masks)) - with self.test_session() as sess: - new_image, new_boxes, new_labels, new_masks = sess.run( - [new_image, new_boxes, new_labels, new_masks]) - expected_boxes = np.array( - [[0.0, 0.0, 0.75789469, 1.0], - [0.23157893, 0.24050637, 0.75789469, 1.0]], dtype=np.float32) - self.assertAllEqual(new_image.shape, [190, 237, 3]) - self.assertAllEqual(new_masks.shape, [2, 190, 237]) - self.assertAllClose( - new_boxes.flatten(), expected_boxes.flatten()) - - def testStrictRandomCropImageWithKeypoints(self): - image = self.createColorfulTestImage()[0] - boxes = self.createTestBoxes() - labels = self.createTestLabels() - keypoints = self.createTestKeypoints() - with mock.patch.object( - tf.image, - 'sample_distorted_bounding_box' - ) as mock_sample_distorted_bounding_box: - mock_sample_distorted_bounding_box.return_value = ( - tf.constant([6, 143, 0], dtype=tf.int32), - tf.constant([190, 237, -1], dtype=tf.int32), - tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32)) - new_image, new_boxes, new_labels, new_keypoints = ( - preprocessor._strict_random_crop_image( - image, boxes, labels, keypoints=keypoints)) - with self.test_session() as sess: - new_image, new_boxes, new_labels, new_keypoints = sess.run( - [new_image, new_boxes, new_labels, new_keypoints]) - - expected_boxes = np.array([ - [0.0, 0.0, 0.75789469, 1.0], - [0.23157893, 0.24050637, 0.75789469, 1.0],], dtype=np.float32) - expected_keypoints = np.array([ - [[np.nan, np.nan], - [np.nan, np.nan], - [np.nan, np.nan]], - [[0.38947368, 0.07173], - [0.49473682, 0.24050637], - [0.60000002, 0.40928277]] - ], dtype=np.float32) - self.assertAllEqual(new_image.shape, [190, 237, 3]) - self.assertAllClose( - new_boxes.flatten(), expected_boxes.flatten()) - self.assertAllClose( - new_keypoints.flatten(), expected_keypoints.flatten()) - - def testRunRandomCropImageWithMasks(self): - image = self.createColorfulTestImage() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - masks = tf.random_uniform([2, 200, 400], dtype=tf.float32) - - tensor_dict = { - fields.InputDataFields.image: image, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - fields.InputDataFields.groundtruth_instance_masks: masks, - } - - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_instance_masks=True) - - preprocessing_options = [(preprocessor.random_crop_image, {})] - - with mock.patch.object( - tf.image, - 'sample_distorted_bounding_box' - ) as mock_sample_distorted_bounding_box: - mock_sample_distorted_bounding_box.return_value = ( - tf.constant([6, 143, 0], dtype=tf.int32), - tf.constant([190, 237, -1], dtype=tf.int32), - tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32)) - distorted_tensor_dict = preprocessor.preprocess( - tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) - distorted_image = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - distorted_labels = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_classes] - distorted_masks = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_instance_masks] - with self.test_session() as sess: - (distorted_image_, distorted_boxes_, distorted_labels_, - distorted_masks_) = sess.run( - [distorted_image, distorted_boxes, distorted_labels, - distorted_masks]) - - expected_boxes = np.array([ - [0.0, 0.0, 0.75789469, 1.0], - [0.23157893, 0.24050637, 0.75789469, 1.0], - ], dtype=np.float32) - self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3]) - self.assertAllEqual(distorted_masks_.shape, [2, 190, 237]) - self.assertAllEqual(distorted_labels_, [1, 2]) - self.assertAllClose( - distorted_boxes_.flatten(), expected_boxes.flatten()) - - def testRunRandomCropImageWithKeypointsInsideCrop(self): - image = self.createColorfulTestImage() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - keypoints = self.createTestKeypointsInsideCrop() - - tensor_dict = { - fields.InputDataFields.image: image, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - fields.InputDataFields.groundtruth_keypoints: keypoints - } - - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_keypoints=True) - - preprocessing_options = [(preprocessor.random_crop_image, {})] - - with mock.patch.object( - tf.image, - 'sample_distorted_bounding_box' - ) as mock_sample_distorted_bounding_box: - mock_sample_distorted_bounding_box.return_value = ( - tf.constant([6, 143, 0], dtype=tf.int32), - tf.constant([190, 237, -1], dtype=tf.int32), - tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32)) - distorted_tensor_dict = preprocessor.preprocess( - tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) - distorted_image = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - distorted_labels = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_classes] - distorted_keypoints = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_keypoints] - with self.test_session() as sess: - (distorted_image_, distorted_boxes_, distorted_labels_, - distorted_keypoints_) = sess.run( - [distorted_image, distorted_boxes, distorted_labels, - distorted_keypoints]) - - expected_boxes = np.array([ - [0.0, 0.0, 0.75789469, 1.0], - [0.23157893, 0.24050637, 0.75789469, 1.0], - ], dtype=np.float32) - expected_keypoints = np.array([ - [[0.38947368, 0.07173], - [0.49473682, 0.24050637], - [0.60000002, 0.40928277]], - [[0.38947368, 0.07173], - [0.49473682, 0.24050637], - [0.60000002, 0.40928277]] - ]) - self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3]) - self.assertAllEqual(distorted_labels_, [1, 2]) - self.assertAllClose( - distorted_boxes_.flatten(), expected_boxes.flatten()) - self.assertAllClose( - distorted_keypoints_.flatten(), expected_keypoints.flatten()) - - def testRunRandomCropImageWithKeypointsOutsideCrop(self): - image = self.createColorfulTestImage() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - keypoints = self.createTestKeypointsOutsideCrop() - - tensor_dict = { - fields.InputDataFields.image: image, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - fields.InputDataFields.groundtruth_keypoints: keypoints - } - - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_keypoints=True) - - preprocessing_options = [(preprocessor.random_crop_image, {})] - - with mock.patch.object( - tf.image, - 'sample_distorted_bounding_box' - ) as mock_sample_distorted_bounding_box: - mock_sample_distorted_bounding_box.return_value = ( - tf.constant([6, 143, 0], dtype=tf.int32), - tf.constant([190, 237, -1], dtype=tf.int32), - tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32)) - distorted_tensor_dict = preprocessor.preprocess( - tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) - distorted_image = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - distorted_labels = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_classes] - distorted_keypoints = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_keypoints] - with self.test_session() as sess: - (distorted_image_, distorted_boxes_, distorted_labels_, - distorted_keypoints_) = sess.run( - [distorted_image, distorted_boxes, distorted_labels, - distorted_keypoints]) - - expected_boxes = np.array([ - [0.0, 0.0, 0.75789469, 1.0], - [0.23157893, 0.24050637, 0.75789469, 1.0], - ], dtype=np.float32) - expected_keypoints = np.array([ - [[np.nan, np.nan], - [np.nan, np.nan], - [np.nan, np.nan]], - [[np.nan, np.nan], - [np.nan, np.nan], - [np.nan, np.nan]], - ]) - self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3]) - self.assertAllEqual(distorted_labels_, [1, 2]) - self.assertAllClose( - distorted_boxes_.flatten(), expected_boxes.flatten()) - self.assertAllClose( - distorted_keypoints_.flatten(), expected_keypoints.flatten()) - - def testRunRetainBoxesAboveThreshold(self): - boxes = self.createTestBoxes() - labels = self.createTestLabels() - label_scores = self.createTestLabelScores() - - tensor_dict = { - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - fields.InputDataFields.groundtruth_label_scores: label_scores - } - - preprocessing_options = [ - (preprocessor.retain_boxes_above_threshold, {'threshold': 0.6}) - ] - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_label_scores=True) - retained_tensor_dict = preprocessor.preprocess( - tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) - retained_boxes = retained_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - retained_labels = retained_tensor_dict[ - fields.InputDataFields.groundtruth_classes] - retained_label_scores = retained_tensor_dict[ - fields.InputDataFields.groundtruth_label_scores] - - with self.test_session() as sess: - (retained_boxes_, retained_labels_, - retained_label_scores_, expected_retained_boxes_, - expected_retained_labels_, expected_retained_label_scores_) = sess.run( - [retained_boxes, retained_labels, retained_label_scores, - self.expectedBoxesAfterThresholding(), - self.expectedLabelsAfterThresholding(), - self.expectedLabelScoresAfterThresholding()]) - - self.assertAllClose(retained_boxes_, expected_retained_boxes_) - self.assertAllClose(retained_labels_, expected_retained_labels_) - self.assertAllClose( - retained_label_scores_, expected_retained_label_scores_) - - def testRunRetainBoxesAboveThresholdWithMasks(self): - boxes = self.createTestBoxes() - labels = self.createTestLabels() - label_scores = self.createTestLabelScores() - masks = self.createTestMasks() - - tensor_dict = { - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - fields.InputDataFields.groundtruth_label_scores: label_scores, - fields.InputDataFields.groundtruth_instance_masks: masks - } - - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_label_scores=True, - include_instance_masks=True) - - preprocessing_options = [ - (preprocessor.retain_boxes_above_threshold, {'threshold': 0.6}) - ] - - retained_tensor_dict = preprocessor.preprocess( - tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) - retained_masks = retained_tensor_dict[ - fields.InputDataFields.groundtruth_instance_masks] - - with self.test_session() as sess: - (retained_masks_, expected_masks_) = sess.run( - [retained_masks, - self.expectedMasksAfterThresholding()]) - self.assertAllClose(retained_masks_, expected_masks_) - - def testRunRetainBoxesAboveThresholdWithKeypoints(self): - boxes = self.createTestBoxes() - labels = self.createTestLabels() - label_scores = self.createTestLabelScores() - keypoints = self.createTestKeypoints() - - tensor_dict = { - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - fields.InputDataFields.groundtruth_label_scores: label_scores, - fields.InputDataFields.groundtruth_keypoints: keypoints - } - - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_label_scores=True, - include_keypoints=True) - - preprocessing_options = [ - (preprocessor.retain_boxes_above_threshold, {'threshold': 0.6}) - ] - - retained_tensor_dict = preprocessor.preprocess( - tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) - retained_keypoints = retained_tensor_dict[ - fields.InputDataFields.groundtruth_keypoints] - - with self.test_session() as sess: - (retained_keypoints_, expected_keypoints_) = sess.run( - [retained_keypoints, - self.expectedKeypointsAfterThresholding()]) - self.assertAllClose(retained_keypoints_, expected_keypoints_) - - def testRandomCropToAspectRatioWithCache(self): - preprocess_options = [(preprocessor.random_crop_to_aspect_ratio, {})] - self._testPreprocessorCache(preprocess_options, - test_boxes=True, - test_masks=False, - test_keypoints=False) - - def testRunRandomCropToAspectRatioWithMasks(self): - image = self.createColorfulTestImage() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - masks = tf.random_uniform([2, 200, 400], dtype=tf.float32) - - tensor_dict = { - fields.InputDataFields.image: image, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - fields.InputDataFields.groundtruth_instance_masks: masks - } - - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_instance_masks=True) - - preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {})] - - with mock.patch.object(preprocessor, - '_random_integer') as mock_random_integer: - mock_random_integer.return_value = tf.constant(0, dtype=tf.int32) - distorted_tensor_dict = preprocessor.preprocess( - tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) - distorted_image = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - distorted_labels = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_classes] - distorted_masks = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_instance_masks] - with self.test_session() as sess: - (distorted_image_, distorted_boxes_, distorted_labels_, - distorted_masks_) = sess.run([ - distorted_image, distorted_boxes, distorted_labels, distorted_masks - ]) - - expected_boxes = np.array([0.0, 0.5, 0.75, 1.0], dtype=np.float32) - self.assertAllEqual(distorted_image_.shape, [1, 200, 200, 3]) - self.assertAllEqual(distorted_labels_, [1]) - self.assertAllClose(distorted_boxes_.flatten(), - expected_boxes.flatten()) - self.assertAllEqual(distorted_masks_.shape, [1, 200, 200]) - - def testRunRandomCropToAspectRatioWithKeypoints(self): - image = self.createColorfulTestImage() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - keypoints = self.createTestKeypoints() - - tensor_dict = { - fields.InputDataFields.image: image, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - fields.InputDataFields.groundtruth_keypoints: keypoints - } - - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_keypoints=True) - - preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {})] - - with mock.patch.object(preprocessor, - '_random_integer') as mock_random_integer: - mock_random_integer.return_value = tf.constant(0, dtype=tf.int32) - distorted_tensor_dict = preprocessor.preprocess( - tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) - distorted_image = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - distorted_labels = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_classes] - distorted_keypoints = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_keypoints] - with self.test_session() as sess: - (distorted_image_, distorted_boxes_, distorted_labels_, - distorted_keypoints_) = sess.run([ - distorted_image, distorted_boxes, distorted_labels, - distorted_keypoints - ]) - - expected_boxes = np.array([0.0, 0.5, 0.75, 1.0], dtype=np.float32) - expected_keypoints = np.array( - [[0.1, 0.2], [0.2, 0.4], [0.3, 0.6]], dtype=np.float32) - self.assertAllEqual(distorted_image_.shape, [1, 200, 200, 3]) - self.assertAllEqual(distorted_labels_, [1]) - self.assertAllClose(distorted_boxes_.flatten(), - expected_boxes.flatten()) - self.assertAllClose(distorted_keypoints_.flatten(), - expected_keypoints.flatten()) - - def testRandomPadToAspectRatioWithCache(self): - preprocess_options = [(preprocessor.random_pad_to_aspect_ratio, {})] - self._testPreprocessorCache(preprocess_options, - test_boxes=True, - test_masks=True, - test_keypoints=True) - - def testRunRandomPadToAspectRatioWithMinMaxPaddedSizeRatios(self): - image = self.createColorfulTestImage() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - - tensor_dict = { - fields.InputDataFields.image: image, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels - } - - preprocessor_arg_map = preprocessor.get_default_func_arg_map() - preprocessing_options = [(preprocessor.random_pad_to_aspect_ratio, - {'min_padded_size_ratio': (4.0, 4.0), - 'max_padded_size_ratio': (4.0, 4.0)})] - - distorted_tensor_dict = preprocessor.preprocess( - tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) - distorted_image = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - distorted_labels = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_classes] - with self.test_session() as sess: - distorted_image_, distorted_boxes_, distorted_labels_ = sess.run([ - distorted_image, distorted_boxes, distorted_labels]) - - expected_boxes = np.array( - [[0.0, 0.125, 0.1875, 0.5], [0.0625, 0.25, 0.1875, 0.5]], - dtype=np.float32) - self.assertAllEqual(distorted_image_.shape, [1, 800, 800, 3]) - self.assertAllEqual(distorted_labels_, [1, 2]) - self.assertAllClose(distorted_boxes_.flatten(), - expected_boxes.flatten()) - - def testRunRandomPadToAspectRatioWithMasks(self): - image = self.createColorfulTestImage() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - masks = tf.random_uniform([2, 200, 400], dtype=tf.float32) - - tensor_dict = { - fields.InputDataFields.image: image, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - fields.InputDataFields.groundtruth_instance_masks: masks - } - - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_instance_masks=True) - - preprocessing_options = [(preprocessor.random_pad_to_aspect_ratio, {})] - - distorted_tensor_dict = preprocessor.preprocess( - tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) - distorted_image = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - distorted_labels = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_classes] - distorted_masks = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_instance_masks] - with self.test_session() as sess: - (distorted_image_, distorted_boxes_, distorted_labels_, - distorted_masks_) = sess.run([ - distorted_image, distorted_boxes, distorted_labels, distorted_masks - ]) - - expected_boxes = np.array( - [[0.0, 0.25, 0.375, 1.0], [0.125, 0.5, 0.375, 1.0]], dtype=np.float32) - self.assertAllEqual(distorted_image_.shape, [1, 400, 400, 3]) - self.assertAllEqual(distorted_labels_, [1, 2]) - self.assertAllClose(distorted_boxes_.flatten(), - expected_boxes.flatten()) - self.assertAllEqual(distorted_masks_.shape, [2, 400, 400]) - - def testRunRandomPadToAspectRatioWithKeypoints(self): - image = self.createColorfulTestImage() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - keypoints = self.createTestKeypoints() - - tensor_dict = { - fields.InputDataFields.image: image, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - fields.InputDataFields.groundtruth_keypoints: keypoints - } - - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_keypoints=True) - - preprocessing_options = [(preprocessor.random_pad_to_aspect_ratio, {})] - - distorted_tensor_dict = preprocessor.preprocess( - tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) - distorted_image = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - distorted_labels = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_classes] - distorted_keypoints = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_keypoints] - with self.test_session() as sess: - (distorted_image_, distorted_boxes_, distorted_labels_, - distorted_keypoints_) = sess.run([ - distorted_image, distorted_boxes, distorted_labels, - distorted_keypoints - ]) - - expected_boxes = np.array( - [[0.0, 0.25, 0.375, 1.0], [0.125, 0.5, 0.375, 1.0]], dtype=np.float32) - expected_keypoints = np.array([ - [[0.05, 0.1], [0.1, 0.2], [0.15, 0.3]], - [[0.2, 0.4], [0.25, 0.5], [0.3, 0.6]], - ], dtype=np.float32) - self.assertAllEqual(distorted_image_.shape, [1, 400, 400, 3]) - self.assertAllEqual(distorted_labels_, [1, 2]) - self.assertAllClose(distorted_boxes_.flatten(), - expected_boxes.flatten()) - self.assertAllClose(distorted_keypoints_.flatten(), - expected_keypoints.flatten()) - - def testRandomPadImageWithCache(self): - preprocess_options = [(preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1,}), (preprocessor.random_pad_image, {})] - self._testPreprocessorCache(preprocess_options, - test_boxes=True, - test_masks=True, - test_keypoints=True) - - def testRandomPadImage(self): - preprocessing_options = [(preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })] - - images = self.createTestImages() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - } - tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) - images = tensor_dict[fields.InputDataFields.image] - - preprocessing_options = [(preprocessor.random_pad_image, {})] - padded_tensor_dict = preprocessor.preprocess(tensor_dict, - preprocessing_options) - - padded_images = padded_tensor_dict[fields.InputDataFields.image] - padded_boxes = padded_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - boxes_shape = tf.shape(boxes) - padded_boxes_shape = tf.shape(padded_boxes) - images_shape = tf.shape(images) - padded_images_shape = tf.shape(padded_images) - - with self.test_session() as sess: - (boxes_shape_, padded_boxes_shape_, images_shape_, - padded_images_shape_, boxes_, padded_boxes_) = sess.run( - [boxes_shape, padded_boxes_shape, images_shape, - padded_images_shape, boxes, padded_boxes]) - self.assertAllEqual(boxes_shape_, padded_boxes_shape_) - self.assertTrue((images_shape_[1] >= padded_images_shape_[1] * 0.5).all) - self.assertTrue((images_shape_[2] >= padded_images_shape_[2] * 0.5).all) - self.assertTrue((images_shape_[1] <= padded_images_shape_[1]).all) - self.assertTrue((images_shape_[2] <= padded_images_shape_[2]).all) - self.assertTrue(np.all((boxes_[:, 2] - boxes_[:, 0]) >= ( - padded_boxes_[:, 2] - padded_boxes_[:, 0]))) - self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= ( - padded_boxes_[:, 3] - padded_boxes_[:, 1]))) - - def testRandomCropPadImageWithCache(self): - preprocess_options = [(preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1,}), (preprocessor.random_crop_pad_image, {})] - self._testPreprocessorCache(preprocess_options, - test_boxes=True, - test_masks=True, - test_keypoints=True) - - def testRandomCropPadImageWithRandomCoefOne(self): - preprocessing_options = [(preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })] - - images = self.createTestImages() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - } - tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) - images = tensor_dict[fields.InputDataFields.image] - - preprocessing_options = [(preprocessor.random_crop_pad_image, { - 'random_coef': 1.0 - })] - padded_tensor_dict = preprocessor.preprocess(tensor_dict, - preprocessing_options) - - padded_images = padded_tensor_dict[fields.InputDataFields.image] - padded_boxes = padded_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - boxes_shape = tf.shape(boxes) - padded_boxes_shape = tf.shape(padded_boxes) - images_shape = tf.shape(images) - padded_images_shape = tf.shape(padded_images) - - with self.test_session() as sess: - (boxes_shape_, padded_boxes_shape_, images_shape_, - padded_images_shape_, boxes_, padded_boxes_) = sess.run( - [boxes_shape, padded_boxes_shape, images_shape, - padded_images_shape, boxes, padded_boxes]) - self.assertAllEqual(boxes_shape_, padded_boxes_shape_) - self.assertTrue((images_shape_[1] >= padded_images_shape_[1] * 0.5).all) - self.assertTrue((images_shape_[2] >= padded_images_shape_[2] * 0.5).all) - self.assertTrue((images_shape_[1] <= padded_images_shape_[1]).all) - self.assertTrue((images_shape_[2] <= padded_images_shape_[2]).all) - self.assertTrue(np.all((boxes_[:, 2] - boxes_[:, 0]) >= ( - padded_boxes_[:, 2] - padded_boxes_[:, 0]))) - self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= ( - padded_boxes_[:, 3] - padded_boxes_[:, 1]))) - - def testRandomCropToAspectRatio(self): - images = self.createTestImages() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - } - tensor_dict = preprocessor.preprocess(tensor_dict, []) - images = tensor_dict[fields.InputDataFields.image] - - preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, { - 'aspect_ratio': 2.0 - })] - cropped_tensor_dict = preprocessor.preprocess(tensor_dict, - preprocessing_options) - - cropped_images = cropped_tensor_dict[fields.InputDataFields.image] - cropped_boxes = cropped_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - boxes_shape = tf.shape(boxes) - cropped_boxes_shape = tf.shape(cropped_boxes) - images_shape = tf.shape(images) - cropped_images_shape = tf.shape(cropped_images) - - with self.test_session() as sess: - (boxes_shape_, cropped_boxes_shape_, images_shape_, - cropped_images_shape_) = sess.run([ - boxes_shape, cropped_boxes_shape, images_shape, cropped_images_shape - ]) - self.assertAllEqual(boxes_shape_, cropped_boxes_shape_) - self.assertEqual(images_shape_[1], cropped_images_shape_[1] * 2) - self.assertEqual(images_shape_[2], cropped_images_shape_[2]) - - def testRandomPadToAspectRatio(self): - images = self.createTestImages() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - } - tensor_dict = preprocessor.preprocess(tensor_dict, []) - images = tensor_dict[fields.InputDataFields.image] - - preprocessing_options = [(preprocessor.random_pad_to_aspect_ratio, { - 'aspect_ratio': 2.0 - })] - padded_tensor_dict = preprocessor.preprocess(tensor_dict, - preprocessing_options) - - padded_images = padded_tensor_dict[fields.InputDataFields.image] - padded_boxes = padded_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - boxes_shape = tf.shape(boxes) - padded_boxes_shape = tf.shape(padded_boxes) - images_shape = tf.shape(images) - padded_images_shape = tf.shape(padded_images) - - with self.test_session() as sess: - (boxes_shape_, padded_boxes_shape_, images_shape_, - padded_images_shape_) = sess.run([ - boxes_shape, padded_boxes_shape, images_shape, padded_images_shape - ]) - self.assertAllEqual(boxes_shape_, padded_boxes_shape_) - self.assertEqual(images_shape_[1], padded_images_shape_[1]) - self.assertEqual(2 * images_shape_[2], padded_images_shape_[2]) - - def testRandomBlackPatchesWithCache(self): - preprocess_options = [] - preprocess_options.append((preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })) - preprocess_options.append((preprocessor.random_black_patches, { - 'size_to_image_ratio': 0.5 - })) - self._testPreprocessorCache(preprocess_options, - test_boxes=True, - test_masks=True, - test_keypoints=True) - - def testRandomBlackPatches(self): - preprocessing_options = [] - preprocessing_options.append((preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })) - preprocessing_options.append((preprocessor.random_black_patches, { - 'size_to_image_ratio': 0.5 - })) - images = self.createTestImages() - tensor_dict = {fields.InputDataFields.image: images} - blacked_tensor_dict = preprocessor.preprocess(tensor_dict, - preprocessing_options) - blacked_images = blacked_tensor_dict[fields.InputDataFields.image] - images_shape = tf.shape(images) - blacked_images_shape = tf.shape(blacked_images) - - with self.test_session() as sess: - (images_shape_, blacked_images_shape_) = sess.run( - [images_shape, blacked_images_shape]) - self.assertAllEqual(images_shape_, blacked_images_shape_) - - def testRandomResizeMethodWithCache(self): - preprocess_options = [] - preprocess_options.append((preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })) - preprocess_options.append((preprocessor.random_resize_method, { - 'target_size': (75, 150) - })) - self._testPreprocessorCache(preprocess_options, - test_boxes=True, - test_masks=True, - test_keypoints=True) - - def testRandomResizeMethod(self): - preprocessing_options = [] - preprocessing_options.append((preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })) - preprocessing_options.append((preprocessor.random_resize_method, { - 'target_size': (75, 150) - })) - images = self.createTestImages() - tensor_dict = {fields.InputDataFields.image: images} - resized_tensor_dict = preprocessor.preprocess(tensor_dict, - preprocessing_options) - resized_images = resized_tensor_dict[fields.InputDataFields.image] - resized_images_shape = tf.shape(resized_images) - expected_images_shape = tf.constant([1, 75, 150, 3], dtype=tf.int32) - - with self.test_session() as sess: - (expected_images_shape_, resized_images_shape_) = sess.run( - [expected_images_shape, resized_images_shape]) - self.assertAllEqual(expected_images_shape_, - resized_images_shape_) - - def testResizeImageWithMasks(self): - """Tests image resizing, checking output sizes.""" - in_image_shape_list = [[60, 40, 3], [15, 30, 3]] - in_masks_shape_list = [[15, 60, 40], [10, 15, 30]] - height = 50 - width = 100 - expected_image_shape_list = [[50, 100, 3], [50, 100, 3]] - expected_masks_shape_list = [[15, 50, 100], [10, 50, 100]] - - for (in_image_shape, expected_image_shape, in_masks_shape, - expected_mask_shape) in zip(in_image_shape_list, - expected_image_shape_list, - in_masks_shape_list, - expected_masks_shape_list): - in_image = tf.random_uniform(in_image_shape) - in_masks = tf.random_uniform(in_masks_shape) - out_image, out_masks, _ = preprocessor.resize_image( - in_image, in_masks, new_height=height, new_width=width) - out_image_shape = tf.shape(out_image) - out_masks_shape = tf.shape(out_masks) - - with self.test_session() as sess: - out_image_shape, out_masks_shape = sess.run( - [out_image_shape, out_masks_shape]) - self.assertAllEqual(out_image_shape, expected_image_shape) - self.assertAllEqual(out_masks_shape, expected_mask_shape) - - def testResizeImageWithMasksTensorInputHeightAndWidth(self): - """Tests image resizing, checking output sizes.""" - in_image_shape_list = [[60, 40, 3], [15, 30, 3]] - in_masks_shape_list = [[15, 60, 40], [10, 15, 30]] - height = tf.constant(50, dtype=tf.int32) - width = tf.constant(100, dtype=tf.int32) - expected_image_shape_list = [[50, 100, 3], [50, 100, 3]] - expected_masks_shape_list = [[15, 50, 100], [10, 50, 100]] - - for (in_image_shape, expected_image_shape, in_masks_shape, - expected_mask_shape) in zip(in_image_shape_list, - expected_image_shape_list, - in_masks_shape_list, - expected_masks_shape_list): - in_image = tf.random_uniform(in_image_shape) - in_masks = tf.random_uniform(in_masks_shape) - out_image, out_masks, _ = preprocessor.resize_image( - in_image, in_masks, new_height=height, new_width=width) - out_image_shape = tf.shape(out_image) - out_masks_shape = tf.shape(out_masks) - - with self.test_session() as sess: - out_image_shape, out_masks_shape = sess.run( - [out_image_shape, out_masks_shape]) - self.assertAllEqual(out_image_shape, expected_image_shape) - self.assertAllEqual(out_masks_shape, expected_mask_shape) - - def testResizeImageWithNoInstanceMask(self): - """Tests image resizing, checking output sizes.""" - in_image_shape_list = [[60, 40, 3], [15, 30, 3]] - in_masks_shape_list = [[0, 60, 40], [0, 15, 30]] - height = 50 - width = 100 - expected_image_shape_list = [[50, 100, 3], [50, 100, 3]] - expected_masks_shape_list = [[0, 50, 100], [0, 50, 100]] - - for (in_image_shape, expected_image_shape, in_masks_shape, - expected_mask_shape) in zip(in_image_shape_list, - expected_image_shape_list, - in_masks_shape_list, - expected_masks_shape_list): - in_image = tf.random_uniform(in_image_shape) - in_masks = tf.random_uniform(in_masks_shape) - out_image, out_masks, _ = preprocessor.resize_image( - in_image, in_masks, new_height=height, new_width=width) - out_image_shape = tf.shape(out_image) - out_masks_shape = tf.shape(out_masks) - - with self.test_session() as sess: - out_image_shape, out_masks_shape = sess.run( - [out_image_shape, out_masks_shape]) - self.assertAllEqual(out_image_shape, expected_image_shape) - self.assertAllEqual(out_masks_shape, expected_mask_shape) - - def testResizeToRangePreservesStaticSpatialShape(self): - """Tests image resizing, checking output sizes.""" - in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]] - min_dim = 50 - max_dim = 100 - expected_shape_list = [[75, 50, 3], [50, 100, 3], [30, 100, 3]] - - for in_shape, expected_shape in zip(in_shape_list, expected_shape_list): - in_image = tf.random_uniform(in_shape) - out_image, _ = preprocessor.resize_to_range( - in_image, min_dimension=min_dim, max_dimension=max_dim) - self.assertAllEqual(out_image.get_shape().as_list(), expected_shape) - - def testResizeToRangeWithDynamicSpatialShape(self): - """Tests image resizing, checking output sizes.""" - in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]] - min_dim = 50 - max_dim = 100 - expected_shape_list = [[75, 50, 3], [50, 100, 3], [30, 100, 3]] - - for in_shape, expected_shape in zip(in_shape_list, expected_shape_list): - in_image = tf.placeholder(tf.float32, shape=(None, None, 3)) - out_image, _ = preprocessor.resize_to_range( - in_image, min_dimension=min_dim, max_dimension=max_dim) - out_image_shape = tf.shape(out_image) - with self.test_session() as sess: - out_image_shape = sess.run(out_image_shape, - feed_dict={in_image: - np.random.randn(*in_shape)}) - self.assertAllEqual(out_image_shape, expected_shape) - - def testResizeToRangeWithPadToMaxDimensionReturnsCorrectShapes(self): - in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]] - min_dim = 50 - max_dim = 100 - expected_shape_list = [[100, 100, 3], [100, 100, 3], [100, 100, 3]] - - for in_shape, expected_shape in zip(in_shape_list, expected_shape_list): - in_image = tf.placeholder(tf.float32, shape=(None, None, 3)) - out_image, _ = preprocessor.resize_to_range( - in_image, - min_dimension=min_dim, - max_dimension=max_dim, - pad_to_max_dimension=True) - self.assertAllEqual(out_image.shape.as_list(), expected_shape) - out_image_shape = tf.shape(out_image) - with self.test_session() as sess: - out_image_shape = sess.run( - out_image_shape, feed_dict={in_image: np.random.randn(*in_shape)}) - self.assertAllEqual(out_image_shape, expected_shape) - - def testResizeToRangeWithPadToMaxDimensionReturnsCorrectTensor(self): - in_image_np = np.array([[[0, 1, 2]]], np.float32) - ex_image_np = np.array( - [[[0, 1, 2], [123.68, 116.779, 103.939]], - [[123.68, 116.779, 103.939], [123.68, 116.779, 103.939]]], np.float32) - min_dim = 1 - max_dim = 2 - - in_image = tf.placeholder(tf.float32, shape=(None, None, 3)) - out_image, _ = preprocessor.resize_to_range( - in_image, - min_dimension=min_dim, - max_dimension=max_dim, - pad_to_max_dimension=True, - per_channel_pad_value=(123.68, 116.779, 103.939)) - - with self.test_session() as sess: - out_image_np = sess.run(out_image, feed_dict={in_image: in_image_np}) - self.assertAllClose(ex_image_np, out_image_np) - - def testResizeToRangeWithMasksPreservesStaticSpatialShape(self): - """Tests image resizing, checking output sizes.""" - in_image_shape_list = [[60, 40, 3], [15, 30, 3]] - in_masks_shape_list = [[15, 60, 40], [10, 15, 30]] - min_dim = 50 - max_dim = 100 - expected_image_shape_list = [[75, 50, 3], [50, 100, 3]] - expected_masks_shape_list = [[15, 75, 50], [10, 50, 100]] - - for (in_image_shape, expected_image_shape, in_masks_shape, - expected_mask_shape) in zip(in_image_shape_list, - expected_image_shape_list, - in_masks_shape_list, - expected_masks_shape_list): - in_image = tf.random_uniform(in_image_shape) - in_masks = tf.random_uniform(in_masks_shape) - out_image, out_masks, _ = preprocessor.resize_to_range( - in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim) - self.assertAllEqual(out_masks.get_shape().as_list(), expected_mask_shape) - self.assertAllEqual(out_image.get_shape().as_list(), expected_image_shape) - - def testResizeToRangeWithMasksAndDynamicSpatialShape(self): - """Tests image resizing, checking output sizes.""" - in_image_shape_list = [[60, 40, 3], [15, 30, 3]] - in_masks_shape_list = [[15, 60, 40], [10, 15, 30]] - min_dim = 50 - max_dim = 100 - expected_image_shape_list = [[75, 50, 3], [50, 100, 3]] - expected_masks_shape_list = [[15, 75, 50], [10, 50, 100]] - - for (in_image_shape, expected_image_shape, in_masks_shape, - expected_mask_shape) in zip(in_image_shape_list, - expected_image_shape_list, - in_masks_shape_list, - expected_masks_shape_list): - in_image = tf.placeholder(tf.float32, shape=(None, None, 3)) - in_masks = tf.placeholder(tf.float32, shape=(None, None, None)) - in_masks = tf.random_uniform(in_masks_shape) - out_image, out_masks, _ = preprocessor.resize_to_range( - in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim) - out_image_shape = tf.shape(out_image) - out_masks_shape = tf.shape(out_masks) - - with self.test_session() as sess: - out_image_shape, out_masks_shape = sess.run( - [out_image_shape, out_masks_shape], - feed_dict={ - in_image: np.random.randn(*in_image_shape), - in_masks: np.random.randn(*in_masks_shape) - }) - self.assertAllEqual(out_image_shape, expected_image_shape) - self.assertAllEqual(out_masks_shape, expected_mask_shape) - - def testResizeToRangeWithInstanceMasksTensorOfSizeZero(self): - """Tests image resizing, checking output sizes.""" - in_image_shape_list = [[60, 40, 3], [15, 30, 3]] - in_masks_shape_list = [[0, 60, 40], [0, 15, 30]] - min_dim = 50 - max_dim = 100 - expected_image_shape_list = [[75, 50, 3], [50, 100, 3]] - expected_masks_shape_list = [[0, 75, 50], [0, 50, 100]] - - for (in_image_shape, expected_image_shape, in_masks_shape, - expected_mask_shape) in zip(in_image_shape_list, - expected_image_shape_list, - in_masks_shape_list, - expected_masks_shape_list): - in_image = tf.random_uniform(in_image_shape) - in_masks = tf.random_uniform(in_masks_shape) - out_image, out_masks, _ = preprocessor.resize_to_range( - in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim) - out_image_shape = tf.shape(out_image) - out_masks_shape = tf.shape(out_masks) - - with self.test_session() as sess: - out_image_shape, out_masks_shape = sess.run( - [out_image_shape, out_masks_shape]) - self.assertAllEqual(out_image_shape, expected_image_shape) - self.assertAllEqual(out_masks_shape, expected_mask_shape) - - def testResizeToRange4DImageTensor(self): - image = tf.random_uniform([1, 200, 300, 3]) - with self.assertRaises(ValueError): - preprocessor.resize_to_range(image, 500, 600) - - def testResizeToRangeSameMinMax(self): - """Tests image resizing, checking output sizes.""" - in_shape_list = [[312, 312, 3], [299, 299, 3]] - min_dim = 320 - max_dim = 320 - expected_shape_list = [[320, 320, 3], [320, 320, 3]] - - for in_shape, expected_shape in zip(in_shape_list, expected_shape_list): - in_image = tf.random_uniform(in_shape) - out_image, _ = preprocessor.resize_to_range( - in_image, min_dimension=min_dim, max_dimension=max_dim) - out_image_shape = tf.shape(out_image) - - with self.test_session() as sess: - out_image_shape = sess.run(out_image_shape) - self.assertAllEqual(out_image_shape, expected_shape) - - def testResizeToMinDimensionTensorShapes(self): - in_image_shape_list = [[60, 55, 3], [15, 30, 3]] - in_masks_shape_list = [[15, 60, 55], [10, 15, 30]] - min_dim = 50 - expected_image_shape_list = [[60, 55, 3], [50, 100, 3]] - expected_masks_shape_list = [[15, 60, 55], [10, 50, 100]] - - for (in_image_shape, expected_image_shape, in_masks_shape, - expected_mask_shape) in zip(in_image_shape_list, - expected_image_shape_list, - in_masks_shape_list, - expected_masks_shape_list): - in_image = tf.placeholder(tf.float32, shape=(None, None, 3)) - in_masks = tf.placeholder(tf.float32, shape=(None, None, None)) - in_masks = tf.random_uniform(in_masks_shape) - out_image, out_masks, _ = preprocessor.resize_to_min_dimension( - in_image, in_masks, min_dimension=min_dim) - out_image_shape = tf.shape(out_image) - out_masks_shape = tf.shape(out_masks) - - with self.test_session() as sess: - out_image_shape, out_masks_shape = sess.run( - [out_image_shape, out_masks_shape], - feed_dict={ - in_image: np.random.randn(*in_image_shape), - in_masks: np.random.randn(*in_masks_shape) - }) - self.assertAllEqual(out_image_shape, expected_image_shape) - self.assertAllEqual(out_masks_shape, expected_mask_shape) - - def testResizeToMinDimensionWithInstanceMasksTensorOfSizeZero(self): - """Tests image resizing, checking output sizes.""" - in_image_shape_list = [[60, 40, 3], [15, 30, 3]] - in_masks_shape_list = [[0, 60, 40], [0, 15, 30]] - min_dim = 50 - expected_image_shape_list = [[75, 50, 3], [50, 100, 3]] - expected_masks_shape_list = [[0, 75, 50], [0, 50, 100]] - - for (in_image_shape, expected_image_shape, in_masks_shape, - expected_mask_shape) in zip(in_image_shape_list, - expected_image_shape_list, - in_masks_shape_list, - expected_masks_shape_list): - in_image = tf.random_uniform(in_image_shape) - in_masks = tf.random_uniform(in_masks_shape) - out_image, out_masks, _ = preprocessor.resize_to_min_dimension( - in_image, in_masks, min_dimension=min_dim) - out_image_shape = tf.shape(out_image) - out_masks_shape = tf.shape(out_masks) - - with self.test_session() as sess: - out_image_shape, out_masks_shape = sess.run( - [out_image_shape, out_masks_shape]) - self.assertAllEqual(out_image_shape, expected_image_shape) - self.assertAllEqual(out_masks_shape, expected_mask_shape) - - def testResizeToMinDimensionRaisesErrorOn4DImage(self): - image = tf.random_uniform([1, 200, 300, 3]) - with self.assertRaises(ValueError): - preprocessor.resize_to_min_dimension(image, 500) - - def testScaleBoxesToPixelCoordinates(self): - """Tests box scaling, checking scaled values.""" - in_shape = [60, 40, 3] - in_boxes = [[0.1, 0.2, 0.4, 0.6], - [0.5, 0.3, 0.9, 0.7]] - - expected_boxes = [[6., 8., 24., 24.], - [30., 12., 54., 28.]] - - in_image = tf.random_uniform(in_shape) - in_boxes = tf.constant(in_boxes) - _, out_boxes = preprocessor.scale_boxes_to_pixel_coordinates( - in_image, boxes=in_boxes) - with self.test_session() as sess: - out_boxes = sess.run(out_boxes) - self.assertAllClose(out_boxes, expected_boxes) - - def testScaleBoxesToPixelCoordinatesWithKeypoints(self): - """Tests box and keypoint scaling, checking scaled values.""" - in_shape = [60, 40, 3] - in_boxes = self.createTestBoxes() - in_keypoints = self.createTestKeypoints() - - expected_boxes = [[0., 10., 45., 40.], - [15., 20., 45., 40.]] - expected_keypoints = [ - [[6., 4.], [12., 8.], [18., 12.]], - [[24., 16.], [30., 20.], [36., 24.]], - ] - - in_image = tf.random_uniform(in_shape) - _, out_boxes, out_keypoints = preprocessor.scale_boxes_to_pixel_coordinates( - in_image, boxes=in_boxes, keypoints=in_keypoints) - with self.test_session() as sess: - out_boxes_, out_keypoints_ = sess.run([out_boxes, out_keypoints]) - self.assertAllClose(out_boxes_, expected_boxes) - self.assertAllClose(out_keypoints_, expected_keypoints) - - def testSubtractChannelMean(self): - """Tests whether channel means have been subtracted.""" - with self.test_session(): - image = tf.zeros((240, 320, 3)) - means = [1, 2, 3] - actual = preprocessor.subtract_channel_mean(image, means=means) - actual = actual.eval() - - self.assertTrue((actual[:, :, 0] == -1).all()) - self.assertTrue((actual[:, :, 1] == -2).all()) - self.assertTrue((actual[:, :, 2] == -3).all()) - - def testOneHotEncoding(self): - """Tests one hot encoding of multiclass labels.""" - with self.test_session(): - labels = tf.constant([1, 4, 2], dtype=tf.int32) - one_hot = preprocessor.one_hot_encoding(labels, num_classes=5) - one_hot = one_hot.eval() - - self.assertAllEqual([0, 1, 1, 0, 1], one_hot) - - def testSSDRandomCropWithCache(self): - preprocess_options = [ - (preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - }), - (preprocessor.ssd_random_crop, {})] - self._testPreprocessorCache(preprocess_options, - test_boxes=True, - test_masks=False, - test_keypoints=False) - - def testSSDRandomCrop(self): - preprocessing_options = [ - (preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - }), - (preprocessor.ssd_random_crop, {})] - images = self.createTestImages() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - } - distorted_tensor_dict = preprocessor.preprocess(tensor_dict, - preprocessing_options) - distorted_images = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - - images_rank = tf.rank(images) - distorted_images_rank = tf.rank(distorted_images) - boxes_rank = tf.rank(boxes) - distorted_boxes_rank = tf.rank(distorted_boxes) - - with self.test_session() as sess: - (boxes_rank_, distorted_boxes_rank_, images_rank_, - distorted_images_rank_) = sess.run( - [boxes_rank, distorted_boxes_rank, images_rank, - distorted_images_rank]) - self.assertAllEqual(boxes_rank_, distorted_boxes_rank_) - self.assertAllEqual(images_rank_, distorted_images_rank_) - - def testSSDRandomCropWithMultiClassScores(self): - preprocessing_options = [(preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - }), (preprocessor.ssd_random_crop, {})] - images = self.createTestImages() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - multiclass_scores = self.createTestMultiClassScores() - - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - fields.InputDataFields.multiclass_scores: multiclass_scores, - } - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_multiclass_scores=True) - distorted_tensor_dict = preprocessor.preprocess( - tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) - distorted_images = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - distorted_multiclass_scores = distorted_tensor_dict[ - fields.InputDataFields.multiclass_scores] - - images_rank = tf.rank(images) - distorted_images_rank = tf.rank(distorted_images) - boxes_rank = tf.rank(boxes) - distorted_boxes_rank = tf.rank(distorted_boxes) - multiclass_scores_rank = tf.rank(multiclass_scores) - distorted_multiclass_scores_rank = tf.rank(distorted_multiclass_scores) - - with self.test_session() as sess: - (boxes_rank_, distorted_boxes_, distorted_boxes_rank_, images_rank_, - distorted_images_rank_, multiclass_scores_rank_, - distorted_multiclass_scores_, - distorted_multiclass_scores_rank_) = sess.run([ - boxes_rank, distorted_boxes, distorted_boxes_rank, images_rank, - distorted_images_rank, multiclass_scores_rank, - distorted_multiclass_scores, distorted_multiclass_scores_rank - ]) - self.assertAllEqual(boxes_rank_, distorted_boxes_rank_) - self.assertAllEqual(images_rank_, distorted_images_rank_) - self.assertAllEqual(multiclass_scores_rank_, - distorted_multiclass_scores_rank_) - self.assertAllEqual(distorted_boxes_.shape[0], - distorted_multiclass_scores_.shape[0]) - - def testSSDRandomCropPad(self): - images = self.createTestImages() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - preprocessing_options = [ - (preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - }), - (preprocessor.ssd_random_crop_pad, {})] - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - } - distorted_tensor_dict = preprocessor.preprocess(tensor_dict, - preprocessing_options) - distorted_images = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - - images_rank = tf.rank(images) - distorted_images_rank = tf.rank(distorted_images) - boxes_rank = tf.rank(boxes) - distorted_boxes_rank = tf.rank(distorted_boxes) - - with self.test_session() as sess: - (boxes_rank_, distorted_boxes_rank_, images_rank_, - distorted_images_rank_) = sess.run([ - boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank - ]) - self.assertAllEqual(boxes_rank_, distorted_boxes_rank_) - self.assertAllEqual(images_rank_, distorted_images_rank_) - - def testSSDRandomCropFixedAspectRatioWithCache(self): - preprocess_options = [ - (preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - }), - (preprocessor.ssd_random_crop_fixed_aspect_ratio, {})] - self._testPreprocessorCache(preprocess_options, - test_boxes=True, - test_masks=False, - test_keypoints=False) - - def _testSSDRandomCropFixedAspectRatio(self, - include_label_scores, - include_multiclass_scores, - include_instance_masks, - include_keypoints): - images = self.createTestImages() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - preprocessing_options = [(preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - }), (preprocessor.ssd_random_crop_fixed_aspect_ratio, {})] - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - } - if include_label_scores: - label_scores = self.createTestLabelScores() - tensor_dict[fields.InputDataFields.groundtruth_label_scores] = ( - label_scores) - if include_multiclass_scores: - multiclass_scores = self.createTestMultiClassScores() - tensor_dict[fields.InputDataFields.multiclass_scores] = ( - multiclass_scores) - if include_instance_masks: - masks = self.createTestMasks() - tensor_dict[fields.InputDataFields.groundtruth_instance_masks] = masks - if include_keypoints: - keypoints = self.createTestKeypoints() - tensor_dict[fields.InputDataFields.groundtruth_keypoints] = keypoints - - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_label_scores=include_label_scores, - include_multiclass_scores=include_multiclass_scores, - include_instance_masks=include_instance_masks, - include_keypoints=include_keypoints) - distorted_tensor_dict = preprocessor.preprocess( - tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) - distorted_images = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - images_rank = tf.rank(images) - distorted_images_rank = tf.rank(distorted_images) - boxes_rank = tf.rank(boxes) - distorted_boxes_rank = tf.rank(distorted_boxes) - - with self.test_session() as sess: - (boxes_rank_, distorted_boxes_rank_, images_rank_, - distorted_images_rank_) = sess.run( - [boxes_rank, distorted_boxes_rank, images_rank, - distorted_images_rank]) - self.assertAllEqual(boxes_rank_, distorted_boxes_rank_) - self.assertAllEqual(images_rank_, distorted_images_rank_) - - def testSSDRandomCropFixedAspectRatio(self): - self._testSSDRandomCropFixedAspectRatio(include_label_scores=False, - include_multiclass_scores=False, - include_instance_masks=False, - include_keypoints=False) - - def testSSDRandomCropFixedAspectRatioWithMultiClassScores(self): - self._testSSDRandomCropFixedAspectRatio(include_label_scores=False, - include_multiclass_scores=True, - include_instance_masks=False, - include_keypoints=False) - - def testSSDRandomCropFixedAspectRatioWithMasksAndKeypoints(self): - self._testSSDRandomCropFixedAspectRatio(include_label_scores=False, - include_multiclass_scores=False, - include_instance_masks=True, - include_keypoints=True) - - def testSSDRandomCropFixedAspectRatioWithLabelScoresMasksAndKeypoints(self): - self._testSSDRandomCropFixedAspectRatio(include_label_scores=True, - include_multiclass_scores=False, - include_instance_masks=True, - include_keypoints=True) - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/region_similarity_calculator.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/region_similarity_calculator.py deleted file mode 100644 index f344006a3c56c95021dae47fcf5195a1b9743d85..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/region_similarity_calculator.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Region Similarity Calculators for BoxLists. - -Region Similarity Calculators compare a pairwise measure of similarity -between the boxes in two BoxLists. -""" -from abc import ABCMeta -from abc import abstractmethod - -import tensorflow as tf - -from object_detection.core import box_list_ops - - -class RegionSimilarityCalculator(object): - """Abstract base class for region similarity calculator.""" - __metaclass__ = ABCMeta - - def compare(self, boxlist1, boxlist2, scope=None): - """Computes matrix of pairwise similarity between BoxLists. - - This op (to be overriden) computes a measure of pairwise similarity between - the boxes in the given BoxLists. Higher values indicate more similarity. - - Note that this method simply measures similarity and does not explicitly - perform a matching. - - Args: - boxlist1: BoxList holding N boxes. - boxlist2: BoxList holding M boxes. - scope: Op scope name. Defaults to 'Compare' if None. - - Returns: - a (float32) tensor of shape [N, M] with pairwise similarity score. - """ - with tf.name_scope(scope, 'Compare', [boxlist1, boxlist2]) as scope: - return self._compare(boxlist1, boxlist2) - - @abstractmethod - def _compare(self, boxlist1, boxlist2): - pass - - -class IouSimilarity(RegionSimilarityCalculator): - """Class to compute similarity based on Intersection over Union (IOU) metric. - - This class computes pairwise similarity between two BoxLists based on IOU. - """ - - def _compare(self, boxlist1, boxlist2): - """Compute pairwise IOU similarity between the two BoxLists. - - Args: - boxlist1: BoxList holding N boxes. - boxlist2: BoxList holding M boxes. - - Returns: - A tensor with shape [N, M] representing pairwise iou scores. - """ - return box_list_ops.iou(boxlist1, boxlist2) - - -class NegSqDistSimilarity(RegionSimilarityCalculator): - """Class to compute similarity based on the squared distance metric. - - This class computes pairwise similarity between two BoxLists based on the - negative squared distance metric. - """ - - def _compare(self, boxlist1, boxlist2): - """Compute matrix of (negated) sq distances. - - Args: - boxlist1: BoxList holding N boxes. - boxlist2: BoxList holding M boxes. - - Returns: - A tensor with shape [N, M] representing negated pairwise squared distance. - """ - return -1 * box_list_ops.sq_dist(boxlist1, boxlist2) - - -class IoaSimilarity(RegionSimilarityCalculator): - """Class to compute similarity based on Intersection over Area (IOA) metric. - - This class computes pairwise similarity between two BoxLists based on their - pairwise intersections divided by the areas of second BoxLists. - """ - - def _compare(self, boxlist1, boxlist2): - """Compute pairwise IOA similarity between the two BoxLists. - - Args: - boxlist1: BoxList holding N boxes. - boxlist2: BoxList holding M boxes. - - Returns: - A tensor with shape [N, M] representing pairwise IOA scores. - """ - return box_list_ops.ioa(boxlist1, boxlist2) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/region_similarity_calculator_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/region_similarity_calculator_test.py deleted file mode 100644 index 162151a3b53468a7724133ca681efc0df5293563..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/region_similarity_calculator_test.py +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for region_similarity_calculator.""" -import tensorflow as tf - -from object_detection.core import box_list -from object_detection.core import region_similarity_calculator - - -class RegionSimilarityCalculatorTest(tf.test.TestCase): - - def test_get_correct_pairwise_similarity_based_on_iou(self): - corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) - corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0]]) - exp_output = [[2.0 / 16.0, 0, 6.0 / 400.0], [1.0 / 16.0, 0.0, 5.0 / 400.0]] - boxes1 = box_list.BoxList(corners1) - boxes2 = box_list.BoxList(corners2) - iou_similarity_calculator = region_similarity_calculator.IouSimilarity() - iou_similarity = iou_similarity_calculator.compare(boxes1, boxes2) - with self.test_session() as sess: - iou_output = sess.run(iou_similarity) - self.assertAllClose(iou_output, exp_output) - - def test_get_correct_pairwise_similarity_based_on_squared_distances(self): - corners1 = tf.constant([[0.0, 0.0, 0.0, 0.0], - [1.0, 1.0, 0.0, 2.0]]) - corners2 = tf.constant([[3.0, 4.0, 1.0, 0.0], - [-4.0, 0.0, 0.0, 3.0], - [0.0, 0.0, 0.0, 0.0]]) - exp_output = [[-26, -25, 0], [-18, -27, -6]] - boxes1 = box_list.BoxList(corners1) - boxes2 = box_list.BoxList(corners2) - dist_similarity_calc = region_similarity_calculator.NegSqDistSimilarity() - dist_similarity = dist_similarity_calc.compare(boxes1, boxes2) - with self.test_session() as sess: - dist_output = sess.run(dist_similarity) - self.assertAllClose(dist_output, exp_output) - - def test_get_correct_pairwise_similarity_based_on_ioa(self): - corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) - corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0]]) - exp_output_1 = [[2.0 / 12.0, 0, 6.0 / 400.0], - [1.0 / 12.0, 0.0, 5.0 / 400.0]] - exp_output_2 = [[2.0 / 6.0, 1.0 / 5.0], - [0, 0], - [6.0 / 6.0, 5.0 / 5.0]] - boxes1 = box_list.BoxList(corners1) - boxes2 = box_list.BoxList(corners2) - ioa_similarity_calculator = region_similarity_calculator.IoaSimilarity() - ioa_similarity_1 = ioa_similarity_calculator.compare(boxes1, boxes2) - ioa_similarity_2 = ioa_similarity_calculator.compare(boxes2, boxes1) - with self.test_session() as sess: - iou_output_1, iou_output_2 = sess.run( - [ioa_similarity_1, ioa_similarity_2]) - self.assertAllClose(iou_output_1, exp_output_1) - self.assertAllClose(iou_output_2, exp_output_2) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/standard_fields.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/standard_fields.py deleted file mode 100644 index 11282da6deca075935d25e3558bfe1a25588fb20..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/standard_fields.py +++ /dev/null @@ -1,227 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Contains classes specifying naming conventions used for object detection. - - -Specifies: - InputDataFields: standard fields used by reader/preprocessor/batcher. - DetectionResultFields: standard fields returned by object detector. - BoxListFields: standard field used by BoxList - TfExampleFields: standard fields for tf-example data format (go/tf-example). -""" - - -class InputDataFields(object): - """Names for the input tensors. - - Holds the standard data field names to use for identifying input tensors. This - should be used by the decoder to identify keys for the returned tensor_dict - containing input tensors. And it should be used by the model to identify the - tensors it needs. - - Attributes: - image: image. - image_additional_channels: additional channels. - original_image: image in the original input size. - key: unique key corresponding to image. - source_id: source of the original image. - filename: original filename of the dataset (without common path). - groundtruth_image_classes: image-level class labels. - groundtruth_boxes: coordinates of the ground truth boxes in the image. - groundtruth_classes: box-level class labels. - groundtruth_label_types: box-level label types (e.g. explicit negative). - groundtruth_is_crowd: [DEPRECATED, use groundtruth_group_of instead] - is the groundtruth a single object or a crowd. - groundtruth_area: area of a groundtruth segment. - groundtruth_difficult: is a `difficult` object - groundtruth_group_of: is a `group_of` objects, e.g. multiple objects of the - same class, forming a connected group, where instances are heavily - occluding each other. - proposal_boxes: coordinates of object proposal boxes. - proposal_objectness: objectness score of each proposal. - groundtruth_instance_masks: ground truth instance masks. - groundtruth_instance_boundaries: ground truth instance boundaries. - groundtruth_instance_classes: instance mask-level class labels. - groundtruth_keypoints: ground truth keypoints. - groundtruth_keypoint_visibilities: ground truth keypoint visibilities. - groundtruth_label_scores: groundtruth label scores. - groundtruth_weights: groundtruth weight factor for bounding boxes. - num_groundtruth_boxes: number of groundtruth boxes. - true_image_shapes: true shapes of images in the resized images, as resized - images can be padded with zeros. - verified_labels: list of human-verified image-level labels (note, that a - label can be verified both as positive and negative). - multiclass_scores: the label score per class for each box. - """ - image = 'image' - image_additional_channels = 'image_additional_channels' - original_image = 'original_image' - key = 'key' - source_id = 'source_id' - filename = 'filename' - groundtruth_image_classes = 'groundtruth_image_classes' - groundtruth_boxes = 'groundtruth_boxes' - groundtruth_classes = 'groundtruth_classes' - groundtruth_label_types = 'groundtruth_label_types' - groundtruth_is_crowd = 'groundtruth_is_crowd' - groundtruth_area = 'groundtruth_area' - groundtruth_difficult = 'groundtruth_difficult' - groundtruth_group_of = 'groundtruth_group_of' - proposal_boxes = 'proposal_boxes' - proposal_objectness = 'proposal_objectness' - groundtruth_instance_masks = 'groundtruth_instance_masks' - groundtruth_instance_boundaries = 'groundtruth_instance_boundaries' - groundtruth_instance_classes = 'groundtruth_instance_classes' - groundtruth_keypoints = 'groundtruth_keypoints' - groundtruth_keypoint_visibilities = 'groundtruth_keypoint_visibilities' - groundtruth_label_scores = 'groundtruth_label_scores' - groundtruth_weights = 'groundtruth_weights' - num_groundtruth_boxes = 'num_groundtruth_boxes' - true_image_shape = 'true_image_shape' - verified_labels = 'verified_labels' - multiclass_scores = 'multiclass_scores' - - -class DetectionResultFields(object): - """Naming conventions for storing the output of the detector. - - Attributes: - source_id: source of the original image. - key: unique key corresponding to image. - detection_boxes: coordinates of the detection boxes in the image. - detection_scores: detection scores for the detection boxes in the image. - detection_classes: detection-level class labels. - detection_masks: contains a segmentation mask for each detection box. - detection_boundaries: contains an object boundary for each detection box. - detection_keypoints: contains detection keypoints for each detection box. - num_detections: number of detections in the batch. - """ - - source_id = 'source_id' - key = 'key' - detection_boxes = 'detection_boxes' - detection_scores = 'detection_scores' - detection_classes = 'detection_classes' - detection_masks = 'detection_masks' - detection_boundaries = 'detection_boundaries' - detection_keypoints = 'detection_keypoints' - num_detections = 'num_detections' - - -class BoxListFields(object): - """Naming conventions for BoxLists. - - Attributes: - boxes: bounding box coordinates. - classes: classes per bounding box. - scores: scores per bounding box. - weights: sample weights per bounding box. - objectness: objectness score per bounding box. - masks: masks per bounding box. - boundaries: boundaries per bounding box. - keypoints: keypoints per bounding box. - keypoint_heatmaps: keypoint heatmaps per bounding box. - is_crowd: is_crowd annotation per bounding box. - """ - boxes = 'boxes' - classes = 'classes' - scores = 'scores' - weights = 'weights' - objectness = 'objectness' - masks = 'masks' - boundaries = 'boundaries' - keypoints = 'keypoints' - keypoint_heatmaps = 'keypoint_heatmaps' - is_crowd = 'is_crowd' - - -class TfExampleFields(object): - """TF-example proto feature names for object detection. - - Holds the standard feature names to load from an Example proto for object - detection. - - Attributes: - image_encoded: JPEG encoded string - image_format: image format, e.g. "JPEG" - filename: filename - channels: number of channels of image - colorspace: colorspace, e.g. "RGB" - height: height of image in pixels, e.g. 462 - width: width of image in pixels, e.g. 581 - source_id: original source of the image - image_class_text: image-level label in text format - image_class_label: image-level label in numerical format - object_class_text: labels in text format, e.g. ["person", "cat"] - object_class_label: labels in numbers, e.g. [16, 8] - object_bbox_xmin: xmin coordinates of groundtruth box, e.g. 10, 30 - object_bbox_xmax: xmax coordinates of groundtruth box, e.g. 50, 40 - object_bbox_ymin: ymin coordinates of groundtruth box, e.g. 40, 50 - object_bbox_ymax: ymax coordinates of groundtruth box, e.g. 80, 70 - object_view: viewpoint of object, e.g. ["frontal", "left"] - object_truncated: is object truncated, e.g. [true, false] - object_occluded: is object occluded, e.g. [true, false] - object_difficult: is object difficult, e.g. [true, false] - object_group_of: is object a single object or a group of objects - object_depiction: is object a depiction - object_is_crowd: [DEPRECATED, use object_group_of instead] - is the object a single object or a crowd - object_segment_area: the area of the segment. - object_weight: a weight factor for the object's bounding box. - instance_masks: instance segmentation masks. - instance_boundaries: instance boundaries. - instance_classes: Classes for each instance segmentation mask. - detection_class_label: class label in numbers. - detection_bbox_ymin: ymin coordinates of a detection box. - detection_bbox_xmin: xmin coordinates of a detection box. - detection_bbox_ymax: ymax coordinates of a detection box. - detection_bbox_xmax: xmax coordinates of a detection box. - detection_score: detection score for the class label and box. - """ - image_encoded = 'image/encoded' - image_format = 'image/format' # format is reserved keyword - filename = 'image/filename' - channels = 'image/channels' - colorspace = 'image/colorspace' - height = 'image/height' - width = 'image/width' - source_id = 'image/source_id' - image_class_text = 'image/class/text' - image_class_label = 'image/class/label' - object_class_text = 'image/object/class/text' - object_class_label = 'image/object/class/label' - object_bbox_ymin = 'image/object/bbox/ymin' - object_bbox_xmin = 'image/object/bbox/xmin' - object_bbox_ymax = 'image/object/bbox/ymax' - object_bbox_xmax = 'image/object/bbox/xmax' - object_view = 'image/object/view' - object_truncated = 'image/object/truncated' - object_occluded = 'image/object/occluded' - object_difficult = 'image/object/difficult' - object_group_of = 'image/object/group_of' - object_depiction = 'image/object/depiction' - object_is_crowd = 'image/object/is_crowd' - object_segment_area = 'image/object/segment/area' - object_weight = 'image/object/weight' - instance_masks = 'image/segmentation/object' - instance_boundaries = 'image/boundaries/object' - instance_classes = 'image/segmentation/object/class' - detection_class_label = 'image/detection/label' - detection_bbox_ymin = 'image/detection/bbox/ymin' - detection_bbox_xmin = 'image/detection/bbox/xmin' - detection_bbox_ymax = 'image/detection/bbox/ymax' - detection_bbox_xmax = 'image/detection/bbox/xmax' - detection_score = 'image/detection/score' diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/target_assigner.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/target_assigner.py deleted file mode 100644 index 14e66def1fe0a873c96900288290491718d3d5ab..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/target_assigner.py +++ /dev/null @@ -1,458 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Base target assigner module. - -The job of a TargetAssigner is, for a given set of anchors (bounding boxes) and -groundtruth detections (bounding boxes), to assign classification and regression -targets to each anchor as well as weights to each anchor (specifying, e.g., -which anchors should not contribute to training loss). - -It assigns classification/regression targets by performing the following steps: -1) Computing pairwise similarity between anchors and groundtruth boxes using a - provided RegionSimilarity Calculator -2) Computing a matching based on the similarity matrix using a provided Matcher -3) Assigning regression targets based on the matching and a provided BoxCoder -4) Assigning classification targets based on the matching and groundtruth labels - -Note that TargetAssigners only operate on detections from a single -image at a time, so any logic for applying a TargetAssigner to multiple -images must be handled externally. -""" -import tensorflow as tf - -from object_detection.box_coders import faster_rcnn_box_coder -from object_detection.box_coders import mean_stddev_box_coder -from object_detection.core import box_coder as bcoder -from object_detection.core import box_list -from object_detection.core import matcher as mat -from object_detection.core import region_similarity_calculator as sim_calc -from object_detection.core import standard_fields as fields -from object_detection.matchers import argmax_matcher -from object_detection.matchers import bipartite_matcher -from object_detection.utils import shape_utils - - -class TargetAssigner(object): - """Target assigner to compute classification and regression targets.""" - - def __init__(self, similarity_calc, matcher, box_coder, - negative_class_weight=1.0, unmatched_cls_target=None): - """Construct Object Detection Target Assigner. - - Args: - similarity_calc: a RegionSimilarityCalculator - matcher: an object_detection.core.Matcher used to match groundtruth to - anchors. - box_coder: an object_detection.core.BoxCoder used to encode matching - groundtruth boxes with respect to anchors. - negative_class_weight: classification weight to be associated to negative - anchors (default: 1.0). The weight must be in [0., 1.]. - unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k] - which is consistent with the classification target for each - anchor (and can be empty for scalar targets). This shape must thus be - compatible with the groundtruth labels that are passed to the "assign" - function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]). - If set to None, unmatched_cls_target is set to be [0] for each anchor. - - Raises: - ValueError: if similarity_calc is not a RegionSimilarityCalculator or - if matcher is not a Matcher or if box_coder is not a BoxCoder - """ - if not isinstance(similarity_calc, sim_calc.RegionSimilarityCalculator): - raise ValueError('similarity_calc must be a RegionSimilarityCalculator') - if not isinstance(matcher, mat.Matcher): - raise ValueError('matcher must be a Matcher') - if not isinstance(box_coder, bcoder.BoxCoder): - raise ValueError('box_coder must be a BoxCoder') - self._similarity_calc = similarity_calc - self._matcher = matcher - self._box_coder = box_coder - self._negative_class_weight = negative_class_weight - if unmatched_cls_target is None: - self._unmatched_cls_target = tf.constant([0], tf.float32) - else: - self._unmatched_cls_target = unmatched_cls_target - - @property - def box_coder(self): - return self._box_coder - - def assign(self, anchors, groundtruth_boxes, groundtruth_labels=None, - groundtruth_weights=None, **params): - """Assign classification and regression targets to each anchor. - - For a given set of anchors and groundtruth detections, match anchors - to groundtruth_boxes and assign classification and regression targets to - each anchor as well as weights based on the resulting match (specifying, - e.g., which anchors should not contribute to training loss). - - Anchors that are not matched to anything are given a classification target - of self._unmatched_cls_target which can be specified via the constructor. - - Args: - anchors: a BoxList representing N anchors - groundtruth_boxes: a BoxList representing M groundtruth boxes - groundtruth_labels: a tensor of shape [M, d_1, ... d_k] - with labels for each of the ground_truth boxes. The subshape - [d_1, ... d_k] can be empty (corresponding to scalar inputs). When set - to None, groundtruth_labels assumes a binary problem where all - ground_truth boxes get a positive label (of 1). - groundtruth_weights: a float tensor of shape [M] indicating the weight to - assign to all anchors match to a particular groundtruth box. The weights - must be in [0., 1.]. If None, all weights are set to 1. - **params: Additional keyword arguments for specific implementations of - the Matcher. - - Returns: - cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], - where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels - which has shape [num_gt_boxes, d_1, d_2, ... d_k]. - cls_weights: a float32 tensor with shape [num_anchors] - reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension] - reg_weights: a float32 tensor with shape [num_anchors] - match: a matcher.Match object encoding the match between anchors and - groundtruth boxes, with rows corresponding to groundtruth boxes - and columns corresponding to anchors. - - Raises: - ValueError: if anchors or groundtruth_boxes are not of type - box_list.BoxList - """ - if not isinstance(anchors, box_list.BoxList): - raise ValueError('anchors must be an BoxList') - if not isinstance(groundtruth_boxes, box_list.BoxList): - raise ValueError('groundtruth_boxes must be an BoxList') - - if groundtruth_labels is None: - groundtruth_labels = tf.ones(tf.expand_dims(groundtruth_boxes.num_boxes(), - 0)) - groundtruth_labels = tf.expand_dims(groundtruth_labels, -1) - unmatched_shape_assert = shape_utils.assert_shape_equal( - shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)[1:], - shape_utils.combined_static_and_dynamic_shape( - self._unmatched_cls_target)) - labels_and_box_shapes_assert = shape_utils.assert_shape_equal( - shape_utils.combined_static_and_dynamic_shape( - groundtruth_labels)[:1], - shape_utils.combined_static_and_dynamic_shape( - groundtruth_boxes.get())[:1]) - - if groundtruth_weights is None: - num_gt_boxes = groundtruth_boxes.num_boxes_static() - if not num_gt_boxes: - num_gt_boxes = groundtruth_boxes.num_boxes() - groundtruth_weights = tf.ones([num_gt_boxes], dtype=tf.float32) - with tf.control_dependencies( - [unmatched_shape_assert, labels_and_box_shapes_assert]): - match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes, - anchors) - match = self._matcher.match(match_quality_matrix, **params) - reg_targets = self._create_regression_targets(anchors, - groundtruth_boxes, - match) - cls_targets = self._create_classification_targets(groundtruth_labels, - match) - reg_weights = self._create_regression_weights(match, groundtruth_weights) - cls_weights = self._create_classification_weights(match, - groundtruth_weights) - - num_anchors = anchors.num_boxes_static() - if num_anchors is not None: - reg_targets = self._reset_target_shape(reg_targets, num_anchors) - cls_targets = self._reset_target_shape(cls_targets, num_anchors) - reg_weights = self._reset_target_shape(reg_weights, num_anchors) - cls_weights = self._reset_target_shape(cls_weights, num_anchors) - - return cls_targets, cls_weights, reg_targets, reg_weights, match - - def _reset_target_shape(self, target, num_anchors): - """Sets the static shape of the target. - - Args: - target: the target tensor. Its first dimension will be overwritten. - num_anchors: the number of anchors, which is used to override the target's - first dimension. - - Returns: - A tensor with the shape info filled in. - """ - target_shape = target.get_shape().as_list() - target_shape[0] = num_anchors - target.set_shape(target_shape) - return target - - def _create_regression_targets(self, anchors, groundtruth_boxes, match): - """Returns a regression target for each anchor. - - Args: - anchors: a BoxList representing N anchors - groundtruth_boxes: a BoxList representing M groundtruth_boxes - match: a matcher.Match object - - Returns: - reg_targets: a float32 tensor with shape [N, box_code_dimension] - """ - matched_gt_boxes = match.gather_based_on_match( - groundtruth_boxes.get(), - unmatched_value=tf.zeros(4), - ignored_value=tf.zeros(4)) - matched_gt_boxlist = box_list.BoxList(matched_gt_boxes) - if groundtruth_boxes.has_field(fields.BoxListFields.keypoints): - groundtruth_keypoints = groundtruth_boxes.get_field( - fields.BoxListFields.keypoints) - matched_keypoints = match.gather_based_on_match( - groundtruth_keypoints, - unmatched_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]), - ignored_value=tf.zeros(groundtruth_keypoints.get_shape()[1:])) - matched_gt_boxlist.add_field(fields.BoxListFields.keypoints, - matched_keypoints) - matched_reg_targets = self._box_coder.encode(matched_gt_boxlist, anchors) - match_results_shape = shape_utils.combined_static_and_dynamic_shape( - match.match_results) - - # Zero out the unmatched and ignored regression targets. - unmatched_ignored_reg_targets = tf.tile( - self._default_regression_target(), [match_results_shape[0], 1]) - matched_anchors_mask = match.matched_column_indicator() - reg_targets = tf.where(matched_anchors_mask, - matched_reg_targets, - unmatched_ignored_reg_targets) - return reg_targets - - def _default_regression_target(self): - """Returns the default target for anchors to regress to. - - Default regression targets are set to zero (though in - this implementation what these targets are set to should - not matter as the regression weight of any box set to - regress to the default target is zero). - - Returns: - default_target: a float32 tensor with shape [1, box_code_dimension] - """ - return tf.constant([self._box_coder.code_size*[0]], tf.float32) - - def _create_classification_targets(self, groundtruth_labels, match): - """Create classification targets for each anchor. - - Assign a classification target of for each anchor to the matching - groundtruth label that is provided by match. Anchors that are not matched - to anything are given the target self._unmatched_cls_target - - Args: - groundtruth_labels: a tensor of shape [num_gt_boxes, d_1, ... d_k] - with labels for each of the ground_truth boxes. The subshape - [d_1, ... d_k] can be empty (corresponding to scalar labels). - match: a matcher.Match object that provides a matching between anchors - and groundtruth boxes. - - Returns: - a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], where the - subshape [d_1, ..., d_k] is compatible with groundtruth_labels which has - shape [num_gt_boxes, d_1, d_2, ... d_k]. - """ - return match.gather_based_on_match( - groundtruth_labels, - unmatched_value=self._unmatched_cls_target, - ignored_value=self._unmatched_cls_target) - - def _create_regression_weights(self, match, groundtruth_weights): - """Set regression weight for each anchor. - - Only positive anchors are set to contribute to the regression loss, so this - method returns a weight of 1 for every positive anchor and 0 for every - negative anchor. - - Args: - match: a matcher.Match object that provides a matching between anchors - and groundtruth boxes. - groundtruth_weights: a float tensor of shape [M] indicating the weight to - assign to all anchors match to a particular groundtruth box. - - Returns: - a float32 tensor with shape [num_anchors] representing regression weights. - """ - return match.gather_based_on_match( - groundtruth_weights, ignored_value=0., unmatched_value=0.) - - def _create_classification_weights(self, - match, - groundtruth_weights): - """Create classification weights for each anchor. - - Positive (matched) anchors are associated with a weight of - positive_class_weight and negative (unmatched) anchors are associated with - a weight of negative_class_weight. When anchors are ignored, weights are set - to zero. By default, both positive/negative weights are set to 1.0, - but they can be adjusted to handle class imbalance (which is almost always - the case in object detection). - - Args: - match: a matcher.Match object that provides a matching between anchors - and groundtruth boxes. - groundtruth_weights: a float tensor of shape [M] indicating the weight to - assign to all anchors match to a particular groundtruth box. - - Returns: - a float32 tensor with shape [num_anchors] representing classification - weights. - """ - return match.gather_based_on_match( - groundtruth_weights, - ignored_value=0., - unmatched_value=self._negative_class_weight) - - def get_box_coder(self): - """Get BoxCoder of this TargetAssigner. - - Returns: - BoxCoder object. - """ - return self._box_coder - - -# TODO(rathodv): This method pulls in all the implementation dependencies into -# core. Therefore its best to have this factory method outside of core. -def create_target_assigner(reference, stage=None, - negative_class_weight=1.0, - unmatched_cls_target=None): - """Factory function for creating standard target assigners. - - Args: - reference: string referencing the type of TargetAssigner. - stage: string denoting stage: {proposal, detection}. - negative_class_weight: classification weight to be associated to negative - anchors (default: 1.0) - unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k] - which is consistent with the classification target for each - anchor (and can be empty for scalar targets). This shape must thus be - compatible with the groundtruth labels that are passed to the Assign - function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]). - If set to None, unmatched_cls_target is set to be 0 for each anchor. - - Returns: - TargetAssigner: desired target assigner. - - Raises: - ValueError: if combination reference+stage is invalid. - """ - if reference == 'Multibox' and stage == 'proposal': - similarity_calc = sim_calc.NegSqDistSimilarity() - matcher = bipartite_matcher.GreedyBipartiteMatcher() - box_coder = mean_stddev_box_coder.MeanStddevBoxCoder() - - elif reference == 'FasterRCNN' and stage == 'proposal': - similarity_calc = sim_calc.IouSimilarity() - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.7, - unmatched_threshold=0.3, - force_match_for_each_row=True) - box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder( - scale_factors=[10.0, 10.0, 5.0, 5.0]) - - elif reference == 'FasterRCNN' and stage == 'detection': - similarity_calc = sim_calc.IouSimilarity() - # Uses all proposals with IOU < 0.5 as candidate negatives. - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, - negatives_lower_than_unmatched=True) - box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder( - scale_factors=[10.0, 10.0, 5.0, 5.0]) - - elif reference == 'FastRCNN': - similarity_calc = sim_calc.IouSimilarity() - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, - unmatched_threshold=0.1, - force_match_for_each_row=False, - negatives_lower_than_unmatched=False) - box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder() - - else: - raise ValueError('No valid combination of reference and stage.') - - return TargetAssigner(similarity_calc, matcher, box_coder, - negative_class_weight=negative_class_weight, - unmatched_cls_target=unmatched_cls_target) - - -def batch_assign_targets(target_assigner, - anchors_batch, - gt_box_batch, - gt_class_targets_batch, - gt_weights_batch=None): - """Batched assignment of classification and regression targets. - - Args: - target_assigner: a target assigner. - anchors_batch: BoxList representing N box anchors or list of BoxList objects - with length batch_size representing anchor sets. - gt_box_batch: a list of BoxList objects with length batch_size - representing groundtruth boxes for each image in the batch - gt_class_targets_batch: a list of tensors with length batch_size, where - each tensor has shape [num_gt_boxes_i, classification_target_size] and - num_gt_boxes_i is the number of boxes in the ith boxlist of - gt_box_batch. - gt_weights_batch: A list of 1-D tf.float32 tensors of shape - [num_boxes] containing weights for groundtruth boxes. - - Returns: - batch_cls_targets: a tensor with shape [batch_size, num_anchors, - num_classes], - batch_cls_weights: a tensor with shape [batch_size, num_anchors], - batch_reg_targets: a tensor with shape [batch_size, num_anchors, - box_code_dimension] - batch_reg_weights: a tensor with shape [batch_size, num_anchors], - match_list: a list of matcher.Match objects encoding the match between - anchors and groundtruth boxes for each image of the batch, - with rows of the Match objects corresponding to groundtruth boxes - and columns corresponding to anchors. - Raises: - ValueError: if input list lengths are inconsistent, i.e., - batch_size == len(gt_box_batch) == len(gt_class_targets_batch) - and batch_size == len(anchors_batch) unless anchors_batch is a single - BoxList. - """ - if not isinstance(anchors_batch, list): - anchors_batch = len(gt_box_batch) * [anchors_batch] - if not all( - isinstance(anchors, box_list.BoxList) for anchors in anchors_batch): - raise ValueError('anchors_batch must be a BoxList or list of BoxLists.') - if not (len(anchors_batch) - == len(gt_box_batch) - == len(gt_class_targets_batch)): - raise ValueError('batch size incompatible with lengths of anchors_batch, ' - 'gt_box_batch and gt_class_targets_batch.') - cls_targets_list = [] - cls_weights_list = [] - reg_targets_list = [] - reg_weights_list = [] - match_list = [] - if gt_weights_batch is None: - gt_weights_batch = [None] * len(gt_class_targets_batch) - for anchors, gt_boxes, gt_class_targets, gt_weights in zip( - anchors_batch, gt_box_batch, gt_class_targets_batch, gt_weights_batch): - (cls_targets, cls_weights, reg_targets, - reg_weights, match) = target_assigner.assign( - anchors, gt_boxes, gt_class_targets, gt_weights) - cls_targets_list.append(cls_targets) - cls_weights_list.append(cls_weights) - reg_targets_list.append(reg_targets) - reg_weights_list.append(reg_weights) - match_list.append(match) - batch_cls_targets = tf.stack(cls_targets_list) - batch_cls_weights = tf.stack(cls_weights_list) - batch_reg_targets = tf.stack(reg_targets_list) - batch_reg_weights = tf.stack(reg_weights_list) - return (batch_cls_targets, batch_cls_weights, batch_reg_targets, - batch_reg_weights, match_list) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/target_assigner_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/core/target_assigner_test.py deleted file mode 100644 index 34a35b6435bcd364faf36ba4f130f1310f6d8b22..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/core/target_assigner_test.py +++ /dev/null @@ -1,827 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.core.target_assigner.""" -import numpy as np -import tensorflow as tf - -from object_detection.box_coders import keypoint_box_coder -from object_detection.box_coders import mean_stddev_box_coder -from object_detection.core import box_list -from object_detection.core import region_similarity_calculator -from object_detection.core import standard_fields as fields -from object_detection.core import target_assigner as targetassigner -from object_detection.matchers import argmax_matcher -from object_detection.matchers import bipartite_matcher -from object_detection.utils import test_case - - -class TargetAssignerTest(test_case.TestCase): - - def test_assign_agnostic(self): - def graph_fn(anchor_means, groundtruth_box_corners): - similarity_calc = region_similarity_calculator.IouSimilarity() - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, - unmatched_threshold=0.5) - box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1) - target_assigner = targetassigner.TargetAssigner( - similarity_calc, matcher, box_coder, unmatched_cls_target=None) - anchors_boxlist = box_list.BoxList(anchor_means) - groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners) - result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist) - (cls_targets, cls_weights, reg_targets, reg_weights, _) = result - return (cls_targets, cls_weights, reg_targets, reg_weights) - - anchor_means = np.array([[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 1.0, 0.8], - [0, 0.5, .5, 1.0]], dtype=np.float32) - groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 0.9, 0.9]], - dtype=np.float32) - exp_cls_targets = [[1], [1], [0]] - exp_cls_weights = [1, 1, 1] - exp_reg_targets = [[0, 0, 0, 0], - [0, 0, -1, 1], - [0, 0, 0, 0]] - exp_reg_weights = [1, 1, 0] - - (cls_targets_out, - cls_weights_out, reg_targets_out, reg_weights_out) = self.execute( - graph_fn, [anchor_means, groundtruth_box_corners]) - self.assertAllClose(cls_targets_out, exp_cls_targets) - self.assertAllClose(cls_weights_out, exp_cls_weights) - self.assertAllClose(reg_targets_out, exp_reg_targets) - self.assertAllClose(reg_weights_out, exp_reg_weights) - self.assertEquals(cls_targets_out.dtype, np.float32) - self.assertEquals(cls_weights_out.dtype, np.float32) - self.assertEquals(reg_targets_out.dtype, np.float32) - self.assertEquals(reg_weights_out.dtype, np.float32) - - def test_assign_class_agnostic_with_ignored_matches(self): - # Note: test is very similar to above. The third box matched with an IOU - # of 0.35, which is between the matched and unmatched threshold. This means - # That like above the expected classification targets are [1, 1, 0]. - # Unlike above, the third target is ignored and therefore expected - # classification weights are [1, 1, 0]. - def graph_fn(anchor_means, groundtruth_box_corners): - similarity_calc = region_similarity_calculator.IouSimilarity() - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, - unmatched_threshold=0.3) - box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1) - target_assigner = targetassigner.TargetAssigner( - similarity_calc, matcher, box_coder, unmatched_cls_target=None) - anchors_boxlist = box_list.BoxList(anchor_means) - groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners) - result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist) - (cls_targets, cls_weights, reg_targets, reg_weights, _) = result - return (cls_targets, cls_weights, reg_targets, reg_weights) - - anchor_means = np.array([[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 1.0, 0.8], - [0.0, 0.5, .9, 1.0]], dtype=np.float32) - groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 0.9, 0.9]], dtype=np.float32) - exp_cls_targets = [[1], [1], [0]] - exp_cls_weights = [1, 1, 0] - exp_reg_targets = [[0, 0, 0, 0], - [0, 0, -1, 1], - [0, 0, 0, 0]] - exp_reg_weights = [1, 1, 0] - (cls_targets_out, - cls_weights_out, reg_targets_out, reg_weights_out) = self.execute( - graph_fn, [anchor_means, groundtruth_box_corners]) - self.assertAllClose(cls_targets_out, exp_cls_targets) - self.assertAllClose(cls_weights_out, exp_cls_weights) - self.assertAllClose(reg_targets_out, exp_reg_targets) - self.assertAllClose(reg_weights_out, exp_reg_weights) - self.assertEquals(cls_targets_out.dtype, np.float32) - self.assertEquals(cls_weights_out.dtype, np.float32) - self.assertEquals(reg_targets_out.dtype, np.float32) - self.assertEquals(reg_weights_out.dtype, np.float32) - - def test_assign_agnostic_with_keypoints(self): - def graph_fn(anchor_means, groundtruth_box_corners, - groundtruth_keypoints): - similarity_calc = region_similarity_calculator.IouSimilarity() - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, - unmatched_threshold=0.5) - box_coder = keypoint_box_coder.KeypointBoxCoder( - num_keypoints=6, scale_factors=[10.0, 10.0, 5.0, 5.0]) - target_assigner = targetassigner.TargetAssigner( - similarity_calc, matcher, box_coder, unmatched_cls_target=None) - anchors_boxlist = box_list.BoxList(anchor_means) - groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners) - groundtruth_boxlist.add_field(fields.BoxListFields.keypoints, - groundtruth_keypoints) - result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist) - (cls_targets, cls_weights, reg_targets, reg_weights, _) = result - return (cls_targets, cls_weights, reg_targets, reg_weights) - - anchor_means = np.array([[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 1.0, 1.0], - [0.0, 0.5, .9, 1.0]], dtype=np.float32) - groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5], - [0.45, 0.45, 0.95, 0.95]], - dtype=np.float32) - groundtruth_keypoints = np.array( - [[[0.1, 0.2], [0.1, 0.3], [0.2, 0.2], [0.2, 0.2], [0.1, 0.1], [0.9, 0]], - [[0, 0.3], [0.2, 0.4], [0.5, 0.6], [0, 0.6], [0.8, 0.2], [0.2, 0.4]]], - dtype=np.float32) - exp_cls_targets = [[1], [1], [0]] - exp_cls_weights = [1, 1, 1] - exp_reg_targets = [[0, 0, 0, 0, -3, -1, -3, 1, -1, -1, -1, -1, -3, -3, 13, - -5], - [-1, -1, 0, 0, -15, -9, -11, -7, -5, -3, -15, -3, 1, -11, - -11, -7], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]] - exp_reg_weights = [1, 1, 0] - (cls_targets_out, cls_weights_out, reg_targets_out, - reg_weights_out) = self.execute(graph_fn, [anchor_means, - groundtruth_box_corners, - groundtruth_keypoints]) - self.assertAllClose(cls_targets_out, exp_cls_targets) - self.assertAllClose(cls_weights_out, exp_cls_weights) - self.assertAllClose(reg_targets_out, exp_reg_targets) - self.assertAllClose(reg_weights_out, exp_reg_weights) - self.assertEquals(cls_targets_out.dtype, np.float32) - self.assertEquals(cls_weights_out.dtype, np.float32) - self.assertEquals(reg_targets_out.dtype, np.float32) - self.assertEquals(reg_weights_out.dtype, np.float32) - - def test_assign_class_agnostic_with_keypoints_and_ignored_matches(self): - # Note: test is very similar to above. The third box matched with an IOU - # of 0.35, which is between the matched and unmatched threshold. This means - # That like above the expected classification targets are [1, 1, 0]. - # Unlike above, the third target is ignored and therefore expected - # classification weights are [1, 1, 0]. - def graph_fn(anchor_means, groundtruth_box_corners, - groundtruth_keypoints): - similarity_calc = region_similarity_calculator.IouSimilarity() - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, - unmatched_threshold=0.5) - box_coder = keypoint_box_coder.KeypointBoxCoder( - num_keypoints=6, scale_factors=[10.0, 10.0, 5.0, 5.0]) - target_assigner = targetassigner.TargetAssigner( - similarity_calc, matcher, box_coder, unmatched_cls_target=None) - anchors_boxlist = box_list.BoxList(anchor_means) - groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners) - groundtruth_boxlist.add_field(fields.BoxListFields.keypoints, - groundtruth_keypoints) - result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist) - (cls_targets, cls_weights, reg_targets, reg_weights, _) = result - return (cls_targets, cls_weights, reg_targets, reg_weights) - - anchor_means = np.array([[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 1.0, 1.0], - [0.0, 0.5, .9, 1.0]], dtype=np.float32) - groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5], - [0.45, 0.45, 0.95, 0.95]], - dtype=np.float32) - groundtruth_keypoints = np.array( - [[[0.1, 0.2], [0.1, 0.3], [0.2, 0.2], [0.2, 0.2], [0.1, 0.1], [0.9, 0]], - [[0, 0.3], [0.2, 0.4], [0.5, 0.6], [0, 0.6], [0.8, 0.2], [0.2, 0.4]]], - dtype=np.float32) - exp_cls_targets = [[1], [1], [0]] - exp_cls_weights = [1, 1, 1] - exp_reg_targets = [[0, 0, 0, 0, -3, -1, -3, 1, -1, -1, -1, -1, -3, -3, 13, - -5], - [-1, -1, 0, 0, -15, -9, -11, -7, -5, -3, -15, -3, 1, -11, - -11, -7], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]] - exp_reg_weights = [1, 1, 0] - (cls_targets_out, cls_weights_out, reg_targets_out, - reg_weights_out) = self.execute(graph_fn, [anchor_means, - groundtruth_box_corners, - groundtruth_keypoints]) - self.assertAllClose(cls_targets_out, exp_cls_targets) - self.assertAllClose(cls_weights_out, exp_cls_weights) - self.assertAllClose(reg_targets_out, exp_reg_targets) - self.assertAllClose(reg_weights_out, exp_reg_weights) - self.assertEquals(cls_targets_out.dtype, np.float32) - self.assertEquals(cls_weights_out.dtype, np.float32) - self.assertEquals(reg_targets_out.dtype, np.float32) - self.assertEquals(reg_weights_out.dtype, np.float32) - - def test_assign_multiclass(self): - - def graph_fn(anchor_means, groundtruth_box_corners, groundtruth_labels): - similarity_calc = region_similarity_calculator.IouSimilarity() - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, - unmatched_threshold=0.5) - box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1) - unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32) - target_assigner = targetassigner.TargetAssigner( - similarity_calc, matcher, box_coder, - unmatched_cls_target=unmatched_cls_target) - - anchors_boxlist = box_list.BoxList(anchor_means) - groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners) - result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist, - groundtruth_labels) - (cls_targets, cls_weights, reg_targets, reg_weights, _) = result - return (cls_targets, cls_weights, reg_targets, reg_weights) - - anchor_means = np.array([[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 1.0, 0.8], - [0, 0.5, .5, 1.0], - [.75, 0, 1.0, .25]], dtype=np.float32) - groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 0.9, 0.9], - [.75, 0, .95, .27]], dtype=np.float32) - groundtruth_labels = np.array([[0, 1, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 1, 0], - [0, 0, 0, 1, 0, 0, 0]], dtype=np.float32) - - exp_cls_targets = [[0, 1, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 1, 0], - [1, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 1, 0, 0, 0]] - exp_cls_weights = [1, 1, 1, 1] - exp_reg_targets = [[0, 0, 0, 0], - [0, 0, -1, 1], - [0, 0, 0, 0], - [0, 0, -.5, .2]] - exp_reg_weights = [1, 1, 0, 1] - - (cls_targets_out, - cls_weights_out, reg_targets_out, reg_weights_out) = self.execute( - graph_fn, [anchor_means, groundtruth_box_corners, groundtruth_labels]) - self.assertAllClose(cls_targets_out, exp_cls_targets) - self.assertAllClose(cls_weights_out, exp_cls_weights) - self.assertAllClose(reg_targets_out, exp_reg_targets) - self.assertAllClose(reg_weights_out, exp_reg_weights) - self.assertEquals(cls_targets_out.dtype, np.float32) - self.assertEquals(cls_weights_out.dtype, np.float32) - self.assertEquals(reg_targets_out.dtype, np.float32) - self.assertEquals(reg_weights_out.dtype, np.float32) - - def test_assign_multiclass_with_groundtruth_weights(self): - - def graph_fn(anchor_means, groundtruth_box_corners, groundtruth_labels, - groundtruth_weights): - similarity_calc = region_similarity_calculator.IouSimilarity() - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, - unmatched_threshold=0.5) - box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1) - unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32) - target_assigner = targetassigner.TargetAssigner( - similarity_calc, matcher, box_coder, - unmatched_cls_target=unmatched_cls_target) - - anchors_boxlist = box_list.BoxList(anchor_means) - groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners) - result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist, - groundtruth_labels, - groundtruth_weights) - (_, cls_weights, _, reg_weights, _) = result - return (cls_weights, reg_weights) - - anchor_means = np.array([[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 1.0, 0.8], - [0, 0.5, .5, 1.0], - [.75, 0, 1.0, .25]], dtype=np.float32) - groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 0.9, 0.9], - [.75, 0, .95, .27]], dtype=np.float32) - groundtruth_labels = np.array([[0, 1, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 1, 0], - [0, 0, 0, 1, 0, 0, 0]], dtype=np.float32) - groundtruth_weights = np.array([0.3, 0., 0.5], dtype=np.float32) - - exp_cls_weights = [0.3, 0., 1, 0.5] # background class gets weight of 1. - exp_reg_weights = [0.3, 0., 0., 0.5] # background class gets weight of 0. - - (cls_weights_out, reg_weights_out) = self.execute(graph_fn, [ - anchor_means, groundtruth_box_corners, groundtruth_labels, - groundtruth_weights - ]) - self.assertAllClose(cls_weights_out, exp_cls_weights) - self.assertAllClose(reg_weights_out, exp_reg_weights) - - def test_assign_multidimensional_class_targets(self): - - def graph_fn(anchor_means, groundtruth_box_corners, groundtruth_labels): - similarity_calc = region_similarity_calculator.IouSimilarity() - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, - unmatched_threshold=0.5) - box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1) - - unmatched_cls_target = tf.constant([[0, 0], [0, 0]], tf.float32) - target_assigner = targetassigner.TargetAssigner( - similarity_calc, matcher, box_coder, - unmatched_cls_target=unmatched_cls_target) - - anchors_boxlist = box_list.BoxList(anchor_means) - groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners) - result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist, - groundtruth_labels) - (cls_targets, cls_weights, reg_targets, reg_weights, _) = result - return (cls_targets, cls_weights, reg_targets, reg_weights) - - anchor_means = np.array([[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 1.0, 0.8], - [0, 0.5, .5, 1.0], - [.75, 0, 1.0, .25]], dtype=np.float32) - groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 0.9, 0.9], - [.75, 0, .95, .27]], dtype=np.float32) - - groundtruth_labels = np.array([[[0, 1], [1, 0]], - [[1, 0], [0, 1]], - [[0, 1], [1, .5]]], np.float32) - - exp_cls_targets = [[[0, 1], [1, 0]], - [[1, 0], [0, 1]], - [[0, 0], [0, 0]], - [[0, 1], [1, .5]]] - exp_cls_weights = [1, 1, 1, 1] - exp_reg_targets = [[0, 0, 0, 0], - [0, 0, -1, 1], - [0, 0, 0, 0], - [0, 0, -.5, .2]] - exp_reg_weights = [1, 1, 0, 1] - (cls_targets_out, - cls_weights_out, reg_targets_out, reg_weights_out) = self.execute( - graph_fn, [anchor_means, groundtruth_box_corners, groundtruth_labels]) - self.assertAllClose(cls_targets_out, exp_cls_targets) - self.assertAllClose(cls_weights_out, exp_cls_weights) - self.assertAllClose(reg_targets_out, exp_reg_targets) - self.assertAllClose(reg_weights_out, exp_reg_weights) - self.assertEquals(cls_targets_out.dtype, np.float32) - self.assertEquals(cls_weights_out.dtype, np.float32) - self.assertEquals(reg_targets_out.dtype, np.float32) - self.assertEquals(reg_weights_out.dtype, np.float32) - - def test_assign_empty_groundtruth(self): - - def graph_fn(anchor_means, groundtruth_box_corners, groundtruth_labels): - similarity_calc = region_similarity_calculator.IouSimilarity() - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, - unmatched_threshold=0.5) - box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1) - unmatched_cls_target = tf.constant([0, 0, 0], tf.float32) - anchors_boxlist = box_list.BoxList(anchor_means) - groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners) - target_assigner = targetassigner.TargetAssigner( - similarity_calc, matcher, box_coder, - unmatched_cls_target=unmatched_cls_target) - result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist, - groundtruth_labels) - (cls_targets, cls_weights, reg_targets, reg_weights, _) = result - return (cls_targets, cls_weights, reg_targets, reg_weights) - - groundtruth_box_corners = np.zeros((0, 4), dtype=np.float32) - groundtruth_labels = np.zeros((0, 3), dtype=np.float32) - anchor_means = np.array([[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 1.0, 0.8], - [0, 0.5, .5, 1.0], - [.75, 0, 1.0, .25]], - dtype=np.float32) - exp_cls_targets = [[0, 0, 0], - [0, 0, 0], - [0, 0, 0], - [0, 0, 0]] - exp_cls_weights = [1, 1, 1, 1] - exp_reg_targets = [[0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0]] - exp_reg_weights = [0, 0, 0, 0] - (cls_targets_out, - cls_weights_out, reg_targets_out, reg_weights_out) = self.execute( - graph_fn, [anchor_means, groundtruth_box_corners, groundtruth_labels]) - self.assertAllClose(cls_targets_out, exp_cls_targets) - self.assertAllClose(cls_weights_out, exp_cls_weights) - self.assertAllClose(reg_targets_out, exp_reg_targets) - self.assertAllClose(reg_weights_out, exp_reg_weights) - self.assertEquals(cls_targets_out.dtype, np.float32) - self.assertEquals(cls_weights_out.dtype, np.float32) - self.assertEquals(reg_targets_out.dtype, np.float32) - self.assertEquals(reg_weights_out.dtype, np.float32) - - def test_raises_error_on_incompatible_groundtruth_boxes_and_labels(self): - similarity_calc = region_similarity_calculator.NegSqDistSimilarity() - matcher = bipartite_matcher.GreedyBipartiteMatcher() - box_coder = mean_stddev_box_coder.MeanStddevBoxCoder() - unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32) - target_assigner = targetassigner.TargetAssigner( - similarity_calc, matcher, box_coder, - unmatched_cls_target=unmatched_cls_target) - - prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 1.0, 0.8], - [0, 0.5, .5, 1.0], - [.75, 0, 1.0, .25]]) - priors = box_list.BoxList(prior_means) - - box_corners = [[0.0, 0.0, 0.5, 0.5], - [0.0, 0.0, 0.5, 0.8], - [0.5, 0.5, 0.9, 0.9], - [.75, 0, .95, .27]] - boxes = box_list.BoxList(tf.constant(box_corners)) - - groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 1, 0], - [0, 0, 0, 1, 0, 0, 0]], tf.float32) - with self.assertRaisesRegexp(ValueError, 'Unequal shapes'): - target_assigner.assign(priors, boxes, groundtruth_labels, - num_valid_rows=3) - - def test_raises_error_on_invalid_groundtruth_labels(self): - similarity_calc = region_similarity_calculator.NegSqDistSimilarity() - matcher = bipartite_matcher.GreedyBipartiteMatcher() - box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=1.0) - unmatched_cls_target = tf.constant([[0, 0], [0, 0], [0, 0]], tf.float32) - target_assigner = targetassigner.TargetAssigner( - similarity_calc, matcher, box_coder, - unmatched_cls_target=unmatched_cls_target) - - prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5]]) - priors = box_list.BoxList(prior_means) - - box_corners = [[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 0.9, 0.9], - [.75, 0, .95, .27]] - boxes = box_list.BoxList(tf.constant(box_corners)) - groundtruth_labels = tf.constant([[[0, 1], [1, 0]]], tf.float32) - - with self.assertRaises(ValueError): - target_assigner.assign(priors, boxes, groundtruth_labels, - num_valid_rows=3) - - -class BatchTargetAssignerTest(test_case.TestCase): - - def _get_agnostic_target_assigner(self): - similarity_calc = region_similarity_calculator.IouSimilarity() - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, - unmatched_threshold=0.5) - box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1) - return targetassigner.TargetAssigner( - similarity_calc, matcher, box_coder, - unmatched_cls_target=None) - - def _get_multi_class_target_assigner(self, num_classes): - similarity_calc = region_similarity_calculator.IouSimilarity() - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, - unmatched_threshold=0.5) - box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1) - unmatched_cls_target = tf.constant([1] + num_classes * [0], tf.float32) - return targetassigner.TargetAssigner( - similarity_calc, matcher, box_coder, - unmatched_cls_target=unmatched_cls_target) - - def _get_multi_dimensional_target_assigner(self, target_dimensions): - similarity_calc = region_similarity_calculator.IouSimilarity() - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, - unmatched_threshold=0.5) - box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1) - unmatched_cls_target = tf.constant(np.zeros(target_dimensions), - tf.float32) - return targetassigner.TargetAssigner( - similarity_calc, matcher, box_coder, - unmatched_cls_target=unmatched_cls_target) - - def test_batch_assign_targets(self): - - def graph_fn(anchor_means, groundtruth_boxlist1, groundtruth_boxlist2): - box_list1 = box_list.BoxList(groundtruth_boxlist1) - box_list2 = box_list.BoxList(groundtruth_boxlist2) - gt_box_batch = [box_list1, box_list2] - gt_class_targets = [None, None] - anchors_boxlist = box_list.BoxList(anchor_means) - agnostic_target_assigner = self._get_agnostic_target_assigner() - (cls_targets, cls_weights, reg_targets, reg_weights, - _) = targetassigner.batch_assign_targets( - agnostic_target_assigner, anchors_boxlist, gt_box_batch, - gt_class_targets) - return (cls_targets, cls_weights, reg_targets, reg_weights) - - groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2]], dtype=np.float32) - groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1], - [0.015789, 0.0985, 0.55789, 0.3842]], - dtype=np.float32) - anchor_means = np.array([[0, 0, .25, .25], - [0, .25, 1, 1], - [0, .1, .5, .5], - [.75, .75, 1, 1]], dtype=np.float32) - - exp_reg_targets = [[[0, 0, -0.5, -0.5], - [0, 0, 0, 0], - [0, 0, 0, 0,], - [0, 0, 0, 0,],], - [[0, 0, 0, 0,], - [0, 0.01231521, 0, 0], - [0.15789001, -0.01500003, 0.57889998, -1.15799987], - [0, 0, 0, 0]]] - exp_cls_weights = [[1, 1, 1, 1], - [1, 1, 1, 1]] - exp_cls_targets = [[[1], [0], [0], [0]], - [[0], [1], [1], [0]]] - exp_reg_weights = [[1, 0, 0, 0], - [0, 1, 1, 0]] - - (cls_targets_out, - cls_weights_out, reg_targets_out, reg_weights_out) = self.execute( - graph_fn, [anchor_means, groundtruth_boxlist1, groundtruth_boxlist2]) - self.assertAllClose(cls_targets_out, exp_cls_targets) - self.assertAllClose(cls_weights_out, exp_cls_weights) - self.assertAllClose(reg_targets_out, exp_reg_targets) - self.assertAllClose(reg_weights_out, exp_reg_weights) - - def test_batch_assign_multiclass_targets(self): - - def graph_fn(anchor_means, groundtruth_boxlist1, groundtruth_boxlist2, - class_targets1, class_targets2): - box_list1 = box_list.BoxList(groundtruth_boxlist1) - box_list2 = box_list.BoxList(groundtruth_boxlist2) - gt_box_batch = [box_list1, box_list2] - gt_class_targets = [class_targets1, class_targets2] - anchors_boxlist = box_list.BoxList(anchor_means) - multiclass_target_assigner = self._get_multi_class_target_assigner( - num_classes=3) - (cls_targets, cls_weights, reg_targets, reg_weights, - _) = targetassigner.batch_assign_targets( - multiclass_target_assigner, anchors_boxlist, gt_box_batch, - gt_class_targets) - return (cls_targets, cls_weights, reg_targets, reg_weights) - - groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2]], dtype=np.float32) - groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1], - [0.015789, 0.0985, 0.55789, 0.3842]], - dtype=np.float32) - class_targets1 = np.array([[0, 1, 0, 0]], dtype=np.float32) - class_targets2 = np.array([[0, 0, 0, 1], - [0, 0, 1, 0]], dtype=np.float32) - - anchor_means = np.array([[0, 0, .25, .25], - [0, .25, 1, 1], - [0, .1, .5, .5], - [.75, .75, 1, 1]], dtype=np.float32) - - exp_reg_targets = [[[0, 0, -0.5, -0.5], - [0, 0, 0, 0], - [0, 0, 0, 0,], - [0, 0, 0, 0,],], - [[0, 0, 0, 0,], - [0, 0.01231521, 0, 0], - [0.15789001, -0.01500003, 0.57889998, -1.15799987], - [0, 0, 0, 0]]] - exp_cls_weights = [[1, 1, 1, 1], - [1, 1, 1, 1]] - exp_cls_targets = [[[0, 1, 0, 0], - [1, 0, 0, 0], - [1, 0, 0, 0], - [1, 0, 0, 0]], - [[1, 0, 0, 0], - [0, 0, 0, 1], - [0, 0, 1, 0], - [1, 0, 0, 0]]] - exp_reg_weights = [[1, 0, 0, 0], - [0, 1, 1, 0]] - - (cls_targets_out, cls_weights_out, reg_targets_out, - reg_weights_out) = self.execute(graph_fn, [ - anchor_means, groundtruth_boxlist1, groundtruth_boxlist2, - class_targets1, class_targets2 - ]) - self.assertAllClose(cls_targets_out, exp_cls_targets) - self.assertAllClose(cls_weights_out, exp_cls_weights) - self.assertAllClose(reg_targets_out, exp_reg_targets) - self.assertAllClose(reg_weights_out, exp_reg_weights) - - def test_batch_assign_multiclass_targets_with_padded_groundtruth(self): - - def graph_fn(anchor_means, groundtruth_boxlist1, groundtruth_boxlist2, - class_targets1, class_targets2, groundtruth_weights1, - groundtruth_weights2): - box_list1 = box_list.BoxList(groundtruth_boxlist1) - box_list2 = box_list.BoxList(groundtruth_boxlist2) - gt_box_batch = [box_list1, box_list2] - gt_class_targets = [class_targets1, class_targets2] - gt_weights = [groundtruth_weights1, groundtruth_weights2] - anchors_boxlist = box_list.BoxList(anchor_means) - multiclass_target_assigner = self._get_multi_class_target_assigner( - num_classes=3) - (cls_targets, cls_weights, reg_targets, reg_weights, - _) = targetassigner.batch_assign_targets( - multiclass_target_assigner, anchors_boxlist, gt_box_batch, - gt_class_targets, gt_weights) - return (cls_targets, cls_weights, reg_targets, reg_weights) - - groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2], - [0., 0., 0., 0.]], dtype=np.float32) - groundtruth_weights1 = np.array([1, 0], dtype=np.float32) - groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1], - [0.015789, 0.0985, 0.55789, 0.3842], - [0, 0, 0, 0]], - dtype=np.float32) - groundtruth_weights2 = np.array([1, 1, 0], dtype=np.float32) - class_targets1 = np.array([[0, 1, 0, 0], [0, 0, 0, 0]], dtype=np.float32) - class_targets2 = np.array([[0, 0, 0, 1], - [0, 0, 1, 0], - [0, 0, 0, 0]], dtype=np.float32) - - anchor_means = np.array([[0, 0, .25, .25], - [0, .25, 1, 1], - [0, .1, .5, .5], - [.75, .75, 1, 1]], dtype=np.float32) - - exp_reg_targets = [[[0, 0, -0.5, -0.5], - [0, 0, 0, 0], - [0, 0, 0, 0,], - [0, 0, 0, 0,],], - [[0, 0, 0, 0,], - [0, 0.01231521, 0, 0], - [0.15789001, -0.01500003, 0.57889998, -1.15799987], - [0, 0, 0, 0]]] - exp_cls_weights = [[1, 1, 1, 1], - [1, 1, 1, 1]] - exp_cls_targets = [[[0, 1, 0, 0], - [1, 0, 0, 0], - [1, 0, 0, 0], - [1, 0, 0, 0]], - [[1, 0, 0, 0], - [0, 0, 0, 1], - [0, 0, 1, 0], - [1, 0, 0, 0]]] - exp_reg_weights = [[1, 0, 0, 0], - [0, 1, 1, 0]] - - (cls_targets_out, cls_weights_out, reg_targets_out, - reg_weights_out) = self.execute(graph_fn, [ - anchor_means, groundtruth_boxlist1, groundtruth_boxlist2, - class_targets1, class_targets2, groundtruth_weights1, - groundtruth_weights2 - ]) - self.assertAllClose(cls_targets_out, exp_cls_targets) - self.assertAllClose(cls_weights_out, exp_cls_weights) - self.assertAllClose(reg_targets_out, exp_reg_targets) - self.assertAllClose(reg_weights_out, exp_reg_weights) - - def test_batch_assign_multidimensional_targets(self): - - def graph_fn(anchor_means, groundtruth_boxlist1, groundtruth_boxlist2, - class_targets1, class_targets2): - box_list1 = box_list.BoxList(groundtruth_boxlist1) - box_list2 = box_list.BoxList(groundtruth_boxlist2) - gt_box_batch = [box_list1, box_list2] - gt_class_targets = [class_targets1, class_targets2] - anchors_boxlist = box_list.BoxList(anchor_means) - multiclass_target_assigner = self._get_multi_dimensional_target_assigner( - target_dimensions=(2, 3)) - (cls_targets, cls_weights, reg_targets, reg_weights, - _) = targetassigner.batch_assign_targets( - multiclass_target_assigner, anchors_boxlist, gt_box_batch, - gt_class_targets) - return (cls_targets, cls_weights, reg_targets, reg_weights) - - groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2]], dtype=np.float32) - groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1], - [0.015789, 0.0985, 0.55789, 0.3842]], - dtype=np.float32) - class_targets1 = np.array([[0, 1, 0, 0]], dtype=np.float32) - class_targets2 = np.array([[0, 0, 0, 1], - [0, 0, 1, 0]], dtype=np.float32) - class_targets1 = np.array([[[0, 1, 1], - [1, 1, 0]]], dtype=np.float32) - class_targets2 = np.array([[[0, 1, 1], - [1, 1, 0]], - [[0, 0, 1], - [0, 0, 1]]], dtype=np.float32) - - anchor_means = np.array([[0, 0, .25, .25], - [0, .25, 1, 1], - [0, .1, .5, .5], - [.75, .75, 1, 1]], dtype=np.float32) - - exp_reg_targets = [[[0, 0, -0.5, -0.5], - [0, 0, 0, 0], - [0, 0, 0, 0,], - [0, 0, 0, 0,],], - [[0, 0, 0, 0,], - [0, 0.01231521, 0, 0], - [0.15789001, -0.01500003, 0.57889998, -1.15799987], - [0, 0, 0, 0]]] - exp_cls_weights = [[1, 1, 1, 1], - [1, 1, 1, 1]] - exp_cls_targets = [[[[0., 1., 1.], - [1., 1., 0.]], - [[0., 0., 0.], - [0., 0., 0.]], - [[0., 0., 0.], - [0., 0., 0.]], - [[0., 0., 0.], - [0., 0., 0.]]], - [[[0., 0., 0.], - [0., 0., 0.]], - [[0., 1., 1.], - [1., 1., 0.]], - [[0., 0., 1.], - [0., 0., 1.]], - [[0., 0., 0.], - [0., 0., 0.]]]] - exp_reg_weights = [[1, 0, 0, 0], - [0, 1, 1, 0]] - - (cls_targets_out, cls_weights_out, reg_targets_out, - reg_weights_out) = self.execute(graph_fn, [ - anchor_means, groundtruth_boxlist1, groundtruth_boxlist2, - class_targets1, class_targets2 - ]) - self.assertAllClose(cls_targets_out, exp_cls_targets) - self.assertAllClose(cls_weights_out, exp_cls_weights) - self.assertAllClose(reg_targets_out, exp_reg_targets) - self.assertAllClose(reg_weights_out, exp_reg_weights) - - def test_batch_assign_empty_groundtruth(self): - - def graph_fn(anchor_means, groundtruth_box_corners, gt_class_targets): - groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners) - gt_box_batch = [groundtruth_boxlist] - gt_class_targets_batch = [gt_class_targets] - anchors_boxlist = box_list.BoxList(anchor_means) - - multiclass_target_assigner = self._get_multi_class_target_assigner( - num_classes=3) - - (cls_targets, cls_weights, reg_targets, reg_weights, - _) = targetassigner.batch_assign_targets( - multiclass_target_assigner, anchors_boxlist, - gt_box_batch, gt_class_targets_batch) - return (cls_targets, cls_weights, reg_targets, reg_weights) - - groundtruth_box_corners = np.zeros((0, 4), dtype=np.float32) - anchor_means = np.array([[0, 0, .25, .25], - [0, .25, 1, 1]], dtype=np.float32) - exp_reg_targets = [[[0, 0, 0, 0], - [0, 0, 0, 0]]] - exp_cls_weights = [[1, 1]] - exp_cls_targets = [[[1, 0, 0, 0], - [1, 0, 0, 0]]] - exp_reg_weights = [[0, 0]] - num_classes = 3 - pad = 1 - gt_class_targets = np.zeros((0, num_classes + pad), dtype=np.float32) - - (cls_targets_out, - cls_weights_out, reg_targets_out, reg_weights_out) = self.execute( - graph_fn, [anchor_means, groundtruth_box_corners, gt_class_targets]) - self.assertAllClose(cls_targets_out, exp_cls_targets) - self.assertAllClose(cls_weights_out, exp_cls_weights) - self.assertAllClose(reg_targets_out, exp_reg_targets) - self.assertAllClose(reg_weights_out, exp_reg_weights) - - -class CreateTargetAssignerTest(tf.test.TestCase): - - def test_create_target_assigner(self): - """Tests that named constructor gives working target assigners. - - TODO(rathodv): Make this test more general. - """ - corners = [[0.0, 0.0, 1.0, 1.0]] - groundtruth = box_list.BoxList(tf.constant(corners)) - - priors = box_list.BoxList(tf.constant(corners)) - multibox_ta = (targetassigner - .create_target_assigner('Multibox', stage='proposal')) - multibox_ta.assign(priors, groundtruth) - # No tests on output, as that may vary arbitrarily as new target assigners - # are added. As long as it is constructed correctly and runs without errors, - # tests on the individual assigners cover correctness of the assignments. - - anchors = box_list.BoxList(tf.constant(corners)) - faster_rcnn_proposals_ta = (targetassigner - .create_target_assigner('FasterRCNN', - stage='proposal')) - faster_rcnn_proposals_ta.assign(anchors, groundtruth) - - fast_rcnn_ta = (targetassigner - .create_target_assigner('FastRCNN')) - fast_rcnn_ta.assign(anchors, groundtruth) - - faster_rcnn_detection_ta = (targetassigner - .create_target_assigner('FasterRCNN', - stage='detection')) - faster_rcnn_detection_ta.assign(anchors, groundtruth) - - with self.assertRaises(ValueError): - targetassigner.create_target_assigner('InvalidDetector', - stage='invalid_stage') - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/ava_label_map_v2.1.pbtxt b/research/mlperf_object_detection/Mask_RCNN/object_detection/data/ava_label_map_v2.1.pbtxt deleted file mode 100644 index 5e2c485682830919a09300ac851e6b0e4bdf3efb..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/ava_label_map_v2.1.pbtxt +++ /dev/null @@ -1,240 +0,0 @@ -item { - name: "bend/bow (at the waist)" - id: 1 -} -item { - name: "crouch/kneel" - id: 3 -} -item { - name: "dance" - id: 4 -} -item { - name: "fall down" - id: 5 -} -item { - name: "get up" - id: 6 -} -item { - name: "jump/leap" - id: 7 -} -item { - name: "lie/sleep" - id: 8 -} -item { - name: "martial art" - id: 9 -} -item { - name: "run/jog" - id: 10 -} -item { - name: "sit" - id: 11 -} -item { - name: "stand" - id: 12 -} -item { - name: "swim" - id: 13 -} -item { - name: "walk" - id: 14 -} -item { - name: "answer phone" - id: 15 -} -item { - name: "carry/hold (an object)" - id: 17 -} -item { - name: "climb (e.g., a mountain)" - id: 20 -} -item { - name: "close (e.g., a door, a box)" - id: 22 -} -item { - name: "cut" - id: 24 -} -item { - name: "dress/put on clothing" - id: 26 -} -item { - name: "drink" - id: 27 -} -item { - name: "drive (e.g., a car, a truck)" - id: 28 -} -item { - name: "eat" - id: 29 -} -item { - name: "enter" - id: 30 -} -item { - name: "hit (an object)" - id: 34 -} -item { - name: "lift/pick up" - id: 36 -} -item { - name: "listen (e.g., to music)" - id: 37 -} -item { - name: "open (e.g., a window, a car door)" - id: 38 -} -item { - name: "play musical instrument" - id: 41 -} -item { - name: "point to (an object)" - id: 43 -} -item { - name: "pull (an object)" - id: 45 -} -item { - name: "push (an object)" - id: 46 -} -item { - name: "put down" - id: 47 -} -item { - name: "read" - id: 48 -} -item { - name: "ride (e.g., a bike, a car, a horse)" - id: 49 -} -item { - name: "sail boat" - id: 51 -} -item { - name: "shoot" - id: 52 -} -item { - name: "smoke" - id: 54 -} -item { - name: "take a photo" - id: 56 -} -item { - name: "text on/look at a cellphone" - id: 57 -} -item { - name: "throw" - id: 58 -} -item { - name: "touch (an object)" - id: 59 -} -item { - name: "turn (e.g., a screwdriver)" - id: 60 -} -item { - name: "watch (e.g., TV)" - id: 61 -} -item { - name: "work on a computer" - id: 62 -} -item { - name: "write" - id: 63 -} -item { - name: "fight/hit (a person)" - id: 64 -} -item { - name: "give/serve (an object) to (a person)" - id: 65 -} -item { - name: "grab (a person)" - id: 66 -} -item { - name: "hand clap" - id: 67 -} -item { - name: "hand shake" - id: 68 -} -item { - name: "hand wave" - id: 69 -} -item { - name: "hug (a person)" - id: 70 -} -item { - name: "kiss (a person)" - id: 72 -} -item { - name: "lift (a person)" - id: 73 -} -item { - name: "listen to (a person)" - id: 74 -} -item { - name: "push (another person)" - id: 76 -} -item { - name: "sing to (e.g., self, a person, a group)" - id: 77 -} -item { - name: "take (an object) from (a person)" - id: 78 -} -item { - name: "talk to (e.g., self, a person, a group)" - id: 79 -} -item { - name: "watch (a person)" - id: 80 -} diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/kitti_label_map.pbtxt b/research/mlperf_object_detection/Mask_RCNN/object_detection/data/kitti_label_map.pbtxt deleted file mode 100644 index 0afcc6936ebdb37ecbc7c3245929fcf178a02c0b..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/kitti_label_map.pbtxt +++ /dev/null @@ -1,9 +0,0 @@ -item { - id: 1 - name: 'car' -} - -item { - id: 2 - name: 'pedestrian' -} diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/mscoco_label_map.pbtxt b/research/mlperf_object_detection/Mask_RCNN/object_detection/data/mscoco_label_map.pbtxt deleted file mode 100644 index 1f4872bd0c7f53e70beecf88af005c07a5df9e08..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/mscoco_label_map.pbtxt +++ /dev/null @@ -1,400 +0,0 @@ -item { - name: "/m/01g317" - id: 1 - display_name: "person" -} -item { - name: "/m/0199g" - id: 2 - display_name: "bicycle" -} -item { - name: "/m/0k4j" - id: 3 - display_name: "car" -} -item { - name: "/m/04_sv" - id: 4 - display_name: "motorcycle" -} -item { - name: "/m/05czz6l" - id: 5 - display_name: "airplane" -} -item { - name: "/m/01bjv" - id: 6 - display_name: "bus" -} -item { - name: "/m/07jdr" - id: 7 - display_name: "train" -} -item { - name: "/m/07r04" - id: 8 - display_name: "truck" -} -item { - name: "/m/019jd" - id: 9 - display_name: "boat" -} -item { - name: "/m/015qff" - id: 10 - display_name: "traffic light" -} -item { - name: "/m/01pns0" - id: 11 - display_name: "fire hydrant" -} -item { - name: "/m/02pv19" - id: 13 - display_name: "stop sign" -} -item { - name: "/m/015qbp" - id: 14 - display_name: "parking meter" -} -item { - name: "/m/0cvnqh" - id: 15 - display_name: "bench" -} -item { - name: "/m/015p6" - id: 16 - display_name: "bird" -} -item { - name: "/m/01yrx" - id: 17 - display_name: "cat" -} -item { - name: "/m/0bt9lr" - id: 18 - display_name: "dog" -} -item { - name: "/m/03k3r" - id: 19 - display_name: "horse" -} -item { - name: "/m/07bgp" - id: 20 - display_name: "sheep" -} -item { - name: "/m/01xq0k1" - id: 21 - display_name: "cow" -} -item { - name: "/m/0bwd_0j" - id: 22 - display_name: "elephant" -} -item { - name: "/m/01dws" - id: 23 - display_name: "bear" -} -item { - name: "/m/0898b" - id: 24 - display_name: "zebra" -} -item { - name: "/m/03bk1" - id: 25 - display_name: "giraffe" -} -item { - name: "/m/01940j" - id: 27 - display_name: "backpack" -} -item { - name: "/m/0hnnb" - id: 28 - display_name: "umbrella" -} -item { - name: "/m/080hkjn" - id: 31 - display_name: "handbag" -} -item { - name: "/m/01rkbr" - id: 32 - display_name: "tie" -} -item { - name: "/m/01s55n" - id: 33 - display_name: "suitcase" -} -item { - name: "/m/02wmf" - id: 34 - display_name: "frisbee" -} -item { - name: "/m/071p9" - id: 35 - display_name: "skis" -} -item { - name: "/m/06__v" - id: 36 - display_name: "snowboard" -} -item { - name: "/m/018xm" - id: 37 - display_name: "sports ball" -} -item { - name: "/m/02zt3" - id: 38 - display_name: "kite" -} -item { - name: "/m/03g8mr" - id: 39 - display_name: "baseball bat" -} -item { - name: "/m/03grzl" - id: 40 - display_name: "baseball glove" -} -item { - name: "/m/06_fw" - id: 41 - display_name: "skateboard" -} -item { - name: "/m/019w40" - id: 42 - display_name: "surfboard" -} -item { - name: "/m/0dv9c" - id: 43 - display_name: "tennis racket" -} -item { - name: "/m/04dr76w" - id: 44 - display_name: "bottle" -} -item { - name: "/m/09tvcd" - id: 46 - display_name: "wine glass" -} -item { - name: "/m/08gqpm" - id: 47 - display_name: "cup" -} -item { - name: "/m/0dt3t" - id: 48 - display_name: "fork" -} -item { - name: "/m/04ctx" - id: 49 - display_name: "knife" -} -item { - name: "/m/0cmx8" - id: 50 - display_name: "spoon" -} -item { - name: "/m/04kkgm" - id: 51 - display_name: "bowl" -} -item { - name: "/m/09qck" - id: 52 - display_name: "banana" -} -item { - name: "/m/014j1m" - id: 53 - display_name: "apple" -} -item { - name: "/m/0l515" - id: 54 - display_name: "sandwich" -} -item { - name: "/m/0cyhj_" - id: 55 - display_name: "orange" -} -item { - name: "/m/0hkxq" - id: 56 - display_name: "broccoli" -} -item { - name: "/m/0fj52s" - id: 57 - display_name: "carrot" -} -item { - name: "/m/01b9xk" - id: 58 - display_name: "hot dog" -} -item { - name: "/m/0663v" - id: 59 - display_name: "pizza" -} -item { - name: "/m/0jy4k" - id: 60 - display_name: "donut" -} -item { - name: "/m/0fszt" - id: 61 - display_name: "cake" -} -item { - name: "/m/01mzpv" - id: 62 - display_name: "chair" -} -item { - name: "/m/02crq1" - id: 63 - display_name: "couch" -} -item { - name: "/m/03fp41" - id: 64 - display_name: "potted plant" -} -item { - name: "/m/03ssj5" - id: 65 - display_name: "bed" -} -item { - name: "/m/04bcr3" - id: 67 - display_name: "dining table" -} -item { - name: "/m/09g1w" - id: 70 - display_name: "toilet" -} -item { - name: "/m/07c52" - id: 72 - display_name: "tv" -} -item { - name: "/m/01c648" - id: 73 - display_name: "laptop" -} -item { - name: "/m/020lf" - id: 74 - display_name: "mouse" -} -item { - name: "/m/0qjjc" - id: 75 - display_name: "remote" -} -item { - name: "/m/01m2v" - id: 76 - display_name: "keyboard" -} -item { - name: "/m/050k8" - id: 77 - display_name: "cell phone" -} -item { - name: "/m/0fx9l" - id: 78 - display_name: "microwave" -} -item { - name: "/m/029bxz" - id: 79 - display_name: "oven" -} -item { - name: "/m/01k6s3" - id: 80 - display_name: "toaster" -} -item { - name: "/m/0130jx" - id: 81 - display_name: "sink" -} -item { - name: "/m/040b_t" - id: 82 - display_name: "refrigerator" -} -item { - name: "/m/0bt_c3" - id: 84 - display_name: "book" -} -item { - name: "/m/01x3z" - id: 85 - display_name: "clock" -} -item { - name: "/m/02s195" - id: 86 - display_name: "vase" -} -item { - name: "/m/01lsmm" - id: 87 - display_name: "scissors" -} -item { - name: "/m/0kmg4" - id: 88 - display_name: "teddy bear" -} -item { - name: "/m/03wvsk" - id: 89 - display_name: "hair drier" -} -item { - name: "/m/012xff" - id: 90 - display_name: "toothbrush" -} diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/oid_bbox_trainable_label_map.pbtxt b/research/mlperf_object_detection/Mask_RCNN/object_detection/data/oid_bbox_trainable_label_map.pbtxt deleted file mode 100644 index 863e4f31d719cd148fd56c981e219257334f9c7e..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/oid_bbox_trainable_label_map.pbtxt +++ /dev/null @@ -1,2725 +0,0 @@ -item { - name: "/m/01g317" - id: 1 - display_name: "Person" -} -item { - name: "/m/09j2d" - id: 2 - display_name: "Clothing" -} -item { - name: "/m/04yx4" - id: 3 - display_name: "Man" -} -item { - name: "/m/0dzct" - id: 4 - display_name: "Face" -} -item { - name: "/m/07j7r" - id: 5 - display_name: "Tree" -} -item { - name: "/m/05s2s" - id: 6 - display_name: "Plant" -} -item { - name: "/m/03bt1vf" - id: 7 - display_name: "Woman" -} -item { - name: "/m/07yv9" - id: 8 - display_name: "Vehicle" -} -item { - name: "/m/0cgh4" - id: 9 - display_name: "Building" -} -item { - name: "/m/01prls" - id: 10 - display_name: "Land vehicle" -} -item { - name: "/m/09j5n" - id: 11 - display_name: "Footwear" -} -item { - name: "/m/05r655" - id: 12 - display_name: "Girl" -} -item { - name: "/m/0jbk" - id: 13 - display_name: "Animal" -} -item { - name: "/m/0k4j" - id: 14 - display_name: "Car" -} -item { - name: "/m/02wbm" - id: 15 - display_name: "Food" -} -item { - name: "/m/083wq" - id: 16 - display_name: "Wheel" -} -item { - name: "/m/0c9ph5" - id: 17 - display_name: "Flower" -} -item { - name: "/m/0c_jw" - id: 18 - display_name: "Furniture" -} -item { - name: "/m/0d4v4" - id: 19 - display_name: "Window" -} -item { - name: "/m/03jm5" - id: 20 - display_name: "House" -} -item { - name: "/m/01bl7v" - id: 21 - display_name: "Boy" -} -item { - name: "/m/0463sg" - id: 22 - display_name: "Fashion accessory" -} -item { - name: "/m/04bcr3" - id: 23 - display_name: "Table" -} -item { - name: "/m/0jyfg" - id: 24 - display_name: "Glasses" -} -item { - name: "/m/01xyhv" - id: 25 - display_name: "Suit" -} -item { - name: "/m/08dz3q" - id: 26 - display_name: "Auto part" -} -item { - name: "/m/015p6" - id: 27 - display_name: "Bird" -} -item { - name: "/m/05y5lj" - id: 28 - display_name: "Sports equipment" -} -item { - name: "/m/01d40f" - id: 29 - display_name: "Dress" -} -item { - name: "/m/0bt9lr" - id: 30 - display_name: "Dog" -} -item { - name: "/m/01lrl" - id: 31 - display_name: "Carnivore" -} -item { - name: "/m/02p0tk3" - id: 32 - display_name: "Human body" -} -item { - name: "/m/0fly7" - id: 33 - display_name: "Jeans" -} -item { - name: "/m/04szw" - id: 34 - display_name: "Musical instrument" -} -item { - name: "/m/0271t" - id: 35 - display_name: "Drink" -} -item { - name: "/m/019jd" - id: 36 - display_name: "Boat" -} -item { - name: "/m/03q69" - id: 37 - display_name: "Hair" -} -item { - name: "/m/0h9mv" - id: 38 - display_name: "Tire" -} -item { - name: "/m/04hgtk" - id: 39 - display_name: "Head" -} -item { - name: "/m/01yrx" - id: 40 - display_name: "Cat" -} -item { - name: "/m/01rzcn" - id: 41 - display_name: "Watercraft" -} -item { - name: "/m/01mzpv" - id: 42 - display_name: "Chair" -} -item { - name: "/m/0199g" - id: 43 - display_name: "Bike" -} -item { - name: "/m/01fdzj" - id: 44 - display_name: "Tower" -} -item { - name: "/m/04rky" - id: 45 - display_name: "Mammal" -} -item { - name: "/m/079cl" - id: 46 - display_name: "Skyscraper" -} -item { - name: "/m/0dzf4" - id: 47 - display_name: "Arm" -} -item { - name: "/m/0138tl" - id: 48 - display_name: "Toy" -} -item { - name: "/m/06msq" - id: 49 - display_name: "Sculpture" -} -item { - name: "/m/03xxp" - id: 50 - display_name: "Invertebrate" -} -item { - name: "/m/0hg7b" - id: 51 - display_name: "Microphone" -} -item { - name: "/m/01n5jq" - id: 52 - display_name: "Poster" -} -item { - name: "/m/03vt0" - id: 53 - display_name: "Insect" -} -item { - name: "/m/0342h" - id: 54 - display_name: "Guitar" -} -item { - name: "/m/0k0pj" - id: 55 - display_name: "Nose" -} -item { - name: "/m/02dl1y" - id: 56 - display_name: "Hat" -} -item { - name: "/m/04brg2" - id: 57 - display_name: "Tableware" -} -item { - name: "/m/02dgv" - id: 58 - display_name: "Door" -} -item { - name: "/m/01bqk0" - id: 59 - display_name: "Bicycle wheel" -} -item { - name: "/m/017ftj" - id: 60 - display_name: "Sunglasses" -} -item { - name: "/m/052lwg6" - id: 61 - display_name: "Baked goods" -} -item { - name: "/m/014sv8" - id: 62 - display_name: "Eye" -} -item { - name: "/m/0270h" - id: 63 - display_name: "Dessert" -} -item { - name: "/m/0283dt1" - id: 64 - display_name: "Mouth" -} -item { - name: "/m/0k5j" - id: 65 - display_name: "Aircraft" -} -item { - name: "/m/0cmf2" - id: 66 - display_name: "Airplane" -} -item { - name: "/m/07jdr" - id: 67 - display_name: "Train" -} -item { - name: "/m/032b3c" - id: 68 - display_name: "Jacket" -} -item { - name: "/m/033rq4" - id: 69 - display_name: "Street light" -} -item { - name: "/m/0k65p" - id: 70 - display_name: "Hand" -} -item { - name: "/m/01ww8y" - id: 71 - display_name: "Snack" -} -item { - name: "/m/0zvk5" - id: 72 - display_name: "Helmet" -} -item { - name: "/m/07mhn" - id: 73 - display_name: "Trousers" -} -item { - name: "/m/04dr76w" - id: 74 - display_name: "Bottle" -} -item { - name: "/m/03fp41" - id: 75 - display_name: "Houseplant" -} -item { - name: "/m/03k3r" - id: 76 - display_name: "Horse" -} -item { - name: "/m/01y9k5" - id: 77 - display_name: "Desk" -} -item { - name: "/m/0cdl1" - id: 78 - display_name: "Palm tree" -} -item { - name: "/m/0f4s2w" - id: 79 - display_name: "Vegetable" -} -item { - name: "/m/02xwb" - id: 80 - display_name: "Fruit" -} -item { - name: "/m/035r7c" - id: 81 - display_name: "Leg" -} -item { - name: "/m/0bt_c3" - id: 82 - display_name: "Book" -} -item { - name: "/m/01_bhs" - id: 83 - display_name: "Fast food" -} -item { - name: "/m/01599" - id: 84 - display_name: "Beer" -} -item { - name: "/m/03120" - id: 85 - display_name: "Flag" -} -item { - name: "/m/026t6" - id: 86 - display_name: "Drum" -} -item { - name: "/m/01bjv" - id: 87 - display_name: "Bus" -} -item { - name: "/m/07r04" - id: 88 - display_name: "Truck" -} -item { - name: "/m/018xm" - id: 89 - display_name: "Ball" -} -item { - name: "/m/01rkbr" - id: 90 - display_name: "Tie" -} -item { - name: "/m/0fm3zh" - id: 91 - display_name: "Flowerpot" -} -item { - name: "/m/02_n6y" - id: 92 - display_name: "Goggles" -} -item { - name: "/m/04_sv" - id: 93 - display_name: "Motorcycle" -} -item { - name: "/m/06z37_" - id: 94 - display_name: "Picture frame" -} -item { - name: "/m/01bfm9" - id: 95 - display_name: "Shorts" -} -item { - name: "/m/0h8mhzd" - id: 96 - display_name: "Sports uniform" -} -item { - name: "/m/0d_2m" - id: 97 - display_name: "Moths and butterflies" -} -item { - name: "/m/0gjbg72" - id: 98 - display_name: "Shelf" -} -item { - name: "/m/01n4qj" - id: 99 - display_name: "Shirt" -} -item { - name: "/m/0ch_cf" - id: 100 - display_name: "Fish" -} -item { - name: "/m/06m11" - id: 101 - display_name: "Rose" -} -item { - name: "/m/01jfm_" - id: 102 - display_name: "Licence plate" -} -item { - name: "/m/02crq1" - id: 103 - display_name: "Couch" -} -item { - name: "/m/083kb" - id: 104 - display_name: "Weapon" -} -item { - name: "/m/01c648" - id: 105 - display_name: "Laptop" -} -item { - name: "/m/09tvcd" - id: 106 - display_name: "Wine glass" -} -item { - name: "/m/0h2r6" - id: 107 - display_name: "Van" -} -item { - name: "/m/081qc" - id: 108 - display_name: "Wine" -} -item { - name: "/m/09ddx" - id: 109 - display_name: "Duck" -} -item { - name: "/m/03p3bw" - id: 110 - display_name: "Bicycle helmet" -} -item { - name: "/m/0cyf8" - id: 111 - display_name: "Butterfly" -} -item { - name: "/m/0b_rs" - id: 112 - display_name: "Swimming pool" -} -item { - name: "/m/039xj_" - id: 113 - display_name: "Ear" -} -item { - name: "/m/021sj1" - id: 114 - display_name: "Office" -} -item { - name: "/m/0dv5r" - id: 115 - display_name: "Camera" -} -item { - name: "/m/01lynh" - id: 116 - display_name: "Stairs" -} -item { - name: "/m/06bt6" - id: 117 - display_name: "Reptile" -} -item { - name: "/m/01226z" - id: 118 - display_name: "Football" -} -item { - name: "/m/0fszt" - id: 119 - display_name: "Cake" -} -item { - name: "/m/050k8" - id: 120 - display_name: "Mobile phone" -} -item { - name: "/m/02wbtzl" - id: 121 - display_name: "Sun hat" -} -item { - name: "/m/02p5f1q" - id: 122 - display_name: "Coffee cup" -} -item { - name: "/m/025nd" - id: 123 - display_name: "Christmas tree" -} -item { - name: "/m/02522" - id: 124 - display_name: "Computer monitor" -} -item { - name: "/m/09ct_" - id: 125 - display_name: "Helicopter" -} -item { - name: "/m/0cvnqh" - id: 126 - display_name: "Bench" -} -item { - name: "/m/0d5gx" - id: 127 - display_name: "Castle" -} -item { - name: "/m/01xygc" - id: 128 - display_name: "Coat" -} -item { - name: "/m/04m6gz" - id: 129 - display_name: "Porch" -} -item { - name: "/m/01gkx_" - id: 130 - display_name: "Swimwear" -} -item { - name: "/m/01s105" - id: 131 - display_name: "Cabinetry" -} -item { - name: "/m/01j61q" - id: 132 - display_name: "Tent" -} -item { - name: "/m/0hnnb" - id: 133 - display_name: "Umbrella" -} -item { - name: "/m/01j51" - id: 134 - display_name: "Balloon" -} -item { - name: "/m/01knjb" - id: 135 - display_name: "Billboard" -} -item { - name: "/m/03__z0" - id: 136 - display_name: "Bookcase" -} -item { - name: "/m/01m2v" - id: 137 - display_name: "Computer keyboard" -} -item { - name: "/m/0167gd" - id: 138 - display_name: "Doll" -} -item { - name: "/m/0284d" - id: 139 - display_name: "Dairy" -} -item { - name: "/m/03ssj5" - id: 140 - display_name: "Bed" -} -item { - name: "/m/02fq_6" - id: 141 - display_name: "Fedora" -} -item { - name: "/m/06nwz" - id: 142 - display_name: "Seafood" -} -item { - name: "/m/0220r2" - id: 143 - display_name: "Fountain" -} -item { - name: "/m/01mqdt" - id: 144 - display_name: "Traffic sign" -} -item { - name: "/m/0268lbt" - id: 145 - display_name: "Hiking equipment" -} -item { - name: "/m/07c52" - id: 146 - display_name: "Television" -} -item { - name: "/m/0grw1" - id: 147 - display_name: "Salad" -} -item { - name: "/m/01h3n" - id: 148 - display_name: "Bee" -} -item { - name: "/m/078n6m" - id: 149 - display_name: "Coffee table" -} -item { - name: "/m/01xq0k1" - id: 150 - display_name: "Cattle" -} -item { - name: "/m/0gd2v" - id: 151 - display_name: "Marine mammal" -} -item { - name: "/m/0dbvp" - id: 152 - display_name: "Goose" -} -item { - name: "/m/03rszm" - id: 153 - display_name: "Curtain" -} -item { - name: "/m/0h8n5zk" - id: 154 - display_name: "Kitchen & dining room table" -} -item { - name: "/m/019dx1" - id: 155 - display_name: "Home appliance" -} -item { - name: "/m/03hl4l9" - id: 156 - display_name: "Marine invertebrates" -} -item { - name: "/m/0b3fp9" - id: 157 - display_name: "Countertop" -} -item { - name: "/m/02rdsp" - id: 158 - display_name: "Office supplies" -} -item { - name: "/m/0hf58v5" - id: 159 - display_name: "Luggage and bags" -} -item { - name: "/m/04h7h" - id: 160 - display_name: "Lighthouse" -} -item { - name: "/m/024g6" - id: 161 - display_name: "Cocktail" -} -item { - name: "/m/0cffdh" - id: 162 - display_name: "Maple" -} -item { - name: "/m/03q5c7" - id: 163 - display_name: "Saucer" -} -item { - name: "/m/014y4n" - id: 164 - display_name: "Paddle" -} -item { - name: "/m/01yx86" - id: 165 - display_name: "Bronze sculpture" -} -item { - name: "/m/020jm" - id: 166 - display_name: "Beetle" -} -item { - name: "/m/025dyy" - id: 167 - display_name: "Box" -} -item { - name: "/m/01llwg" - id: 168 - display_name: "Necklace" -} -item { - name: "/m/08pbxl" - id: 169 - display_name: "Monkey" -} -item { - name: "/m/02d9qx" - id: 170 - display_name: "Whiteboard" -} -item { - name: "/m/02pkr5" - id: 171 - display_name: "Plumbing fixture" -} -item { - name: "/m/0h99cwc" - id: 172 - display_name: "Kitchen appliance" -} -item { - name: "/m/050gv4" - id: 173 - display_name: "Plate" -} -item { - name: "/m/02vqfm" - id: 174 - display_name: "Coffee" -} -item { - name: "/m/09kx5" - id: 175 - display_name: "Deer" -} -item { - name: "/m/019w40" - id: 176 - display_name: "Surfboard" -} -item { - name: "/m/09dzg" - id: 177 - display_name: "Turtle" -} -item { - name: "/m/07k1x" - id: 178 - display_name: "Tool" -} -item { - name: "/m/080hkjn" - id: 179 - display_name: "Handbag" -} -item { - name: "/m/07qxg_" - id: 180 - display_name: "Football helmet" -} -item { - name: "/m/0ph39" - id: 181 - display_name: "Canoe" -} -item { - name: "/m/018p4k" - id: 182 - display_name: "Cart" -} -item { - name: "/m/02h19r" - id: 183 - display_name: "Scarf" -} -item { - name: "/m/015h_t" - id: 184 - display_name: "Beard" -} -item { - name: "/m/0fqfqc" - id: 185 - display_name: "Drawer" -} -item { - name: "/m/025rp__" - id: 186 - display_name: "Cowboy hat" -} -item { - name: "/m/01x3z" - id: 187 - display_name: "Clock" -} -item { - name: "/m/0crjs" - id: 188 - display_name: "Convenience store" -} -item { - name: "/m/0l515" - id: 189 - display_name: "Sandwich" -} -item { - name: "/m/015qff" - id: 190 - display_name: "Traffic light" -} -item { - name: "/m/09kmb" - id: 191 - display_name: "Spider" -} -item { - name: "/m/09728" - id: 192 - display_name: "Bread" -} -item { - name: "/m/071qp" - id: 193 - display_name: "Squirrel" -} -item { - name: "/m/02s195" - id: 194 - display_name: "Vase" -} -item { - name: "/m/06c54" - id: 195 - display_name: "Rifle" -} -item { - name: "/m/01xqw" - id: 196 - display_name: "Cello" -} -item { - name: "/m/05zsy" - id: 197 - display_name: "Pumpkin" -} -item { - name: "/m/0bwd_0j" - id: 198 - display_name: "Elephant" -} -item { - name: "/m/04m9y" - id: 199 - display_name: "Lizard" -} -item { - name: "/m/052sf" - id: 200 - display_name: "Mushroom" -} -item { - name: "/m/03grzl" - id: 201 - display_name: "Baseball glove" -} -item { - name: "/m/01z1kdw" - id: 202 - display_name: "Juice" -} -item { - name: "/m/02wv6h6" - id: 203 - display_name: "Skirt" -} -item { - name: "/m/016m2d" - id: 204 - display_name: "Skull" -} -item { - name: "/m/0dtln" - id: 205 - display_name: "Lamp" -} -item { - name: "/m/057cc" - id: 206 - display_name: "Musical keyboard" -} -item { - name: "/m/06k2mb" - id: 207 - display_name: "High heels" -} -item { - name: "/m/0f6wt" - id: 208 - display_name: "Falcon" -} -item { - name: "/m/0cxn2" - id: 209 - display_name: "Ice cream" -} -item { - name: "/m/02jvh9" - id: 210 - display_name: "Mug" -} -item { - name: "/m/0gjkl" - id: 211 - display_name: "Watch" -} -item { - name: "/m/01b638" - id: 212 - display_name: "Boot" -} -item { - name: "/m/071p9" - id: 213 - display_name: "Ski" -} -item { - name: "/m/0pg52" - id: 214 - display_name: "Taxi" -} -item { - name: "/m/0ftb8" - id: 215 - display_name: "Sunflower" -} -item { - name: "/m/0hnyx" - id: 216 - display_name: "Pastry" -} -item { - name: "/m/02jz0l" - id: 217 - display_name: "Tap" -} -item { - name: "/m/04kkgm" - id: 218 - display_name: "Bowl" -} -item { - name: "/m/0174n1" - id: 219 - display_name: "Glove" -} -item { - name: "/m/0gv1x" - id: 220 - display_name: "Parrot" -} -item { - name: "/m/09csl" - id: 221 - display_name: "Eagle" -} -item { - name: "/m/02jnhm" - id: 222 - display_name: "Tin can" -} -item { - name: "/m/099ssp" - id: 223 - display_name: "Platter" -} -item { - name: "/m/03nfch" - id: 224 - display_name: "Sandal" -} -item { - name: "/m/07y_7" - id: 225 - display_name: "Violin" -} -item { - name: "/m/05z6w" - id: 226 - display_name: "Penguin" -} -item { - name: "/m/03m3pdh" - id: 227 - display_name: "Sofa bed" -} -item { - name: "/m/09ld4" - id: 228 - display_name: "Frog" -} -item { - name: "/m/09b5t" - id: 229 - display_name: "Chicken" -} -item { - name: "/m/054xkw" - id: 230 - display_name: "Lifejacket" -} -item { - name: "/m/0130jx" - id: 231 - display_name: "Sink" -} -item { - name: "/m/07fbm7" - id: 232 - display_name: "Strawberry" -} -item { - name: "/m/01dws" - id: 233 - display_name: "Bear" -} -item { - name: "/m/01tcjp" - id: 234 - display_name: "Muffin" -} -item { - name: "/m/0dftk" - id: 235 - display_name: "Swan" -} -item { - name: "/m/0c06p" - id: 236 - display_name: "Candle" -} -item { - name: "/m/034c16" - id: 237 - display_name: "Pillow" -} -item { - name: "/m/09d5_" - id: 238 - display_name: "Owl" -} -item { - name: "/m/03hlz0c" - id: 239 - display_name: "Kitchen utensil" -} -item { - name: "/m/0ft9s" - id: 240 - display_name: "Dragonfly" -} -item { - name: "/m/011k07" - id: 241 - display_name: "Tortoise" -} -item { - name: "/m/054_l" - id: 242 - display_name: "Mirror" -} -item { - name: "/m/0jqgx" - id: 243 - display_name: "Lily" -} -item { - name: "/m/0663v" - id: 244 - display_name: "Pizza" -} -item { - name: "/m/0242l" - id: 245 - display_name: "Coin" -} -item { - name: "/m/014trl" - id: 246 - display_name: "Cosmetics" -} -item { - name: "/m/05r5c" - id: 247 - display_name: "Piano" -} -item { - name: "/m/07j87" - id: 248 - display_name: "Tomato" -} -item { - name: "/m/05kyg_" - id: 249 - display_name: "Chest of drawers" -} -item { - name: "/m/0kmg4" - id: 250 - display_name: "Teddy bear" -} -item { - name: "/m/07cmd" - id: 251 - display_name: "Tank" -} -item { - name: "/m/0dv77" - id: 252 - display_name: "Squash" -} -item { - name: "/m/096mb" - id: 253 - display_name: "Lion" -} -item { - name: "/m/01gmv2" - id: 254 - display_name: "Brassiere" -} -item { - name: "/m/07bgp" - id: 255 - display_name: "Sheep" -} -item { - name: "/m/0cmx8" - id: 256 - display_name: "Spoon" -} -item { - name: "/m/029tx" - id: 257 - display_name: "Dinosaur" -} -item { - name: "/m/073bxn" - id: 258 - display_name: "Tripod" -} -item { - name: "/m/0bh9flk" - id: 259 - display_name: "Tablet computer" -} -item { - name: "/m/06mf6" - id: 260 - display_name: "Rabbit" -} -item { - name: "/m/06_fw" - id: 261 - display_name: "Skateboard" -} -item { - name: "/m/078jl" - id: 262 - display_name: "Snake" -} -item { - name: "/m/0fbdv" - id: 263 - display_name: "Shellfish" -} -item { - name: "/m/0h23m" - id: 264 - display_name: "Sparrow" -} -item { - name: "/m/014j1m" - id: 265 - display_name: "Apple" -} -item { - name: "/m/03fwl" - id: 266 - display_name: "Goat" -} -item { - name: "/m/02y6n" - id: 267 - display_name: "French fries" -} -item { - name: "/m/06c7f7" - id: 268 - display_name: "Lipstick" -} -item { - name: "/m/026qbn5" - id: 269 - display_name: "studio couch" -} -item { - name: "/m/0cdn1" - id: 270 - display_name: "Hamburger" -} -item { - name: "/m/07clx" - id: 271 - display_name: "Tea" -} -item { - name: "/m/07cx4" - id: 272 - display_name: "Telephone" -} -item { - name: "/m/03g8mr" - id: 273 - display_name: "Baseball bat" -} -item { - name: "/m/0cnyhnx" - id: 274 - display_name: "Bull" -} -item { - name: "/m/01b7fy" - id: 275 - display_name: "Headphones" -} -item { - name: "/m/04gth" - id: 276 - display_name: "Lavender" -} -item { - name: "/m/0cyfs" - id: 277 - display_name: "Parachute" -} -item { - name: "/m/021mn" - id: 278 - display_name: "Cookie" -} -item { - name: "/m/07dm6" - id: 279 - display_name: "Tiger" -} -item { - name: "/m/0k1tl" - id: 280 - display_name: "Pen" -} -item { - name: "/m/0dv9c" - id: 281 - display_name: "Racket" -} -item { - name: "/m/0dt3t" - id: 282 - display_name: "Fork" -} -item { - name: "/m/04yqq2" - id: 283 - display_name: "Bust" -} -item { - name: "/m/01cmb2" - id: 284 - display_name: "Miniskirt" -} -item { - name: "/m/0gd36" - id: 285 - display_name: "Sea lion" -} -item { - name: "/m/033cnk" - id: 286 - display_name: "Egg" -} -item { - name: "/m/06ncr" - id: 287 - display_name: "Saxophone" -} -item { - name: "/m/03bk1" - id: 288 - display_name: "Giraffe" -} -item { - name: "/m/0bjyj5" - id: 289 - display_name: "Waste container" -} -item { - name: "/m/06__v" - id: 290 - display_name: "Snowboard" -} -item { - name: "/m/0qmmr" - id: 291 - display_name: "Wheelchair" -} -item { - name: "/m/01xgg_" - id: 292 - display_name: "Medical equipment" -} -item { - name: "/m/0czz2" - id: 293 - display_name: "Antelope" -} -item { - name: "/m/02l8p9" - id: 294 - display_name: "Harbor seal" -} -item { - name: "/m/09g1w" - id: 295 - display_name: "Toilet" -} -item { - name: "/m/0ll1f78" - id: 296 - display_name: "Shrimp" -} -item { - name: "/m/0cyhj_" - id: 297 - display_name: "Orange" -} -item { - name: "/m/0642b4" - id: 298 - display_name: "Cupboard" -} -item { - name: "/m/0h8mzrc" - id: 299 - display_name: "Wall clock" -} -item { - name: "/m/068zj" - id: 300 - display_name: "Pig" -} -item { - name: "/m/02z51p" - id: 301 - display_name: "Nightstand" -} -item { - name: "/m/0h8nr_l" - id: 302 - display_name: "Bathroom accessory" -} -item { - name: "/m/0388q" - id: 303 - display_name: "Grape" -} -item { - name: "/m/02hj4" - id: 304 - display_name: "Dolphin" -} -item { - name: "/m/01jfsr" - id: 305 - display_name: "Lantern" -} -item { - name: "/m/07gql" - id: 306 - display_name: "Trumpet" -} -item { - name: "/m/0h8my_4" - id: 307 - display_name: "Tennis racket" -} -item { - name: "/m/0n28_" - id: 308 - display_name: "Crab" -} -item { - name: "/m/0120dh" - id: 309 - display_name: "Sea turtle" -} -item { - name: "/m/020kz" - id: 310 - display_name: "Cannon" -} -item { - name: "/m/0mkg" - id: 311 - display_name: "Accordion" -} -item { - name: "/m/03c7gz" - id: 312 - display_name: "Door handle" -} -item { - name: "/m/09k_b" - id: 313 - display_name: "Lemon" -} -item { - name: "/m/031n1" - id: 314 - display_name: "Foot" -} -item { - name: "/m/04rmv" - id: 315 - display_name: "Mouse" -} -item { - name: "/m/084rd" - id: 316 - display_name: "Wok" -} -item { - name: "/m/02rgn06" - id: 317 - display_name: "Volleyball" -} -item { - name: "/m/05z55" - id: 318 - display_name: "Pasta" -} -item { - name: "/m/01r546" - id: 319 - display_name: "Earrings" -} -item { - name: "/m/09qck" - id: 320 - display_name: "Banana" -} -item { - name: "/m/012w5l" - id: 321 - display_name: "Ladder" -} -item { - name: "/m/01940j" - id: 322 - display_name: "Backpack" -} -item { - name: "/m/09f_2" - id: 323 - display_name: "Crocodile" -} -item { - name: "/m/02p3w7d" - id: 324 - display_name: "Roller skates" -} -item { - name: "/m/057p5t" - id: 325 - display_name: "Scoreboard" -} -item { - name: "/m/0d8zb" - id: 326 - display_name: "Jellyfish" -} -item { - name: "/m/01nq26" - id: 327 - display_name: "Sock" -} -item { - name: "/m/01x_v" - id: 328 - display_name: "Camel" -} -item { - name: "/m/05gqfk" - id: 329 - display_name: "Plastic bag" -} -item { - name: "/m/0cydv" - id: 330 - display_name: "Caterpillar" -} -item { - name: "/m/07030" - id: 331 - display_name: "Sushi" -} -item { - name: "/m/084zz" - id: 332 - display_name: "Whale" -} -item { - name: "/m/0c29q" - id: 333 - display_name: "Leopard" -} -item { - name: "/m/02zn6n" - id: 334 - display_name: "Barrel" -} -item { - name: "/m/03tw93" - id: 335 - display_name: "Fireplace" -} -item { - name: "/m/0fqt361" - id: 336 - display_name: "Stool" -} -item { - name: "/m/0f9_l" - id: 337 - display_name: "Snail" -} -item { - name: "/m/0gm28" - id: 338 - display_name: "Candy" -} -item { - name: "/m/09rvcxw" - id: 339 - display_name: "Rocket" -} -item { - name: "/m/01nkt" - id: 340 - display_name: "Cheese" -} -item { - name: "/m/04p0qw" - id: 341 - display_name: "Billiard table" -} -item { - name: "/m/03hj559" - id: 342 - display_name: "Mixing bowl" -} -item { - name: "/m/07pj7bq" - id: 343 - display_name: "Bowling equipment" -} -item { - name: "/m/04ctx" - id: 344 - display_name: "Knife" -} -item { - name: "/m/0703r8" - id: 345 - display_name: "Loveseat" -} -item { - name: "/m/03qrc" - id: 346 - display_name: "Hamster" -} -item { - name: "/m/020lf" - id: 347 - display_name: "Mouse" -} -item { - name: "/m/0by6g" - id: 348 - display_name: "Shark" -} -item { - name: "/m/01fh4r" - id: 349 - display_name: "Teapot" -} -item { - name: "/m/07c6l" - id: 350 - display_name: "Trombone" -} -item { - name: "/m/03bj1" - id: 351 - display_name: "Panda" -} -item { - name: "/m/0898b" - id: 352 - display_name: "Zebra" -} -item { - name: "/m/02x984l" - id: 353 - display_name: "Mechanical fan" -} -item { - name: "/m/0fj52s" - id: 354 - display_name: "Carrot" -} -item { - name: "/m/0cd4d" - id: 355 - display_name: "Cheetah" -} -item { - name: "/m/02068x" - id: 356 - display_name: "Gondola" -} -item { - name: "/m/01vbnl" - id: 357 - display_name: "Bidet" -} -item { - name: "/m/0449p" - id: 358 - display_name: "Jaguar" -} -item { - name: "/m/0gj37" - id: 359 - display_name: "Ladybug" -} -item { - name: "/m/0nl46" - id: 360 - display_name: "Crown" -} -item { - name: "/m/0152hh" - id: 361 - display_name: "Snowman" -} -item { - name: "/m/03dnzn" - id: 362 - display_name: "Bathtub" -} -item { - name: "/m/05_5p_0" - id: 363 - display_name: "Table tennis racket" -} -item { - name: "/m/02jfl0" - id: 364 - display_name: "Sombrero" -} -item { - name: "/m/01dxs" - id: 365 - display_name: "Brown bear" -} -item { - name: "/m/0cjq5" - id: 366 - display_name: "Lobster" -} -item { - name: "/m/040b_t" - id: 367 - display_name: "Refrigerator" -} -item { - name: "/m/0_cp5" - id: 368 - display_name: "Oyster" -} -item { - name: "/m/0gxl3" - id: 369 - display_name: "Handgun" -} -item { - name: "/m/029bxz" - id: 370 - display_name: "Oven" -} -item { - name: "/m/02zt3" - id: 371 - display_name: "Kite" -} -item { - name: "/m/03d443" - id: 372 - display_name: "Rhinoceros" -} -item { - name: "/m/0306r" - id: 373 - display_name: "Fox" -} -item { - name: "/m/0h8l4fh" - id: 374 - display_name: "Light bulb" -} -item { - name: "/m/0633h" - id: 375 - display_name: "Polar bear" -} -item { - name: "/m/01s55n" - id: 376 - display_name: "Suitcase" -} -item { - name: "/m/0hkxq" - id: 377 - display_name: "Broccoli" -} -item { - name: "/m/0cn6p" - id: 378 - display_name: "Otter" -} -item { - name: "/m/0dbzx" - id: 379 - display_name: "Mule" -} -item { - name: "/m/01dy8n" - id: 380 - display_name: "Woodpecker" -} -item { - name: "/m/01h8tj" - id: 381 - display_name: "Starfish" -} -item { - name: "/m/03s_tn" - id: 382 - display_name: "Kettle" -} -item { - name: "/m/01xs3r" - id: 383 - display_name: "Jet ski" -} -item { - name: "/m/031b6r" - id: 384 - display_name: "Window blind" -} -item { - name: "/m/06j2d" - id: 385 - display_name: "Raven" -} -item { - name: "/m/0hqkz" - id: 386 - display_name: "Grapefruit" -} -item { - name: "/m/01_5g" - id: 387 - display_name: "Chopsticks" -} -item { - name: "/m/02zvsm" - id: 388 - display_name: "Tart" -} -item { - name: "/m/0kpqd" - id: 389 - display_name: "Watermelon" -} -item { - name: "/m/015x4r" - id: 390 - display_name: "Cucumber" -} -item { - name: "/m/061hd_" - id: 391 - display_name: "Infant bed" -} -item { - name: "/m/04ylt" - id: 392 - display_name: "Missile" -} -item { - name: "/m/02wv84t" - id: 393 - display_name: "Gas stove" -} -item { - name: "/m/04y4h8h" - id: 394 - display_name: "Bathroom cabinet" -} -item { - name: "/m/01gllr" - id: 395 - display_name: "Beehive" -} -item { - name: "/m/0pcr" - id: 396 - display_name: "Alpaca" -} -item { - name: "/m/0jy4k" - id: 397 - display_name: "Doughnut" -} -item { - name: "/m/09f20" - id: 398 - display_name: "Hippopotamus" -} -item { - name: "/m/0mcx2" - id: 399 - display_name: "Ipod" -} -item { - name: "/m/04c0y" - id: 400 - display_name: "Kangaroo" -} -item { - name: "/m/0_k2" - id: 401 - display_name: "Ant" -} -item { - name: "/m/0jg57" - id: 402 - display_name: "Bell pepper" -} -item { - name: "/m/03fj2" - id: 403 - display_name: "Goldfish" -} -item { - name: "/m/03ldnb" - id: 404 - display_name: "Ceiling fan" -} -item { - name: "/m/06nrc" - id: 405 - display_name: "Shotgun" -} -item { - name: "/m/01btn" - id: 406 - display_name: "Barge" -} -item { - name: "/m/05vtc" - id: 407 - display_name: "Potato" -} -item { - name: "/m/08hvt4" - id: 408 - display_name: "Jug" -} -item { - name: "/m/0fx9l" - id: 409 - display_name: "Microwave oven" -} -item { - name: "/m/01h44" - id: 410 - display_name: "Bat" -} -item { - name: "/m/05n4y" - id: 411 - display_name: "Ostrich" -} -item { - name: "/m/0jly1" - id: 412 - display_name: "Turkey" -} -item { - name: "/m/06y5r" - id: 413 - display_name: "Sword" -} -item { - name: "/m/05ctyq" - id: 414 - display_name: "Tennis ball" -} -item { - name: "/m/0fp6w" - id: 415 - display_name: "Pineapple" -} -item { - name: "/m/0d4w1" - id: 416 - display_name: "Closet" -} -item { - name: "/m/02pv19" - id: 417 - display_name: "Stop sign" -} -item { - name: "/m/07crc" - id: 418 - display_name: "Taco" -} -item { - name: "/m/01dwwc" - id: 419 - display_name: "Pancake" -} -item { - name: "/m/01b9xk" - id: 420 - display_name: "Hot dog" -} -item { - name: "/m/013y1f" - id: 421 - display_name: "Organ" -} -item { - name: "/m/0m53l" - id: 422 - display_name: "Rays and skates" -} -item { - name: "/m/0174k2" - id: 423 - display_name: "Washing machine" -} -item { - name: "/m/01dwsz" - id: 424 - display_name: "Waffle" -} -item { - name: "/m/04vv5k" - id: 425 - display_name: "Snowplow" -} -item { - name: "/m/04cp_" - id: 426 - display_name: "Koala" -} -item { - name: "/m/0fz0h" - id: 427 - display_name: "Honeycomb" -} -item { - name: "/m/0llzx" - id: 428 - display_name: "Sewing machine" -} -item { - name: "/m/0319l" - id: 429 - display_name: "Horn" -} -item { - name: "/m/04v6l4" - id: 430 - display_name: "Frying pan" -} -item { - name: "/m/0dkzw" - id: 431 - display_name: "Seat belt" -} -item { - name: "/m/027pcv" - id: 432 - display_name: "Zucchini" -} -item { - name: "/m/0323sq" - id: 433 - display_name: "Golf cart" -} -item { - name: "/m/054fyh" - id: 434 - display_name: "Pitcher" -} -item { - name: "/m/01pns0" - id: 435 - display_name: "Fire hydrant" -} -item { - name: "/m/012n7d" - id: 436 - display_name: "Ambulance" -} -item { - name: "/m/044r5d" - id: 437 - display_name: "Golf ball" -} -item { - name: "/m/01krhy" - id: 438 - display_name: "Tiara" -} -item { - name: "/m/0dq75" - id: 439 - display_name: "Raccoon" -} -item { - name: "/m/0176mf" - id: 440 - display_name: "Belt" -} -item { - name: "/m/0h8lkj8" - id: 441 - display_name: "Corded phone" -} -item { - name: "/m/04tn4x" - id: 442 - display_name: "Swim cap" -} -item { - name: "/m/06l9r" - id: 443 - display_name: "Red panda" -} -item { - name: "/m/0cjs7" - id: 444 - display_name: "Asparagus" -} -item { - name: "/m/01lsmm" - id: 445 - display_name: "Scissors" -} -item { - name: "/m/01lcw4" - id: 446 - display_name: "Limousine" -} -item { - name: "/m/047j0r" - id: 447 - display_name: "Filing cabinet" -} -item { - name: "/m/01fb_0" - id: 448 - display_name: "Bagel" -} -item { - name: "/m/04169hn" - id: 449 - display_name: "Wood-burning stove" -} -item { - name: "/m/076bq" - id: 450 - display_name: "Segway" -} -item { - name: "/m/0hdln" - id: 451 - display_name: "Ruler" -} -item { - name: "/m/01g3x7" - id: 452 - display_name: "Bow and arrow" -} -item { - name: "/m/0l3ms" - id: 453 - display_name: "Balance beam" -} -item { - name: "/m/058qzx" - id: 454 - display_name: "Kitchen knife" -} -item { - name: "/m/0h8n6ft" - id: 455 - display_name: "Cake stand" -} -item { - name: "/m/018j2" - id: 456 - display_name: "Banjo" -} -item { - name: "/m/0l14j_" - id: 457 - display_name: "Flute" -} -item { - name: "/m/0wdt60w" - id: 458 - display_name: "Rugby ball" -} -item { - name: "/m/02gzp" - id: 459 - display_name: "Dagger" -} -item { - name: "/m/0h8n6f9" - id: 460 - display_name: "Dog bed" -} -item { - name: "/m/0fbw6" - id: 461 - display_name: "Cabbage" -} -item { - name: "/m/07kng9" - id: 462 - display_name: "Picnic basket" -} -item { - name: "/m/0dj6p" - id: 463 - display_name: "Peach" -} -item { - name: "/m/06pcq" - id: 464 - display_name: "Submarine sandwich" -} -item { - name: "/m/061_f" - id: 465 - display_name: "Pear" -} -item { - name: "/m/04g2r" - id: 466 - display_name: "Lynx" -} -item { - name: "/m/0jwn_" - id: 467 - display_name: "Pomegranate" -} -item { - name: "/m/02f9f_" - id: 468 - display_name: "Shower" -} -item { - name: "/m/01f8m5" - id: 469 - display_name: "Blue jay" -} -item { - name: "/m/01m4t" - id: 470 - display_name: "Printer" -} -item { - name: "/m/0cl4p" - id: 471 - display_name: "Hedgehog" -} -item { - name: "/m/07xyvk" - id: 472 - display_name: "Coffeemaker" -} -item { - name: "/m/084hf" - id: 473 - display_name: "Worm" -} -item { - name: "/m/03v5tg" - id: 474 - display_name: "Drinking straw" -} -item { - name: "/m/0qjjc" - id: 475 - display_name: "Remote control" -} -item { - name: "/m/015x5n" - id: 476 - display_name: "Radish" -} -item { - name: "/m/0ccs93" - id: 477 - display_name: "Canary" -} -item { - name: "/m/0nybt" - id: 478 - display_name: "Seahorse" -} -item { - name: "/m/02vkqh8" - id: 479 - display_name: "Wardrobe" -} -item { - name: "/m/09gtd" - id: 480 - display_name: "Toilet paper" -} -item { - name: "/m/019h78" - id: 481 - display_name: "Centipede" -} -item { - name: "/m/015wgc" - id: 482 - display_name: "Croissant" -} -item { - name: "/m/01x3jk" - id: 483 - display_name: "Snowmobile" -} -item { - name: "/m/01j3zr" - id: 484 - display_name: "Burrito" -} -item { - name: "/m/0c568" - id: 485 - display_name: "Porcupine" -} -item { - name: "/m/02pdsw" - id: 486 - display_name: "Cutting board" -} -item { - name: "/m/029b3" - id: 487 - display_name: "Dice" -} -item { - name: "/m/03q5t" - id: 488 - display_name: "Harpsichord" -} -item { - name: "/m/0p833" - id: 489 - display_name: "Perfume" -} -item { - name: "/m/01d380" - id: 490 - display_name: "Drill" -} -item { - name: "/m/024d2" - id: 491 - display_name: "Calculator" -} -item { - name: "/m/0mw_6" - id: 492 - display_name: "Willow" -} -item { - name: "/m/01f91_" - id: 493 - display_name: "Pretzel" -} -item { - name: "/m/02g30s" - id: 494 - display_name: "Guacamole" -} -item { - name: "/m/01hrv5" - id: 495 - display_name: "Popcorn" -} -item { - name: "/m/03m5k" - id: 496 - display_name: "Harp" -} -item { - name: "/m/0162_1" - id: 497 - display_name: "Towel" -} -item { - name: "/m/063rgb" - id: 498 - display_name: "Mixer" -} -item { - name: "/m/06_72j" - id: 499 - display_name: "Digital clock" -} -item { - name: "/m/046dlr" - id: 500 - display_name: "Alarm clock" -} -item { - name: "/m/047v4b" - id: 501 - display_name: "Artichoke" -} -item { - name: "/m/04zpv" - id: 502 - display_name: "Milk" -} -item { - name: "/m/043nyj" - id: 503 - display_name: "Common fig" -} -item { - name: "/m/03bbps" - id: 504 - display_name: "Power plugs and sockets" -} -item { - name: "/m/02w3r3" - id: 505 - display_name: "Paper towel" -} -item { - name: "/m/02pjr4" - id: 506 - display_name: "Blender" -} -item { - name: "/m/0755b" - id: 507 - display_name: "Scorpion" -} -item { - name: "/m/02lbcq" - id: 508 - display_name: "Stretcher" -} -item { - name: "/m/0fldg" - id: 509 - display_name: "Mango" -} -item { - name: "/m/012074" - id: 510 - display_name: "Magpie" -} -item { - name: "/m/035vxb" - id: 511 - display_name: "Isopod" -} -item { - name: "/m/02w3_ws" - id: 512 - display_name: "Personal care" -} -item { - name: "/m/0f6nr" - id: 513 - display_name: "Unicycle" -} -item { - name: "/m/0420v5" - id: 514 - display_name: "Punching bag" -} -item { - name: "/m/0frqm" - id: 515 - display_name: "Envelope" -} -item { - name: "/m/03txqz" - id: 516 - display_name: "Scale" -} -item { - name: "/m/0271qf7" - id: 517 - display_name: "Wine rack" -} -item { - name: "/m/074d1" - id: 518 - display_name: "Submarine" -} -item { - name: "/m/08p92x" - id: 519 - display_name: "Cream" -} -item { - name: "/m/01j4z9" - id: 520 - display_name: "Chainsaw" -} -item { - name: "/m/0kpt_" - id: 521 - display_name: "Cantaloupe" -} -item { - name: "/m/0h8n27j" - id: 522 - display_name: "Serving tray" -} -item { - name: "/m/03y6mg" - id: 523 - display_name: "Food processor" -} -item { - name: "/m/04h8sr" - id: 524 - display_name: "Dumbbell" -} -item { - name: "/m/065h6l" - id: 525 - display_name: "Jacuzzi" -} -item { - name: "/m/02tsc9" - id: 526 - display_name: "Slow cooker" -} -item { - name: "/m/012ysf" - id: 527 - display_name: "Syringe" -} -item { - name: "/m/0ky7b" - id: 528 - display_name: "Dishwasher" -} -item { - name: "/m/02wg_p" - id: 529 - display_name: "Tree house" -} -item { - name: "/m/0584n8" - id: 530 - display_name: "Briefcase" -} -item { - name: "/m/03kt2w" - id: 531 - display_name: "Stationary bicycle" -} -item { - name: "/m/05kms" - id: 532 - display_name: "Oboe" -} -item { - name: "/m/030610" - id: 533 - display_name: "Treadmill" -} -item { - name: "/m/0lt4_" - id: 534 - display_name: "Binoculars" -} -item { - name: "/m/076lb9" - id: 535 - display_name: "Bench" -} -item { - name: "/m/02ctlc" - id: 536 - display_name: "Cricket ball" -} -item { - name: "/m/02x8cch" - id: 537 - display_name: "Salt and pepper shakers" -} -item { - name: "/m/09gys" - id: 538 - display_name: "Squid" -} -item { - name: "/m/03jbxj" - id: 539 - display_name: "Light switch" -} -item { - name: "/m/012xff" - id: 540 - display_name: "Toothbrush" -} -item { - name: "/m/0h8kx63" - id: 541 - display_name: "Spice rack" -} -item { - name: "/m/073g6" - id: 542 - display_name: "Stethoscope" -} -item { - name: "/m/02cvgx" - id: 543 - display_name: "Winter melon" -} -item { - name: "/m/027rl48" - id: 544 - display_name: "Ladle" -} -item { - name: "/m/01kb5b" - id: 545 - display_name: "Flashlight" -} diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/oid_object_detection_challenge_500_label_map.pbtxt b/research/mlperf_object_detection/Mask_RCNN/object_detection/data/oid_object_detection_challenge_500_label_map.pbtxt deleted file mode 100644 index 044f6d4c813729a693cac761f43a2246e07f7b6a..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/oid_object_detection_challenge_500_label_map.pbtxt +++ /dev/null @@ -1,2500 +0,0 @@ -item { - name: "/m/061hd_" - id: 1 - display_name: "Infant bed" -} -item { - name: "/m/06m11" - id: 2 - display_name: "Rose" -} -item { - name: "/m/03120" - id: 3 - display_name: "Flag" -} -item { - name: "/m/01kb5b" - id: 4 - display_name: "Flashlight" -} -item { - name: "/m/0120dh" - id: 5 - display_name: "Sea turtle" -} -item { - name: "/m/0dv5r" - id: 6 - display_name: "Camera" -} -item { - name: "/m/0jbk" - id: 7 - display_name: "Animal" -} -item { - name: "/m/0174n1" - id: 8 - display_name: "Glove" -} -item { - name: "/m/09f_2" - id: 9 - display_name: "Crocodile" -} -item { - name: "/m/01xq0k1" - id: 10 - display_name: "Cattle" -} -item { - name: "/m/03jm5" - id: 11 - display_name: "House" -} -item { - name: "/m/02g30s" - id: 12 - display_name: "Guacamole" -} -item { - name: "/m/05z6w" - id: 13 - display_name: "Penguin" -} -item { - name: "/m/01jfm_" - id: 14 - display_name: "Vehicle registration plate" -} -item { - name: "/m/076lb9" - id: 15 - display_name: "Training bench" -} -item { - name: "/m/0gj37" - id: 16 - display_name: "Ladybug" -} -item { - name: "/m/0k0pj" - id: 17 - display_name: "Human nose" -} -item { - name: "/m/0kpqd" - id: 18 - display_name: "Watermelon" -} -item { - name: "/m/0l14j_" - id: 19 - display_name: "Flute" -} -item { - name: "/m/0cyf8" - id: 20 - display_name: "Butterfly" -} -item { - name: "/m/0174k2" - id: 21 - display_name: "Washing machine" -} -item { - name: "/m/0dq75" - id: 22 - display_name: "Raccoon" -} -item { - name: "/m/076bq" - id: 23 - display_name: "Segway" -} -item { - name: "/m/07crc" - id: 24 - display_name: "Taco" -} -item { - name: "/m/0d8zb" - id: 25 - display_name: "Jellyfish" -} -item { - name: "/m/0fszt" - id: 26 - display_name: "Cake" -} -item { - name: "/m/0k1tl" - id: 27 - display_name: "Pen" -} -item { - name: "/m/020kz" - id: 28 - display_name: "Cannon" -} -item { - name: "/m/09728" - id: 29 - display_name: "Bread" -} -item { - name: "/m/07j7r" - id: 30 - display_name: "Tree" -} -item { - name: "/m/0fbdv" - id: 31 - display_name: "Shellfish" -} -item { - name: "/m/03ssj5" - id: 32 - display_name: "Bed" -} -item { - name: "/m/03qrc" - id: 33 - display_name: "Hamster" -} -item { - name: "/m/02dl1y" - id: 34 - display_name: "Hat" -} -item { - name: "/m/01k6s3" - id: 35 - display_name: "Toaster" -} -item { - name: "/m/02jfl0" - id: 36 - display_name: "Sombrero" -} -item { - name: "/m/01krhy" - id: 37 - display_name: "Tiara" -} -item { - name: "/m/04kkgm" - id: 38 - display_name: "Bowl" -} -item { - name: "/m/0ft9s" - id: 39 - display_name: "Dragonfly" -} -item { - name: "/m/0d_2m" - id: 40 - display_name: "Moths and butterflies" -} -item { - name: "/m/0czz2" - id: 41 - display_name: "Antelope" -} -item { - name: "/m/0f4s2w" - id: 42 - display_name: "Vegetable" -} -item { - name: "/m/07dd4" - id: 43 - display_name: "Torch" -} -item { - name: "/m/0cgh4" - id: 44 - display_name: "Building" -} -item { - name: "/m/03bbps" - id: 45 - display_name: "Power plugs and sockets" -} -item { - name: "/m/02pjr4" - id: 46 - display_name: "Blender" -} -item { - name: "/m/04p0qw" - id: 47 - display_name: "Billiard table" -} -item { - name: "/m/02pdsw" - id: 48 - display_name: "Cutting board" -} -item { - name: "/m/01yx86" - id: 49 - display_name: "Bronze sculpture" -} -item { - name: "/m/09dzg" - id: 50 - display_name: "Turtle" -} -item { - name: "/m/0hkxq" - id: 51 - display_name: "Broccoli" -} -item { - name: "/m/07dm6" - id: 52 - display_name: "Tiger" -} -item { - name: "/m/054_l" - id: 53 - display_name: "Mirror" -} -item { - name: "/m/01dws" - id: 54 - display_name: "Bear" -} -item { - name: "/m/027pcv" - id: 55 - display_name: "Zucchini" -} -item { - name: "/m/01d40f" - id: 56 - display_name: "Dress" -} -item { - name: "/m/02rgn06" - id: 57 - display_name: "Volleyball" -} -item { - name: "/m/0342h" - id: 58 - display_name: "Guitar" -} -item { - name: "/m/06bt6" - id: 59 - display_name: "Reptile" -} -item { - name: "/m/0323sq" - id: 60 - display_name: "Golf cart" -} -item { - name: "/m/02zvsm" - id: 61 - display_name: "Tart" -} -item { - name: "/m/02fq_6" - id: 62 - display_name: "Fedora" -} -item { - name: "/m/01lrl" - id: 63 - display_name: "Carnivore" -} -item { - name: "/m/0k4j" - id: 64 - display_name: "Car" -} -item { - name: "/m/04h7h" - id: 65 - display_name: "Lighthouse" -} -item { - name: "/m/07xyvk" - id: 66 - display_name: "Coffeemaker" -} -item { - name: "/m/03y6mg" - id: 67 - display_name: "Food processor" -} -item { - name: "/m/07r04" - id: 68 - display_name: "Truck" -} -item { - name: "/m/03__z0" - id: 69 - display_name: "Bookcase" -} -item { - name: "/m/019w40" - id: 70 - display_name: "Surfboard" -} -item { - name: "/m/09j5n" - id: 71 - display_name: "Footwear" -} -item { - name: "/m/0cvnqh" - id: 72 - display_name: "Bench" -} -item { - name: "/m/01llwg" - id: 73 - display_name: "Necklace" -} -item { - name: "/m/0c9ph5" - id: 74 - display_name: "Flower" -} -item { - name: "/m/015x5n" - id: 75 - display_name: "Radish" -} -item { - name: "/m/0gd2v" - id: 76 - display_name: "Marine mammal" -} -item { - name: "/m/04v6l4" - id: 77 - display_name: "Frying pan" -} -item { - name: "/m/02jz0l" - id: 78 - display_name: "Tap" -} -item { - name: "/m/0dj6p" - id: 79 - display_name: "Peach" -} -item { - name: "/m/04ctx" - id: 80 - display_name: "Knife" -} -item { - name: "/m/080hkjn" - id: 81 - display_name: "Handbag" -} -item { - name: "/m/01c648" - id: 82 - display_name: "Laptop" -} -item { - name: "/m/01j61q" - id: 83 - display_name: "Tent" -} -item { - name: "/m/012n7d" - id: 84 - display_name: "Ambulance" -} -item { - name: "/m/025nd" - id: 85 - display_name: "Christmas tree" -} -item { - name: "/m/09csl" - id: 86 - display_name: "Eagle" -} -item { - name: "/m/01lcw4" - id: 87 - display_name: "Limousine" -} -item { - name: "/m/0h8n5zk" - id: 88 - display_name: "Kitchen & dining room table" -} -item { - name: "/m/0633h" - id: 89 - display_name: "Polar bear" -} -item { - name: "/m/01fdzj" - id: 90 - display_name: "Tower" -} -item { - name: "/m/01226z" - id: 91 - display_name: "Football" -} -item { - name: "/m/0mw_6" - id: 92 - display_name: "Willow" -} -item { - name: "/m/04hgtk" - id: 93 - display_name: "Human head" -} -item { - name: "/m/02pv19" - id: 94 - display_name: "Stop sign" -} -item { - name: "/m/09qck" - id: 95 - display_name: "Banana" -} -item { - name: "/m/063rgb" - id: 96 - display_name: "Mixer" -} -item { - name: "/m/0lt4_" - id: 97 - display_name: "Binoculars" -} -item { - name: "/m/0270h" - id: 98 - display_name: "Dessert" -} -item { - name: "/m/01h3n" - id: 99 - display_name: "Bee" -} -item { - name: "/m/01mzpv" - id: 100 - display_name: "Chair" -} -item { - name: "/m/04169hn" - id: 101 - display_name: "Wood-burning stove" -} -item { - name: "/m/0fm3zh" - id: 102 - display_name: "Flowerpot" -} -item { - name: "/m/0d20w4" - id: 103 - display_name: "Beaker" -} -item { - name: "/m/0_cp5" - id: 104 - display_name: "Oyster" -} -item { - name: "/m/01dy8n" - id: 105 - display_name: "Woodpecker" -} -item { - name: "/m/03m5k" - id: 106 - display_name: "Harp" -} -item { - name: "/m/03dnzn" - id: 107 - display_name: "Bathtub" -} -item { - name: "/m/0h8mzrc" - id: 108 - display_name: "Wall clock" -} -item { - name: "/m/0h8mhzd" - id: 109 - display_name: "Sports uniform" -} -item { - name: "/m/03d443" - id: 110 - display_name: "Rhinoceros" -} -item { - name: "/m/01gllr" - id: 111 - display_name: "Beehive" -} -item { - name: "/m/0642b4" - id: 112 - display_name: "Cupboard" -} -item { - name: "/m/09b5t" - id: 113 - display_name: "Chicken" -} -item { - name: "/m/04yx4" - id: 114 - display_name: "Man" -} -item { - name: "/m/01f8m5" - id: 115 - display_name: "Blue jay" -} -item { - name: "/m/015x4r" - id: 116 - display_name: "Cucumber" -} -item { - name: "/m/01j51" - id: 117 - display_name: "Balloon" -} -item { - name: "/m/02zt3" - id: 118 - display_name: "Kite" -} -item { - name: "/m/03tw93" - id: 119 - display_name: "Fireplace" -} -item { - name: "/m/01jfsr" - id: 120 - display_name: "Lantern" -} -item { - name: "/m/04ylt" - id: 121 - display_name: "Missile" -} -item { - name: "/m/0bt_c3" - id: 122 - display_name: "Book" -} -item { - name: "/m/0cmx8" - id: 123 - display_name: "Spoon" -} -item { - name: "/m/0hqkz" - id: 124 - display_name: "Grapefruit" -} -item { - name: "/m/071qp" - id: 125 - display_name: "Squirrel" -} -item { - name: "/m/0cyhj_" - id: 126 - display_name: "Orange" -} -item { - name: "/m/01xygc" - id: 127 - display_name: "Coat" -} -item { - name: "/m/0420v5" - id: 128 - display_name: "Punching bag" -} -item { - name: "/m/0898b" - id: 129 - display_name: "Zebra" -} -item { - name: "/m/01knjb" - id: 130 - display_name: "Billboard" -} -item { - name: "/m/0199g" - id: 131 - display_name: "Bicycle" -} -item { - name: "/m/03c7gz" - id: 132 - display_name: "Door handle" -} -item { - name: "/m/02x984l" - id: 133 - display_name: "Mechanical fan" -} -item { - name: "/m/04zwwv" - id: 134 - display_name: "Ring binder" -} -item { - name: "/m/04bcr3" - id: 135 - display_name: "Table" -} -item { - name: "/m/0gv1x" - id: 136 - display_name: "Parrot" -} -item { - name: "/m/01nq26" - id: 137 - display_name: "Sock" -} -item { - name: "/m/02s195" - id: 138 - display_name: "Vase" -} -item { - name: "/m/083kb" - id: 139 - display_name: "Weapon" -} -item { - name: "/m/06nrc" - id: 140 - display_name: "Shotgun" -} -item { - name: "/m/0jyfg" - id: 141 - display_name: "Glasses" -} -item { - name: "/m/0nybt" - id: 142 - display_name: "Seahorse" -} -item { - name: "/m/0176mf" - id: 143 - display_name: "Belt" -} -item { - name: "/m/01rzcn" - id: 144 - display_name: "Watercraft" -} -item { - name: "/m/0d4v4" - id: 145 - display_name: "Window" -} -item { - name: "/m/03bk1" - id: 146 - display_name: "Giraffe" -} -item { - name: "/m/096mb" - id: 147 - display_name: "Lion" -} -item { - name: "/m/0h9mv" - id: 148 - display_name: "Tire" -} -item { - name: "/m/07yv9" - id: 149 - display_name: "Vehicle" -} -item { - name: "/m/0ph39" - id: 150 - display_name: "Canoe" -} -item { - name: "/m/01rkbr" - id: 151 - display_name: "Tie" -} -item { - name: "/m/0gjbg72" - id: 152 - display_name: "Shelf" -} -item { - name: "/m/06z37_" - id: 153 - display_name: "Picture frame" -} -item { - name: "/m/01m4t" - id: 154 - display_name: "Printer" -} -item { - name: "/m/035r7c" - id: 155 - display_name: "Human leg" -} -item { - name: "/m/019jd" - id: 156 - display_name: "Boat" -} -item { - name: "/m/02tsc9" - id: 157 - display_name: "Slow cooker" -} -item { - name: "/m/015wgc" - id: 158 - display_name: "Croissant" -} -item { - name: "/m/0c06p" - id: 159 - display_name: "Candle" -} -item { - name: "/m/01dwwc" - id: 160 - display_name: "Pancake" -} -item { - name: "/m/034c16" - id: 161 - display_name: "Pillow" -} -item { - name: "/m/0242l" - id: 162 - display_name: "Coin" -} -item { - name: "/m/02lbcq" - id: 163 - display_name: "Stretcher" -} -item { - name: "/m/03nfch" - id: 164 - display_name: "Sandal" -} -item { - name: "/m/03bt1vf" - id: 165 - display_name: "Woman" -} -item { - name: "/m/01lynh" - id: 166 - display_name: "Stairs" -} -item { - name: "/m/03q5t" - id: 167 - display_name: "Harpsichord" -} -item { - name: "/m/0fqt361" - id: 168 - display_name: "Stool" -} -item { - name: "/m/01bjv" - id: 169 - display_name: "Bus" -} -item { - name: "/m/01s55n" - id: 170 - display_name: "Suitcase" -} -item { - name: "/m/0283dt1" - id: 171 - display_name: "Human mouth" -} -item { - name: "/m/01z1kdw" - id: 172 - display_name: "Juice" -} -item { - name: "/m/016m2d" - id: 173 - display_name: "Skull" -} -item { - name: "/m/02dgv" - id: 174 - display_name: "Door" -} -item { - name: "/m/07y_7" - id: 175 - display_name: "Violin" -} -item { - name: "/m/01_5g" - id: 176 - display_name: "Chopsticks" -} -item { - name: "/m/06_72j" - id: 177 - display_name: "Digital clock" -} -item { - name: "/m/0ftb8" - id: 178 - display_name: "Sunflower" -} -item { - name: "/m/0c29q" - id: 179 - display_name: "Leopard" -} -item { - name: "/m/0jg57" - id: 180 - display_name: "Bell pepper" -} -item { - name: "/m/02l8p9" - id: 181 - display_name: "Harbor seal" -} -item { - name: "/m/078jl" - id: 182 - display_name: "Snake" -} -item { - name: "/m/0llzx" - id: 183 - display_name: "Sewing machine" -} -item { - name: "/m/0dbvp" - id: 184 - display_name: "Goose" -} -item { - name: "/m/09ct_" - id: 185 - display_name: "Helicopter" -} -item { - name: "/m/0dkzw" - id: 186 - display_name: "Seat belt" -} -item { - name: "/m/02p5f1q" - id: 187 - display_name: "Coffee cup" -} -item { - name: "/m/0fx9l" - id: 188 - display_name: "Microwave oven" -} -item { - name: "/m/01b9xk" - id: 189 - display_name: "Hot dog" -} -item { - name: "/m/0b3fp9" - id: 190 - display_name: "Countertop" -} -item { - name: "/m/0h8n27j" - id: 191 - display_name: "Serving tray" -} -item { - name: "/m/0h8n6f9" - id: 192 - display_name: "Dog bed" -} -item { - name: "/m/01599" - id: 193 - display_name: "Beer" -} -item { - name: "/m/017ftj" - id: 194 - display_name: "Sunglasses" -} -item { - name: "/m/044r5d" - id: 195 - display_name: "Golf ball" -} -item { - name: "/m/01dwsz" - id: 196 - display_name: "Waffle" -} -item { - name: "/m/0cdl1" - id: 197 - display_name: "Palm tree" -} -item { - name: "/m/07gql" - id: 198 - display_name: "Trumpet" -} -item { - name: "/m/0hdln" - id: 199 - display_name: "Ruler" -} -item { - name: "/m/0zvk5" - id: 200 - display_name: "Helmet" -} -item { - name: "/m/012w5l" - id: 201 - display_name: "Ladder" -} -item { - name: "/m/021sj1" - id: 202 - display_name: "Office building" -} -item { - name: "/m/0bh9flk" - id: 203 - display_name: "Tablet computer" -} -item { - name: "/m/09gtd" - id: 204 - display_name: "Toilet paper" -} -item { - name: "/m/0jwn_" - id: 205 - display_name: "Pomegranate" -} -item { - name: "/m/02wv6h6" - id: 206 - display_name: "Skirt" -} -item { - name: "/m/02wv84t" - id: 207 - display_name: "Gas stove" -} -item { - name: "/m/021mn" - id: 208 - display_name: "Cookie" -} -item { - name: "/m/018p4k" - id: 209 - display_name: "Cart" -} -item { - name: "/m/06j2d" - id: 210 - display_name: "Raven" -} -item { - name: "/m/033cnk" - id: 211 - display_name: "Egg" -} -item { - name: "/m/01j3zr" - id: 212 - display_name: "Burrito" -} -item { - name: "/m/03fwl" - id: 213 - display_name: "Goat" -} -item { - name: "/m/058qzx" - id: 214 - display_name: "Kitchen knife" -} -item { - name: "/m/06_fw" - id: 215 - display_name: "Skateboard" -} -item { - name: "/m/02x8cch" - id: 216 - display_name: "Salt and pepper shakers" -} -item { - name: "/m/04g2r" - id: 217 - display_name: "Lynx" -} -item { - name: "/m/01b638" - id: 218 - display_name: "Boot" -} -item { - name: "/m/099ssp" - id: 219 - display_name: "Platter" -} -item { - name: "/m/071p9" - id: 220 - display_name: "Ski" -} -item { - name: "/m/01gkx_" - id: 221 - display_name: "Swimwear" -} -item { - name: "/m/0b_rs" - id: 222 - display_name: "Swimming pool" -} -item { - name: "/m/03v5tg" - id: 223 - display_name: "Drinking straw" -} -item { - name: "/m/01j5ks" - id: 224 - display_name: "Wrench" -} -item { - name: "/m/026t6" - id: 225 - display_name: "Drum" -} -item { - name: "/m/0_k2" - id: 226 - display_name: "Ant" -} -item { - name: "/m/039xj_" - id: 227 - display_name: "Human ear" -} -item { - name: "/m/01b7fy" - id: 228 - display_name: "Headphones" -} -item { - name: "/m/0220r2" - id: 229 - display_name: "Fountain" -} -item { - name: "/m/015p6" - id: 230 - display_name: "Bird" -} -item { - name: "/m/0fly7" - id: 231 - display_name: "Jeans" -} -item { - name: "/m/07c52" - id: 232 - display_name: "Television" -} -item { - name: "/m/0n28_" - id: 233 - display_name: "Crab" -} -item { - name: "/m/0hg7b" - id: 234 - display_name: "Microphone" -} -item { - name: "/m/019dx1" - id: 235 - display_name: "Home appliance" -} -item { - name: "/m/04vv5k" - id: 236 - display_name: "Snowplow" -} -item { - name: "/m/020jm" - id: 237 - display_name: "Beetle" -} -item { - name: "/m/047v4b" - id: 238 - display_name: "Artichoke" -} -item { - name: "/m/01xs3r" - id: 239 - display_name: "Jet ski" -} -item { - name: "/m/03kt2w" - id: 240 - display_name: "Stationary bicycle" -} -item { - name: "/m/03q69" - id: 241 - display_name: "Human hair" -} -item { - name: "/m/01dxs" - id: 242 - display_name: "Brown bear" -} -item { - name: "/m/01h8tj" - id: 243 - display_name: "Starfish" -} -item { - name: "/m/0dt3t" - id: 244 - display_name: "Fork" -} -item { - name: "/m/0cjq5" - id: 245 - display_name: "Lobster" -} -item { - name: "/m/0h8lkj8" - id: 246 - display_name: "Corded phone" -} -item { - name: "/m/0271t" - id: 247 - display_name: "Drink" -} -item { - name: "/m/03q5c7" - id: 248 - display_name: "Saucer" -} -item { - name: "/m/0fj52s" - id: 249 - display_name: "Carrot" -} -item { - name: "/m/03vt0" - id: 250 - display_name: "Insect" -} -item { - name: "/m/01x3z" - id: 251 - display_name: "Clock" -} -item { - name: "/m/0d5gx" - id: 252 - display_name: "Castle" -} -item { - name: "/m/0h8my_4" - id: 253 - display_name: "Tennis racket" -} -item { - name: "/m/03ldnb" - id: 254 - display_name: "Ceiling fan" -} -item { - name: "/m/0cjs7" - id: 255 - display_name: "Asparagus" -} -item { - name: "/m/0449p" - id: 256 - display_name: "Jaguar" -} -item { - name: "/m/04szw" - id: 257 - display_name: "Musical instrument" -} -item { - name: "/m/07jdr" - id: 258 - display_name: "Train" -} -item { - name: "/m/01yrx" - id: 259 - display_name: "Cat" -} -item { - name: "/m/06c54" - id: 260 - display_name: "Rifle" -} -item { - name: "/m/04h8sr" - id: 261 - display_name: "Dumbbell" -} -item { - name: "/m/050k8" - id: 262 - display_name: "Mobile phone" -} -item { - name: "/m/0pg52" - id: 263 - display_name: "Taxi" -} -item { - name: "/m/02f9f_" - id: 264 - display_name: "Shower" -} -item { - name: "/m/054fyh" - id: 265 - display_name: "Pitcher" -} -item { - name: "/m/09k_b" - id: 266 - display_name: "Lemon" -} -item { - name: "/m/03xxp" - id: 267 - display_name: "Invertebrate" -} -item { - name: "/m/0jly1" - id: 268 - display_name: "Turkey" -} -item { - name: "/m/06k2mb" - id: 269 - display_name: "High heels" -} -item { - name: "/m/04yqq2" - id: 270 - display_name: "Bust" -} -item { - name: "/m/0bwd_0j" - id: 271 - display_name: "Elephant" -} -item { - name: "/m/02h19r" - id: 272 - display_name: "Scarf" -} -item { - name: "/m/02zn6n" - id: 273 - display_name: "Barrel" -} -item { - name: "/m/07c6l" - id: 274 - display_name: "Trombone" -} -item { - name: "/m/05zsy" - id: 275 - display_name: "Pumpkin" -} -item { - name: "/m/025dyy" - id: 276 - display_name: "Box" -} -item { - name: "/m/07j87" - id: 277 - display_name: "Tomato" -} -item { - name: "/m/09ld4" - id: 278 - display_name: "Frog" -} -item { - name: "/m/01vbnl" - id: 279 - display_name: "Bidet" -} -item { - name: "/m/0dzct" - id: 280 - display_name: "Human face" -} -item { - name: "/m/03fp41" - id: 281 - display_name: "Houseplant" -} -item { - name: "/m/0h2r6" - id: 282 - display_name: "Van" -} -item { - name: "/m/0by6g" - id: 283 - display_name: "Shark" -} -item { - name: "/m/0cxn2" - id: 284 - display_name: "Ice cream" -} -item { - name: "/m/04tn4x" - id: 285 - display_name: "Swim cap" -} -item { - name: "/m/0f6wt" - id: 286 - display_name: "Falcon" -} -item { - name: "/m/05n4y" - id: 287 - display_name: "Ostrich" -} -item { - name: "/m/0gxl3" - id: 288 - display_name: "Handgun" -} -item { - name: "/m/02d9qx" - id: 289 - display_name: "Whiteboard" -} -item { - name: "/m/04m9y" - id: 290 - display_name: "Lizard" -} -item { - name: "/m/05z55" - id: 291 - display_name: "Pasta" -} -item { - name: "/m/01x3jk" - id: 292 - display_name: "Snowmobile" -} -item { - name: "/m/0h8l4fh" - id: 293 - display_name: "Light bulb" -} -item { - name: "/m/031b6r" - id: 294 - display_name: "Window blind" -} -item { - name: "/m/01tcjp" - id: 295 - display_name: "Muffin" -} -item { - name: "/m/01f91_" - id: 296 - display_name: "Pretzel" -} -item { - name: "/m/02522" - id: 297 - display_name: "Computer monitor" -} -item { - name: "/m/0319l" - id: 298 - display_name: "Horn" -} -item { - name: "/m/0c_jw" - id: 299 - display_name: "Furniture" -} -item { - name: "/m/0l515" - id: 300 - display_name: "Sandwich" -} -item { - name: "/m/0306r" - id: 301 - display_name: "Fox" -} -item { - name: "/m/0crjs" - id: 302 - display_name: "Convenience store" -} -item { - name: "/m/0ch_cf" - id: 303 - display_name: "Fish" -} -item { - name: "/m/02xwb" - id: 304 - display_name: "Fruit" -} -item { - name: "/m/01r546" - id: 305 - display_name: "Earrings" -} -item { - name: "/m/03rszm" - id: 306 - display_name: "Curtain" -} -item { - name: "/m/0388q" - id: 307 - display_name: "Grape" -} -item { - name: "/m/03m3pdh" - id: 308 - display_name: "Sofa bed" -} -item { - name: "/m/03k3r" - id: 309 - display_name: "Horse" -} -item { - name: "/m/0hf58v5" - id: 310 - display_name: "Luggage and bags" -} -item { - name: "/m/01y9k5" - id: 311 - display_name: "Desk" -} -item { - name: "/m/05441v" - id: 312 - display_name: "Crutch" -} -item { - name: "/m/03p3bw" - id: 313 - display_name: "Bicycle helmet" -} -item { - name: "/m/0175cv" - id: 314 - display_name: "Tick" -} -item { - name: "/m/0cmf2" - id: 315 - display_name: "Airplane" -} -item { - name: "/m/0ccs93" - id: 316 - display_name: "Canary" -} -item { - name: "/m/02d1br" - id: 317 - display_name: "Spatula" -} -item { - name: "/m/0gjkl" - id: 318 - display_name: "Watch" -} -item { - name: "/m/0jqgx" - id: 319 - display_name: "Lily" -} -item { - name: "/m/0h99cwc" - id: 320 - display_name: "Kitchen appliance" -} -item { - name: "/m/047j0r" - id: 321 - display_name: "Filing cabinet" -} -item { - name: "/m/0k5j" - id: 322 - display_name: "Aircraft" -} -item { - name: "/m/0h8n6ft" - id: 323 - display_name: "Cake stand" -} -item { - name: "/m/0gm28" - id: 324 - display_name: "Candy" -} -item { - name: "/m/0130jx" - id: 325 - display_name: "Sink" -} -item { - name: "/m/04rmv" - id: 326 - display_name: "Mouse" -} -item { - name: "/m/081qc" - id: 327 - display_name: "Wine" -} -item { - name: "/m/0qmmr" - id: 328 - display_name: "Wheelchair" -} -item { - name: "/m/03fj2" - id: 329 - display_name: "Goldfish" -} -item { - name: "/m/040b_t" - id: 330 - display_name: "Refrigerator" -} -item { - name: "/m/02y6n" - id: 331 - display_name: "French fries" -} -item { - name: "/m/0fqfqc" - id: 332 - display_name: "Drawer" -} -item { - name: "/m/030610" - id: 333 - display_name: "Treadmill" -} -item { - name: "/m/07kng9" - id: 334 - display_name: "Picnic basket" -} -item { - name: "/m/029b3" - id: 335 - display_name: "Dice" -} -item { - name: "/m/0fbw6" - id: 336 - display_name: "Cabbage" -} -item { - name: "/m/07qxg_" - id: 337 - display_name: "Football helmet" -} -item { - name: "/m/068zj" - id: 338 - display_name: "Pig" -} -item { - name: "/m/01g317" - id: 339 - display_name: "Person" -} -item { - name: "/m/01bfm9" - id: 340 - display_name: "Shorts" -} -item { - name: "/m/02068x" - id: 341 - display_name: "Gondola" -} -item { - name: "/m/0fz0h" - id: 342 - display_name: "Honeycomb" -} -item { - name: "/m/0jy4k" - id: 343 - display_name: "Doughnut" -} -item { - name: "/m/05kyg_" - id: 344 - display_name: "Chest of drawers" -} -item { - name: "/m/01prls" - id: 345 - display_name: "Land vehicle" -} -item { - name: "/m/01h44" - id: 346 - display_name: "Bat" -} -item { - name: "/m/08pbxl" - id: 347 - display_name: "Monkey" -} -item { - name: "/m/02gzp" - id: 348 - display_name: "Dagger" -} -item { - name: "/m/04brg2" - id: 349 - display_name: "Tableware" -} -item { - name: "/m/031n1" - id: 350 - display_name: "Human foot" -} -item { - name: "/m/02jvh9" - id: 351 - display_name: "Mug" -} -item { - name: "/m/046dlr" - id: 352 - display_name: "Alarm clock" -} -item { - name: "/m/0h8ntjv" - id: 353 - display_name: "Pressure cooker" -} -item { - name: "/m/0k65p" - id: 354 - display_name: "Human hand" -} -item { - name: "/m/011k07" - id: 355 - display_name: "Tortoise" -} -item { - name: "/m/03grzl" - id: 356 - display_name: "Baseball glove" -} -item { - name: "/m/06y5r" - id: 357 - display_name: "Sword" -} -item { - name: "/m/061_f" - id: 358 - display_name: "Pear" -} -item { - name: "/m/01cmb2" - id: 359 - display_name: "Miniskirt" -} -item { - name: "/m/01mqdt" - id: 360 - display_name: "Traffic sign" -} -item { - name: "/m/05r655" - id: 361 - display_name: "Girl" -} -item { - name: "/m/02p3w7d" - id: 362 - display_name: "Roller skates" -} -item { - name: "/m/029tx" - id: 363 - display_name: "Dinosaur" -} -item { - name: "/m/04m6gz" - id: 364 - display_name: "Porch" -} -item { - name: "/m/015h_t" - id: 365 - display_name: "Human beard" -} -item { - name: "/m/06pcq" - id: 366 - display_name: "Submarine sandwich" -} -item { - name: "/m/01bms0" - id: 367 - display_name: "Screwdriver" -} -item { - name: "/m/07fbm7" - id: 368 - display_name: "Strawberry" -} -item { - name: "/m/09tvcd" - id: 369 - display_name: "Wine glass" -} -item { - name: "/m/06nwz" - id: 370 - display_name: "Seafood" -} -item { - name: "/m/0dv9c" - id: 371 - display_name: "Racket" -} -item { - name: "/m/083wq" - id: 372 - display_name: "Wheel" -} -item { - name: "/m/0gd36" - id: 373 - display_name: "Sea lion" -} -item { - name: "/m/0138tl" - id: 374 - display_name: "Toy" -} -item { - name: "/m/07clx" - id: 375 - display_name: "Tea" -} -item { - name: "/m/05ctyq" - id: 376 - display_name: "Tennis ball" -} -item { - name: "/m/0bjyj5" - id: 377 - display_name: "Waste container" -} -item { - name: "/m/0dbzx" - id: 378 - display_name: "Mule" -} -item { - name: "/m/02ctlc" - id: 379 - display_name: "Cricket ball" -} -item { - name: "/m/0fp6w" - id: 380 - display_name: "Pineapple" -} -item { - name: "/m/0djtd" - id: 381 - display_name: "Coconut" -} -item { - name: "/m/0167gd" - id: 382 - display_name: "Doll" -} -item { - name: "/m/078n6m" - id: 383 - display_name: "Coffee table" -} -item { - name: "/m/0152hh" - id: 384 - display_name: "Snowman" -} -item { - name: "/m/04gth" - id: 385 - display_name: "Lavender" -} -item { - name: "/m/0ll1f78" - id: 386 - display_name: "Shrimp" -} -item { - name: "/m/0cffdh" - id: 387 - display_name: "Maple" -} -item { - name: "/m/025rp__" - id: 388 - display_name: "Cowboy hat" -} -item { - name: "/m/02_n6y" - id: 389 - display_name: "Goggles" -} -item { - name: "/m/0wdt60w" - id: 390 - display_name: "Rugby ball" -} -item { - name: "/m/0cydv" - id: 391 - display_name: "Caterpillar" -} -item { - name: "/m/01n5jq" - id: 392 - display_name: "Poster" -} -item { - name: "/m/09rvcxw" - id: 393 - display_name: "Rocket" -} -item { - name: "/m/013y1f" - id: 394 - display_name: "Organ" -} -item { - name: "/m/06ncr" - id: 395 - display_name: "Saxophone" -} -item { - name: "/m/015qff" - id: 396 - display_name: "Traffic light" -} -item { - name: "/m/024g6" - id: 397 - display_name: "Cocktail" -} -item { - name: "/m/05gqfk" - id: 398 - display_name: "Plastic bag" -} -item { - name: "/m/0dv77" - id: 399 - display_name: "Squash" -} -item { - name: "/m/052sf" - id: 400 - display_name: "Mushroom" -} -item { - name: "/m/0cdn1" - id: 401 - display_name: "Hamburger" -} -item { - name: "/m/03jbxj" - id: 402 - display_name: "Light switch" -} -item { - name: "/m/0cyfs" - id: 403 - display_name: "Parachute" -} -item { - name: "/m/0kmg4" - id: 404 - display_name: "Teddy bear" -} -item { - name: "/m/02cvgx" - id: 405 - display_name: "Winter melon" -} -item { - name: "/m/09kx5" - id: 406 - display_name: "Deer" -} -item { - name: "/m/057cc" - id: 407 - display_name: "Musical keyboard" -} -item { - name: "/m/02pkr5" - id: 408 - display_name: "Plumbing fixture" -} -item { - name: "/m/057p5t" - id: 409 - display_name: "Scoreboard" -} -item { - name: "/m/03g8mr" - id: 410 - display_name: "Baseball bat" -} -item { - name: "/m/0frqm" - id: 411 - display_name: "Envelope" -} -item { - name: "/m/03m3vtv" - id: 412 - display_name: "Adhesive tape" -} -item { - name: "/m/0584n8" - id: 413 - display_name: "Briefcase" -} -item { - name: "/m/014y4n" - id: 414 - display_name: "Paddle" -} -item { - name: "/m/01g3x7" - id: 415 - display_name: "Bow and arrow" -} -item { - name: "/m/07cx4" - id: 416 - display_name: "Telephone" -} -item { - name: "/m/07bgp" - id: 417 - display_name: "Sheep" -} -item { - name: "/m/032b3c" - id: 418 - display_name: "Jacket" -} -item { - name: "/m/01bl7v" - id: 419 - display_name: "Boy" -} -item { - name: "/m/0663v" - id: 420 - display_name: "Pizza" -} -item { - name: "/m/0cn6p" - id: 421 - display_name: "Otter" -} -item { - name: "/m/02rdsp" - id: 422 - display_name: "Office supplies" -} -item { - name: "/m/02crq1" - id: 423 - display_name: "Couch" -} -item { - name: "/m/01xqw" - id: 424 - display_name: "Cello" -} -item { - name: "/m/0cnyhnx" - id: 425 - display_name: "Bull" -} -item { - name: "/m/01x_v" - id: 426 - display_name: "Camel" -} -item { - name: "/m/018xm" - id: 427 - display_name: "Ball" -} -item { - name: "/m/09ddx" - id: 428 - display_name: "Duck" -} -item { - name: "/m/084zz" - id: 429 - display_name: "Whale" -} -item { - name: "/m/01n4qj" - id: 430 - display_name: "Shirt" -} -item { - name: "/m/07cmd" - id: 431 - display_name: "Tank" -} -item { - name: "/m/04_sv" - id: 432 - display_name: "Motorcycle" -} -item { - name: "/m/0mkg" - id: 433 - display_name: "Accordion" -} -item { - name: "/m/09d5_" - id: 434 - display_name: "Owl" -} -item { - name: "/m/0c568" - id: 435 - display_name: "Porcupine" -} -item { - name: "/m/02wbtzl" - id: 436 - display_name: "Sun hat" -} -item { - name: "/m/05bm6" - id: 437 - display_name: "Nail" -} -item { - name: "/m/01lsmm" - id: 438 - display_name: "Scissors" -} -item { - name: "/m/0dftk" - id: 439 - display_name: "Swan" -} -item { - name: "/m/0dtln" - id: 440 - display_name: "Lamp" -} -item { - name: "/m/0nl46" - id: 441 - display_name: "Crown" -} -item { - name: "/m/05r5c" - id: 442 - display_name: "Piano" -} -item { - name: "/m/06msq" - id: 443 - display_name: "Sculpture" -} -item { - name: "/m/0cd4d" - id: 444 - display_name: "Cheetah" -} -item { - name: "/m/05kms" - id: 445 - display_name: "Oboe" -} -item { - name: "/m/02jnhm" - id: 446 - display_name: "Tin can" -} -item { - name: "/m/0fldg" - id: 447 - display_name: "Mango" -} -item { - name: "/m/073bxn" - id: 448 - display_name: "Tripod" -} -item { - name: "/m/029bxz" - id: 449 - display_name: "Oven" -} -item { - name: "/m/020lf" - id: 450 - display_name: "Computer mouse" -} -item { - name: "/m/01btn" - id: 451 - display_name: "Barge" -} -item { - name: "/m/02vqfm" - id: 452 - display_name: "Coffee" -} -item { - name: "/m/06__v" - id: 453 - display_name: "Snowboard" -} -item { - name: "/m/043nyj" - id: 454 - display_name: "Common fig" -} -item { - name: "/m/0grw1" - id: 455 - display_name: "Salad" -} -item { - name: "/m/03hl4l9" - id: 456 - display_name: "Marine invertebrates" -} -item { - name: "/m/0hnnb" - id: 457 - display_name: "Umbrella" -} -item { - name: "/m/04c0y" - id: 458 - display_name: "Kangaroo" -} -item { - name: "/m/0dzf4" - id: 459 - display_name: "Human arm" -} -item { - name: "/m/07v9_z" - id: 460 - display_name: "Measuring cup" -} -item { - name: "/m/0f9_l" - id: 461 - display_name: "Snail" -} -item { - name: "/m/0703r8" - id: 462 - display_name: "Loveseat" -} -item { - name: "/m/01xyhv" - id: 463 - display_name: "Suit" -} -item { - name: "/m/01fh4r" - id: 464 - display_name: "Teapot" -} -item { - name: "/m/04dr76w" - id: 465 - display_name: "Bottle" -} -item { - name: "/m/0pcr" - id: 466 - display_name: "Alpaca" -} -item { - name: "/m/03s_tn" - id: 467 - display_name: "Kettle" -} -item { - name: "/m/07mhn" - id: 468 - display_name: "Trousers" -} -item { - name: "/m/01hrv5" - id: 469 - display_name: "Popcorn" -} -item { - name: "/m/019h78" - id: 470 - display_name: "Centipede" -} -item { - name: "/m/09kmb" - id: 471 - display_name: "Spider" -} -item { - name: "/m/0h23m" - id: 472 - display_name: "Sparrow" -} -item { - name: "/m/050gv4" - id: 473 - display_name: "Plate" -} -item { - name: "/m/01fb_0" - id: 474 - display_name: "Bagel" -} -item { - name: "/m/02w3_ws" - id: 475 - display_name: "Personal care" -} -item { - name: "/m/014j1m" - id: 476 - display_name: "Apple" -} -item { - name: "/m/01gmv2" - id: 477 - display_name: "Brassiere" -} -item { - name: "/m/04y4h8h" - id: 478 - display_name: "Bathroom cabinet" -} -item { - name: "/m/026qbn5" - id: 479 - display_name: "Studio couch" -} -item { - name: "/m/01m2v" - id: 480 - display_name: "Computer keyboard" -} -item { - name: "/m/05_5p_0" - id: 481 - display_name: "Table tennis racket" -} -item { - name: "/m/07030" - id: 482 - display_name: "Sushi" -} -item { - name: "/m/01s105" - id: 483 - display_name: "Cabinetry" -} -item { - name: "/m/033rq4" - id: 484 - display_name: "Street light" -} -item { - name: "/m/0162_1" - id: 485 - display_name: "Towel" -} -item { - name: "/m/02z51p" - id: 486 - display_name: "Nightstand" -} -item { - name: "/m/06mf6" - id: 487 - display_name: "Rabbit" -} -item { - name: "/m/02hj4" - id: 488 - display_name: "Dolphin" -} -item { - name: "/m/0bt9lr" - id: 489 - display_name: "Dog" -} -item { - name: "/m/08hvt4" - id: 490 - display_name: "Jug" -} -item { - name: "/m/084rd" - id: 491 - display_name: "Wok" -} -item { - name: "/m/01pns0" - id: 492 - display_name: "Fire hydrant" -} -item { - name: "/m/014sv8" - id: 493 - display_name: "Human eye" -} -item { - name: "/m/079cl" - id: 494 - display_name: "Skyscraper" -} -item { - name: "/m/01940j" - id: 495 - display_name: "Backpack" -} -item { - name: "/m/05vtc" - id: 496 - display_name: "Potato" -} -item { - name: "/m/02w3r3" - id: 497 - display_name: "Paper towel" -} -item { - name: "/m/054xkw" - id: 498 - display_name: "Lifejacket" -} -item { - name: "/m/01bqk0" - id: 499 - display_name: "Bicycle wheel" -} -item { - name: "/m/09g1w" - id: 500 - display_name: "Toilet" -} diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/pascal_label_map.pbtxt b/research/mlperf_object_detection/Mask_RCNN/object_detection/data/pascal_label_map.pbtxt deleted file mode 100644 index c9e9e2affcd73ae5cb272a51b44306a74cf22eea..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/pascal_label_map.pbtxt +++ /dev/null @@ -1,99 +0,0 @@ -item { - id: 1 - name: 'aeroplane' -} - -item { - id: 2 - name: 'bicycle' -} - -item { - id: 3 - name: 'bird' -} - -item { - id: 4 - name: 'boat' -} - -item { - id: 5 - name: 'bottle' -} - -item { - id: 6 - name: 'bus' -} - -item { - id: 7 - name: 'car' -} - -item { - id: 8 - name: 'cat' -} - -item { - id: 9 - name: 'chair' -} - -item { - id: 10 - name: 'cow' -} - -item { - id: 11 - name: 'diningtable' -} - -item { - id: 12 - name: 'dog' -} - -item { - id: 13 - name: 'horse' -} - -item { - id: 14 - name: 'motorbike' -} - -item { - id: 15 - name: 'person' -} - -item { - id: 16 - name: 'pottedplant' -} - -item { - id: 17 - name: 'sheep' -} - -item { - id: 18 - name: 'sofa' -} - -item { - id: 19 - name: 'train' -} - -item { - id: 20 - name: 'tvmonitor' -} diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/pet_label_map.pbtxt b/research/mlperf_object_detection/Mask_RCNN/object_detection/data/pet_label_map.pbtxt deleted file mode 100644 index 54d7d3518941ceb0d2dc3465bdf702d4eaac3f07..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/data/pet_label_map.pbtxt +++ /dev/null @@ -1,184 +0,0 @@ -item { - id: 1 - name: 'Abyssinian' -} - -item { - id: 2 - name: 'american_bulldog' -} - -item { - id: 3 - name: 'american_pit_bull_terrier' -} - -item { - id: 4 - name: 'basset_hound' -} - -item { - id: 5 - name: 'beagle' -} - -item { - id: 6 - name: 'Bengal' -} - -item { - id: 7 - name: 'Birman' -} - -item { - id: 8 - name: 'Bombay' -} - -item { - id: 9 - name: 'boxer' -} - -item { - id: 10 - name: 'British_Shorthair' -} - -item { - id: 11 - name: 'chihuahua' -} - -item { - id: 12 - name: 'Egyptian_Mau' -} - -item { - id: 13 - name: 'english_cocker_spaniel' -} - -item { - id: 14 - name: 'english_setter' -} - -item { - id: 15 - name: 'german_shorthaired' -} - -item { - id: 16 - name: 'great_pyrenees' -} - -item { - id: 17 - name: 'havanese' -} - -item { - id: 18 - name: 'japanese_chin' -} - -item { - id: 19 - name: 'keeshond' -} - -item { - id: 20 - name: 'leonberger' -} - -item { - id: 21 - name: 'Maine_Coon' -} - -item { - id: 22 - name: 'miniature_pinscher' -} - -item { - id: 23 - name: 'newfoundland' -} - -item { - id: 24 - name: 'Persian' -} - -item { - id: 25 - name: 'pomeranian' -} - -item { - id: 26 - name: 'pug' -} - -item { - id: 27 - name: 'Ragdoll' -} - -item { - id: 28 - name: 'Russian_Blue' -} - -item { - id: 29 - name: 'saint_bernard' -} - -item { - id: 30 - name: 'samoyed' -} - -item { - id: 31 - name: 'scottish_terrier' -} - -item { - id: 32 - name: 'shiba_inu' -} - -item { - id: 33 - name: 'Siamese' -} - -item { - id: 34 - name: 'Sphynx' -} - -item { - id: 35 - name: 'staffordshire_bull_terrier' -} - -item { - id: 36 - name: 'wheaten_terrier' -} - -item { - id: 37 - name: 'yorkshire_terrier' -} diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/data_decoders/__init__.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/data_decoders/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/data_decoders/tf_example_decoder.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/data_decoders/tf_example_decoder.py deleted file mode 100644 index 8480a14b4ade6497e57db505875ae0795b191063..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/data_decoders/tf_example_decoder.py +++ /dev/null @@ -1,439 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tensorflow Example proto decoder for object detection. - -A decoder to decode string tensors containing serialized tensorflow.Example -protos for object detection. -""" -import tensorflow as tf - -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import math_ops -from object_detection.core import data_decoder -from object_detection.core import standard_fields as fields -from object_detection.protos import input_reader_pb2 -from object_detection.utils import label_map_util - -slim_example_decoder = tf.contrib.slim.tfexample_decoder - - -# TODO(lzc): keep LookupTensor and BackupHandler in sync with -# tf.contrib.slim.tfexample_decoder version. -class LookupTensor(slim_example_decoder.Tensor): - """An ItemHandler that returns a parsed Tensor, the result of a lookup.""" - - def __init__(self, - tensor_key, - table, - shape_keys=None, - shape=None, - default_value=''): - """Initializes the LookupTensor handler. - - Simply calls a vocabulary (most often, a label mapping) lookup. - - Args: - tensor_key: the name of the `TFExample` feature to read the tensor from. - table: A tf.lookup table. - shape_keys: Optional name or list of names of the TF-Example feature in - which the tensor shape is stored. If a list, then each corresponds to - one dimension of the shape. - shape: Optional output shape of the `Tensor`. If provided, the `Tensor` is - reshaped accordingly. - default_value: The value used when the `tensor_key` is not found in a - particular `TFExample`. - - Raises: - ValueError: if both `shape_keys` and `shape` are specified. - """ - self._table = table - super(LookupTensor, self).__init__(tensor_key, shape_keys, shape, - default_value) - - def tensors_to_item(self, keys_to_tensors): - unmapped_tensor = super(LookupTensor, self).tensors_to_item(keys_to_tensors) - return self._table.lookup(unmapped_tensor) - - -class BackupHandler(slim_example_decoder.ItemHandler): - """An ItemHandler that tries two ItemHandlers in order.""" - - def __init__(self, handler, backup): - """Initializes the BackupHandler handler. - - If the first Handler's tensors_to_item returns a Tensor with no elements, - the second Handler is used. - - Args: - handler: The primary ItemHandler. - backup: The backup ItemHandler. - - Raises: - ValueError: if either is not an ItemHandler. - """ - if not isinstance(handler, slim_example_decoder.ItemHandler): - raise ValueError('Primary handler is of type %s instead of ItemHandler' % - type(handler)) - if not isinstance(backup, slim_example_decoder.ItemHandler): - raise ValueError( - 'Backup handler is of type %s instead of ItemHandler' % type(backup)) - self._handler = handler - self._backup = backup - super(BackupHandler, self).__init__(handler.keys + backup.keys) - - def tensors_to_item(self, keys_to_tensors): - item = self._handler.tensors_to_item(keys_to_tensors) - return control_flow_ops.cond( - pred=math_ops.equal(math_ops.reduce_prod(array_ops.shape(item)), 0), - true_fn=lambda: self._backup.tensors_to_item(keys_to_tensors), - false_fn=lambda: item) - - -class TfExampleDecoder(data_decoder.DataDecoder): - """Tensorflow Example proto decoder.""" - - def __init__(self, - load_instance_masks=False, - instance_mask_type=input_reader_pb2.NUMERICAL_MASKS, - label_map_proto_file=None, - use_display_name=False, - dct_method='', - num_keypoints=0, - num_additional_channels=0): - """Constructor sets keys_to_features and items_to_handlers. - - Args: - load_instance_masks: whether or not to load and handle instance masks. - instance_mask_type: type of instance masks. Options are provided in - input_reader.proto. This is only used if `load_instance_masks` is True. - label_map_proto_file: a file path to a - object_detection.protos.StringIntLabelMap proto. If provided, then the - mapped IDs of 'image/object/class/text' will take precedence over the - existing 'image/object/class/label' ID. Also, if provided, it is - assumed that 'image/object/class/text' will be in the data. - use_display_name: whether or not to use the `display_name` for label - mapping (instead of `name`). Only used if label_map_proto_file is - provided. - dct_method: An optional string. Defaults to None. It only takes - effect when image format is jpeg, used to specify a hint about the - algorithm used for jpeg decompression. Currently valid values - are ['INTEGER_FAST', 'INTEGER_ACCURATE']. The hint may be ignored, for - example, the jpeg library does not have that specific option. - num_keypoints: the number of keypoints per object. - num_additional_channels: how many additional channels to use. - - Raises: - ValueError: If `instance_mask_type` option is not one of - input_reader_pb2.DEFAULT, input_reader_pb2.NUMERICAL, or - input_reader_pb2.PNG_MASKS. - """ - self.keys_to_features = { - 'image/encoded': - tf.FixedLenFeature((), tf.string, default_value=''), - 'image/format': - tf.FixedLenFeature((), tf.string, default_value='jpeg'), - 'image/filename': - tf.FixedLenFeature((), tf.string, default_value=''), - 'image/key/sha256': - tf.FixedLenFeature((), tf.string, default_value=''), - 'image/source_id': - tf.FixedLenFeature((), tf.string, default_value=''), - 'image/height': - tf.FixedLenFeature((), tf.int64, default_value=1), - 'image/width': - tf.FixedLenFeature((), tf.int64, default_value=1), - # Object boxes and classes. - 'image/object/bbox/xmin': - tf.VarLenFeature(tf.float32), - 'image/object/bbox/xmax': - tf.VarLenFeature(tf.float32), - 'image/object/bbox/ymin': - tf.VarLenFeature(tf.float32), - 'image/object/bbox/ymax': - tf.VarLenFeature(tf.float32), - 'image/object/class/label': - tf.VarLenFeature(tf.int64), - 'image/object/class/text': - tf.VarLenFeature(tf.string), - 'image/object/area': - tf.VarLenFeature(tf.float32), - 'image/object/is_crowd': - tf.VarLenFeature(tf.int64), - 'image/object/difficult': - tf.VarLenFeature(tf.int64), - 'image/object/group_of': - tf.VarLenFeature(tf.int64), - 'image/object/weight': - tf.VarLenFeature(tf.float32), - } - # We are checking `dct_method` instead of passing it directly in order to - # ensure TF version 1.6 compatibility. - if dct_method: - image = slim_example_decoder.Image( - image_key='image/encoded', - format_key='image/format', - channels=3, - dct_method=dct_method) - additional_channel_image = slim_example_decoder.Image( - image_key='image/additional_channels/encoded', - format_key='image/format', - channels=1, - repeated=True, - dct_method=dct_method) - else: - image = slim_example_decoder.Image( - image_key='image/encoded', format_key='image/format', channels=3) - additional_channel_image = slim_example_decoder.Image( - image_key='image/additional_channels/encoded', - format_key='image/format', - channels=1, - repeated=True) - self.items_to_handlers = { - fields.InputDataFields.image: - image, - fields.InputDataFields.source_id: ( - slim_example_decoder.Tensor('image/source_id')), - fields.InputDataFields.key: ( - slim_example_decoder.Tensor('image/key/sha256')), - fields.InputDataFields.filename: ( - slim_example_decoder.Tensor('image/filename')), - # Object boxes and classes. - fields.InputDataFields.groundtruth_boxes: ( - slim_example_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'], - 'image/object/bbox/')), - fields.InputDataFields.groundtruth_area: - slim_example_decoder.Tensor('image/object/area'), - fields.InputDataFields.groundtruth_is_crowd: ( - slim_example_decoder.Tensor('image/object/is_crowd')), - fields.InputDataFields.groundtruth_difficult: ( - slim_example_decoder.Tensor('image/object/difficult')), - fields.InputDataFields.groundtruth_group_of: ( - slim_example_decoder.Tensor('image/object/group_of')), - fields.InputDataFields.groundtruth_weights: ( - slim_example_decoder.Tensor('image/object/weight')), - } - if num_additional_channels > 0: - self.keys_to_features[ - 'image/additional_channels/encoded'] = tf.FixedLenFeature( - (num_additional_channels,), tf.string) - self.items_to_handlers[ - fields.InputDataFields. - image_additional_channels] = additional_channel_image - self._num_keypoints = num_keypoints - if num_keypoints > 0: - self.keys_to_features['image/object/keypoint/x'] = ( - tf.VarLenFeature(tf.float32)) - self.keys_to_features['image/object/keypoint/y'] = ( - tf.VarLenFeature(tf.float32)) - self.items_to_handlers[fields.InputDataFields.groundtruth_keypoints] = ( - slim_example_decoder.ItemHandlerCallback( - ['image/object/keypoint/y', 'image/object/keypoint/x'], - self._reshape_keypoints)) - if load_instance_masks: - if instance_mask_type in (input_reader_pb2.DEFAULT, - input_reader_pb2.NUMERICAL_MASKS): - self.keys_to_features['image/object/mask'] = ( - tf.VarLenFeature(tf.float32)) - self.items_to_handlers[ - fields.InputDataFields.groundtruth_instance_masks] = ( - slim_example_decoder.ItemHandlerCallback( - ['image/object/mask', 'image/height', 'image/width'], - self._reshape_instance_masks)) - elif instance_mask_type == input_reader_pb2.PNG_MASKS: - self.keys_to_features['image/object/mask'] = tf.VarLenFeature(tf.string) - self.items_to_handlers[ - fields.InputDataFields.groundtruth_instance_masks] = ( - slim_example_decoder.ItemHandlerCallback( - ['image/object/mask', 'image/height', 'image/width'], - self._decode_png_instance_masks)) - else: - raise ValueError('Did not recognize the `instance_mask_type` option.') - if label_map_proto_file: - label_map = label_map_util.get_label_map_dict(label_map_proto_file, - use_display_name) - # We use a default_value of -1, but we expect all labels to be contained - # in the label map. - table = tf.contrib.lookup.HashTable( - initializer=tf.contrib.lookup.KeyValueTensorInitializer( - keys=tf.constant(list(label_map.keys())), - values=tf.constant(list(label_map.values()), dtype=tf.int64)), - default_value=-1) - # If the label_map_proto is provided, try to use it in conjunction with - # the class text, and fall back to a materialized ID. - # TODO(lzc): note that here we are using BackupHandler defined in this - # file(which is branching slim_example_decoder.BackupHandler). Need to - # switch back to slim_example_decoder.BackupHandler once tf 1.5 becomes - # more popular. - label_handler = BackupHandler( - LookupTensor('image/object/class/text', table, default_value=''), - slim_example_decoder.Tensor('image/object/class/label')) - else: - label_handler = slim_example_decoder.Tensor('image/object/class/label') - self.items_to_handlers[ - fields.InputDataFields.groundtruth_classes] = label_handler - - def decode(self, tf_example_string_tensor): - """Decodes serialized tensorflow example and returns a tensor dictionary. - - Args: - tf_example_string_tensor: a string tensor holding a serialized tensorflow - example proto. - - Returns: - A dictionary of the following tensors. - fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3] - containing image. - fields.InputDataFields.source_id - string tensor containing original - image id. - fields.InputDataFields.key - string tensor with unique sha256 hash key. - fields.InputDataFields.filename - string tensor with original dataset - filename. - fields.InputDataFields.groundtruth_boxes - 2D float32 tensor of shape - [None, 4] containing box corners. - fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape - [None] containing classes for the boxes. - fields.InputDataFields.groundtruth_weights - 1D float32 tensor of - shape [None] indicating the weights of groundtruth boxes. - fields.InputDataFields.num_groundtruth_boxes - int32 scalar indicating - the number of groundtruth_boxes. - fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape - [None] containing containing object mask area in pixel squared. - fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape - [None] indicating if the boxes enclose a crowd. - - Optional: - fields.InputDataFields.image_additional_channels - 3D uint8 tensor of - shape [None, None, num_additional_channels]. 1st dim is height; 2nd dim - is width; 3rd dim is the number of additional channels. - fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape - [None] indicating if the boxes represent `difficult` instances. - fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape - [None] indicating if the boxes represent `group_of` instances. - fields.InputDataFields.groundtruth_keypoints - 3D float32 tensor of - shape [None, None, 2] containing keypoints, where the coordinates of - the keypoints are ordered (y, x). - fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of - shape [None, None, None] containing instance masks. - """ - serialized_example = tf.reshape(tf_example_string_tensor, shape=[]) - decoder = slim_example_decoder.TFExampleDecoder(self.keys_to_features, - self.items_to_handlers) - keys = decoder.list_items() - tensors = decoder.decode(serialized_example, items=keys) - tensor_dict = dict(zip(keys, tensors)) - is_crowd = fields.InputDataFields.groundtruth_is_crowd - tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool) - tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3]) - tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape( - tensor_dict[fields.InputDataFields.groundtruth_boxes])[0] - - if fields.InputDataFields.image_additional_channels in tensor_dict: - channels = tensor_dict[fields.InputDataFields.image_additional_channels] - channels = tf.squeeze(channels, axis=3) - channels = tf.transpose(channels, perm=[1, 2, 0]) - tensor_dict[fields.InputDataFields.image_additional_channels] = channels - - def default_groundtruth_weights(): - return tf.ones( - [tf.shape(tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]], - dtype=tf.float32) - - tensor_dict[fields.InputDataFields.groundtruth_weights] = tf.cond( - tf.greater( - tf.shape( - tensor_dict[fields.InputDataFields.groundtruth_weights])[0], - 0), lambda: tensor_dict[fields.InputDataFields.groundtruth_weights], - default_groundtruth_weights) - return tensor_dict - - def _reshape_keypoints(self, keys_to_tensors): - """Reshape keypoints. - - The instance segmentation masks are reshaped to [num_instances, - num_keypoints, 2]. - - Args: - keys_to_tensors: a dictionary from keys to tensors. - - Returns: - A 3-D float tensor of shape [num_instances, num_keypoints, 2] with values - in {0, 1}. - """ - y = keys_to_tensors['image/object/keypoint/y'] - if isinstance(y, tf.SparseTensor): - y = tf.sparse_tensor_to_dense(y) - y = tf.expand_dims(y, 1) - x = keys_to_tensors['image/object/keypoint/x'] - if isinstance(x, tf.SparseTensor): - x = tf.sparse_tensor_to_dense(x) - x = tf.expand_dims(x, 1) - keypoints = tf.concat([y, x], 1) - keypoints = tf.reshape(keypoints, [-1, self._num_keypoints, 2]) - return keypoints - - def _reshape_instance_masks(self, keys_to_tensors): - """Reshape instance segmentation masks. - - The instance segmentation masks are reshaped to [num_instances, height, - width]. - - Args: - keys_to_tensors: a dictionary from keys to tensors. - - Returns: - A 3-D float tensor of shape [num_instances, height, width] with values - in {0, 1}. - """ - height = keys_to_tensors['image/height'] - width = keys_to_tensors['image/width'] - to_shape = tf.cast(tf.stack([-1, height, width]), tf.int32) - masks = keys_to_tensors['image/object/mask'] - if isinstance(masks, tf.SparseTensor): - masks = tf.sparse_tensor_to_dense(masks) - masks = tf.reshape(tf.to_float(tf.greater(masks, 0.0)), to_shape) - return tf.cast(masks, tf.float32) - - def _decode_png_instance_masks(self, keys_to_tensors): - """Decode PNG instance segmentation masks and stack into dense tensor. - - The instance segmentation masks are reshaped to [num_instances, height, - width]. - - Args: - keys_to_tensors: a dictionary from keys to tensors. - - Returns: - A 3-D float tensor of shape [num_instances, height, width] with values - in {0, 1}. - """ - - def decode_png_mask(image_buffer): - image = tf.squeeze( - tf.image.decode_image(image_buffer, channels=1), axis=2) - image.set_shape([None, None]) - image = tf.to_float(tf.greater(image, 0)) - return image - - png_masks = keys_to_tensors['image/object/mask'] - height = keys_to_tensors['image/height'] - width = keys_to_tensors['image/width'] - if isinstance(png_masks, tf.SparseTensor): - png_masks = tf.sparse_tensor_to_dense(png_masks, default_value='') - return tf.cond( - tf.greater(tf.size(png_masks), 0), - lambda: tf.map_fn(decode_png_mask, png_masks, dtype=tf.float32), - lambda: tf.zeros(tf.to_int32(tf.stack([0, height, width])))) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/data_decoders/tf_example_decoder_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/data_decoders/tf_example_decoder_test.py deleted file mode 100644 index b567b8c20f442f135653b49ace7e85088fd67ad1..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/data_decoders/tf_example_decoder_test.py +++ /dev/null @@ -1,767 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.data_decoders.tf_example_decoder.""" - -import os -import numpy as np -import tensorflow as tf - -from tensorflow.core.example import example_pb2 -from tensorflow.core.example import feature_pb2 -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import test_util -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import lookup_ops -from tensorflow.python.ops import parsing_ops -from object_detection.core import standard_fields as fields -from object_detection.data_decoders import tf_example_decoder -from object_detection.protos import input_reader_pb2 - -slim_example_decoder = tf.contrib.slim.tfexample_decoder - - -class TfExampleDecoderTest(tf.test.TestCase): - - def _EncodeImage(self, image_tensor, encoding_type='jpeg'): - with self.test_session(): - if encoding_type == 'jpeg': - image_encoded = tf.image.encode_jpeg(tf.constant(image_tensor)).eval() - elif encoding_type == 'png': - image_encoded = tf.image.encode_png(tf.constant(image_tensor)).eval() - else: - raise ValueError('Invalid encoding type.') - return image_encoded - - def _DecodeImage(self, image_encoded, encoding_type='jpeg'): - with self.test_session(): - if encoding_type == 'jpeg': - image_decoded = tf.image.decode_jpeg(tf.constant(image_encoded)).eval() - elif encoding_type == 'png': - image_decoded = tf.image.decode_png(tf.constant(image_encoded)).eval() - else: - raise ValueError('Invalid encoding type.') - return image_decoded - - def _Int64Feature(self, value): - return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) - - def _FloatFeature(self, value): - return tf.train.Feature(float_list=tf.train.FloatList(value=value)) - - def _BytesFeature(self, value): - if isinstance(value, list): - return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) - - def _Int64FeatureFromList(self, ndarray): - return feature_pb2.Feature( - int64_list=feature_pb2.Int64List(value=ndarray.flatten().tolist())) - - def _BytesFeatureFromList(self, ndarray): - values = ndarray.flatten().tolist() - return feature_pb2.Feature(bytes_list=feature_pb2.BytesList(value=values)) - - def testDecodeAdditionalChannels(self): - image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - - additional_channel_tensor = np.random.randint( - 256, size=(4, 5, 1)).astype(np.uint8) - encoded_additional_channel = self._EncodeImage(additional_channel_tensor) - decoded_additional_channel = self._DecodeImage(encoded_additional_channel) - - example = tf.train.Example( - features=tf.train.Features( - feature={ - 'image/encoded': - self._BytesFeature(encoded_jpeg), - 'image/additional_channels/encoded': - self._BytesFeatureFromList( - np.array([encoded_additional_channel] * 2)), - 'image/format': - self._BytesFeature('jpeg'), - 'image/source_id': - self._BytesFeature('image_id'), - })).SerializeToString() - - example_decoder = tf_example_decoder.TfExampleDecoder( - num_additional_channels=2) - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - with self.test_session() as sess: - tensor_dict = sess.run(tensor_dict) - self.assertAllEqual( - np.concatenate([decoded_additional_channel] * 2, axis=2), - tensor_dict[fields.InputDataFields.image_additional_channels]) - - def testDecodeExampleWithBranchedBackupHandler(self): - example1 = example_pb2.Example( - features=feature_pb2.Features( - feature={ - 'image/object/class/text': - self._BytesFeatureFromList( - np.array(['cat', 'dog', 'guinea pig'])), - 'image/object/class/label': - self._Int64FeatureFromList(np.array([42, 10, 900])) - })) - example2 = example_pb2.Example( - features=feature_pb2.Features( - feature={ - 'image/object/class/text': - self._BytesFeatureFromList( - np.array(['cat', 'dog', 'guinea pig'])), - })) - example3 = example_pb2.Example( - features=feature_pb2.Features( - feature={ - 'image/object/class/label': - self._Int64FeatureFromList(np.array([42, 10, 901])) - })) - # 'dog' -> 0, 'guinea pig' -> 1, 'cat' -> 2 - table = lookup_ops.index_table_from_tensor( - constant_op.constant(['dog', 'guinea pig', 'cat'])) - keys_to_features = { - 'image/object/class/text': parsing_ops.VarLenFeature(dtypes.string), - 'image/object/class/label': parsing_ops.VarLenFeature(dtypes.int64), - } - backup_handler = tf_example_decoder.BackupHandler( - handler=slim_example_decoder.Tensor('image/object/class/label'), - backup=tf_example_decoder.LookupTensor('image/object/class/text', - table)) - items_to_handlers = { - 'labels': backup_handler, - } - decoder = slim_example_decoder.TFExampleDecoder(keys_to_features, - items_to_handlers) - obtained_class_ids_each_example = [] - with self.test_session() as sess: - sess.run(lookup_ops.tables_initializer()) - for example in [example1, example2, example3]: - serialized_example = array_ops.reshape( - example.SerializeToString(), shape=[]) - obtained_class_ids_each_example.append( - decoder.decode(serialized_example)[0].eval()) - - self.assertAllClose([42, 10, 900], obtained_class_ids_each_example[0]) - self.assertAllClose([2, 0, 1], obtained_class_ids_each_example[1]) - self.assertAllClose([42, 10, 901], obtained_class_ids_each_example[2]) - - def testDecodeExampleWithBranchedLookup(self): - - example = example_pb2.Example(features=feature_pb2.Features(feature={ - 'image/object/class/text': self._BytesFeatureFromList( - np.array(['cat', 'dog', 'guinea pig'])), - })) - serialized_example = example.SerializeToString() - # 'dog' -> 0, 'guinea pig' -> 1, 'cat' -> 2 - table = lookup_ops.index_table_from_tensor( - constant_op.constant(['dog', 'guinea pig', 'cat'])) - - with self.test_session() as sess: - sess.run(lookup_ops.tables_initializer()) - - serialized_example = array_ops.reshape(serialized_example, shape=[]) - - keys_to_features = { - 'image/object/class/text': parsing_ops.VarLenFeature(dtypes.string), - } - - items_to_handlers = { - 'labels': - tf_example_decoder.LookupTensor('image/object/class/text', table), - } - - decoder = slim_example_decoder.TFExampleDecoder(keys_to_features, - items_to_handlers) - obtained_class_ids = decoder.decode(serialized_example)[0].eval() - - self.assertAllClose([2, 0, 1], obtained_class_ids) - - def testDecodeJpegImage(self): - image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - decoded_jpeg = self._DecodeImage(encoded_jpeg) - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': self._BytesFeature(encoded_jpeg), - 'image/format': self._BytesFeature('jpeg'), - 'image/source_id': self._BytesFeature('image_id'), - })).SerializeToString() - - example_decoder = tf_example_decoder.TfExampleDecoder() - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - self.assertAllEqual((tensor_dict[fields.InputDataFields.image]. - get_shape().as_list()), [None, None, 3]) - with self.test_session() as sess: - tensor_dict = sess.run(tensor_dict) - - self.assertAllEqual(decoded_jpeg, tensor_dict[fields.InputDataFields.image]) - self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id]) - - def testDecodeImageKeyAndFilename(self): - image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': self._BytesFeature(encoded_jpeg), - 'image/key/sha256': self._BytesFeature('abc'), - 'image/filename': self._BytesFeature('filename') - })).SerializeToString() - - example_decoder = tf_example_decoder.TfExampleDecoder() - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - with self.test_session() as sess: - tensor_dict = sess.run(tensor_dict) - - self.assertEqual('abc', tensor_dict[fields.InputDataFields.key]) - self.assertEqual('filename', tensor_dict[fields.InputDataFields.filename]) - - def testDecodePngImage(self): - image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) - encoded_png = self._EncodeImage(image_tensor, encoding_type='png') - decoded_png = self._DecodeImage(encoded_png, encoding_type='png') - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': self._BytesFeature(encoded_png), - 'image/format': self._BytesFeature('png'), - 'image/source_id': self._BytesFeature('image_id') - })).SerializeToString() - - example_decoder = tf_example_decoder.TfExampleDecoder() - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - self.assertAllEqual((tensor_dict[fields.InputDataFields.image]. - get_shape().as_list()), [None, None, 3]) - with self.test_session() as sess: - tensor_dict = sess.run(tensor_dict) - - self.assertAllEqual(decoded_png, tensor_dict[fields.InputDataFields.image]) - self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id]) - - def testDecodePngInstanceMasks(self): - image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - mask_1 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8) - mask_2 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8) - encoded_png_1 = self._EncodeImage(mask_1, encoding_type='png') - decoded_png_1 = np.squeeze(mask_1.astype(np.float32)) - encoded_png_2 = self._EncodeImage(mask_2, encoding_type='png') - decoded_png_2 = np.squeeze(mask_2.astype(np.float32)) - encoded_masks = [encoded_png_1, encoded_png_2] - decoded_masks = np.stack([decoded_png_1, decoded_png_2]) - example = tf.train.Example( - features=tf.train.Features( - feature={ - 'image/encoded': self._BytesFeature(encoded_jpeg), - 'image/format': self._BytesFeature('jpeg'), - 'image/object/mask': self._BytesFeature(encoded_masks) - })).SerializeToString() - - example_decoder = tf_example_decoder.TfExampleDecoder( - load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS) - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - with self.test_session() as sess: - tensor_dict = sess.run(tensor_dict) - - self.assertAllEqual( - decoded_masks, - tensor_dict[fields.InputDataFields.groundtruth_instance_masks]) - - def testDecodeEmptyPngInstanceMasks(self): - image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - encoded_masks = [] - example = tf.train.Example( - features=tf.train.Features( - feature={ - 'image/encoded': self._BytesFeature(encoded_jpeg), - 'image/format': self._BytesFeature('jpeg'), - 'image/object/mask': self._BytesFeature(encoded_masks), - 'image/height': self._Int64Feature([10]), - 'image/width': self._Int64Feature([10]), - })).SerializeToString() - - example_decoder = tf_example_decoder.TfExampleDecoder( - load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS) - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - with self.test_session() as sess: - tensor_dict = sess.run(tensor_dict) - self.assertAllEqual( - tensor_dict[fields.InputDataFields.groundtruth_instance_masks].shape, - [0, 10, 10]) - - def testDecodeBoundingBox(self): - image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - bbox_ymins = [0.0, 4.0] - bbox_xmins = [1.0, 5.0] - bbox_ymaxs = [2.0, 6.0] - bbox_xmaxs = [3.0, 7.0] - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': self._BytesFeature(encoded_jpeg), - 'image/format': self._BytesFeature('jpeg'), - 'image/object/bbox/ymin': self._FloatFeature(bbox_ymins), - 'image/object/bbox/xmin': self._FloatFeature(bbox_xmins), - 'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs), - 'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs), - })).SerializeToString() - - example_decoder = tf_example_decoder.TfExampleDecoder() - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes]. - get_shape().as_list()), [None, 4]) - with self.test_session() as sess: - tensor_dict = sess.run(tensor_dict) - - expected_boxes = np.vstack([bbox_ymins, bbox_xmins, - bbox_ymaxs, bbox_xmaxs]).transpose() - self.assertAllEqual(expected_boxes, - tensor_dict[fields.InputDataFields.groundtruth_boxes]) - self.assertAllEqual( - 2, tensor_dict[fields.InputDataFields.num_groundtruth_boxes]) - - @test_util.enable_c_shapes - def testDecodeKeypoint(self): - image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - bbox_ymins = [0.0, 4.0] - bbox_xmins = [1.0, 5.0] - bbox_ymaxs = [2.0, 6.0] - bbox_xmaxs = [3.0, 7.0] - keypoint_ys = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] - keypoint_xs = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': self._BytesFeature(encoded_jpeg), - 'image/format': self._BytesFeature('jpeg'), - 'image/object/bbox/ymin': self._FloatFeature(bbox_ymins), - 'image/object/bbox/xmin': self._FloatFeature(bbox_xmins), - 'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs), - 'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs), - 'image/object/keypoint/y': self._FloatFeature(keypoint_ys), - 'image/object/keypoint/x': self._FloatFeature(keypoint_xs), - })).SerializeToString() - - example_decoder = tf_example_decoder.TfExampleDecoder(num_keypoints=3) - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes]. - get_shape().as_list()), [None, 4]) - self.assertAllEqual((tensor_dict[fields.InputDataFields. - groundtruth_keypoints]. - get_shape().as_list()), [2, 3, 2]) - with self.test_session() as sess: - tensor_dict = sess.run(tensor_dict) - - expected_boxes = np.vstack([bbox_ymins, bbox_xmins, - bbox_ymaxs, bbox_xmaxs]).transpose() - self.assertAllEqual(expected_boxes, - tensor_dict[fields.InputDataFields.groundtruth_boxes]) - self.assertAllEqual( - 2, tensor_dict[fields.InputDataFields.num_groundtruth_boxes]) - - expected_keypoints = ( - np.vstack([keypoint_ys, keypoint_xs]).transpose().reshape((2, 3, 2))) - self.assertAllEqual(expected_keypoints, - tensor_dict[ - fields.InputDataFields.groundtruth_keypoints]) - - def testDecodeDefaultGroundtruthWeights(self): - image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - bbox_ymins = [0.0, 4.0] - bbox_xmins = [1.0, 5.0] - bbox_ymaxs = [2.0, 6.0] - bbox_xmaxs = [3.0, 7.0] - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': self._BytesFeature(encoded_jpeg), - 'image/format': self._BytesFeature('jpeg'), - 'image/object/bbox/ymin': self._FloatFeature(bbox_ymins), - 'image/object/bbox/xmin': self._FloatFeature(bbox_xmins), - 'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs), - 'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs), - })).SerializeToString() - - example_decoder = tf_example_decoder.TfExampleDecoder() - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes]. - get_shape().as_list()), [None, 4]) - - with self.test_session() as sess: - tensor_dict = sess.run(tensor_dict) - - self.assertAllClose(tensor_dict[fields.InputDataFields.groundtruth_weights], - np.ones(2, dtype=np.float32)) - - @test_util.enable_c_shapes - def testDecodeObjectLabel(self): - image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - bbox_classes = [0, 1] - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': self._BytesFeature(encoded_jpeg), - 'image/format': self._BytesFeature('jpeg'), - 'image/object/class/label': self._Int64Feature(bbox_classes), - })).SerializeToString() - - example_decoder = tf_example_decoder.TfExampleDecoder() - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - self.assertAllEqual((tensor_dict[ - fields.InputDataFields.groundtruth_classes].get_shape().as_list()), - [2]) - - with self.test_session() as sess: - tensor_dict = sess.run(tensor_dict) - - self.assertAllEqual(bbox_classes, - tensor_dict[fields.InputDataFields.groundtruth_classes]) - - def testDecodeObjectLabelNoText(self): - image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - bbox_classes = [1, 2] - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': self._BytesFeature(encoded_jpeg), - 'image/format': self._BytesFeature('jpeg'), - 'image/object/class/label': self._Int64Feature(bbox_classes), - })).SerializeToString() - label_map_string = """ - item { - id:1 - name:'cat' - } - item { - id:2 - name:'dog' - } - """ - label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') - with tf.gfile.Open(label_map_path, 'wb') as f: - f.write(label_map_string) - - example_decoder = tf_example_decoder.TfExampleDecoder( - label_map_proto_file=label_map_path) - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - self.assertAllEqual((tensor_dict[ - fields.InputDataFields.groundtruth_classes].get_shape().as_list()), - [None]) - - init = tf.tables_initializer() - with self.test_session() as sess: - sess.run(init) - tensor_dict = sess.run(tensor_dict) - - self.assertAllEqual(bbox_classes, - tensor_dict[fields.InputDataFields.groundtruth_classes]) - - def testDecodeObjectLabelUnrecognizedName(self): - image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - bbox_classes_text = ['cat', 'cheetah'] - example = tf.train.Example( - features=tf.train.Features( - feature={ - 'image/encoded': - self._BytesFeature(encoded_jpeg), - 'image/format': - self._BytesFeature('jpeg'), - 'image/object/class/text': - self._BytesFeature(bbox_classes_text), - })).SerializeToString() - - label_map_string = """ - item { - id:2 - name:'cat' - } - item { - id:1 - name:'dog' - } - """ - label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') - with tf.gfile.Open(label_map_path, 'wb') as f: - f.write(label_map_string) - example_decoder = tf_example_decoder.TfExampleDecoder( - label_map_proto_file=label_map_path) - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes] - .get_shape().as_list()), [None]) - - with self.test_session() as sess: - sess.run(tf.tables_initializer()) - tensor_dict = sess.run(tensor_dict) - - self.assertAllEqual([2, -1], - tensor_dict[fields.InputDataFields.groundtruth_classes]) - - def testDecodeObjectLabelWithMapping(self): - image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - bbox_classes_text = ['cat', 'dog'] - example = tf.train.Example( - features=tf.train.Features( - feature={ - 'image/encoded': - self._BytesFeature(encoded_jpeg), - 'image/format': - self._BytesFeature('jpeg'), - 'image/object/class/text': - self._BytesFeature(bbox_classes_text), - })).SerializeToString() - - label_map_string = """ - item { - id:3 - name:'cat' - } - item { - id:1 - name:'dog' - } - """ - label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') - with tf.gfile.Open(label_map_path, 'wb') as f: - f.write(label_map_string) - example_decoder = tf_example_decoder.TfExampleDecoder( - label_map_proto_file=label_map_path) - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes] - .get_shape().as_list()), [None]) - - with self.test_session() as sess: - sess.run(tf.tables_initializer()) - tensor_dict = sess.run(tensor_dict) - - self.assertAllEqual([3, 1], - tensor_dict[fields.InputDataFields.groundtruth_classes]) - - @test_util.enable_c_shapes - def testDecodeObjectArea(self): - image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - object_area = [100., 174.] - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': self._BytesFeature(encoded_jpeg), - 'image/format': self._BytesFeature('jpeg'), - 'image/object/area': self._FloatFeature(object_area), - })).SerializeToString() - - example_decoder = tf_example_decoder.TfExampleDecoder() - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_area]. - get_shape().as_list()), [2]) - with self.test_session() as sess: - tensor_dict = sess.run(tensor_dict) - - self.assertAllEqual(object_area, - tensor_dict[fields.InputDataFields.groundtruth_area]) - - @test_util.enable_c_shapes - def testDecodeObjectIsCrowd(self): - image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - object_is_crowd = [0, 1] - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': self._BytesFeature(encoded_jpeg), - 'image/format': self._BytesFeature('jpeg'), - 'image/object/is_crowd': self._Int64Feature(object_is_crowd), - })).SerializeToString() - - example_decoder = tf_example_decoder.TfExampleDecoder() - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - self.assertAllEqual((tensor_dict[ - fields.InputDataFields.groundtruth_is_crowd].get_shape().as_list()), - [2]) - with self.test_session() as sess: - tensor_dict = sess.run(tensor_dict) - - self.assertAllEqual([bool(item) for item in object_is_crowd], - tensor_dict[ - fields.InputDataFields.groundtruth_is_crowd]) - - @test_util.enable_c_shapes - def testDecodeObjectDifficult(self): - image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - object_difficult = [0, 1] - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': self._BytesFeature(encoded_jpeg), - 'image/format': self._BytesFeature('jpeg'), - 'image/object/difficult': self._Int64Feature(object_difficult), - })).SerializeToString() - - example_decoder = tf_example_decoder.TfExampleDecoder() - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - self.assertAllEqual((tensor_dict[ - fields.InputDataFields.groundtruth_difficult].get_shape().as_list()), - [2]) - with self.test_session() as sess: - tensor_dict = sess.run(tensor_dict) - - self.assertAllEqual([bool(item) for item in object_difficult], - tensor_dict[ - fields.InputDataFields.groundtruth_difficult]) - - @test_util.enable_c_shapes - def testDecodeObjectGroupOf(self): - image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - object_group_of = [0, 1] - example = tf.train.Example(features=tf.train.Features( - feature={ - 'image/encoded': self._BytesFeature(encoded_jpeg), - 'image/format': self._BytesFeature('jpeg'), - 'image/object/group_of': self._Int64Feature(object_group_of), - })).SerializeToString() - - example_decoder = tf_example_decoder.TfExampleDecoder() - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - self.assertAllEqual((tensor_dict[ - fields.InputDataFields.groundtruth_group_of].get_shape().as_list()), - [2]) - with self.test_session() as sess: - tensor_dict = sess.run(tensor_dict) - - self.assertAllEqual( - [bool(item) for item in object_group_of], - tensor_dict[fields.InputDataFields.groundtruth_group_of]) - - def testDecodeObjectWeight(self): - image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - object_weights = [0.75, 1.0] - example = tf.train.Example(features=tf.train.Features( - feature={ - 'image/encoded': self._BytesFeature(encoded_jpeg), - 'image/format': self._BytesFeature('jpeg'), - 'image/object/weight': self._FloatFeature(object_weights), - })).SerializeToString() - - example_decoder = tf_example_decoder.TfExampleDecoder() - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - self.assertAllEqual((tensor_dict[ - fields.InputDataFields.groundtruth_weights].get_shape().as_list()), - [None]) - with self.test_session() as sess: - tensor_dict = sess.run(tensor_dict) - - self.assertAllEqual( - object_weights, - tensor_dict[fields.InputDataFields.groundtruth_weights]) - - @test_util.enable_c_shapes - def testDecodeInstanceSegmentation(self): - num_instances = 4 - image_height = 5 - image_width = 3 - - # Randomly generate image. - image_tensor = np.random.randint(256, size=(image_height, - image_width, - 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - - # Randomly generate instance segmentation masks. - instance_masks = ( - np.random.randint(2, size=(num_instances, - image_height, - image_width)).astype(np.float32)) - instance_masks_flattened = np.reshape(instance_masks, [-1]) - - # Randomly generate class labels for each instance. - object_classes = np.random.randint( - 100, size=(num_instances)).astype(np.int64) - - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': self._BytesFeature(encoded_jpeg), - 'image/format': self._BytesFeature('jpeg'), - 'image/height': self._Int64Feature([image_height]), - 'image/width': self._Int64Feature([image_width]), - 'image/object/mask': self._FloatFeature(instance_masks_flattened), - 'image/object/class/label': self._Int64Feature( - object_classes)})).SerializeToString() - example_decoder = tf_example_decoder.TfExampleDecoder( - load_instance_masks=True) - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - self.assertAllEqual(( - tensor_dict[fields.InputDataFields.groundtruth_instance_masks]. - get_shape().as_list()), [4, 5, 3]) - - self.assertAllEqual(( - tensor_dict[fields.InputDataFields.groundtruth_classes]. - get_shape().as_list()), [4]) - - with self.test_session() as sess: - tensor_dict = sess.run(tensor_dict) - - self.assertAllEqual( - instance_masks.astype(np.float32), - tensor_dict[fields.InputDataFields.groundtruth_instance_masks]) - self.assertAllEqual( - object_classes, - tensor_dict[fields.InputDataFields.groundtruth_classes]) - - def testInstancesNotAvailableByDefault(self): - num_instances = 4 - image_height = 5 - image_width = 3 - # Randomly generate image. - image_tensor = np.random.randint(256, size=(image_height, - image_width, - 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - - # Randomly generate instance segmentation masks. - instance_masks = ( - np.random.randint(2, size=(num_instances, - image_height, - image_width)).astype(np.float32)) - instance_masks_flattened = np.reshape(instance_masks, [-1]) - - # Randomly generate class labels for each instance. - object_classes = np.random.randint( - 100, size=(num_instances)).astype(np.int64) - - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': self._BytesFeature(encoded_jpeg), - 'image/format': self._BytesFeature('jpeg'), - 'image/height': self._Int64Feature([image_height]), - 'image/width': self._Int64Feature([image_width]), - 'image/object/mask': self._FloatFeature(instance_masks_flattened), - 'image/object/class/label': self._Int64Feature( - object_classes)})).SerializeToString() - example_decoder = tf_example_decoder.TfExampleDecoder() - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - self.assertTrue(fields.InputDataFields.groundtruth_instance_masks - not in tensor_dict) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/__init__.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_coco_tf_record.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_coco_tf_record.py deleted file mode 100644 index 9928443d805effb24b46f599929c4b7db73fb2c8..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_coco_tf_record.py +++ /dev/null @@ -1,273 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r"""Convert raw COCO dataset to TFRecord for object_detection. - -Example usage: - python create_coco_tf_record.py --logtostderr \ - --train_image_dir="${TRAIN_IMAGE_DIR}" \ - --val_image_dir="${VAL_IMAGE_DIR}" \ - --test_image_dir="${TEST_IMAGE_DIR}" \ - --train_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \ - --val_annotations_file="${VAL_ANNOTATIONS_FILE}" \ - --testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \ - --output_dir="${OUTPUT_DIR}" -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import hashlib -import io -import json -import os -import numpy as np -import PIL.Image - -from pycocotools import mask -import tensorflow as tf - -from object_detection.utils import dataset_util -from object_detection.utils import label_map_util - - -flags = tf.app.flags -tf.flags.DEFINE_boolean('include_masks', False, - 'Whether to include instance segmentations masks ' - '(PNG encoded) in the result. default: False.') -tf.flags.DEFINE_string('train_image_dir', '', - 'Training image directory.') -tf.flags.DEFINE_string('val_image_dir', '', - 'Validation image directory.') -tf.flags.DEFINE_string('test_image_dir', '', - 'Test image directory.') -tf.flags.DEFINE_string('train_annotations_file', '', - 'Training annotations JSON file.') -tf.flags.DEFINE_string('val_annotations_file', '', - 'Validation annotations JSON file.') -tf.flags.DEFINE_string('testdev_annotations_file', '', - 'Test-dev annotations JSON file.') -tf.flags.DEFINE_string('output_dir', '/tmp/', 'Output data directory.') - -FLAGS = flags.FLAGS - -tf.logging.set_verbosity(tf.logging.INFO) - - -def create_tf_example(image, - annotations_list, - image_dir, - category_index, - include_masks=False): - """Converts image and annotations to a tf.Example proto. - - Args: - image: dict with keys: - [u'license', u'file_name', u'coco_url', u'height', u'width', - u'date_captured', u'flickr_url', u'id'] - annotations_list: - list of dicts with keys: - [u'segmentation', u'area', u'iscrowd', u'image_id', - u'bbox', u'category_id', u'id'] - Notice that bounding box coordinates in the official COCO dataset are - given as [x, y, width, height] tuples using absolute coordinates where - x, y represent the top-left (0-indexed) corner. This function converts - to the format expected by the Tensorflow Object Detection API (which is - which is [ymin, xmin, ymax, xmax] with coordinates normalized relative - to image size). - image_dir: directory containing the image files. - category_index: a dict containing COCO category information keyed - by the 'id' field of each category. See the - label_map_util.create_category_index function. - include_masks: Whether to include instance segmentations masks - (PNG encoded) in the result. default: False. - Returns: - example: The converted tf.Example - num_annotations_skipped: Number of (invalid) annotations that were ignored. - - Raises: - ValueError: if the image pointed to by data['filename'] is not a valid JPEG - """ - image_height = image['height'] - image_width = image['width'] - filename = image['file_name'] - image_id = image['id'] - - full_path = os.path.join(image_dir, filename) - with tf.gfile.GFile(full_path, 'rb') as fid: - encoded_jpg = fid.read() - encoded_jpg_io = io.BytesIO(encoded_jpg) - image = PIL.Image.open(encoded_jpg_io) - key = hashlib.sha256(encoded_jpg).hexdigest() - - xmin = [] - xmax = [] - ymin = [] - ymax = [] - is_crowd = [] - category_names = [] - category_ids = [] - area = [] - encoded_mask_png = [] - num_annotations_skipped = 0 - for object_annotations in annotations_list: - (x, y, width, height) = tuple(object_annotations['bbox']) - if width <= 0 or height <= 0: - num_annotations_skipped += 1 - continue - if x + width > image_width or y + height > image_height: - num_annotations_skipped += 1 - continue - xmin.append(float(x) / image_width) - xmax.append(float(x + width) / image_width) - ymin.append(float(y) / image_height) - ymax.append(float(y + height) / image_height) - is_crowd.append(object_annotations['iscrowd']) - category_id = int(object_annotations['category_id']) - category_ids.append(category_id) - category_names.append(category_index[category_id]['name'].encode('utf8')) - area.append(object_annotations['area']) - - if include_masks: - run_len_encoding = mask.frPyObjects(object_annotations['segmentation'], - image_height, image_width) - binary_mask = mask.decode(run_len_encoding) - if not object_annotations['iscrowd']: - binary_mask = np.amax(binary_mask, axis=2) - pil_image = PIL.Image.fromarray(binary_mask) - output_io = io.BytesIO() - pil_image.save(output_io, format='PNG') - encoded_mask_png.append(output_io.getvalue()) - feature_dict = { - 'image/height': - dataset_util.int64_feature(image_height), - 'image/width': - dataset_util.int64_feature(image_width), - 'image/filename': - dataset_util.bytes_feature(filename.encode('utf8')), - 'image/source_id': - dataset_util.bytes_feature(str(image_id).encode('utf8')), - 'image/key/sha256': - dataset_util.bytes_feature(key.encode('utf8')), - 'image/encoded': - dataset_util.bytes_feature(encoded_jpg), - 'image/format': - dataset_util.bytes_feature('jpeg'.encode('utf8')), - 'image/object/bbox/xmin': - dataset_util.float_list_feature(xmin), - 'image/object/bbox/xmax': - dataset_util.float_list_feature(xmax), - 'image/object/bbox/ymin': - dataset_util.float_list_feature(ymin), - 'image/object/bbox/ymax': - dataset_util.float_list_feature(ymax), - 'image/object/class/label': - dataset_util.int64_list_feature(category_ids), - 'image/object/is_crowd': - dataset_util.int64_list_feature(is_crowd), - 'image/object/area': - dataset_util.float_list_feature(area), - } - if include_masks: - feature_dict['image/object/mask'] = ( - dataset_util.bytes_list_feature(encoded_mask_png)) - example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) - return key, example, num_annotations_skipped - - -def _create_tf_record_from_coco_annotations( - annotations_file, image_dir, output_path, include_masks): - """Loads COCO annotation json files and converts to tf.Record format. - - Args: - annotations_file: JSON file containing bounding box annotations. - image_dir: Directory containing the image files. - output_path: Path to output tf.Record file. - include_masks: Whether to include instance segmentations masks - (PNG encoded) in the result. default: False. - """ - with tf.gfile.GFile(annotations_file, 'r') as fid: - groundtruth_data = json.load(fid) - images = groundtruth_data['images'] - category_index = label_map_util.create_category_index( - groundtruth_data['categories']) - - annotations_index = {} - if 'annotations' in groundtruth_data: - tf.logging.info( - 'Found groundtruth annotations. Building annotations index.') - for annotation in groundtruth_data['annotations']: - image_id = annotation['image_id'] - if image_id not in annotations_index: - annotations_index[image_id] = [] - annotations_index[image_id].append(annotation) - missing_annotation_count = 0 - for image in images: - image_id = image['id'] - if image_id not in annotations_index: - missing_annotation_count += 1 - annotations_index[image_id] = [] - tf.logging.info('%d images are missing annotations.', - missing_annotation_count) - - tf.logging.info('writing to output path: %s', output_path) - writer = tf.python_io.TFRecordWriter(output_path) - total_num_annotations_skipped = 0 - for idx, image in enumerate(images): - if idx % 100 == 0: - tf.logging.info('On image %d of %d', idx, len(images)) - annotations_list = annotations_index[image['id']] - _, tf_example, num_annotations_skipped = create_tf_example( - image, annotations_list, image_dir, category_index, include_masks) - total_num_annotations_skipped += num_annotations_skipped - writer.write(tf_example.SerializeToString()) - writer.close() - tf.logging.info('Finished writing, skipped %d annotations.', - total_num_annotations_skipped) - - -def main(_): - assert FLAGS.train_image_dir, '`train_image_dir` missing.' - assert FLAGS.val_image_dir, '`val_image_dir` missing.' - assert FLAGS.test_image_dir, '`test_image_dir` missing.' - assert FLAGS.train_annotations_file, '`train_annotations_file` missing.' - assert FLAGS.val_annotations_file, '`val_annotations_file` missing.' - assert FLAGS.testdev_annotations_file, '`testdev_annotations_file` missing.' - - if not tf.gfile.IsDirectory(FLAGS.output_dir): - tf.gfile.MakeDirs(FLAGS.output_dir) - train_output_path = os.path.join(FLAGS.output_dir, 'coco_train.record') - val_output_path = os.path.join(FLAGS.output_dir, 'coco_val.record') - testdev_output_path = os.path.join(FLAGS.output_dir, 'coco_testdev.record') - - _create_tf_record_from_coco_annotations( - FLAGS.train_annotations_file, - FLAGS.train_image_dir, - train_output_path, - FLAGS.include_masks) - _create_tf_record_from_coco_annotations( - FLAGS.val_annotations_file, - FLAGS.val_image_dir, - val_output_path, - FLAGS.include_masks) - _create_tf_record_from_coco_annotations( - FLAGS.testdev_annotations_file, - FLAGS.test_image_dir, - testdev_output_path, - FLAGS.include_masks) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_coco_tf_record_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_coco_tf_record_test.py deleted file mode 100644 index 45697eeff5bc9f103621fda2cb729ee71ef7c4d6..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_coco_tf_record_test.py +++ /dev/null @@ -1,188 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Test for create_coco_tf_record.py.""" - -import io -import os - -import numpy as np -import PIL.Image -import tensorflow as tf - -from object_detection.dataset_tools import create_coco_tf_record - - -class CreateCocoTFRecordTest(tf.test.TestCase): - - def _assertProtoEqual(self, proto_field, expectation): - """Helper function to assert if a proto field equals some value. - - Args: - proto_field: The protobuf field to compare. - expectation: The expected value of the protobuf field. - """ - proto_list = [p for p in proto_field] - self.assertListEqual(proto_list, expectation) - - def test_create_tf_example(self): - image_file_name = 'tmp_image.jpg' - image_data = np.random.rand(256, 256, 3) - tmp_dir = self.get_temp_dir() - save_path = os.path.join(tmp_dir, image_file_name) - image = PIL.Image.fromarray(image_data, 'RGB') - image.save(save_path) - - image = { - 'file_name': image_file_name, - 'height': 256, - 'width': 256, - 'id': 11, - } - - annotations_list = [{ - 'area': .5, - 'iscrowd': False, - 'image_id': 11, - 'bbox': [64, 64, 128, 128], - 'category_id': 2, - 'id': 1000, - }] - - image_dir = tmp_dir - category_index = { - 1: { - 'name': 'dog', - 'id': 1 - }, - 2: { - 'name': 'cat', - 'id': 2 - }, - 3: { - 'name': 'human', - 'id': 3 - } - } - - (_, example, - num_annotations_skipped) = create_coco_tf_record.create_tf_example( - image, annotations_list, image_dir, category_index) - - self.assertEqual(num_annotations_skipped, 0) - self._assertProtoEqual( - example.features.feature['image/height'].int64_list.value, [256]) - self._assertProtoEqual( - example.features.feature['image/width'].int64_list.value, [256]) - self._assertProtoEqual( - example.features.feature['image/filename'].bytes_list.value, - [image_file_name]) - self._assertProtoEqual( - example.features.feature['image/source_id'].bytes_list.value, - [str(image['id'])]) - self._assertProtoEqual( - example.features.feature['image/format'].bytes_list.value, ['jpeg']) - self._assertProtoEqual( - example.features.feature['image/object/bbox/xmin'].float_list.value, - [0.25]) - self._assertProtoEqual( - example.features.feature['image/object/bbox/ymin'].float_list.value, - [0.25]) - self._assertProtoEqual( - example.features.feature['image/object/bbox/xmax'].float_list.value, - [0.75]) - self._assertProtoEqual( - example.features.feature['image/object/bbox/ymax'].float_list.value, - [0.75]) - - def test_create_tf_example_with_instance_masks(self): - image_file_name = 'tmp_image.jpg' - image_data = np.random.rand(8, 8, 3) - tmp_dir = self.get_temp_dir() - save_path = os.path.join(tmp_dir, image_file_name) - image = PIL.Image.fromarray(image_data, 'RGB') - image.save(save_path) - - image = { - 'file_name': image_file_name, - 'height': 8, - 'width': 8, - 'id': 11, - } - - annotations_list = [{ - 'area': .5, - 'iscrowd': False, - 'image_id': 11, - 'bbox': [0, 0, 8, 8], - 'segmentation': [[4, 0, 0, 0, 0, 4], [8, 4, 4, 8, 8, 8]], - 'category_id': 1, - 'id': 1000, - }] - - image_dir = tmp_dir - category_index = { - 1: { - 'name': 'dog', - 'id': 1 - }, - } - - (_, example, - num_annotations_skipped) = create_coco_tf_record.create_tf_example( - image, annotations_list, image_dir, category_index, include_masks=True) - - self.assertEqual(num_annotations_skipped, 0) - self._assertProtoEqual( - example.features.feature['image/height'].int64_list.value, [8]) - self._assertProtoEqual( - example.features.feature['image/width'].int64_list.value, [8]) - self._assertProtoEqual( - example.features.feature['image/filename'].bytes_list.value, - [image_file_name]) - self._assertProtoEqual( - example.features.feature['image/source_id'].bytes_list.value, - [str(image['id'])]) - self._assertProtoEqual( - example.features.feature['image/format'].bytes_list.value, ['jpeg']) - self._assertProtoEqual( - example.features.feature['image/object/bbox/xmin'].float_list.value, - [0]) - self._assertProtoEqual( - example.features.feature['image/object/bbox/ymin'].float_list.value, - [0]) - self._assertProtoEqual( - example.features.feature['image/object/bbox/xmax'].float_list.value, - [1]) - self._assertProtoEqual( - example.features.feature['image/object/bbox/ymax'].float_list.value, - [1]) - encoded_mask_pngs = [ - io.BytesIO(encoded_masks) for encoded_masks in example.features.feature[ - 'image/object/mask'].bytes_list.value - ] - pil_masks = [ - np.array(PIL.Image.open(encoded_mask_png)) - for encoded_mask_png in encoded_mask_pngs - ] - self.assertTrue(len(pil_masks) == 1) - self.assertAllEqual(pil_masks[0], - [[1, 1, 1, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0, 0, 0], - [1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 1, 1], - [0, 0, 0, 0, 0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 1, 1]]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_kitti_tf_record.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_kitti_tf_record.py deleted file mode 100644 index c612db99166114689b8c40112bc03be53db44eef..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_kitti_tf_record.py +++ /dev/null @@ -1,310 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r"""Convert raw KITTI detection dataset to TFRecord for object_detection. - -Converts KITTI detection dataset to TFRecords with a standard format allowing - to use this dataset to train object detectors. The raw dataset can be - downloaded from: - http://kitti.is.tue.mpg.de/kitti/data_object_image_2.zip. - http://kitti.is.tue.mpg.de/kitti/data_object_label_2.zip - Permission can be requested at the main website. - - KITTI detection dataset contains 7481 training images. Using this code with - the default settings will set aside the first 500 images as a validation set. - This can be altered using the flags, see details below. - -Example usage: - python object_detection/dataset_tools/create_kitti_tf_record.py \ - --data_dir=/home/user/kitti \ - --output_path=/home/user/kitti.record -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import hashlib -import io -import os - -import numpy as np -import PIL.Image as pil -import tensorflow as tf - -from object_detection.utils import dataset_util -from object_detection.utils import label_map_util -from object_detection.utils.np_box_ops import iou - -tf.app.flags.DEFINE_string('data_dir', '', 'Location of root directory for the ' - 'data. Folder structure is assumed to be:' - '/training/label_2 (annotations) and' - '/data_object_image_2/training/image_2' - '(images).') -tf.app.flags.DEFINE_string('output_path', '', 'Path to which TFRecord files' - 'will be written. The TFRecord with the training set' - 'will be located at: _train.tfrecord.' - 'And the TFRecord with the validation set will be' - 'located at: _val.tfrecord') -tf.app.flags.DEFINE_string('classes_to_use', 'car,pedestrian,dontcare', - 'Comma separated list of class names that will be' - 'used. Adding the dontcare class will remove all' - 'bboxs in the dontcare regions.') -tf.app.flags.DEFINE_string('label_map_path', 'data/kitti_label_map.pbtxt', - 'Path to label map proto.') -tf.app.flags.DEFINE_integer('validation_set_size', '500', 'Number of images to' - 'be used as a validation set.') -FLAGS = tf.app.flags.FLAGS - - -def convert_kitti_to_tfrecords(data_dir, output_path, classes_to_use, - label_map_path, validation_set_size): - """Convert the KITTI detection dataset to TFRecords. - - Args: - data_dir: The full path to the unzipped folder containing the unzipped data - from data_object_image_2 and data_object_label_2.zip. - Folder structure is assumed to be: data_dir/training/label_2 (annotations) - and data_dir/data_object_image_2/training/image_2 (images). - output_path: The path to which TFRecord files will be written. The TFRecord - with the training set will be located at: _train.tfrecord - And the TFRecord with the validation set will be located at: - _val.tfrecord - classes_to_use: List of strings naming the classes for which data should be - converted. Use the same names as presented in the KIITI README file. - Adding dontcare class will remove all other bounding boxes that overlap - with areas marked as dontcare regions. - label_map_path: Path to label map proto - validation_set_size: How many images should be left as the validation set. - (Ffirst `validation_set_size` examples are selected to be in the - validation set). - """ - label_map_dict = label_map_util.get_label_map_dict(label_map_path) - train_count = 0 - val_count = 0 - - annotation_dir = os.path.join(data_dir, - 'training', - 'label_2') - - image_dir = os.path.join(data_dir, - 'data_object_image_2', - 'training', - 'image_2') - - train_writer = tf.python_io.TFRecordWriter('%s_train.tfrecord'% - output_path) - val_writer = tf.python_io.TFRecordWriter('%s_val.tfrecord'% - output_path) - - images = sorted(tf.gfile.ListDirectory(image_dir)) - for img_name in images: - img_num = int(img_name.split('.')[0]) - is_validation_img = img_num < validation_set_size - img_anno = read_annotation_file(os.path.join(annotation_dir, - str(img_num).zfill(6)+'.txt')) - - image_path = os.path.join(image_dir, img_name) - - # Filter all bounding boxes of this frame that are of a legal class, and - # don't overlap with a dontcare region. - # TODO(talremez) filter out targets that are truncated or heavily occluded. - annotation_for_image = filter_annotations(img_anno, classes_to_use) - - example = prepare_example(image_path, annotation_for_image, label_map_dict) - if is_validation_img: - val_writer.write(example.SerializeToString()) - val_count += 1 - else: - train_writer.write(example.SerializeToString()) - train_count += 1 - - train_writer.close() - val_writer.close() - - -def prepare_example(image_path, annotations, label_map_dict): - """Converts a dictionary with annotations for an image to tf.Example proto. - - Args: - image_path: The complete path to image. - annotations: A dictionary representing the annotation of a single object - that appears in the image. - label_map_dict: A map from string label names to integer ids. - - Returns: - example: The converted tf.Example. - """ - with tf.gfile.GFile(image_path, 'rb') as fid: - encoded_png = fid.read() - encoded_png_io = io.BytesIO(encoded_png) - image = pil.open(encoded_png_io) - image = np.asarray(image) - - key = hashlib.sha256(encoded_png).hexdigest() - - width = int(image.shape[1]) - height = int(image.shape[0]) - - xmin_norm = annotations['2d_bbox_left'] / float(width) - ymin_norm = annotations['2d_bbox_top'] / float(height) - xmax_norm = annotations['2d_bbox_right'] / float(width) - ymax_norm = annotations['2d_bbox_bottom'] / float(height) - - difficult_obj = [0]*len(xmin_norm) - - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/height': dataset_util.int64_feature(height), - 'image/width': dataset_util.int64_feature(width), - 'image/filename': dataset_util.bytes_feature(image_path.encode('utf8')), - 'image/source_id': dataset_util.bytes_feature(image_path.encode('utf8')), - 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), - 'image/encoded': dataset_util.bytes_feature(encoded_png), - 'image/format': dataset_util.bytes_feature('png'.encode('utf8')), - 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin_norm), - 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax_norm), - 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin_norm), - 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax_norm), - 'image/object/class/text': dataset_util.bytes_list_feature( - [x.encode('utf8') for x in annotations['type']]), - 'image/object/class/label': dataset_util.int64_list_feature( - [label_map_dict[x] for x in annotations['type']]), - 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), - 'image/object/truncated': dataset_util.float_list_feature( - annotations['truncated']), - 'image/object/alpha': dataset_util.float_list_feature( - annotations['alpha']), - 'image/object/3d_bbox/height': dataset_util.float_list_feature( - annotations['3d_bbox_height']), - 'image/object/3d_bbox/width': dataset_util.float_list_feature( - annotations['3d_bbox_width']), - 'image/object/3d_bbox/length': dataset_util.float_list_feature( - annotations['3d_bbox_length']), - 'image/object/3d_bbox/x': dataset_util.float_list_feature( - annotations['3d_bbox_x']), - 'image/object/3d_bbox/y': dataset_util.float_list_feature( - annotations['3d_bbox_y']), - 'image/object/3d_bbox/z': dataset_util.float_list_feature( - annotations['3d_bbox_z']), - 'image/object/3d_bbox/rot_y': dataset_util.float_list_feature( - annotations['3d_bbox_rot_y']), - })) - - return example - - -def filter_annotations(img_all_annotations, used_classes): - """Filters out annotations from the unused classes and dontcare regions. - - Filters out the annotations that belong to classes we do now wish to use and - (optionally) also removes all boxes that overlap with dontcare regions. - - Args: - img_all_annotations: A list of annotation dictionaries. See documentation of - read_annotation_file for more details about the format of the annotations. - used_classes: A list of strings listing the classes we want to keep, if the - list contains "dontcare", all bounding boxes with overlapping with dont - care regions will also be filtered out. - - Returns: - img_filtered_annotations: A list of annotation dictionaries that have passed - the filtering. - """ - - img_filtered_annotations = {} - - # Filter the type of the objects. - relevant_annotation_indices = [ - i for i, x in enumerate(img_all_annotations['type']) if x in used_classes - ] - - for key in img_all_annotations.keys(): - img_filtered_annotations[key] = ( - img_all_annotations[key][relevant_annotation_indices]) - - if 'dontcare' in used_classes: - dont_care_indices = [i for i, - x in enumerate(img_filtered_annotations['type']) - if x == 'dontcare'] - - # bounding box format [y_min, x_min, y_max, x_max] - all_boxes = np.stack([img_filtered_annotations['2d_bbox_top'], - img_filtered_annotations['2d_bbox_left'], - img_filtered_annotations['2d_bbox_bottom'], - img_filtered_annotations['2d_bbox_right']], - axis=1) - - ious = iou(boxes1=all_boxes, - boxes2=all_boxes[dont_care_indices]) - - # Remove all bounding boxes that overlap with a dontcare region. - if ious.size > 0: - boxes_to_remove = np.amax(ious, axis=1) > 0.0 - for key in img_all_annotations.keys(): - img_filtered_annotations[key] = ( - img_filtered_annotations[key][np.logical_not(boxes_to_remove)]) - - return img_filtered_annotations - - -def read_annotation_file(filename): - """Reads a KITTI annotation file. - - Converts a KITTI annotation file into a dictionary containing all the - relevant information. - - Args: - filename: the path to the annotataion text file. - - Returns: - anno: A dictionary with the converted annotation information. See annotation - README file for details on the different fields. - """ - with open(filename) as f: - content = f.readlines() - content = [x.strip().split(' ') for x in content] - - anno = {} - anno['type'] = np.array([x[0].lower() for x in content]) - anno['truncated'] = np.array([float(x[1]) for x in content]) - anno['occluded'] = np.array([int(x[2]) for x in content]) - anno['alpha'] = np.array([float(x[3]) for x in content]) - - anno['2d_bbox_left'] = np.array([float(x[4]) for x in content]) - anno['2d_bbox_top'] = np.array([float(x[5]) for x in content]) - anno['2d_bbox_right'] = np.array([float(x[6]) for x in content]) - anno['2d_bbox_bottom'] = np.array([float(x[7]) for x in content]) - - anno['3d_bbox_height'] = np.array([float(x[8]) for x in content]) - anno['3d_bbox_width'] = np.array([float(x[9]) for x in content]) - anno['3d_bbox_length'] = np.array([float(x[10]) for x in content]) - anno['3d_bbox_x'] = np.array([float(x[11]) for x in content]) - anno['3d_bbox_y'] = np.array([float(x[12]) for x in content]) - anno['3d_bbox_z'] = np.array([float(x[13]) for x in content]) - anno['3d_bbox_rot_y'] = np.array([float(x[14]) for x in content]) - - return anno - - -def main(_): - convert_kitti_to_tfrecords( - data_dir=FLAGS.data_dir, - output_path=FLAGS.output_path, - classes_to_use=FLAGS.classes_to_use.split(','), - label_map_path=FLAGS.label_map_path, - validation_set_size=FLAGS.validation_set_size) - -if __name__ == '__main__': - tf.app.run() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_kitti_tf_record_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_kitti_tf_record_test.py deleted file mode 100644 index 37ac4b8b19d65f8533ecefec318b409df12bce5f..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_kitti_tf_record_test.py +++ /dev/null @@ -1,130 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Test for create_kitti_tf_record.py.""" - -import os - -import numpy as np -import PIL.Image -import tensorflow as tf - -from object_detection.dataset_tools import create_kitti_tf_record - - -class CreateKittiTFRecordTest(tf.test.TestCase): - - def _assertProtoEqual(self, proto_field, expectation): - """Helper function to assert if a proto field equals some value. - - Args: - proto_field: The protobuf field to compare. - expectation: The expected value of the protobuf field. - """ - proto_list = [p for p in proto_field] - self.assertListEqual(proto_list, expectation) - - def test_dict_to_tf_example(self): - image_file_name = 'tmp_image.jpg' - image_data = np.random.rand(256, 256, 3) - save_path = os.path.join(self.get_temp_dir(), image_file_name) - image = PIL.Image.fromarray(image_data, 'RGB') - image.save(save_path) - - annotations = {} - annotations['2d_bbox_left'] = np.array([64]) - annotations['2d_bbox_top'] = np.array([64]) - annotations['2d_bbox_right'] = np.array([192]) - annotations['2d_bbox_bottom'] = np.array([192]) - annotations['type'] = ['car'] - annotations['truncated'] = np.array([1]) - annotations['alpha'] = np.array([2]) - annotations['3d_bbox_height'] = np.array([10]) - annotations['3d_bbox_width'] = np.array([11]) - annotations['3d_bbox_length'] = np.array([12]) - annotations['3d_bbox_x'] = np.array([13]) - annotations['3d_bbox_y'] = np.array([14]) - annotations['3d_bbox_z'] = np.array([15]) - annotations['3d_bbox_rot_y'] = np.array([4]) - - label_map_dict = { - 'background': 0, - 'car': 1, - } - - example = create_kitti_tf_record.prepare_example( - save_path, - annotations, - label_map_dict) - - self._assertProtoEqual( - example.features.feature['image/height'].int64_list.value, [256]) - self._assertProtoEqual( - example.features.feature['image/width'].int64_list.value, [256]) - self._assertProtoEqual( - example.features.feature['image/filename'].bytes_list.value, - [save_path]) - self._assertProtoEqual( - example.features.feature['image/source_id'].bytes_list.value, - [save_path]) - self._assertProtoEqual( - example.features.feature['image/format'].bytes_list.value, ['png']) - self._assertProtoEqual( - example.features.feature['image/object/bbox/xmin'].float_list.value, - [0.25]) - self._assertProtoEqual( - example.features.feature['image/object/bbox/ymin'].float_list.value, - [0.25]) - self._assertProtoEqual( - example.features.feature['image/object/bbox/xmax'].float_list.value, - [0.75]) - self._assertProtoEqual( - example.features.feature['image/object/bbox/ymax'].float_list.value, - [0.75]) - self._assertProtoEqual( - example.features.feature['image/object/class/text'].bytes_list.value, - ['car']) - self._assertProtoEqual( - example.features.feature['image/object/class/label'].int64_list.value, - [1]) - self._assertProtoEqual( - example.features.feature['image/object/truncated'].float_list.value, - [1]) - self._assertProtoEqual( - example.features.feature['image/object/alpha'].float_list.value, - [2]) - self._assertProtoEqual(example.features.feature[ - 'image/object/3d_bbox/height'].float_list.value, [10]) - self._assertProtoEqual( - example.features.feature['image/object/3d_bbox/width'].float_list.value, - [11]) - self._assertProtoEqual(example.features.feature[ - 'image/object/3d_bbox/length'].float_list.value, [12]) - self._assertProtoEqual( - example.features.feature['image/object/3d_bbox/x'].float_list.value, - [13]) - self._assertProtoEqual( - example.features.feature['image/object/3d_bbox/y'].float_list.value, - [14]) - self._assertProtoEqual( - example.features.feature['image/object/3d_bbox/z'].float_list.value, - [15]) - self._assertProtoEqual( - example.features.feature['image/object/3d_bbox/rot_y'].float_list.value, - [4]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_oid_tf_record.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_oid_tf_record.py deleted file mode 100644 index 26d9699c8ee4ec17ef329f91e0df31ca79d50c99..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_oid_tf_record.py +++ /dev/null @@ -1,117 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Creates TFRecords of Open Images dataset for object detection. - -Example usage: - python object_detection/dataset_tools/create_oid_tf_record.py \ - --input_box_annotations_csv=/path/to/input/annotations-human-bbox.csv \ - --input_image_label_annotations_csv=/path/to/input/annotations-label.csv \ - --input_images_directory=/path/to/input/image_pixels_directory \ - --input_label_map=/path/to/input/labels_bbox_545.labelmap \ - --output_tf_record_path_prefix=/path/to/output/prefix.tfrecord - -CSVs with bounding box annotations and image metadata (including the image URLs) -can be downloaded from the Open Images GitHub repository: -https://github.com/openimages/dataset - -This script will include every image found in the input_images_directory in the -output TFRecord, even if the image has no corresponding bounding box annotations -in the input_annotations_csv. If input_image_label_annotations_csv is specified, -it will add image-level labels as well. Note that the information of whether a -label is positivelly or negativelly verified is NOT added to tfrecord. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -import contextlib2 -import pandas as pd -import tensorflow as tf - -from object_detection.dataset_tools import oid_tfrecord_creation -from object_detection.dataset_tools import tf_record_creation_util -from object_detection.utils import label_map_util - -tf.flags.DEFINE_string('input_box_annotations_csv', None, - 'Path to CSV containing image bounding box annotations') -tf.flags.DEFINE_string('input_images_directory', None, - 'Directory containing the image pixels ' - 'downloaded from the OpenImages GitHub repository.') -tf.flags.DEFINE_string('input_image_label_annotations_csv', None, - 'Path to CSV containing image-level labels annotations') -tf.flags.DEFINE_string('input_label_map', None, 'Path to the label map proto') -tf.flags.DEFINE_string( - 'output_tf_record_path_prefix', None, - 'Path to the output TFRecord. The shard index and the number of shards ' - 'will be appended for each output shard.') -tf.flags.DEFINE_integer('num_shards', 100, 'Number of TFRecord shards') - -FLAGS = tf.flags.FLAGS - - -def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - - required_flags = [ - 'input_box_annotations_csv', 'input_images_directory', 'input_label_map', - 'output_tf_record_path_prefix' - ] - for flag_name in required_flags: - if not getattr(FLAGS, flag_name): - raise ValueError('Flag --{} is required'.format(flag_name)) - - label_map = label_map_util.get_label_map_dict(FLAGS.input_label_map) - all_box_annotations = pd.read_csv(FLAGS.input_box_annotations_csv) - if FLAGS.input_image_label_annotations_csv: - all_label_annotations = pd.read_csv(FLAGS.input_image_label_annotations_csv) - all_label_annotations.rename( - columns={'Confidence': 'ConfidenceImageLabel'}, inplace=True) - else: - all_label_annotations = None - all_images = tf.gfile.Glob( - os.path.join(FLAGS.input_images_directory, '*.jpg')) - all_image_ids = [os.path.splitext(os.path.basename(v))[0] for v in all_images] - all_image_ids = pd.DataFrame({'ImageID': all_image_ids}) - all_annotations = pd.concat( - [all_box_annotations, all_image_ids, all_label_annotations]) - - tf.logging.log(tf.logging.INFO, 'Found %d images...', len(all_image_ids)) - - with contextlib2.ExitStack() as tf_record_close_stack: - output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords( - tf_record_close_stack, FLAGS.output_tf_record_path_prefix, - FLAGS.num_shards) - - for counter, image_data in enumerate(all_annotations.groupby('ImageID')): - tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 1000, - counter) - - image_id, image_annotations = image_data - # In OID image file names are formed by appending ".jpg" to the image ID. - image_path = os.path.join(FLAGS.input_images_directory, image_id + '.jpg') - with tf.gfile.Open(image_path) as image_file: - encoded_image = image_file.read() - - tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame( - image_annotations, label_map, encoded_image) - if tf_example: - shard_idx = int(image_id, 16) % FLAGS.num_shards - output_tfrecords[shard_idx].write(tf_example.SerializeToString()) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_pascal_tf_record.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_pascal_tf_record.py deleted file mode 100644 index 813071c924ae457453190710181be2d702b439ce..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_pascal_tf_record.py +++ /dev/null @@ -1,185 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r"""Convert raw PASCAL dataset to TFRecord for object_detection. - -Example usage: - python object_detection/dataset_tools/create_pascal_tf_record.py \ - --data_dir=/home/user/VOCdevkit \ - --year=VOC2012 \ - --output_path=/home/user/pascal.record -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import hashlib -import io -import logging -import os - -from lxml import etree -import PIL.Image -import tensorflow as tf - -from object_detection.utils import dataset_util -from object_detection.utils import label_map_util - - -flags = tf.app.flags -flags.DEFINE_string('data_dir', '', 'Root directory to raw PASCAL VOC dataset.') -flags.DEFINE_string('set', 'train', 'Convert training set, validation set or ' - 'merged set.') -flags.DEFINE_string('annotations_dir', 'Annotations', - '(Relative) path to annotations directory.') -flags.DEFINE_string('year', 'VOC2007', 'Desired challenge year.') -flags.DEFINE_string('output_path', '', 'Path to output TFRecord') -flags.DEFINE_string('label_map_path', 'data/pascal_label_map.pbtxt', - 'Path to label map proto') -flags.DEFINE_boolean('ignore_difficult_instances', False, 'Whether to ignore ' - 'difficult instances') -FLAGS = flags.FLAGS - -SETS = ['train', 'val', 'trainval', 'test'] -YEARS = ['VOC2007', 'VOC2012', 'merged'] - - -def dict_to_tf_example(data, - dataset_directory, - label_map_dict, - ignore_difficult_instances=False, - image_subdirectory='JPEGImages'): - """Convert XML derived dict to tf.Example proto. - - Notice that this function normalizes the bounding box coordinates provided - by the raw data. - - Args: - data: dict holding PASCAL XML fields for a single image (obtained by - running dataset_util.recursive_parse_xml_to_dict) - dataset_directory: Path to root directory holding PASCAL dataset - label_map_dict: A map from string label names to integers ids. - ignore_difficult_instances: Whether to skip difficult instances in the - dataset (default: False). - image_subdirectory: String specifying subdirectory within the - PASCAL dataset directory holding the actual image data. - - Returns: - example: The converted tf.Example. - - Raises: - ValueError: if the image pointed to by data['filename'] is not a valid JPEG - """ - img_path = os.path.join(data['folder'], image_subdirectory, data['filename']) - full_path = os.path.join(dataset_directory, img_path) - with tf.gfile.GFile(full_path, 'rb') as fid: - encoded_jpg = fid.read() - encoded_jpg_io = io.BytesIO(encoded_jpg) - image = PIL.Image.open(encoded_jpg_io) - if image.format != 'JPEG': - raise ValueError('Image format not JPEG') - key = hashlib.sha256(encoded_jpg).hexdigest() - - width = int(data['size']['width']) - height = int(data['size']['height']) - - xmin = [] - ymin = [] - xmax = [] - ymax = [] - classes = [] - classes_text = [] - truncated = [] - poses = [] - difficult_obj = [] - if 'object' in data: - for obj in data['object']: - difficult = bool(int(obj['difficult'])) - if ignore_difficult_instances and difficult: - continue - - difficult_obj.append(int(difficult)) - - xmin.append(float(obj['bndbox']['xmin']) / width) - ymin.append(float(obj['bndbox']['ymin']) / height) - xmax.append(float(obj['bndbox']['xmax']) / width) - ymax.append(float(obj['bndbox']['ymax']) / height) - classes_text.append(obj['name'].encode('utf8')) - classes.append(label_map_dict[obj['name']]) - truncated.append(int(obj['truncated'])) - poses.append(obj['pose'].encode('utf8')) - - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/height': dataset_util.int64_feature(height), - 'image/width': dataset_util.int64_feature(width), - 'image/filename': dataset_util.bytes_feature( - data['filename'].encode('utf8')), - 'image/source_id': dataset_util.bytes_feature( - data['filename'].encode('utf8')), - 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), - 'image/encoded': dataset_util.bytes_feature(encoded_jpg), - 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), - 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), - 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), - 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), - 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), - 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), - 'image/object/class/label': dataset_util.int64_list_feature(classes), - 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), - 'image/object/truncated': dataset_util.int64_list_feature(truncated), - 'image/object/view': dataset_util.bytes_list_feature(poses), - })) - return example - - -def main(_): - if FLAGS.set not in SETS: - raise ValueError('set must be in : {}'.format(SETS)) - if FLAGS.year not in YEARS: - raise ValueError('year must be in : {}'.format(YEARS)) - - data_dir = FLAGS.data_dir - years = ['VOC2007', 'VOC2012'] - if FLAGS.year != 'merged': - years = [FLAGS.year] - - writer = tf.python_io.TFRecordWriter(FLAGS.output_path) - - label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) - - for year in years: - logging.info('Reading from PASCAL %s dataset.', year) - examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main', - 'aeroplane_' + FLAGS.set + '.txt') - annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir) - examples_list = dataset_util.read_examples_list(examples_path) - for idx, example in enumerate(examples_list): - if idx % 100 == 0: - logging.info('On image %d of %d', idx, len(examples_list)) - path = os.path.join(annotations_dir, example + '.xml') - with tf.gfile.GFile(path, 'r') as fid: - xml_str = fid.read() - xml = etree.fromstring(xml_str) - data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] - - tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, - FLAGS.ignore_difficult_instances) - writer.write(tf_example.SerializeToString()) - - writer.close() - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_pascal_tf_record_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_pascal_tf_record_test.py deleted file mode 100644 index 66929bd466a3db5acc9b79460993486c1cd10f34..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_pascal_tf_record_test.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Test for create_pascal_tf_record.py.""" - -import os - -import numpy as np -import PIL.Image -import tensorflow as tf - -from object_detection.dataset_tools import create_pascal_tf_record - - -class CreatePascalTFRecordTest(tf.test.TestCase): - - def _assertProtoEqual(self, proto_field, expectation): - """Helper function to assert if a proto field equals some value. - - Args: - proto_field: The protobuf field to compare. - expectation: The expected value of the protobuf field. - """ - proto_list = [p for p in proto_field] - self.assertListEqual(proto_list, expectation) - - def test_dict_to_tf_example(self): - image_file_name = 'tmp_image.jpg' - image_data = np.random.rand(256, 256, 3) - save_path = os.path.join(self.get_temp_dir(), image_file_name) - image = PIL.Image.fromarray(image_data, 'RGB') - image.save(save_path) - - data = { - 'folder': '', - 'filename': image_file_name, - 'size': { - 'height': 256, - 'width': 256, - }, - 'object': [ - { - 'difficult': 1, - 'bndbox': { - 'xmin': 64, - 'ymin': 64, - 'xmax': 192, - 'ymax': 192, - }, - 'name': 'person', - 'truncated': 0, - 'pose': '', - }, - ], - } - - label_map_dict = { - 'background': 0, - 'person': 1, - 'notperson': 2, - } - - example = create_pascal_tf_record.dict_to_tf_example( - data, self.get_temp_dir(), label_map_dict, image_subdirectory='') - self._assertProtoEqual( - example.features.feature['image/height'].int64_list.value, [256]) - self._assertProtoEqual( - example.features.feature['image/width'].int64_list.value, [256]) - self._assertProtoEqual( - example.features.feature['image/filename'].bytes_list.value, - [image_file_name]) - self._assertProtoEqual( - example.features.feature['image/source_id'].bytes_list.value, - [image_file_name]) - self._assertProtoEqual( - example.features.feature['image/format'].bytes_list.value, ['jpeg']) - self._assertProtoEqual( - example.features.feature['image/object/bbox/xmin'].float_list.value, - [0.25]) - self._assertProtoEqual( - example.features.feature['image/object/bbox/ymin'].float_list.value, - [0.25]) - self._assertProtoEqual( - example.features.feature['image/object/bbox/xmax'].float_list.value, - [0.75]) - self._assertProtoEqual( - example.features.feature['image/object/bbox/ymax'].float_list.value, - [0.75]) - self._assertProtoEqual( - example.features.feature['image/object/class/text'].bytes_list.value, - ['person']) - self._assertProtoEqual( - example.features.feature['image/object/class/label'].int64_list.value, - [1]) - self._assertProtoEqual( - example.features.feature['image/object/difficult'].int64_list.value, - [1]) - self._assertProtoEqual( - example.features.feature['image/object/truncated'].int64_list.value, - [0]) - self._assertProtoEqual( - example.features.feature['image/object/view'].bytes_list.value, ['']) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_pet_tf_record.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_pet_tf_record.py deleted file mode 100644 index 9b3b55c60009fb14d7384097d8c7fad02c5d345a..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/create_pet_tf_record.py +++ /dev/null @@ -1,318 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r"""Convert the Oxford pet dataset to TFRecord for object_detection. - -See: O. M. Parkhi, A. Vedaldi, A. Zisserman, C. V. Jawahar - Cats and Dogs - IEEE Conference on Computer Vision and Pattern Recognition, 2012 - http://www.robots.ox.ac.uk/~vgg/data/pets/ - -Example usage: - python object_detection/dataset_tools/create_pet_tf_record.py \ - --data_dir=/home/user/pet \ - --output_dir=/home/user/pet/output -""" - -import hashlib -import io -import logging -import os -import random -import re - -import contextlib2 -from lxml import etree -import numpy as np -import PIL.Image -import tensorflow as tf - -from object_detection.dataset_tools import tf_record_creation_util -from object_detection.utils import dataset_util -from object_detection.utils import label_map_util - -flags = tf.app.flags -flags.DEFINE_string('data_dir', '', 'Root directory to raw pet dataset.') -flags.DEFINE_string('output_dir', '', 'Path to directory to output TFRecords.') -flags.DEFINE_string('label_map_path', 'data/pet_label_map.pbtxt', - 'Path to label map proto') -flags.DEFINE_boolean('faces_only', True, 'If True, generates bounding boxes ' - 'for pet faces. Otherwise generates bounding boxes (as ' - 'well as segmentations for full pet bodies). Note that ' - 'in the latter case, the resulting files are much larger.') -flags.DEFINE_string('mask_type', 'png', 'How to represent instance ' - 'segmentation masks. Options are "png" or "numerical".') -flags.DEFINE_integer('num_shards', 10, 'Number of TFRecord shards') - -FLAGS = flags.FLAGS - - -def get_class_name_from_filename(file_name): - """Gets the class name from a file. - - Args: - file_name: The file name to get the class name from. - ie. "american_pit_bull_terrier_105.jpg" - - Returns: - A string of the class name. - """ - match = re.match(r'([A-Za-z_]+)(_[0-9]+\.jpg)', file_name, re.I) - return match.groups()[0] - - -def dict_to_tf_example(data, - mask_path, - label_map_dict, - image_subdirectory, - ignore_difficult_instances=False, - faces_only=True, - mask_type='png'): - """Convert XML derived dict to tf.Example proto. - - Notice that this function normalizes the bounding box coordinates provided - by the raw data. - - Args: - data: dict holding PASCAL XML fields for a single image (obtained by - running dataset_util.recursive_parse_xml_to_dict) - mask_path: String path to PNG encoded mask. - label_map_dict: A map from string label names to integers ids. - image_subdirectory: String specifying subdirectory within the - Pascal dataset directory holding the actual image data. - ignore_difficult_instances: Whether to skip difficult instances in the - dataset (default: False). - faces_only: If True, generates bounding boxes for pet faces. Otherwise - generates bounding boxes (as well as segmentations for full pet bodies). - mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to - smaller file sizes. - - Returns: - example: The converted tf.Example. - - Raises: - ValueError: if the image pointed to by data['filename'] is not a valid JPEG - """ - img_path = os.path.join(image_subdirectory, data['filename']) - with tf.gfile.GFile(img_path, 'rb') as fid: - encoded_jpg = fid.read() - encoded_jpg_io = io.BytesIO(encoded_jpg) - image = PIL.Image.open(encoded_jpg_io) - if image.format != 'JPEG': - raise ValueError('Image format not JPEG') - key = hashlib.sha256(encoded_jpg).hexdigest() - - with tf.gfile.GFile(mask_path, 'rb') as fid: - encoded_mask_png = fid.read() - encoded_png_io = io.BytesIO(encoded_mask_png) - mask = PIL.Image.open(encoded_png_io) - if mask.format != 'PNG': - raise ValueError('Mask format not PNG') - - mask_np = np.asarray(mask) - nonbackground_indices_x = np.any(mask_np != 2, axis=0) - nonbackground_indices_y = np.any(mask_np != 2, axis=1) - nonzero_x_indices = np.where(nonbackground_indices_x) - nonzero_y_indices = np.where(nonbackground_indices_y) - - width = int(data['size']['width']) - height = int(data['size']['height']) - - xmins = [] - ymins = [] - xmaxs = [] - ymaxs = [] - classes = [] - classes_text = [] - truncated = [] - poses = [] - difficult_obj = [] - masks = [] - if 'object' in data: - for obj in data['object']: - difficult = bool(int(obj['difficult'])) - if ignore_difficult_instances and difficult: - continue - difficult_obj.append(int(difficult)) - - if faces_only: - xmin = float(obj['bndbox']['xmin']) - xmax = float(obj['bndbox']['xmax']) - ymin = float(obj['bndbox']['ymin']) - ymax = float(obj['bndbox']['ymax']) - else: - xmin = float(np.min(nonzero_x_indices)) - xmax = float(np.max(nonzero_x_indices)) - ymin = float(np.min(nonzero_y_indices)) - ymax = float(np.max(nonzero_y_indices)) - - xmins.append(xmin / width) - ymins.append(ymin / height) - xmaxs.append(xmax / width) - ymaxs.append(ymax / height) - class_name = get_class_name_from_filename(data['filename']) - classes_text.append(class_name.encode('utf8')) - classes.append(label_map_dict[class_name]) - truncated.append(int(obj['truncated'])) - poses.append(obj['pose'].encode('utf8')) - if not faces_only: - mask_remapped = (mask_np != 2).astype(np.uint8) - masks.append(mask_remapped) - - feature_dict = { - 'image/height': dataset_util.int64_feature(height), - 'image/width': dataset_util.int64_feature(width), - 'image/filename': dataset_util.bytes_feature( - data['filename'].encode('utf8')), - 'image/source_id': dataset_util.bytes_feature( - data['filename'].encode('utf8')), - 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), - 'image/encoded': dataset_util.bytes_feature(encoded_jpg), - 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), - 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), - 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), - 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), - 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), - 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), - 'image/object/class/label': dataset_util.int64_list_feature(classes), - 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), - 'image/object/truncated': dataset_util.int64_list_feature(truncated), - 'image/object/view': dataset_util.bytes_list_feature(poses), - } - if not faces_only: - if mask_type == 'numerical': - mask_stack = np.stack(masks).astype(np.float32) - masks_flattened = np.reshape(mask_stack, [-1]) - feature_dict['image/object/mask'] = ( - dataset_util.float_list_feature(masks_flattened.tolist())) - elif mask_type == 'png': - encoded_mask_png_list = [] - for mask in masks: - img = PIL.Image.fromarray(mask) - output = io.BytesIO() - img.save(output, format='PNG') - encoded_mask_png_list.append(output.getvalue()) - feature_dict['image/object/mask'] = ( - dataset_util.bytes_list_feature(encoded_mask_png_list)) - - example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) - return example - - -def create_tf_record(output_filename, - num_shards, - label_map_dict, - annotations_dir, - image_dir, - examples, - faces_only=True, - mask_type='png'): - """Creates a TFRecord file from examples. - - Args: - output_filename: Path to where output file is saved. - num_shards: Number of shards for output file. - label_map_dict: The label map dictionary. - annotations_dir: Directory where annotation files are stored. - image_dir: Directory where image files are stored. - examples: Examples to parse and save to tf record. - faces_only: If True, generates bounding boxes for pet faces. Otherwise - generates bounding boxes (as well as segmentations for full pet bodies). - mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to - smaller file sizes. - """ - with contextlib2.ExitStack() as tf_record_close_stack: - output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords( - tf_record_close_stack, output_filename, num_shards) - for idx, example in enumerate(examples): - if idx % 100 == 0: - logging.info('On image %d of %d', idx, len(examples)) - xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml') - mask_path = os.path.join(annotations_dir, 'trimaps', example + '.png') - - if not os.path.exists(xml_path): - logging.warning('Could not find %s, ignoring example.', xml_path) - continue - with tf.gfile.GFile(xml_path, 'r') as fid: - xml_str = fid.read() - xml = etree.fromstring(xml_str) - data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] - - try: - tf_example = dict_to_tf_example( - data, - mask_path, - label_map_dict, - image_dir, - faces_only=faces_only, - mask_type=mask_type) - if tf_example: - shard_idx = idx % num_shards - output_tfrecords[shard_idx].write(tf_example.SerializeToString()) - except ValueError: - logging.warning('Invalid example: %s, ignoring.', xml_path) - - -# TODO(derekjchow): Add test for pet/PASCAL main files. -def main(_): - data_dir = FLAGS.data_dir - label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) - - logging.info('Reading from Pet dataset.') - image_dir = os.path.join(data_dir, 'images') - annotations_dir = os.path.join(data_dir, 'annotations') - examples_path = os.path.join(annotations_dir, 'trainval.txt') - examples_list = dataset_util.read_examples_list(examples_path) - - # Test images are not included in the downloaded data set, so we shall perform - # our own split. - random.seed(42) - random.shuffle(examples_list) - num_examples = len(examples_list) - num_train = int(0.7 * num_examples) - train_examples = examples_list[:num_train] - val_examples = examples_list[num_train:] - logging.info('%d training and %d validation examples.', - len(train_examples), len(val_examples)) - - train_output_path = os.path.join(FLAGS.output_dir, 'pet_faces_train.record') - val_output_path = os.path.join(FLAGS.output_dir, 'pet_faces_val.record') - if not FLAGS.faces_only: - train_output_path = os.path.join(FLAGS.output_dir, - 'pets_fullbody_with_masks_train.record') - val_output_path = os.path.join(FLAGS.output_dir, - 'pets_fullbody_with_masks_val.record') - create_tf_record( - train_output_path, - FLAGS.num_shards, - label_map_dict, - annotations_dir, - image_dir, - train_examples, - faces_only=FLAGS.faces_only, - mask_type=FLAGS.mask_type) - create_tf_record( - val_output_path, - FLAGS.num_shards, - label_map_dict, - annotations_dir, - image_dir, - val_examples, - faces_only=FLAGS.faces_only, - mask_type=FLAGS.mask_type) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/download_and_preprocess_mscoco.sh b/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/download_and_preprocess_mscoco.sh deleted file mode 100644 index 843ba86938d35eed18dd6f7968ea87c90551fc13..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/download_and_preprocess_mscoco.sh +++ /dev/null @@ -1,106 +0,0 @@ -#!/bin/bash -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -# Script to download and preprocess the MSCOCO data set for detection. -# -# The outputs of this script are TFRecord files containing serialized -# tf.Example protocol buffers. See create_coco_tf_record.py for details of how -# the tf.Example protocol buffers are constructed and see -# http://cocodataset.org/#overview for an overview of the dataset. -# -# usage: -# bash object_detection/dataset_tools/download_and_preprocess_mscoco.sh \ -# /tmp/mscoco -set -e - -if [ -z "$1" ]; then - echo "usage download_and_preprocess_mscoco.sh [data dir]" - exit -fi - -if [ "$(uname)" == "Darwin" ]; then - UNZIP="tar -xf" -else - UNZIP="unzip -nq" -fi - -# Create the output directories. -OUTPUT_DIR="${1%/}" -SCRATCH_DIR="${OUTPUT_DIR}/raw-data" -mkdir -p "${OUTPUT_DIR}" -mkdir -p "${SCRATCH_DIR}" -CURRENT_DIR=$(pwd) - -# Helper function to download and unpack a .zip file. -function download_and_unzip() { - local BASE_URL=${1} - local FILENAME=${2} - - if [ ! -f ${FILENAME} ]; then - echo "Downloading ${FILENAME} to $(pwd)" - wget -nd -c "${BASE_URL}/${FILENAME}" - else - echo "Skipping download of ${FILENAME}" - fi - echo "Unzipping ${FILENAME}" - ${UNZIP} ${FILENAME} -} - -cd ${SCRATCH_DIR} - -# Download the images. -BASE_IMAGE_URL="http://images.cocodataset.org/zips" - -TRAIN_IMAGE_FILE="train2017.zip" -download_and_unzip ${BASE_IMAGE_URL} ${TRAIN_IMAGE_FILE} -TRAIN_IMAGE_DIR="${SCRATCH_DIR}/train2017" - -VAL_IMAGE_FILE="val2017.zip" -download_and_unzip ${BASE_IMAGE_URL} ${VAL_IMAGE_FILE} -VAL_IMAGE_DIR="${SCRATCH_DIR}/val2017" - -TEST_IMAGE_FILE="test2017.zip" -download_and_unzip ${BASE_IMAGE_URL} ${TEST_IMAGE_FILE} -TEST_IMAGE_DIR="${SCRATCH_DIR}/test2017" - -# Download the annotations. -BASE_INSTANCES_URL="http://images.cocodataset.org/annotations" -INSTANCES_FILE="annotations_trainval2017.zip" -download_and_unzip ${BASE_INSTANCES_URL} ${INSTANCES_FILE} - -TRAIN_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/instances_train2017.json" -VAL_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/instances_val2017.json" - -# Download the test image info. -BASE_IMAGE_INFO_URL="http://images.cocodataset.org/annotations" -IMAGE_INFO_FILE="image_info_test2017.zip" -download_and_unzip ${BASE_IMAGE_INFO_URL} ${IMAGE_INFO_FILE} - -TESTDEV_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/image_info_test-dev2017.json" - -# Build TFRecords of the image data. -cd "${CURRENT_DIR}" -python object_detection/dataset_tools/create_coco_tf_record.py \ - --logtostderr \ - --include_masks \ - --train_image_dir="${TRAIN_IMAGE_DIR}" \ - --val_image_dir="${VAL_IMAGE_DIR}" \ - --test_image_dir="${TEST_IMAGE_DIR}" \ - --train_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \ - --val_annotations_file="${VAL_ANNOTATIONS_FILE}" \ - --testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \ - --output_dir="${OUTPUT_DIR}" - diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/oid_hierarchical_labels_expansion.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/oid_hierarchical_labels_expansion.py deleted file mode 100644 index 6c00ac429102841ccff77de78e5bf06a0d3d6a5a..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/oid_hierarchical_labels_expansion.py +++ /dev/null @@ -1,172 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""A class and executable to expand hierarchically image-level labels and boxes. - -Example usage: - ./hierarchical_labels_expansion - [optional]labels_file -""" - -import json -import sys - - -def _update_dict(initial_dict, update): - """Updates dictionary with update content. - - Args: - initial_dict: initial dictionary. - update: updated dictionary. - """ - - for key, value_list in update.iteritems(): - if key in initial_dict: - initial_dict[key].extend(value_list) - else: - initial_dict[key] = value_list - - -def _build_plain_hierarchy(hierarchy, skip_root=False): - """Expands tree hierarchy representation to parent-child dictionary. - - Args: - hierarchy: labels hierarchy as JSON file. - skip_root: if true skips root from the processing (done for the case when all - classes under hierarchy are collected under virtual node). - - Returns: - keyed_parent - dictionary of parent - all its children nodes. - keyed_child - dictionary of children - all its parent nodes - children - all children of the current node. - """ - all_children = [] - all_keyed_parent = {} - all_keyed_child = {} - if 'Subcategory' in hierarchy: - for node in hierarchy['Subcategory']: - keyed_parent, keyed_child, children = _build_plain_hierarchy(node) - # Update is not done through dict.update() since some children have multi- - # ple parents in the hiearchy. - _update_dict(all_keyed_parent, keyed_parent) - _update_dict(all_keyed_child, keyed_child) - all_children.extend(children) - - if not skip_root: - all_keyed_parent[hierarchy['LabelName']] = all_children - all_children = [hierarchy['LabelName']] + all_children - for child, _ in all_keyed_child.iteritems(): - all_keyed_child[child].append(hierarchy['LabelName']) - all_keyed_child[hierarchy['LabelName']] = [] - - return all_keyed_parent, all_keyed_child, all_children - - -class OIDHierarchicalLabelsExpansion(object): - """ Main class to perform labels hierachical expansion.""" - - def __init__(self, hierarchy): - """Constructor. - - Args: - hierarchy: labels hierarchy as JSON file. - """ - - self._hierarchy_keyed_parent, self._hierarchy_keyed_child, _ = ( - _build_plain_hierarchy(hierarchy, skip_root=True)) - - def expand_boxes_from_csv(self, csv_row): - """Expands a row containing bounding boxes from CSV file. - - Args: - csv_row: a single row of Open Images released groundtruth file. - - Returns: - a list of strings (including the initial row) corresponding to the ground - truth expanded to multiple annotation for evaluation with Open Images - Challenge 2018 metric. - """ - # Row header is expected to be exactly: - # ImageID,Source,LabelName,Confidence,XMin,XMax,YMin,YMax,IsOccluded, - # IsTruncated,IsGroupOf,IsDepiction,IsInside - cvs_row_splited = csv_row.split(',') - assert len(cvs_row_splited) == 13 - result = [csv_row] - assert cvs_row_splited[2] in self._hierarchy_keyed_child - parent_nodes = self._hierarchy_keyed_child[cvs_row_splited[2]] - for parent_node in parent_nodes: - cvs_row_splited[2] = parent_node - result.append(','.join(cvs_row_splited)) - return result - - def expand_labels_from_csv(self, csv_row): - """Expands a row containing bounding boxes from CSV file. - - Args: - csv_row: a single row of Open Images released groundtruth file. - - Returns: - a list of strings (including the initial row) corresponding to the ground - truth expanded to multiple annotation for evaluation with Open Images - Challenge 2018 metric. - """ - # Row header is expected to be exactly: - # ImageID,Source,LabelName,Confidence - cvs_row_splited = csv_row.split(',') - assert len(cvs_row_splited) == 4 - result = [csv_row] - if int(cvs_row_splited[3]) == 1: - assert cvs_row_splited[2] in self._hierarchy_keyed_child - parent_nodes = self._hierarchy_keyed_child[cvs_row_splited[2]] - for parent_node in parent_nodes: - cvs_row_splited[2] = parent_node - result.append(','.join(cvs_row_splited)) - else: - assert cvs_row_splited[2] in self._hierarchy_keyed_parent - child_nodes = self._hierarchy_keyed_parent[cvs_row_splited[2]] - for child_node in child_nodes: - cvs_row_splited[2] = child_node - result.append(','.join(cvs_row_splited)) - return result - - -def main(argv): - - if len(argv) < 4: - print """Missing arguments. \n - Usage: ./hierarchical_labels_expansion - [optional]labels_file""" - return - with open(argv[1]) as f: - hierarchy = json.load(f) - expansion_generator = OIDHierarchicalLabelsExpansion(hierarchy) - labels_file = False - if len(argv) > 4 and argv[4] == 'labels_file': - labels_file = True - with open(argv[2], 'r') as source: - with open(argv[3], 'w') as target: - header_skipped = False - for line in source: - if not header_skipped: - header_skipped = True - continue - if labels_file: - expanded_lines = expansion_generator.expand_labels_from_csv(line) - else: - expanded_lines = expansion_generator.expand_boxes_from_csv(line) - target.writelines(expanded_lines) - - -if __name__ == '__main__': - main(sys.argv) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/oid_hierarchical_labels_expansion_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/oid_hierarchical_labels_expansion_test.py deleted file mode 100644 index cd62b9cff28b052dba542a354eb7c87a8f332f8b..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/oid_hierarchical_labels_expansion_test.py +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for the OpenImages label expansion (OIDHierarchicalLabelsExpansion).""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from object_detection.dataset_tools import oid_hierarchical_labels_expansion - - -def create_test_data(): - hierarchy = { - 'LabelName': - 'a', - 'Subcategory': [{ - 'LabelName': 'b' - }, { - 'LabelName': 'c', - 'Subcategory': [{ - 'LabelName': 'd' - }, { - 'LabelName': 'e' - }] - }, { - 'LabelName': 'f', - 'Subcategory': [{ - 'LabelName': 'd' - },] - }] - } - bbox_rows = [ - '123,xclick,b,1,0.1,0.2,0.1,0.2,1,1,0,0,0', - '123,xclick,d,1,0.2,0.3,0.1,0.2,1,1,0,0,0' - ] - label_rows = [ - '123,verification,b,0', '123,verification,c,0', '124,verification,d,1' - ] - return hierarchy, bbox_rows, label_rows - - -class HierarchicalLabelsExpansionTest(tf.test.TestCase): - - def test_bbox_expansion(self): - hierarchy, bbox_rows, _ = create_test_data() - expansion_generator = ( - oid_hierarchical_labels_expansion.OIDHierarchicalLabelsExpansion( - hierarchy)) - all_result_rows = [] - for row in bbox_rows: - all_result_rows.extend(expansion_generator.expand_boxes_from_csv(row)) - self.assertItemsEqual([ - '123,xclick,b,1,0.1,0.2,0.1,0.2,1,1,0,0,0', - '123,xclick,d,1,0.2,0.3,0.1,0.2,1,1,0,0,0', - '123,xclick,f,1,0.2,0.3,0.1,0.2,1,1,0,0,0', - '123,xclick,c,1,0.2,0.3,0.1,0.2,1,1,0,0,0' - ], all_result_rows) - - def test_labels_expansion(self): - hierarchy, _, label_rows = create_test_data() - expansion_generator = ( - oid_hierarchical_labels_expansion.OIDHierarchicalLabelsExpansion( - hierarchy)) - all_result_rows = [] - for row in label_rows: - all_result_rows.extend(expansion_generator.expand_labels_from_csv(row)) - self.assertItemsEqual([ - '123,verification,b,0', '123,verification,c,0', '123,verification,d,0', - '123,verification,e,0', '124,verification,d,1', '124,verification,f,1', - '124,verification,c,1' - ], all_result_rows) - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/oid_tfrecord_creation.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/oid_tfrecord_creation.py deleted file mode 100644 index 706280985d98bf4caaef8f9a2e30735913f33420..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/oid_tfrecord_creation.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Utilities for creating TFRecords of TF examples for the Open Images dataset. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from object_detection.core import standard_fields -from object_detection.utils import dataset_util - - -def tf_example_from_annotations_data_frame(annotations_data_frame, label_map, - encoded_image): - """Populates a TF Example message with image annotations from a data frame. - - Args: - annotations_data_frame: Data frame containing the annotations for a single - image. - label_map: String to integer label map. - encoded_image: The encoded image string - - Returns: - The populated TF Example, if the label of at least one object is present in - label_map. Otherwise, returns None. - """ - - filtered_data_frame = annotations_data_frame[ - annotations_data_frame.LabelName.isin(label_map)] - filtered_data_frame_boxes = filtered_data_frame[ - ~filtered_data_frame.YMin.isnull()] - filtered_data_frame_labels = filtered_data_frame[ - filtered_data_frame.YMin.isnull()] - image_id = annotations_data_frame.ImageID.iloc[0] - - feature_map = { - standard_fields.TfExampleFields.object_bbox_ymin: - dataset_util.float_list_feature( - filtered_data_frame_boxes.YMin.as_matrix()), - standard_fields.TfExampleFields.object_bbox_xmin: - dataset_util.float_list_feature( - filtered_data_frame_boxes.XMin.as_matrix()), - standard_fields.TfExampleFields.object_bbox_ymax: - dataset_util.float_list_feature( - filtered_data_frame_boxes.YMax.as_matrix()), - standard_fields.TfExampleFields.object_bbox_xmax: - dataset_util.float_list_feature( - filtered_data_frame_boxes.XMax.as_matrix()), - standard_fields.TfExampleFields.object_class_text: - dataset_util.bytes_list_feature( - filtered_data_frame_boxes.LabelName.as_matrix()), - standard_fields.TfExampleFields.object_class_label: - dataset_util.int64_list_feature( - filtered_data_frame_boxes.LabelName.map(lambda x: label_map[x]) - .as_matrix()), - standard_fields.TfExampleFields.filename: - dataset_util.bytes_feature('{}.jpg'.format(image_id)), - standard_fields.TfExampleFields.source_id: - dataset_util.bytes_feature(image_id), - standard_fields.TfExampleFields.image_encoded: - dataset_util.bytes_feature(encoded_image), - } - - if 'IsGroupOf' in filtered_data_frame.columns: - feature_map[standard_fields.TfExampleFields. - object_group_of] = dataset_util.int64_list_feature( - filtered_data_frame_boxes.IsGroupOf.as_matrix().astype(int)) - if 'IsOccluded' in filtered_data_frame.columns: - feature_map[standard_fields.TfExampleFields. - object_occluded] = dataset_util.int64_list_feature( - filtered_data_frame_boxes.IsOccluded.as_matrix().astype( - int)) - if 'IsTruncated' in filtered_data_frame.columns: - feature_map[standard_fields.TfExampleFields. - object_truncated] = dataset_util.int64_list_feature( - filtered_data_frame_boxes.IsTruncated.as_matrix().astype( - int)) - if 'IsDepiction' in filtered_data_frame.columns: - feature_map[standard_fields.TfExampleFields. - object_depiction] = dataset_util.int64_list_feature( - filtered_data_frame_boxes.IsDepiction.as_matrix().astype( - int)) - - if 'ConfidenceImageLabel' in filtered_data_frame_labels.columns: - feature_map[standard_fields.TfExampleFields. - image_class_label] = dataset_util.int64_list_feature( - filtered_data_frame_labels.LabelName.map( - lambda x: label_map[x]).as_matrix()) - feature_map[standard_fields.TfExampleFields. - image_class_text] = dataset_util.bytes_list_feature( - filtered_data_frame_labels.LabelName.as_matrix()), - return tf.train.Example(features=tf.train.Features(feature=feature_map)) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/oid_tfrecord_creation_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/oid_tfrecord_creation_test.py deleted file mode 100644 index 44ef852165ca115ba109e665f27352935386cb69..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/oid_tfrecord_creation_test.py +++ /dev/null @@ -1,200 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for oid_tfrecord_creation.py.""" - -import pandas as pd -import tensorflow as tf - -from object_detection.dataset_tools import oid_tfrecord_creation - - -def create_test_data(): - data = { - 'ImageID': ['i1', 'i1', 'i1', 'i1', 'i1', 'i2', 'i2'], - 'LabelName': ['a', 'a', 'b', 'b', 'c', 'b', 'c'], - 'YMin': [0.3, 0.6, 0.8, 0.1, None, 0.0, 0.0], - 'XMin': [0.1, 0.3, 0.7, 0.0, None, 0.1, 0.1], - 'XMax': [0.2, 0.3, 0.8, 0.5, None, 0.9, 0.9], - 'YMax': [0.3, 0.6, 1, 0.8, None, 0.8, 0.8], - 'IsOccluded': [0, 1, 1, 0, None, 0, 0], - 'IsTruncated': [0, 0, 0, 1, None, 0, 0], - 'IsGroupOf': [0, 0, 0, 0, None, 0, 1], - 'IsDepiction': [1, 0, 0, 0, None, 0, 0], - 'ConfidenceImageLabel': [None, None, None, None, 0, None, None], - } - df = pd.DataFrame(data=data) - label_map = {'a': 0, 'b': 1, 'c': 2} - return label_map, df - - -class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase): - - def test_simple(self): - label_map, df = create_test_data() - - tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame( - df[df.ImageID == 'i1'], label_map, 'encoded_image_test') - self.assertProtoEquals( - """ - features { - feature { - key: "image/encoded" - value { bytes_list { value: "encoded_image_test" } } } - feature { - key: "image/filename" - value { bytes_list { value: "i1.jpg" } } } - feature { - key: "image/object/bbox/ymin" - value { float_list { value: [0.3, 0.6, 0.8, 0.1] } } } - feature { - key: "image/object/bbox/xmin" - value { float_list { value: [0.1, 0.3, 0.7, 0.0] } } } - feature { - key: "image/object/bbox/ymax" - value { float_list { value: [0.3, 0.6, 1.0, 0.8] } } } - feature { - key: "image/object/bbox/xmax" - value { float_list { value: [0.2, 0.3, 0.8, 0.5] } } } - feature { - key: "image/object/class/label" - value { int64_list { value: [0, 0, 1, 1] } } } - feature { - key: "image/object/class/text" - value { bytes_list { value: ["a", "a", "b", "b"] } } } - feature { - key: "image/source_id" - value { bytes_list { value: "i1" } } } - feature { - key: "image/object/depiction" - value { int64_list { value: [1, 0, 0, 0] } } } - feature { - key: "image/object/group_of" - value { int64_list { value: [0, 0, 0, 0] } } } - feature { - key: "image/object/occluded" - value { int64_list { value: [0, 1, 1, 0] } } } - feature { - key: "image/object/truncated" - value { int64_list { value: [0, 0, 0, 1] } } } - feature { - key: "image/class/label" - value { int64_list { value: [2] } } } - feature { - key: "image/class/text" - value { bytes_list { value: ["c"] } } } } - """, tf_example) - - def test_no_attributes(self): - label_map, df = create_test_data() - - del df['IsDepiction'] - del df['IsGroupOf'] - del df['IsOccluded'] - del df['IsTruncated'] - del df['ConfidenceImageLabel'] - - tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame( - df[df.ImageID == 'i2'], label_map, 'encoded_image_test') - self.assertProtoEquals(""" - features { - feature { - key: "image/encoded" - value { bytes_list { value: "encoded_image_test" } } } - feature { - key: "image/filename" - value { bytes_list { value: "i2.jpg" } } } - feature { - key: "image/object/bbox/ymin" - value { float_list { value: [0.0, 0.0] } } } - feature { - key: "image/object/bbox/xmin" - value { float_list { value: [0.1, 0.1] } } } - feature { - key: "image/object/bbox/ymax" - value { float_list { value: [0.8, 0.8] } } } - feature { - key: "image/object/bbox/xmax" - value { float_list { value: [0.9, 0.9] } } } - feature { - key: "image/object/class/label" - value { int64_list { value: [1, 2] } } } - feature { - key: "image/object/class/text" - value { bytes_list { value: ["b", "c"] } } } - feature { - key: "image/source_id" - value { bytes_list { value: "i2" } } } } - """, tf_example) - - def test_label_filtering(self): - label_map, df = create_test_data() - - label_map = {'a': 0} - - tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame( - df[df.ImageID == 'i1'], label_map, 'encoded_image_test') - self.assertProtoEquals( - """ - features { - feature { - key: "image/encoded" - value { bytes_list { value: "encoded_image_test" } } } - feature { - key: "image/filename" - value { bytes_list { value: "i1.jpg" } } } - feature { - key: "image/object/bbox/ymin" - value { float_list { value: [0.3, 0.6] } } } - feature { - key: "image/object/bbox/xmin" - value { float_list { value: [0.1, 0.3] } } } - feature { - key: "image/object/bbox/ymax" - value { float_list { value: [0.3, 0.6] } } } - feature { - key: "image/object/bbox/xmax" - value { float_list { value: [0.2, 0.3] } } } - feature { - key: "image/object/class/label" - value { int64_list { value: [0, 0] } } } - feature { - key: "image/object/class/text" - value { bytes_list { value: ["a", "a"] } } } - feature { - key: "image/source_id" - value { bytes_list { value: "i1" } } } - feature { - key: "image/object/depiction" - value { int64_list { value: [1, 0] } } } - feature { - key: "image/object/group_of" - value { int64_list { value: [0, 0] } } } - feature { - key: "image/object/occluded" - value { int64_list { value: [0, 1] } } } - feature { - key: "image/object/truncated" - value { int64_list { value: [0, 0] } } } - feature { - key: "image/class/label" - value { int64_list { } } } - feature { - key: "image/class/text" - value { bytes_list { } } } } - """, tf_example) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/tf_record_creation_util.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/tf_record_creation_util.py deleted file mode 100644 index e8da2291d620b2218cbf724dedb6464e8050f1f7..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/tf_record_creation_util.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Utilities for creating TFRecords of TF examples for the Open Images dataset. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - - -def open_sharded_output_tfrecords(exit_stack, base_path, num_shards): - """Opens all TFRecord shards for writing and adds them to an exit stack. - - Args: - exit_stack: A context2.ExitStack used to automatically closed the TFRecords - opened in this function. - base_path: The base path for all shards - num_shards: The number of shards - - Returns: - The list of opened TFRecords. Position k in the list corresponds to shard k. - """ - tf_record_output_filenames = [ - '{}-{:05d}-of-{:05d}'.format(base_path, idx, num_shards) - for idx in range(num_shards) - ] - - tfrecords = [ - exit_stack.enter_context(tf.python_io.TFRecordWriter(file_name)) - for file_name in tf_record_output_filenames - ] - - return tfrecords diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/tf_record_creation_util_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/tf_record_creation_util_test.py deleted file mode 100644 index f1231f8bb7b86ec6a066a6ec373e3e6e8af63386..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/dataset_tools/tf_record_creation_util_test.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for tf_record_creation_util.py.""" - -import os -import contextlib2 -import tensorflow as tf - -from object_detection.dataset_tools import tf_record_creation_util - - -class OpenOutputTfrecordsTests(tf.test.TestCase): - - def test_sharded_tfrecord_writes(self): - with contextlib2.ExitStack() as tf_record_close_stack: - output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords( - tf_record_close_stack, - os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), 10) - for idx in range(10): - output_tfrecords[idx].write('test_{}'.format(idx)) - - for idx in range(10): - tf_record_path = '{}-{:05d}-of-00010'.format( - os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), idx) - records = list(tf.python_io.tf_record_iterator(tf_record_path)) - self.assertAllEqual(records, ['test_{}'.format(idx)]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/eval.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/eval.py deleted file mode 100644 index f546442e27d19b41a121c68022549367ba36bc90..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/eval.py +++ /dev/null @@ -1,147 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r"""Evaluation executable for detection models. - -This executable is used to evaluate DetectionModels. There are two ways of -configuring the eval job. - -1) A single pipeline_pb2.TrainEvalPipelineConfig file maybe specified instead. -In this mode, the --eval_training_data flag may be given to force the pipeline -to evaluate on training data instead. - -Example usage: - ./eval \ - --logtostderr \ - --checkpoint_dir=path/to/checkpoint_dir \ - --eval_dir=path/to/eval_dir \ - --pipeline_config_path=pipeline_config.pbtxt - -2) Three configuration files may be provided: a model_pb2.DetectionModel -configuration file to define what type of DetectionModel is being evaluated, an -input_reader_pb2.InputReader file to specify what data the model is evaluating -and an eval_pb2.EvalConfig file to configure evaluation parameters. - -Example usage: - ./eval \ - --logtostderr \ - --checkpoint_dir=path/to/checkpoint_dir \ - --eval_dir=path/to/eval_dir \ - --eval_config_path=eval_config.pbtxt \ - --model_config_path=model_config.pbtxt \ - --input_config_path=eval_input_config.pbtxt -""" -import functools -import os -import tensorflow as tf - -from object_detection import evaluator -from object_detection.builders import dataset_builder -from object_detection.builders import graph_rewriter_builder -from object_detection.builders import model_builder -from object_detection.utils import config_util -from object_detection.utils import dataset_util -from object_detection.utils import label_map_util - - -tf.logging.set_verbosity(tf.logging.INFO) - -flags = tf.app.flags -flags.DEFINE_boolean('eval_training_data', False, - 'If training data should be evaluated for this job.') -flags.DEFINE_string('checkpoint_dir', '', - 'Directory containing checkpoints to evaluate, typically ' - 'set to `train_dir` used in the training job.') -flags.DEFINE_string('eval_dir', '', - 'Directory to write eval summaries to.') -flags.DEFINE_string('pipeline_config_path', '', - 'Path to a pipeline_pb2.TrainEvalPipelineConfig config ' - 'file. If provided, other configs are ignored') -flags.DEFINE_string('eval_config_path', '', - 'Path to an eval_pb2.EvalConfig config file.') -flags.DEFINE_string('input_config_path', '', - 'Path to an input_reader_pb2.InputReader config file.') -flags.DEFINE_string('model_config_path', '', - 'Path to a model_pb2.DetectionModel config file.') -flags.DEFINE_boolean('run_once', False, 'Option to only run a single pass of ' - 'evaluation. Overrides the `max_evals` parameter in the ' - 'provided config.') -FLAGS = flags.FLAGS - - -def main(unused_argv): - assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.' - assert FLAGS.eval_dir, '`eval_dir` is missing.' - tf.gfile.MakeDirs(FLAGS.eval_dir) - if FLAGS.pipeline_config_path: - configs = config_util.get_configs_from_pipeline_file( - FLAGS.pipeline_config_path) - tf.gfile.Copy(FLAGS.pipeline_config_path, - os.path.join(FLAGS.eval_dir, 'pipeline.config'), - overwrite=True) - else: - configs = config_util.get_configs_from_multiple_files( - model_config_path=FLAGS.model_config_path, - eval_config_path=FLAGS.eval_config_path, - eval_input_config_path=FLAGS.input_config_path) - for name, config in [('model.config', FLAGS.model_config_path), - ('eval.config', FLAGS.eval_config_path), - ('input.config', FLAGS.input_config_path)]: - tf.gfile.Copy(config, - os.path.join(FLAGS.eval_dir, name), - overwrite=True) - - model_config = configs['model'] - eval_config = configs['eval_config'] - input_config = configs['eval_input_config'] - if FLAGS.eval_training_data: - input_config = configs['train_input_config'] - - model_fn = functools.partial( - model_builder.build, - model_config=model_config, - is_training=False) - - def get_next(config): - return dataset_util.make_initializable_iterator( - dataset_builder.build(config)).get_next() - - create_input_dict_fn = functools.partial(get_next, input_config) - - label_map = label_map_util.load_labelmap(input_config.label_map_path) - max_num_classes = max([item.id for item in label_map.item]) - categories = label_map_util.convert_label_map_to_categories( - label_map, max_num_classes) - - if FLAGS.run_once: - eval_config.max_evals = 1 - - graph_rewriter_fn = None - if 'graph_rewriter_config' in configs: - graph_rewriter_fn = graph_rewriter_builder.build( - configs['graph_rewriter_config'], is_training=False) - - evaluator.evaluate( - create_input_dict_fn, - model_fn, - eval_config, - categories, - FLAGS.checkpoint_dir, - FLAGS.eval_dir, - graph_hook_fn=graph_rewriter_fn) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/eval_util.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/eval_util.py deleted file mode 100644 index 67b62a4421a963dbd4c09266315252d172594ba9..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/eval_util.py +++ /dev/null @@ -1,645 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Common utility functions for evaluation.""" -import collections -import logging -import os -import time - -import numpy as np -import tensorflow as tf - -from object_detection.core import box_list -from object_detection.core import box_list_ops -from object_detection.core import keypoint_ops -from object_detection.core import standard_fields as fields -from object_detection.metrics import coco_evaluation -from object_detection.utils import label_map_util -from object_detection.utils import ops -from object_detection.utils import visualization_utils as vis_utils - -slim = tf.contrib.slim - - -def write_metrics(metrics, global_step, summary_dir): - """Write metrics to a summary directory. - - Args: - metrics: A dictionary containing metric names and values. - global_step: Global step at which the metrics are computed. - summary_dir: Directory to write tensorflow summaries to. - """ - logging.info('Writing metrics to tf summary.') - summary_writer = tf.summary.FileWriterCache.get(summary_dir) - for key in sorted(metrics): - summary = tf.Summary(value=[ - tf.Summary.Value(tag=key, simple_value=metrics[key]), - ]) - summary_writer.add_summary(summary, global_step) - logging.info('%s: %f', key, metrics[key]) - logging.info('Metrics written to tf summary.') - - -# TODO(rathodv): Add tests. -def visualize_detection_results(result_dict, - tag, - global_step, - categories, - summary_dir='', - export_dir='', - agnostic_mode=False, - show_groundtruth=False, - groundtruth_box_visualization_color='black', - min_score_thresh=.5, - max_num_predictions=20, - skip_scores=False, - skip_labels=False, - keep_image_id_for_visualization_export=False): - """Visualizes detection results and writes visualizations to image summaries. - - This function visualizes an image with its detected bounding boxes and writes - to image summaries which can be viewed on tensorboard. It optionally also - writes images to a directory. In the case of missing entry in the label map, - unknown class name in the visualization is shown as "N/A". - - Args: - result_dict: a dictionary holding groundtruth and detection - data corresponding to each image being evaluated. The following keys - are required: - 'original_image': a numpy array representing the image with shape - [1, height, width, 3] or [1, height, width, 1] - 'detection_boxes': a numpy array of shape [N, 4] - 'detection_scores': a numpy array of shape [N] - 'detection_classes': a numpy array of shape [N] - The following keys are optional: - 'groundtruth_boxes': a numpy array of shape [N, 4] - 'groundtruth_keypoints': a numpy array of shape [N, num_keypoints, 2] - Detections are assumed to be provided in decreasing order of score and for - display, and we assume that scores are probabilities between 0 and 1. - tag: tensorboard tag (string) to associate with image. - global_step: global step at which the visualization are generated. - categories: a list of dictionaries representing all possible categories. - Each dict in this list has the following keys: - 'id': (required) an integer id uniquely identifying this category - 'name': (required) string representing category name - e.g., 'cat', 'dog', 'pizza' - 'supercategory': (optional) string representing the supercategory - e.g., 'animal', 'vehicle', 'food', etc - summary_dir: the output directory to which the image summaries are written. - export_dir: the output directory to which images are written. If this is - empty (default), then images are not exported. - agnostic_mode: boolean (default: False) controlling whether to evaluate in - class-agnostic mode or not. - show_groundtruth: boolean (default: False) controlling whether to show - groundtruth boxes in addition to detected boxes - groundtruth_box_visualization_color: box color for visualizing groundtruth - boxes - min_score_thresh: minimum score threshold for a box to be visualized - max_num_predictions: maximum number of detections to visualize - skip_scores: whether to skip score when drawing a single detection - skip_labels: whether to skip label when drawing a single detection - keep_image_id_for_visualization_export: whether to keep image identifier in - filename when exported to export_dir - Raises: - ValueError: if result_dict does not contain the expected keys (i.e., - 'original_image', 'detection_boxes', 'detection_scores', - 'detection_classes') - """ - detection_fields = fields.DetectionResultFields - input_fields = fields.InputDataFields - if not set([ - input_fields.original_image, - detection_fields.detection_boxes, - detection_fields.detection_scores, - detection_fields.detection_classes, - ]).issubset(set(result_dict.keys())): - raise ValueError('result_dict does not contain all expected keys.') - if show_groundtruth and input_fields.groundtruth_boxes not in result_dict: - raise ValueError('If show_groundtruth is enabled, result_dict must contain ' - 'groundtruth_boxes.') - logging.info('Creating detection visualizations.') - category_index = label_map_util.create_category_index(categories) - - image = np.squeeze(result_dict[input_fields.original_image], axis=0) - if image.shape[2] == 1: # If one channel image, repeat in RGB. - image = np.tile(image, [1, 1, 3]) - detection_boxes = result_dict[detection_fields.detection_boxes] - detection_scores = result_dict[detection_fields.detection_scores] - detection_classes = np.int32((result_dict[ - detection_fields.detection_classes])) - detection_keypoints = result_dict.get(detection_fields.detection_keypoints) - detection_masks = result_dict.get(detection_fields.detection_masks) - detection_boundaries = result_dict.get(detection_fields.detection_boundaries) - - # Plot groundtruth underneath detections - if show_groundtruth: - groundtruth_boxes = result_dict[input_fields.groundtruth_boxes] - groundtruth_keypoints = result_dict.get(input_fields.groundtruth_keypoints) - vis_utils.visualize_boxes_and_labels_on_image_array( - image=image, - boxes=groundtruth_boxes, - classes=None, - scores=None, - category_index=category_index, - keypoints=groundtruth_keypoints, - use_normalized_coordinates=False, - max_boxes_to_draw=None, - groundtruth_box_visualization_color=groundtruth_box_visualization_color) - vis_utils.visualize_boxes_and_labels_on_image_array( - image, - detection_boxes, - detection_classes, - detection_scores, - category_index, - instance_masks=detection_masks, - instance_boundaries=detection_boundaries, - keypoints=detection_keypoints, - use_normalized_coordinates=False, - max_boxes_to_draw=max_num_predictions, - min_score_thresh=min_score_thresh, - agnostic_mode=agnostic_mode, - skip_scores=skip_scores, - skip_labels=skip_labels) - - if export_dir: - if keep_image_id_for_visualization_export and result_dict[fields. - InputDataFields() - .key]: - export_path = os.path.join(export_dir, 'export-{}-{}.png'.format( - tag, result_dict[fields.InputDataFields().key])) - else: - export_path = os.path.join(export_dir, 'export-{}.png'.format(tag)) - vis_utils.save_image_array_as_png(image, export_path) - - summary = tf.Summary(value=[ - tf.Summary.Value( - tag=tag, - image=tf.Summary.Image( - encoded_image_string=vis_utils.encode_image_array_as_png_str( - image))) - ]) - summary_writer = tf.summary.FileWriterCache.get(summary_dir) - summary_writer.add_summary(summary, global_step) - - logging.info('Detection visualizations written to summary with tag %s.', tag) - - -def _run_checkpoint_once(tensor_dict, - evaluators=None, - batch_processor=None, - checkpoint_dirs=None, - variables_to_restore=None, - restore_fn=None, - num_batches=1, - master='', - save_graph=False, - save_graph_dir='', - losses_dict=None): - """Evaluates metrics defined in evaluators and returns summaries. - - This function loads the latest checkpoint in checkpoint_dirs and evaluates - all metrics defined in evaluators. The metrics are processed in batch by the - batch_processor. - - Args: - tensor_dict: a dictionary holding tensors representing a batch of detections - and corresponding groundtruth annotations. - evaluators: a list of object of type DetectionEvaluator to be used for - evaluation. Note that the metric names produced by different evaluators - must be unique. - batch_processor: a function taking four arguments: - 1. tensor_dict: the same tensor_dict that is passed in as the first - argument to this function. - 2. sess: a tensorflow session - 3. batch_index: an integer representing the index of the batch amongst - all batches - By default, batch_processor is None, which defaults to running: - return sess.run(tensor_dict) - To skip an image, it suffices to return an empty dictionary in place of - result_dict. - checkpoint_dirs: list of directories to load into an EnsembleModel. If it - has only one directory, EnsembleModel will not be used -- - a DetectionModel - will be instantiated directly. Not used if restore_fn is set. - variables_to_restore: None, or a dictionary mapping variable names found in - a checkpoint to model variables. The dictionary would normally be - generated by creating a tf.train.ExponentialMovingAverage object and - calling its variables_to_restore() method. Not used if restore_fn is set. - restore_fn: None, or a function that takes a tf.Session object and correctly - restores all necessary variables from the correct checkpoint file. If - None, attempts to restore from the first directory in checkpoint_dirs. - num_batches: the number of batches to use for evaluation. - master: the location of the Tensorflow session. - save_graph: whether or not the Tensorflow graph is stored as a pbtxt file. - save_graph_dir: where to store the Tensorflow graph on disk. If save_graph - is True this must be non-empty. - losses_dict: optional dictionary of scalar detection losses. - - Returns: - global_step: the count of global steps. - all_evaluator_metrics: A dictionary containing metric names and values. - - Raises: - ValueError: if restore_fn is None and checkpoint_dirs doesn't have at least - one element. - ValueError: if save_graph is True and save_graph_dir is not defined. - """ - if save_graph and not save_graph_dir: - raise ValueError('`save_graph_dir` must be defined.') - sess = tf.Session(master, graph=tf.get_default_graph()) - sess.run(tf.global_variables_initializer()) - sess.run(tf.local_variables_initializer()) - sess.run(tf.tables_initializer()) - if restore_fn: - restore_fn(sess) - else: - if not checkpoint_dirs: - raise ValueError('`checkpoint_dirs` must have at least one entry.') - checkpoint_file = tf.train.latest_checkpoint(checkpoint_dirs[0]) - saver = tf.train.Saver(variables_to_restore) - saver.restore(sess, checkpoint_file) - - if save_graph: - tf.train.write_graph(sess.graph_def, save_graph_dir, 'eval.pbtxt') - - counters = {'skipped': 0, 'success': 0} - aggregate_result_losses_dict = collections.defaultdict(list) - with tf.contrib.slim.queues.QueueRunners(sess): - try: - for batch in range(int(num_batches)): - if (batch + 1) % 100 == 0: - logging.info('Running eval ops batch %d/%d', batch + 1, num_batches) - if not batch_processor: - try: - if not losses_dict: - losses_dict = {} - result_dict, result_losses_dict = sess.run([tensor_dict, - losses_dict]) - counters['success'] += 1 - except tf.errors.InvalidArgumentError: - logging.info('Skipping image') - counters['skipped'] += 1 - result_dict = {} - else: - result_dict, result_losses_dict = batch_processor( - tensor_dict, sess, batch, counters, losses_dict=losses_dict) - if not result_dict: - continue - for key, value in iter(result_losses_dict.items()): - aggregate_result_losses_dict[key].append(value) - for evaluator in evaluators: - # TODO(b/65130867): Use image_id tensor once we fix the input data - # decoders to return correct image_id. - # TODO(akuznetsa): result_dict contains batches of images, while - # add_single_ground_truth_image_info expects a single image. Fix - evaluator.add_single_ground_truth_image_info( - image_id=batch, groundtruth_dict=result_dict) - evaluator.add_single_detected_image_info( - image_id=batch, detections_dict=result_dict) - logging.info('Running eval batches done.') - except tf.errors.OutOfRangeError: - logging.info('Done evaluating -- epoch limit reached') - finally: - # When done, ask the threads to stop. - logging.info('# success: %d', counters['success']) - logging.info('# skipped: %d', counters['skipped']) - all_evaluator_metrics = {} - for evaluator in evaluators: - metrics = evaluator.evaluate() - evaluator.clear() - if any(key in all_evaluator_metrics for key in metrics): - raise ValueError('Metric names between evaluators must not collide.') - all_evaluator_metrics.update(metrics) - global_step = tf.train.global_step(sess, tf.train.get_global_step()) - - for key, value in iter(aggregate_result_losses_dict.items()): - all_evaluator_metrics['Losses/' + key] = np.mean(value) - sess.close() - return (global_step, all_evaluator_metrics) - - -# TODO(rathodv): Add tests. -def repeated_checkpoint_run(tensor_dict, - summary_dir, - evaluators, - batch_processor=None, - checkpoint_dirs=None, - variables_to_restore=None, - restore_fn=None, - num_batches=1, - eval_interval_secs=120, - max_number_of_evaluations=None, - master='', - save_graph=False, - save_graph_dir='', - losses_dict=None): - """Periodically evaluates desired tensors using checkpoint_dirs or restore_fn. - - This function repeatedly loads a checkpoint and evaluates a desired - set of tensors (provided by tensor_dict) and hands the resulting numpy - arrays to a function result_processor which can be used to further - process/save/visualize the results. - - Args: - tensor_dict: a dictionary holding tensors representing a batch of detections - and corresponding groundtruth annotations. - summary_dir: a directory to write metrics summaries. - evaluators: a list of object of type DetectionEvaluator to be used for - evaluation. Note that the metric names produced by different evaluators - must be unique. - batch_processor: a function taking three arguments: - 1. tensor_dict: the same tensor_dict that is passed in as the first - argument to this function. - 2. sess: a tensorflow session - 3. batch_index: an integer representing the index of the batch amongst - all batches - By default, batch_processor is None, which defaults to running: - return sess.run(tensor_dict) - checkpoint_dirs: list of directories to load into a DetectionModel or an - EnsembleModel if restore_fn isn't set. Also used to determine when to run - next evaluation. Must have at least one element. - variables_to_restore: None, or a dictionary mapping variable names found in - a checkpoint to model variables. The dictionary would normally be - generated by creating a tf.train.ExponentialMovingAverage object and - calling its variables_to_restore() method. Not used if restore_fn is set. - restore_fn: a function that takes a tf.Session object and correctly restores - all necessary variables from the correct checkpoint file. - num_batches: the number of batches to use for evaluation. - eval_interval_secs: the number of seconds between each evaluation run. - max_number_of_evaluations: the max number of iterations of the evaluation. - If the value is left as None the evaluation continues indefinitely. - master: the location of the Tensorflow session. - save_graph: whether or not the Tensorflow graph is saved as a pbtxt file. - save_graph_dir: where to save on disk the Tensorflow graph. If store_graph - is True this must be non-empty. - losses_dict: optional dictionary of scalar detection losses. - - Returns: - metrics: A dictionary containing metric names and values in the latest - evaluation. - - Raises: - ValueError: if max_num_of_evaluations is not None or a positive number. - ValueError: if checkpoint_dirs doesn't have at least one element. - """ - if max_number_of_evaluations and max_number_of_evaluations <= 0: - raise ValueError( - '`number_of_steps` must be either None or a positive number.') - - if not checkpoint_dirs: - raise ValueError('`checkpoint_dirs` must have at least one entry.') - - last_evaluated_model_path = None - number_of_evaluations = 0 - while True: - start = time.time() - logging.info('Starting evaluation at ' + time.strftime( - '%Y-%m-%d-%H:%M:%S', time.gmtime())) - model_path = tf.train.latest_checkpoint(checkpoint_dirs[0]) - if not model_path: - logging.info('No model found in %s. Will try again in %d seconds', - checkpoint_dirs[0], eval_interval_secs) - elif model_path == last_evaluated_model_path: - logging.info('Found already evaluated checkpoint. Will try again in %d ' - 'seconds', eval_interval_secs) - else: - last_evaluated_model_path = model_path - global_step, metrics = _run_checkpoint_once(tensor_dict, evaluators, - batch_processor, - checkpoint_dirs, - variables_to_restore, - restore_fn, num_batches, - master, save_graph, - save_graph_dir, - losses_dict=losses_dict) - write_metrics(metrics, global_step, summary_dir) - number_of_evaluations += 1 - - if (max_number_of_evaluations and - number_of_evaluations >= max_number_of_evaluations): - logging.info('Finished evaluation!') - break - time_to_next_eval = start + eval_interval_secs - time.time() - if time_to_next_eval > 0: - time.sleep(time_to_next_eval) - - return metrics - - -def result_dict_for_single_example(image, - key, - detections, - groundtruth=None, - class_agnostic=False, - scale_to_absolute=False): - """Merges all detection and groundtruth information for a single example. - - Note that evaluation tools require classes that are 1-indexed, and so this - function performs the offset. If `class_agnostic` is True, all output classes - have label 1. - - Args: - image: A single 4D uint8 image tensor of shape [1, H, W, C]. - key: A single string tensor identifying the image. - detections: A dictionary of detections, returned from - DetectionModel.postprocess(). - groundtruth: (Optional) Dictionary of groundtruth items, with fields: - 'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in - normalized coordinates. - 'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes. - 'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional) - 'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional) - 'groundtruth_difficult': [num_boxes] int64 tensor. (Optional) - 'groundtruth_group_of': [num_boxes] int64 tensor. (Optional) - 'groundtruth_instance_masks': 3D int64 tensor of instance masks - (Optional). - class_agnostic: Boolean indicating whether the detections are class-agnostic - (i.e. binary). Default False. - scale_to_absolute: Boolean indicating whether boxes and keypoints should be - scaled to absolute coordinates. Note that for IoU based evaluations, it - does not matter whether boxes are expressed in absolute or relative - coordinates. Default False. - - Returns: - A dictionary with: - 'original_image': A [1, H, W, C] uint8 image tensor. - 'key': A string tensor with image identifier. - 'detection_boxes': [max_detections, 4] float32 tensor of boxes, in - normalized or absolute coordinates, depending on the value of - `scale_to_absolute`. - 'detection_scores': [max_detections] float32 tensor of scores. - 'detection_classes': [max_detections] int64 tensor of 1-indexed classes. - 'detection_masks': [max_detections, H, W] float32 tensor of binarized - masks, reframed to full image masks. - 'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in - normalized or absolute coordinates, depending on the value of - `scale_to_absolute`. (Optional) - 'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes. - (Optional) - 'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional) - 'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional) - 'groundtruth_difficult': [num_boxes] int64 tensor. (Optional) - 'groundtruth_group_of': [num_boxes] int64 tensor. (Optional) - 'groundtruth_instance_masks': 3D int64 tensor of instance masks - (Optional). - - """ - label_id_offset = 1 # Applying label id offset (b/63711816) - - input_data_fields = fields.InputDataFields - output_dict = { - input_data_fields.original_image: image, - input_data_fields.key: key, - } - - detection_fields = fields.DetectionResultFields - detection_boxes = detections[detection_fields.detection_boxes][0] - image_shape = tf.shape(image) - detection_scores = detections[detection_fields.detection_scores][0] - - if class_agnostic: - detection_classes = tf.ones_like(detection_scores, dtype=tf.int64) - else: - detection_classes = ( - tf.to_int64(detections[detection_fields.detection_classes][0]) + - label_id_offset) - - num_detections = tf.to_int32(detections[detection_fields.num_detections][0]) - detection_boxes = tf.slice( - detection_boxes, begin=[0, 0], size=[num_detections, -1]) - detection_classes = tf.slice( - detection_classes, begin=[0], size=[num_detections]) - detection_scores = tf.slice( - detection_scores, begin=[0], size=[num_detections]) - - if scale_to_absolute: - absolute_detection_boxlist = box_list_ops.to_absolute_coordinates( - box_list.BoxList(detection_boxes), image_shape[1], image_shape[2]) - output_dict[detection_fields.detection_boxes] = ( - absolute_detection_boxlist.get()) - else: - output_dict[detection_fields.detection_boxes] = detection_boxes - output_dict[detection_fields.detection_classes] = detection_classes - output_dict[detection_fields.detection_scores] = detection_scores - - if detection_fields.detection_masks in detections: - detection_masks = detections[detection_fields.detection_masks][0] - # TODO(rathodv): This should be done in model's postprocess - # function ideally. - detection_masks = tf.slice( - detection_masks, begin=[0, 0, 0], size=[num_detections, -1, -1]) - detection_masks_reframed = ops.reframe_box_masks_to_image_masks( - detection_masks, detection_boxes, image_shape[1], image_shape[2]) - detection_masks_reframed = tf.cast( - tf.greater(detection_masks_reframed, 0.5), tf.uint8) - output_dict[detection_fields.detection_masks] = detection_masks_reframed - if detection_fields.detection_keypoints in detections: - detection_keypoints = detections[detection_fields.detection_keypoints][0] - output_dict[detection_fields.detection_keypoints] = detection_keypoints - if scale_to_absolute: - absolute_detection_keypoints = keypoint_ops.scale( - detection_keypoints, image_shape[1], image_shape[2]) - output_dict[detection_fields.detection_keypoints] = ( - absolute_detection_keypoints) - - if groundtruth: - if input_data_fields.groundtruth_instance_masks in groundtruth: - groundtruth[input_data_fields.groundtruth_instance_masks] = tf.cast( - groundtruth[input_data_fields.groundtruth_instance_masks], tf.uint8) - output_dict.update(groundtruth) - if scale_to_absolute: - groundtruth_boxes = groundtruth[input_data_fields.groundtruth_boxes] - absolute_gt_boxlist = box_list_ops.to_absolute_coordinates( - box_list.BoxList(groundtruth_boxes), image_shape[1], image_shape[2]) - output_dict[input_data_fields.groundtruth_boxes] = ( - absolute_gt_boxlist.get()) - # For class-agnostic models, groundtruth classes all become 1. - if class_agnostic: - groundtruth_classes = groundtruth[input_data_fields.groundtruth_classes] - groundtruth_classes = tf.ones_like(groundtruth_classes, dtype=tf.int64) - output_dict[input_data_fields.groundtruth_classes] = groundtruth_classes - - return output_dict - - -def get_eval_metric_ops_for_evaluators(evaluation_metrics, - categories, - eval_dict, - include_metrics_per_category=False): - """Returns a dictionary of eval metric ops to use with `tf.EstimatorSpec`. - - Args: - evaluation_metrics: List of evaluation metric names. Current options are - 'coco_detection_metrics' and 'coco_mask_metrics'. - categories: A list of dicts, each of which has the following keys - - 'id': (required) an integer id uniquely identifying this category. - 'name': (required) string representing category name e.g., 'cat', 'dog'. - eval_dict: An evaluation dictionary, returned from - result_dict_for_single_example(). - include_metrics_per_category: If True, additionally include per-category - metrics. - - Returns: - A dictionary of metric names to tuple of value_op and update_op that can be - used as eval metric ops in tf.EstimatorSpec. - - Raises: - ValueError: If any of the metrics in `evaluation_metric` is not - 'coco_detection_metrics' or 'coco_mask_metrics'. - """ - evaluation_metrics = list(set(evaluation_metrics)) - - input_data_fields = fields.InputDataFields - detection_fields = fields.DetectionResultFields - eval_metric_ops = {} - for metric in evaluation_metrics: - if metric == 'coco_detection_metrics': - coco_evaluator = coco_evaluation.CocoDetectionEvaluator( - categories, include_metrics_per_category=include_metrics_per_category) - eval_metric_ops.update( - coco_evaluator.get_estimator_eval_metric_ops( - image_id=eval_dict[input_data_fields.key], - groundtruth_boxes=eval_dict[input_data_fields.groundtruth_boxes], - groundtruth_classes=eval_dict[ - input_data_fields.groundtruth_classes], - detection_boxes=eval_dict[detection_fields.detection_boxes], - detection_scores=eval_dict[detection_fields.detection_scores], - detection_classes=eval_dict[detection_fields.detection_classes], - groundtruth_is_crowd=eval_dict.get( - input_data_fields.groundtruth_is_crowd))) - elif metric == 'coco_mask_metrics': - coco_mask_evaluator = coco_evaluation.CocoMaskEvaluator( - categories, include_metrics_per_category=include_metrics_per_category) - eval_metric_ops.update( - coco_mask_evaluator.get_estimator_eval_metric_ops( - image_id=eval_dict[input_data_fields.key], - groundtruth_boxes=eval_dict[input_data_fields.groundtruth_boxes], - groundtruth_classes=eval_dict[ - input_data_fields.groundtruth_classes], - groundtruth_instance_masks=eval_dict[ - input_data_fields.groundtruth_instance_masks], - detection_scores=eval_dict[detection_fields.detection_scores], - detection_classes=eval_dict[detection_fields.detection_classes], - detection_masks=eval_dict[detection_fields.detection_masks], - groundtruth_is_crowd=eval_dict.get( - input_data_fields.groundtruth_is_crowd),)) - else: - raise ValueError('The only evaluation metrics supported are ' - '"coco_detection_metrics" and "coco_mask_metrics". ' - 'Found {} in the evaluation metrics'.format(metric)) - - return eval_metric_ops - - diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/eval_util_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/eval_util_test.py deleted file mode 100644 index e4b0ca3d866774189731658be2ee7b5fa7293113..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/eval_util_test.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for eval_util.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - - -from object_detection import eval_util -from object_detection.core import standard_fields as fields - - -class EvalUtilTest(tf.test.TestCase): - - def _get_categories_list(self): - return [{'id': 0, 'name': 'person'}, - {'id': 1, 'name': 'dog'}, - {'id': 2, 'name': 'cat'}] - - def _make_evaluation_dict(self): - input_data_fields = fields.InputDataFields - detection_fields = fields.DetectionResultFields - - image = tf.zeros(shape=[1, 20, 20, 3], dtype=tf.uint8) - key = tf.constant('image1') - detection_boxes = tf.constant([[[0., 0., 1., 1.]]]) - detection_scores = tf.constant([[0.8]]) - detection_classes = tf.constant([[0]]) - detection_masks = tf.ones(shape=[1, 1, 20, 20], dtype=tf.float32) - num_detections = tf.constant([1]) - groundtruth_boxes = tf.constant([[0., 0., 1., 1.]]) - groundtruth_classes = tf.constant([1]) - groundtruth_instance_masks = tf.ones(shape=[1, 20, 20], dtype=tf.uint8) - detections = { - detection_fields.detection_boxes: detection_boxes, - detection_fields.detection_scores: detection_scores, - detection_fields.detection_classes: detection_classes, - detection_fields.detection_masks: detection_masks, - detection_fields.num_detections: num_detections - } - groundtruth = { - input_data_fields.groundtruth_boxes: groundtruth_boxes, - input_data_fields.groundtruth_classes: groundtruth_classes, - input_data_fields.groundtruth_instance_masks: groundtruth_instance_masks - } - return eval_util.result_dict_for_single_example(image, key, detections, - groundtruth) - - def test_get_eval_metric_ops_for_coco_detections(self): - evaluation_metrics = ['coco_detection_metrics'] - categories = self._get_categories_list() - eval_dict = self._make_evaluation_dict() - metric_ops = eval_util.get_eval_metric_ops_for_evaluators( - evaluation_metrics, categories, eval_dict) - _, update_op = metric_ops['DetectionBoxes_Precision/mAP'] - - with self.test_session() as sess: - metrics = {} - for key, (value_op, _) in metric_ops.iteritems(): - metrics[key] = value_op - sess.run(update_op) - metrics = sess.run(metrics) - print(metrics) - self.assertAlmostEqual(1.0, metrics['DetectionBoxes_Precision/mAP']) - self.assertNotIn('DetectionMasks_Precision/mAP', metrics) - - def test_get_eval_metric_ops_for_coco_detections_and_masks(self): - evaluation_metrics = ['coco_detection_metrics', - 'coco_mask_metrics'] - categories = self._get_categories_list() - eval_dict = self._make_evaluation_dict() - metric_ops = eval_util.get_eval_metric_ops_for_evaluators( - evaluation_metrics, categories, eval_dict) - _, update_op_boxes = metric_ops['DetectionBoxes_Precision/mAP'] - _, update_op_masks = metric_ops['DetectionMasks_Precision/mAP'] - - with self.test_session() as sess: - metrics = {} - for key, (value_op, _) in metric_ops.iteritems(): - metrics[key] = value_op - sess.run(update_op_boxes) - sess.run(update_op_masks) - metrics = sess.run(metrics) - self.assertAlmostEqual(1.0, metrics['DetectionBoxes_Precision/mAP']) - self.assertAlmostEqual(1.0, metrics['DetectionMasks_Precision/mAP']) - - def test_get_eval_metric_ops_raises_error_with_unsupported_metric(self): - evaluation_metrics = ['unsupported_metrics'] - categories = self._get_categories_list() - eval_dict = self._make_evaluation_dict() - with self.assertRaises(ValueError): - eval_util.get_eval_metric_ops_for_evaluators( - evaluation_metrics, categories, eval_dict) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/evaluator.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/evaluator.py deleted file mode 100644 index 7352af52208451ca70b8db99266f76cba800e384..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/evaluator.py +++ /dev/null @@ -1,278 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Detection model evaluator. - -This file provides a generic evaluation method that can be used to evaluate a -DetectionModel. -""" - -import logging -import tensorflow as tf - -from object_detection import eval_util -from object_detection.core import prefetcher -from object_detection.core import standard_fields as fields -from object_detection.metrics import coco_evaluation -from object_detection.utils import object_detection_evaluation - -# A dictionary of metric names to classes that implement the metric. The classes -# in the dictionary must implement -# utils.object_detection_evaluation.DetectionEvaluator interface. -EVAL_METRICS_CLASS_DICT = { - 'pascal_voc_detection_metrics': - object_detection_evaluation.PascalDetectionEvaluator, - 'weighted_pascal_voc_detection_metrics': - object_detection_evaluation.WeightedPascalDetectionEvaluator, - 'pascal_voc_instance_segmentation_metrics': - object_detection_evaluation.PascalInstanceSegmentationEvaluator, - 'weighted_pascal_voc_instance_segmentation_metrics': - object_detection_evaluation.WeightedPascalInstanceSegmentationEvaluator, - 'open_images_V2_detection_metrics': - object_detection_evaluation.OpenImagesDetectionEvaluator, - 'coco_detection_metrics': - coco_evaluation.CocoDetectionEvaluator, - 'coco_mask_metrics': - coco_evaluation.CocoMaskEvaluator, - 'oid_challenge_object_detection_metrics': - object_detection_evaluation.OpenImagesDetectionChallengeEvaluator, -} - -EVAL_DEFAULT_METRIC = 'pascal_voc_detection_metrics' - - -def _extract_predictions_and_losses(model, - create_input_dict_fn, - ignore_groundtruth=False): - """Constructs tensorflow detection graph and returns output tensors. - - Args: - model: model to perform predictions with. - create_input_dict_fn: function to create input tensor dictionaries. - ignore_groundtruth: whether groundtruth should be ignored. - - Returns: - prediction_groundtruth_dict: A dictionary with postprocessed tensors (keyed - by standard_fields.DetectionResultsFields) and optional groundtruth - tensors (keyed by standard_fields.InputDataFields). - losses_dict: A dictionary containing detection losses. This is empty when - ignore_groundtruth is true. - """ - input_dict = create_input_dict_fn() - prefetch_queue = prefetcher.prefetch(input_dict, capacity=500) - input_dict = prefetch_queue.dequeue() - original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0) - preprocessed_image, true_image_shapes = model.preprocess( - tf.to_float(original_image)) - prediction_dict = model.predict(preprocessed_image, true_image_shapes) - detections = model.postprocess(prediction_dict, true_image_shapes) - - groundtruth = None - losses_dict = {} - if not ignore_groundtruth: - groundtruth = { - fields.InputDataFields.groundtruth_boxes: - input_dict[fields.InputDataFields.groundtruth_boxes], - fields.InputDataFields.groundtruth_classes: - input_dict[fields.InputDataFields.groundtruth_classes], - fields.InputDataFields.groundtruth_area: - input_dict[fields.InputDataFields.groundtruth_area], - fields.InputDataFields.groundtruth_is_crowd: - input_dict[fields.InputDataFields.groundtruth_is_crowd], - fields.InputDataFields.groundtruth_difficult: - input_dict[fields.InputDataFields.groundtruth_difficult] - } - if fields.InputDataFields.groundtruth_group_of in input_dict: - groundtruth[fields.InputDataFields.groundtruth_group_of] = ( - input_dict[fields.InputDataFields.groundtruth_group_of]) - groundtruth_masks_list = None - if fields.DetectionResultFields.detection_masks in detections: - groundtruth[fields.InputDataFields.groundtruth_instance_masks] = ( - input_dict[fields.InputDataFields.groundtruth_instance_masks]) - groundtruth_masks_list = [ - input_dict[fields.InputDataFields.groundtruth_instance_masks]] - groundtruth_keypoints_list = None - if fields.DetectionResultFields.detection_keypoints in detections: - groundtruth[fields.InputDataFields.groundtruth_keypoints] = ( - input_dict[fields.InputDataFields.groundtruth_keypoints]) - groundtruth_keypoints_list = [ - input_dict[fields.InputDataFields.groundtruth_keypoints]] - label_id_offset = 1 - model.provide_groundtruth( - [input_dict[fields.InputDataFields.groundtruth_boxes]], - [tf.one_hot(input_dict[fields.InputDataFields.groundtruth_classes] - - label_id_offset, depth=model.num_classes)], - groundtruth_masks_list, groundtruth_keypoints_list) - losses_dict.update(model.loss(prediction_dict, true_image_shapes)) - - result_dict = eval_util.result_dict_for_single_example( - original_image, - input_dict[fields.InputDataFields.source_id], - detections, - groundtruth, - class_agnostic=( - fields.DetectionResultFields.detection_classes not in detections), - scale_to_absolute=True) - return result_dict, losses_dict - - -def get_evaluators(eval_config, categories): - """Returns the evaluator class according to eval_config, valid for categories. - - Args: - eval_config: evaluation configurations. - categories: a list of categories to evaluate. - Returns: - An list of instances of DetectionEvaluator. - - Raises: - ValueError: if metric is not in the metric class dictionary. - """ - eval_metric_fn_keys = eval_config.metrics_set - if not eval_metric_fn_keys: - eval_metric_fn_keys = [EVAL_DEFAULT_METRIC] - evaluators_list = [] - for eval_metric_fn_key in eval_metric_fn_keys: - if eval_metric_fn_key not in EVAL_METRICS_CLASS_DICT: - raise ValueError('Metric not found: {}'.format(eval_metric_fn_key)) - evaluators_list.append( - EVAL_METRICS_CLASS_DICT[eval_metric_fn_key](categories=categories)) - return evaluators_list - - -def evaluate(create_input_dict_fn, create_model_fn, eval_config, categories, - checkpoint_dir, eval_dir, graph_hook_fn=None, evaluator_list=None): - """Evaluation function for detection models. - - Args: - create_input_dict_fn: a function to create a tensor input dictionary. - create_model_fn: a function that creates a DetectionModel. - eval_config: a eval_pb2.EvalConfig protobuf. - categories: a list of category dictionaries. Each dict in the list should - have an integer 'id' field and string 'name' field. - checkpoint_dir: directory to load the checkpoints to evaluate from. - eval_dir: directory to write evaluation metrics summary to. - graph_hook_fn: Optional function that is called after the training graph is - completely built. This is helpful to perform additional changes to the - training graph such as optimizing batchnorm. The function should modify - the default graph. - evaluator_list: Optional list of instances of DetectionEvaluator. If not - given, this list of metrics is created according to the eval_config. - - Returns: - metrics: A dictionary containing metric names and values from the latest - run. - """ - with tf.Graph().as_default(): - model = create_model_fn() - - if eval_config.ignore_groundtruth and not eval_config.export_path: - logging.fatal('If ignore_groundtruth=True then an export_path is ' - 'required. Aborting!!!') - - tensor_dict, losses_dict = _extract_predictions_and_losses( - model=model, - create_input_dict_fn=create_input_dict_fn, - ignore_groundtruth=eval_config.ignore_groundtruth) - - def _process_batch(tensor_dict, sess, batch_index, counters, - losses_dict=None): - """Evaluates tensors in tensor_dict, losses_dict and visualizes examples. - - This function calls sess.run on tensor_dict, evaluating the original_image - tensor only on the first K examples and visualizing detections overlaid - on this original_image. - - Args: - tensor_dict: a dictionary of tensors - sess: tensorflow session - batch_index: the index of the batch amongst all batches in the run. - counters: a dictionary holding 'success' and 'skipped' fields which can - be updated to keep track of number of successful and failed runs, - respectively. If these fields are not updated, then the success/skipped - counter values shown at the end of evaluation will be incorrect. - losses_dict: Optional dictonary of scalar loss tensors. - - Returns: - result_dict: a dictionary of numpy arrays - result_losses_dict: a dictionary of scalar losses. This is empty if input - losses_dict is None. - """ - try: - if not losses_dict: - losses_dict = {} - result_dict, result_losses_dict = sess.run([tensor_dict, losses_dict]) - counters['success'] += 1 - except tf.errors.InvalidArgumentError: - logging.info('Skipping image') - counters['skipped'] += 1 - return {}, {} - global_step = tf.train.global_step(sess, tf.train.get_global_step()) - if batch_index < eval_config.num_visualizations: - tag = 'image-{}'.format(batch_index) - eval_util.visualize_detection_results( - result_dict, - tag, - global_step, - categories=categories, - summary_dir=eval_dir, - export_dir=eval_config.visualization_export_dir, - show_groundtruth=eval_config.visualize_groundtruth_boxes, - groundtruth_box_visualization_color=eval_config. - groundtruth_box_visualization_color, - min_score_thresh=eval_config.min_score_threshold, - max_num_predictions=eval_config.max_num_boxes_to_visualize, - skip_scores=eval_config.skip_scores, - skip_labels=eval_config.skip_labels, - keep_image_id_for_visualization_export=eval_config. - keep_image_id_for_visualization_export) - return result_dict, result_losses_dict - - variables_to_restore = tf.global_variables() - global_step = tf.train.get_or_create_global_step() - variables_to_restore.append(global_step) - - if graph_hook_fn: graph_hook_fn() - - if eval_config.use_moving_averages: - variable_averages = tf.train.ExponentialMovingAverage(0.0) - variables_to_restore = variable_averages.variables_to_restore() - saver = tf.train.Saver(variables_to_restore) - - def _restore_latest_checkpoint(sess): - latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) - saver.restore(sess, latest_checkpoint) - - if not evaluator_list: - evaluator_list = get_evaluators(eval_config, categories) - - metrics = eval_util.repeated_checkpoint_run( - tensor_dict=tensor_dict, - summary_dir=eval_dir, - evaluators=evaluator_list, - batch_processor=_process_batch, - checkpoint_dirs=[checkpoint_dir], - variables_to_restore=None, - restore_fn=_restore_latest_checkpoint, - num_batches=eval_config.num_examples, - eval_interval_secs=eval_config.eval_interval_secs, - max_number_of_evaluations=(1 if eval_config.ignore_groundtruth else - eval_config.max_evals - if eval_config.max_evals else None), - master=eval_config.eval_master, - save_graph=eval_config.save_graph, - save_graph_dir=(eval_dir if eval_config.save_graph else ''), - losses_dict=losses_dict) - - return metrics diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/export_inference_graph.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/export_inference_graph.py deleted file mode 100644 index 5d0699f199848255c2989f7f46f4282ee4674765..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/export_inference_graph.py +++ /dev/null @@ -1,147 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r"""Tool to export an object detection model for inference. - -Prepares an object detection tensorflow graph for inference using model -configuration and an optional trained checkpoint. Outputs inference -graph, associated checkpoint files, a frozen inference graph and a -SavedModel (https://tensorflow.github.io/serving/serving_basic.html). - -The inference graph contains one of three input nodes depending on the user -specified option. - * `image_tensor`: Accepts a uint8 4-D tensor of shape [None, None, None, 3] - * `encoded_image_string_tensor`: Accepts a 1-D string tensor of shape [None] - containing encoded PNG or JPEG images. Image resolutions are expected to be - the same if more than 1 image is provided. - * `tf_example`: Accepts a 1-D string tensor of shape [None] containing - serialized TFExample protos. Image resolutions are expected to be the same - if more than 1 image is provided. - -and the following output nodes returned by the model.postprocess(..): - * `num_detections`: Outputs float32 tensors of the form [batch] - that specifies the number of valid boxes per image in the batch. - * `detection_boxes`: Outputs float32 tensors of the form - [batch, num_boxes, 4] containing detected boxes. - * `detection_scores`: Outputs float32 tensors of the form - [batch, num_boxes] containing class scores for the detections. - * `detection_classes`: Outputs float32 tensors of the form - [batch, num_boxes] containing classes for the detections. - * `detection_masks`: Outputs float32 tensors of the form - [batch, num_boxes, mask_height, mask_width] containing predicted instance - masks for each box if its present in the dictionary of postprocessed - tensors returned by the model. - -Notes: - * This tool uses `use_moving_averages` from eval_config to decide which - weights to freeze. - -Example Usage: --------------- -python export_inference_graph \ - --input_type image_tensor \ - --pipeline_config_path path/to/ssd_inception_v2.config \ - --trained_checkpoint_prefix path/to/model.ckpt \ - --output_directory path/to/exported_model_directory - -The expected output would be in the directory -path/to/exported_model_directory (which is created if it does not exist) -with contents: - - graph.pbtxt - - model.ckpt.data-00000-of-00001 - - model.ckpt.info - - model.ckpt.meta - - frozen_inference_graph.pb - + saved_model (a directory) - -Config overrides (see the `config_override` flag) are text protobufs -(also of type pipeline_pb2.TrainEvalPipelineConfig) which are used to override -certain fields in the provided pipeline_config_path. These are useful for -making small changes to the inference graph that differ from the training or -eval config. - -Example Usage (in which we change the second stage post-processing score -threshold to be 0.5): - -python export_inference_graph \ - --input_type image_tensor \ - --pipeline_config_path path/to/ssd_inception_v2.config \ - --trained_checkpoint_prefix path/to/model.ckpt \ - --output_directory path/to/exported_model_directory \ - --config_override " \ - model{ \ - faster_rcnn { \ - second_stage_post_processing { \ - batch_non_max_suppression { \ - score_threshold: 0.5 \ - } \ - } \ - } \ - }" -""" -import tensorflow as tf -from google.protobuf import text_format -from object_detection import exporter -from object_detection.protos import pipeline_pb2 - -slim = tf.contrib.slim -flags = tf.app.flags - -flags.DEFINE_string('input_type', 'image_tensor', 'Type of input node. Can be ' - 'one of [`image_tensor`, `encoded_image_string_tensor`, ' - '`tf_example`]') -flags.DEFINE_string('input_shape', None, - 'If input_type is `image_tensor`, this can explicitly set ' - 'the shape of this input tensor to a fixed size. The ' - 'dimensions are to be provided as a comma-separated list ' - 'of integers. A value of -1 can be used for unknown ' - 'dimensions. If not specified, for an `image_tensor, the ' - 'default shape will be partially specified as ' - '`[None, None, None, 3]`.') -flags.DEFINE_string('pipeline_config_path', None, - 'Path to a pipeline_pb2.TrainEvalPipelineConfig config ' - 'file.') -flags.DEFINE_string('trained_checkpoint_prefix', None, - 'Path to trained checkpoint, typically of the form ' - 'path/to/model.ckpt') -flags.DEFINE_string('output_directory', None, 'Path to write outputs.') -flags.DEFINE_string('config_override', '', - 'pipeline_pb2.TrainEvalPipelineConfig ' - 'text proto to override pipeline_config_path.') -tf.app.flags.mark_flag_as_required('pipeline_config_path') -tf.app.flags.mark_flag_as_required('trained_checkpoint_prefix') -tf.app.flags.mark_flag_as_required('output_directory') -FLAGS = flags.FLAGS - - -def main(_): - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f: - text_format.Merge(f.read(), pipeline_config) - text_format.Merge(FLAGS.config_override, pipeline_config) - if FLAGS.input_shape: - input_shape = [ - int(dim) if dim != '-1' else None - for dim in FLAGS.input_shape.split(',') - ] - else: - input_shape = None - exporter.export_inference_graph(FLAGS.input_type, pipeline_config, - FLAGS.trained_checkpoint_prefix, - FLAGS.output_directory, input_shape) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/exporter.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/exporter.py deleted file mode 100644 index 05b09b1f72ae12682c77bbe4e790469ab5864b43..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/exporter.py +++ /dev/null @@ -1,465 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Functions to export object detection inference graph.""" -import logging -import os -import tempfile -import tensorflow as tf -from google.protobuf import text_format -from tensorflow.core.protobuf import saver_pb2 -from tensorflow.python import pywrap_tensorflow -from tensorflow.python.client import session -from tensorflow.python.framework import graph_util -from tensorflow.python.platform import gfile -from tensorflow.python.saved_model import signature_constants -from tensorflow.python.training import saver as saver_lib -from object_detection.builders import model_builder -from object_detection.core import standard_fields as fields -from object_detection.data_decoders import tf_example_decoder - -slim = tf.contrib.slim - - -# TODO(derekjchow): Replace with freeze_graph.freeze_graph_with_def_protos when -# newer version of Tensorflow becomes more common. -def freeze_graph_with_def_protos( - input_graph_def, - input_saver_def, - input_checkpoint, - output_node_names, - restore_op_name, - filename_tensor_name, - clear_devices, - initializer_nodes, - variable_names_blacklist=''): - """Converts all variables in a graph and checkpoint into constants.""" - del restore_op_name, filename_tensor_name # Unused by updated loading code. - - # 'input_checkpoint' may be a prefix if we're using Saver V2 format - if not saver_lib.checkpoint_exists(input_checkpoint): - raise ValueError( - 'Input checkpoint "' + input_checkpoint + '" does not exist!') - - if not output_node_names: - raise ValueError( - 'You must supply the name of a node to --output_node_names.') - - # Remove all the explicit device specifications for this node. This helps to - # make the graph more portable. - if clear_devices: - for node in input_graph_def.node: - node.device = '' - - with tf.Graph().as_default(): - tf.import_graph_def(input_graph_def, name='') - config = tf.ConfigProto(graph_options=tf.GraphOptions()) - with session.Session(config=config) as sess: - if input_saver_def: - saver = saver_lib.Saver(saver_def=input_saver_def) - saver.restore(sess, input_checkpoint) - else: - var_list = {} - reader = pywrap_tensorflow.NewCheckpointReader(input_checkpoint) - var_to_shape_map = reader.get_variable_to_shape_map() - for key in var_to_shape_map: - try: - tensor = sess.graph.get_tensor_by_name(key + ':0') - except KeyError: - # This tensor doesn't exist in the graph (for example it's - # 'global_step' or a similar housekeeping element) so skip it. - continue - var_list[key] = tensor - saver = saver_lib.Saver(var_list=var_list) - saver.restore(sess, input_checkpoint) - if initializer_nodes: - sess.run(initializer_nodes) - - variable_names_blacklist = (variable_names_blacklist.split(',') if - variable_names_blacklist else None) - output_graph_def = graph_util.convert_variables_to_constants( - sess, - input_graph_def, - output_node_names.split(','), - variable_names_blacklist=variable_names_blacklist) - - return output_graph_def - - -def replace_variable_values_with_moving_averages(graph, - current_checkpoint_file, - new_checkpoint_file): - """Replaces variable values in the checkpoint with their moving averages. - - If the current checkpoint has shadow variables maintaining moving averages of - the variables defined in the graph, this function generates a new checkpoint - where the variables contain the values of their moving averages. - - Args: - graph: a tf.Graph object. - current_checkpoint_file: a checkpoint containing both original variables and - their moving averages. - new_checkpoint_file: file path to write a new checkpoint. - """ - with graph.as_default(): - variable_averages = tf.train.ExponentialMovingAverage(0.0) - ema_variables_to_restore = variable_averages.variables_to_restore() - with tf.Session() as sess: - read_saver = tf.train.Saver(ema_variables_to_restore) - read_saver.restore(sess, current_checkpoint_file) - write_saver = tf.train.Saver() - write_saver.save(sess, new_checkpoint_file) - - -def _image_tensor_input_placeholder(input_shape=None): - """Returns input placeholder and a 4-D uint8 image tensor.""" - if input_shape is None: - input_shape = (None, None, None, 3) - input_tensor = tf.placeholder( - dtype=tf.uint8, shape=input_shape, name='image_tensor') - return input_tensor, input_tensor - - -def _tf_example_input_placeholder(): - """Returns input that accepts a batch of strings with tf examples. - - Returns: - a tuple of input placeholder and the output decoded images. - """ - batch_tf_example_placeholder = tf.placeholder( - tf.string, shape=[None], name='tf_example') - def decode(tf_example_string_tensor): - tensor_dict = tf_example_decoder.TfExampleDecoder().decode( - tf_example_string_tensor) - image_tensor = tensor_dict[fields.InputDataFields.image] - return image_tensor - return (batch_tf_example_placeholder, - tf.map_fn(decode, - elems=batch_tf_example_placeholder, - dtype=tf.uint8, - parallel_iterations=32, - back_prop=False)) - - -def _encoded_image_string_tensor_input_placeholder(): - """Returns input that accepts a batch of PNG or JPEG strings. - - Returns: - a tuple of input placeholder and the output decoded images. - """ - batch_image_str_placeholder = tf.placeholder( - dtype=tf.string, - shape=[None], - name='encoded_image_string_tensor') - def decode(encoded_image_string_tensor): - image_tensor = tf.image.decode_image(encoded_image_string_tensor, - channels=3) - image_tensor.set_shape((None, None, 3)) - return image_tensor - return (batch_image_str_placeholder, - tf.map_fn( - decode, - elems=batch_image_str_placeholder, - dtype=tf.uint8, - parallel_iterations=32, - back_prop=False)) - - -input_placeholder_fn_map = { - 'image_tensor': _image_tensor_input_placeholder, - 'encoded_image_string_tensor': - _encoded_image_string_tensor_input_placeholder, - 'tf_example': _tf_example_input_placeholder, -} - - -def _add_output_tensor_nodes(postprocessed_tensors, - output_collection_name='inference_op'): - """Adds output nodes for detection boxes and scores. - - Adds the following nodes for output tensors - - * num_detections: float32 tensor of shape [batch_size]. - * detection_boxes: float32 tensor of shape [batch_size, num_boxes, 4] - containing detected boxes. - * detection_scores: float32 tensor of shape [batch_size, num_boxes] - containing scores for the detected boxes. - * detection_classes: float32 tensor of shape [batch_size, num_boxes] - containing class predictions for the detected boxes. - * detection_keypoints: (Optional) float32 tensor of shape - [batch_size, num_boxes, num_keypoints, 2] containing keypoints for each - detection box. - * detection_masks: (Optional) float32 tensor of shape - [batch_size, num_boxes, mask_height, mask_width] containing masks for each - detection box. - - Args: - postprocessed_tensors: a dictionary containing the following fields - 'detection_boxes': [batch, max_detections, 4] - 'detection_scores': [batch, max_detections] - 'detection_classes': [batch, max_detections] - 'detection_masks': [batch, max_detections, mask_height, mask_width] - (optional). - 'num_detections': [batch] - output_collection_name: Name of collection to add output tensors to. - - Returns: - A tensor dict containing the added output tensor nodes. - """ - detection_fields = fields.DetectionResultFields - label_id_offset = 1 - boxes = postprocessed_tensors.get(detection_fields.detection_boxes) - scores = postprocessed_tensors.get(detection_fields.detection_scores) - classes = postprocessed_tensors.get( - detection_fields.detection_classes) + label_id_offset - keypoints = postprocessed_tensors.get(detection_fields.detection_keypoints) - masks = postprocessed_tensors.get(detection_fields.detection_masks) - num_detections = postprocessed_tensors.get(detection_fields.num_detections) - outputs = {} - outputs[detection_fields.detection_boxes] = tf.identity( - boxes, name=detection_fields.detection_boxes) - outputs[detection_fields.detection_scores] = tf.identity( - scores, name=detection_fields.detection_scores) - outputs[detection_fields.detection_classes] = tf.identity( - classes, name=detection_fields.detection_classes) - outputs[detection_fields.num_detections] = tf.identity( - num_detections, name=detection_fields.num_detections) - if keypoints is not None: - outputs[detection_fields.detection_keypoints] = tf.identity( - keypoints, name=detection_fields.detection_keypoints) - if masks is not None: - outputs[detection_fields.detection_masks] = tf.identity( - masks, name=detection_fields.detection_masks) - for output_key in outputs: - tf.add_to_collection(output_collection_name, outputs[output_key]) - if masks is not None: - tf.add_to_collection(output_collection_name, - outputs[detection_fields.detection_masks]) - return outputs - - -def write_frozen_graph(frozen_graph_path, frozen_graph_def): - """Writes frozen graph to disk. - - Args: - frozen_graph_path: Path to write inference graph. - frozen_graph_def: tf.GraphDef holding frozen graph. - """ - with gfile.GFile(frozen_graph_path, 'wb') as f: - f.write(frozen_graph_def.SerializeToString()) - logging.info('%d ops in the final graph.', len(frozen_graph_def.node)) - - -def write_saved_model(saved_model_path, - frozen_graph_def, - inputs, - outputs): - """Writes SavedModel to disk. - - If checkpoint_path is not None bakes the weights into the graph thereby - eliminating the need of checkpoint files during inference. If the model - was trained with moving averages, setting use_moving_averages to true - restores the moving averages, otherwise the original set of variables - is restored. - - Args: - saved_model_path: Path to write SavedModel. - frozen_graph_def: tf.GraphDef holding frozen graph. - inputs: The input image tensor to use for detection. - outputs: A tensor dictionary containing the outputs of a DetectionModel. - """ - with tf.Graph().as_default(): - with session.Session() as sess: - - tf.import_graph_def(frozen_graph_def, name='') - - builder = tf.saved_model.builder.SavedModelBuilder(saved_model_path) - - tensor_info_inputs = { - 'inputs': tf.saved_model.utils.build_tensor_info(inputs)} - tensor_info_outputs = {} - for k, v in outputs.items(): - tensor_info_outputs[k] = tf.saved_model.utils.build_tensor_info(v) - - detection_signature = ( - tf.saved_model.signature_def_utils.build_signature_def( - inputs=tensor_info_inputs, - outputs=tensor_info_outputs, - method_name=signature_constants.PREDICT_METHOD_NAME)) - - builder.add_meta_graph_and_variables( - sess, [tf.saved_model.tag_constants.SERVING], - signature_def_map={ - signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: - detection_signature, - }, - ) - builder.save() - - -def write_graph_and_checkpoint(inference_graph_def, - model_path, - input_saver_def, - trained_checkpoint_prefix): - """Writes the graph and the checkpoint into disk.""" - for node in inference_graph_def.node: - node.device = '' - with tf.Graph().as_default(): - tf.import_graph_def(inference_graph_def, name='') - with session.Session() as sess: - saver = saver_lib.Saver(saver_def=input_saver_def, - save_relative_paths=True) - saver.restore(sess, trained_checkpoint_prefix) - saver.save(sess, model_path) - - -def _get_outputs_from_inputs(input_tensors, detection_model, - output_collection_name): - inputs = tf.to_float(input_tensors) - preprocessed_inputs, true_image_shapes = detection_model.preprocess(inputs) - output_tensors = detection_model.predict( - preprocessed_inputs, true_image_shapes) - postprocessed_tensors = detection_model.postprocess( - output_tensors, true_image_shapes) - return _add_output_tensor_nodes(postprocessed_tensors, - output_collection_name) - - -def _build_detection_graph(input_type, detection_model, input_shape, - output_collection_name, graph_hook_fn): - """Build the detection graph.""" - if input_type not in input_placeholder_fn_map: - raise ValueError('Unknown input type: {}'.format(input_type)) - placeholder_args = {} - if input_shape is not None: - if input_type != 'image_tensor': - raise ValueError('Can only specify input shape for `image_tensor` ' - 'inputs.') - placeholder_args['input_shape'] = input_shape - placeholder_tensor, input_tensors = input_placeholder_fn_map[input_type]( - **placeholder_args) - outputs = _get_outputs_from_inputs( - input_tensors=input_tensors, - detection_model=detection_model, - output_collection_name=output_collection_name) - - # Add global step to the graph. - slim.get_or_create_global_step() - - if graph_hook_fn: graph_hook_fn() - - return outputs, placeholder_tensor - - -def _export_inference_graph(input_type, - detection_model, - use_moving_averages, - trained_checkpoint_prefix, - output_directory, - additional_output_tensor_names=None, - input_shape=None, - output_collection_name='inference_op', - graph_hook_fn=None): - """Export helper.""" - tf.gfile.MakeDirs(output_directory) - frozen_graph_path = os.path.join(output_directory, - 'frozen_inference_graph.pb') - saved_model_path = os.path.join(output_directory, 'saved_model') - model_path = os.path.join(output_directory, 'model.ckpt') - - outputs, placeholder_tensor = _build_detection_graph( - input_type=input_type, - detection_model=detection_model, - input_shape=input_shape, - output_collection_name=output_collection_name, - graph_hook_fn=graph_hook_fn) - - saver_kwargs = {} - if use_moving_averages: - # This check is to be compatible with both version of SaverDef. - if os.path.isfile(trained_checkpoint_prefix): - saver_kwargs['write_version'] = saver_pb2.SaverDef.V1 - temp_checkpoint_prefix = tempfile.NamedTemporaryFile().name - else: - temp_checkpoint_prefix = tempfile.mkdtemp() - replace_variable_values_with_moving_averages( - tf.get_default_graph(), trained_checkpoint_prefix, - temp_checkpoint_prefix) - checkpoint_to_use = temp_checkpoint_prefix - else: - checkpoint_to_use = trained_checkpoint_prefix - - saver = tf.train.Saver(**saver_kwargs) - input_saver_def = saver.as_saver_def() - - write_graph_and_checkpoint( - inference_graph_def=tf.get_default_graph().as_graph_def(), - model_path=model_path, - input_saver_def=input_saver_def, - trained_checkpoint_prefix=checkpoint_to_use) - - if additional_output_tensor_names is not None: - output_node_names = ','.join(outputs.keys()+additional_output_tensor_names) - else: - output_node_names = ','.join(outputs.keys()) - - frozen_graph_def = freeze_graph_with_def_protos( - input_graph_def=tf.get_default_graph().as_graph_def(), - input_saver_def=input_saver_def, - input_checkpoint=checkpoint_to_use, - output_node_names=output_node_names, - restore_op_name='save/restore_all', - filename_tensor_name='save/Const:0', - clear_devices=True, - initializer_nodes='') - write_frozen_graph(frozen_graph_path, frozen_graph_def) - write_saved_model(saved_model_path, frozen_graph_def, - placeholder_tensor, outputs) - - -def export_inference_graph(input_type, - pipeline_config, - trained_checkpoint_prefix, - output_directory, - input_shape=None, - output_collection_name='inference_op', - additional_output_tensor_names=None): - """Exports inference graph for the model specified in the pipeline config. - - Args: - input_type: Type of input for the graph. Can be one of [`image_tensor`, - `tf_example`]. - pipeline_config: pipeline_pb2.TrainAndEvalPipelineConfig proto. - trained_checkpoint_prefix: Path to the trained checkpoint file. - output_directory: Path to write outputs. - input_shape: Sets a fixed shape for an `image_tensor` input. If not - specified, will default to [None, None, None, 3]. - output_collection_name: Name of collection to add output tensors to. - If None, does not add output tensors to a collection. - additional_output_tensor_names: list of additional output - tensors to include in the frozen graph. - """ - detection_model = model_builder.build(pipeline_config.model, - is_training=False) - _export_inference_graph(input_type, detection_model, - pipeline_config.eval_config.use_moving_averages, - trained_checkpoint_prefix, - output_directory, additional_output_tensor_names, - input_shape, output_collection_name, - graph_hook_fn=None) - pipeline_config.eval_config.use_moving_averages = False - config_text = text_format.MessageToString(pipeline_config) - with tf.gfile.Open( - os.path.join(output_directory, 'pipeline.config'), 'wb') as f: - f.write(config_text) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/exporter_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/exporter_test.py deleted file mode 100644 index cf6e85b200f881ec5f1db6b9d1ce5e5e074904b0..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/exporter_test.py +++ /dev/null @@ -1,864 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.export_inference_graph.""" -import os -import numpy as np -import six -import tensorflow as tf -from google.protobuf import text_format -from object_detection import exporter -from object_detection.builders import model_builder -from object_detection.core import model -from object_detection.protos import pipeline_pb2 - -if six.PY2: - import mock # pylint: disable=g-import-not-at-top -else: - from unittest import mock # pylint: disable=g-import-not-at-top - -slim = tf.contrib.slim - - -class FakeModel(model.DetectionModel): - - def __init__(self, add_detection_keypoints=False, add_detection_masks=False): - self._add_detection_keypoints = add_detection_keypoints - self._add_detection_masks = add_detection_masks - - def preprocess(self, inputs): - true_image_shapes = [] # Doesn't matter for the fake model. - return tf.identity(inputs), true_image_shapes - - def predict(self, preprocessed_inputs, true_image_shapes): - return {'image': tf.layers.conv2d(preprocessed_inputs, 3, 1)} - - def postprocess(self, prediction_dict, true_image_shapes): - with tf.control_dependencies(prediction_dict.values()): - postprocessed_tensors = { - 'detection_boxes': tf.constant([[[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 0.8, 0.8]], - [[0.5, 0.5, 1.0, 1.0], - [0.0, 0.0, 0.0, 0.0]]], tf.float32), - 'detection_scores': tf.constant([[0.7, 0.6], - [0.9, 0.0]], tf.float32), - 'detection_classes': tf.constant([[0, 1], - [1, 0]], tf.float32), - 'num_detections': tf.constant([2, 1], tf.float32) - } - if self._add_detection_keypoints: - postprocessed_tensors['detection_keypoints'] = tf.constant( - np.arange(48).reshape([2, 2, 6, 2]), tf.float32) - if self._add_detection_masks: - postprocessed_tensors['detection_masks'] = tf.constant( - np.arange(64).reshape([2, 2, 4, 4]), tf.float32) - return postprocessed_tensors - - def restore_map(self, checkpoint_path, fine_tune_checkpoint_type): - pass - - def loss(self, prediction_dict, true_image_shapes): - pass - - -class ExportInferenceGraphTest(tf.test.TestCase): - - def _save_checkpoint_from_mock_model(self, checkpoint_path, - use_moving_averages): - g = tf.Graph() - with g.as_default(): - mock_model = FakeModel() - preprocessed_inputs, true_image_shapes = mock_model.preprocess( - tf.placeholder(tf.float32, shape=[None, None, None, 3])) - predictions = mock_model.predict(preprocessed_inputs, true_image_shapes) - mock_model.postprocess(predictions, true_image_shapes) - if use_moving_averages: - tf.train.ExponentialMovingAverage(0.0).apply() - slim.get_or_create_global_step() - saver = tf.train.Saver() - init = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init) - saver.save(sess, checkpoint_path) - - def _load_inference_graph(self, inference_graph_path): - od_graph = tf.Graph() - with od_graph.as_default(): - od_graph_def = tf.GraphDef() - with tf.gfile.GFile(inference_graph_path) as fid: - serialized_graph = fid.read() - od_graph_def.ParseFromString(serialized_graph) - tf.import_graph_def(od_graph_def, name='') - return od_graph - - def _create_tf_example(self, image_array): - with self.test_session(): - encoded_image = tf.image.encode_jpeg(tf.constant(image_array)).eval() - def _bytes_feature(value): - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': _bytes_feature(encoded_image), - 'image/format': _bytes_feature('jpg'), - 'image/source_id': _bytes_feature('image_id') - })).SerializeToString() - return example - - def test_export_graph_with_image_tensor_input(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=False) - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel() - output_directory = os.path.join(tmp_dir, 'output') - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.eval_config.use_moving_averages = False - exporter.export_inference_graph( - input_type='image_tensor', - pipeline_config=pipeline_config, - trained_checkpoint_prefix=trained_checkpoint_prefix, - output_directory=output_directory) - self.assertTrue(os.path.exists(os.path.join( - output_directory, 'saved_model', 'saved_model.pb'))) - - def test_export_graph_with_fixed_size_image_tensor_input(self): - input_shape = [1, 320, 320, 3] - - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model( - trained_checkpoint_prefix, use_moving_averages=False) - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel() - output_directory = os.path.join(tmp_dir, 'output') - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.eval_config.use_moving_averages = False - exporter.export_inference_graph( - input_type='image_tensor', - pipeline_config=pipeline_config, - trained_checkpoint_prefix=trained_checkpoint_prefix, - output_directory=output_directory, - input_shape=input_shape) - saved_model_path = os.path.join(output_directory, 'saved_model') - self.assertTrue( - os.path.exists(os.path.join(saved_model_path, 'saved_model.pb'))) - - with tf.Graph().as_default() as od_graph: - with self.test_session(graph=od_graph) as sess: - meta_graph = tf.saved_model.loader.load( - sess, [tf.saved_model.tag_constants.SERVING], saved_model_path) - signature = meta_graph.signature_def['serving_default'] - input_tensor_name = signature.inputs['inputs'].name - image_tensor = od_graph.get_tensor_by_name(input_tensor_name) - self.assertSequenceEqual(image_tensor.get_shape().as_list(), - input_shape) - - def test_export_graph_with_tf_example_input(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=False) - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel() - output_directory = os.path.join(tmp_dir, 'output') - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.eval_config.use_moving_averages = False - exporter.export_inference_graph( - input_type='tf_example', - pipeline_config=pipeline_config, - trained_checkpoint_prefix=trained_checkpoint_prefix, - output_directory=output_directory) - self.assertTrue(os.path.exists(os.path.join( - output_directory, 'saved_model', 'saved_model.pb'))) - - def test_export_graph_with_encoded_image_string_input(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=False) - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel() - output_directory = os.path.join(tmp_dir, 'output') - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.eval_config.use_moving_averages = False - exporter.export_inference_graph( - input_type='encoded_image_string_tensor', - pipeline_config=pipeline_config, - trained_checkpoint_prefix=trained_checkpoint_prefix, - output_directory=output_directory) - self.assertTrue(os.path.exists(os.path.join( - output_directory, 'saved_model', 'saved_model.pb'))) - - def _get_variables_in_checkpoint(self, checkpoint_file): - return set([ - var_name - for var_name, _ in tf.train.list_variables(checkpoint_file)]) - - def test_replace_variable_values_with_moving_averages(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - new_checkpoint_prefix = os.path.join(tmp_dir, 'new.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=True) - graph = tf.Graph() - with graph.as_default(): - fake_model = FakeModel() - preprocessed_inputs, true_image_shapes = fake_model.preprocess( - tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3])) - predictions = fake_model.predict(preprocessed_inputs, true_image_shapes) - fake_model.postprocess(predictions, true_image_shapes) - exporter.replace_variable_values_with_moving_averages( - graph, trained_checkpoint_prefix, new_checkpoint_prefix) - - expected_variables = set(['conv2d/bias', 'conv2d/kernel']) - variables_in_old_ckpt = self._get_variables_in_checkpoint( - trained_checkpoint_prefix) - self.assertIn('conv2d/bias/ExponentialMovingAverage', - variables_in_old_ckpt) - self.assertIn('conv2d/kernel/ExponentialMovingAverage', - variables_in_old_ckpt) - variables_in_new_ckpt = self._get_variables_in_checkpoint( - new_checkpoint_prefix) - self.assertTrue(expected_variables.issubset(variables_in_new_ckpt)) - self.assertNotIn('conv2d/bias/ExponentialMovingAverage', - variables_in_new_ckpt) - self.assertNotIn('conv2d/kernel/ExponentialMovingAverage', - variables_in_new_ckpt) - - def test_export_graph_with_moving_averages(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=True) - output_directory = os.path.join(tmp_dir, 'output') - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel() - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.eval_config.use_moving_averages = True - exporter.export_inference_graph( - input_type='image_tensor', - pipeline_config=pipeline_config, - trained_checkpoint_prefix=trained_checkpoint_prefix, - output_directory=output_directory) - self.assertTrue(os.path.exists(os.path.join( - output_directory, 'saved_model', 'saved_model.pb'))) - expected_variables = set(['conv2d/bias', 'conv2d/kernel', 'global_step']) - actual_variables = set( - [var_name for var_name, _ in tf.train.list_variables(output_directory)]) - self.assertTrue(expected_variables.issubset(actual_variables)) - - def test_export_model_with_all_output_nodes(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=True) - output_directory = os.path.join(tmp_dir, 'output') - inference_graph_path = os.path.join(output_directory, - 'frozen_inference_graph.pb') - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel( - add_detection_keypoints=True, add_detection_masks=True) - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - exporter.export_inference_graph( - input_type='image_tensor', - pipeline_config=pipeline_config, - trained_checkpoint_prefix=trained_checkpoint_prefix, - output_directory=output_directory) - inference_graph = self._load_inference_graph(inference_graph_path) - with self.test_session(graph=inference_graph): - inference_graph.get_tensor_by_name('image_tensor:0') - inference_graph.get_tensor_by_name('detection_boxes:0') - inference_graph.get_tensor_by_name('detection_scores:0') - inference_graph.get_tensor_by_name('detection_classes:0') - inference_graph.get_tensor_by_name('detection_keypoints:0') - inference_graph.get_tensor_by_name('detection_masks:0') - inference_graph.get_tensor_by_name('num_detections:0') - - def test_export_model_with_detection_only_nodes(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=True) - output_directory = os.path.join(tmp_dir, 'output') - inference_graph_path = os.path.join(output_directory, - 'frozen_inference_graph.pb') - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel(add_detection_masks=False) - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - exporter.export_inference_graph( - input_type='image_tensor', - pipeline_config=pipeline_config, - trained_checkpoint_prefix=trained_checkpoint_prefix, - output_directory=output_directory) - inference_graph = self._load_inference_graph(inference_graph_path) - with self.test_session(graph=inference_graph): - inference_graph.get_tensor_by_name('image_tensor:0') - inference_graph.get_tensor_by_name('detection_boxes:0') - inference_graph.get_tensor_by_name('detection_scores:0') - inference_graph.get_tensor_by_name('detection_classes:0') - inference_graph.get_tensor_by_name('num_detections:0') - with self.assertRaises(KeyError): - inference_graph.get_tensor_by_name('detection_keypoints:0') - inference_graph.get_tensor_by_name('detection_masks:0') - - def test_export_and_run_inference_with_image_tensor(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=True) - output_directory = os.path.join(tmp_dir, 'output') - inference_graph_path = os.path.join(output_directory, - 'frozen_inference_graph.pb') - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel( - add_detection_keypoints=True, add_detection_masks=True) - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.eval_config.use_moving_averages = False - exporter.export_inference_graph( - input_type='image_tensor', - pipeline_config=pipeline_config, - trained_checkpoint_prefix=trained_checkpoint_prefix, - output_directory=output_directory) - - inference_graph = self._load_inference_graph(inference_graph_path) - with self.test_session(graph=inference_graph) as sess: - image_tensor = inference_graph.get_tensor_by_name('image_tensor:0') - boxes = inference_graph.get_tensor_by_name('detection_boxes:0') - scores = inference_graph.get_tensor_by_name('detection_scores:0') - classes = inference_graph.get_tensor_by_name('detection_classes:0') - keypoints = inference_graph.get_tensor_by_name('detection_keypoints:0') - masks = inference_graph.get_tensor_by_name('detection_masks:0') - num_detections = inference_graph.get_tensor_by_name('num_detections:0') - (boxes_np, scores_np, classes_np, keypoints_np, masks_np, - num_detections_np) = sess.run( - [boxes, scores, classes, keypoints, masks, num_detections], - feed_dict={image_tensor: np.ones((2, 4, 4, 3)).astype(np.uint8)}) - self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 0.8, 0.8]], - [[0.5, 0.5, 1.0, 1.0], - [0.0, 0.0, 0.0, 0.0]]]) - self.assertAllClose(scores_np, [[0.7, 0.6], - [0.9, 0.0]]) - self.assertAllClose(classes_np, [[1, 2], - [2, 1]]) - self.assertAllClose(keypoints_np, np.arange(48).reshape([2, 2, 6, 2])) - self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4])) - self.assertAllClose(num_detections_np, [2, 1]) - - def _create_encoded_image_string(self, image_array_np, encoding_format): - od_graph = tf.Graph() - with od_graph.as_default(): - if encoding_format == 'jpg': - encoded_string = tf.image.encode_jpeg(image_array_np) - elif encoding_format == 'png': - encoded_string = tf.image.encode_png(image_array_np) - else: - raise ValueError('Supports only the following formats: `jpg`, `png`') - with self.test_session(graph=od_graph): - return encoded_string.eval() - - def test_export_and_run_inference_with_encoded_image_string_tensor(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=True) - output_directory = os.path.join(tmp_dir, 'output') - inference_graph_path = os.path.join(output_directory, - 'frozen_inference_graph.pb') - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel( - add_detection_keypoints=True, add_detection_masks=True) - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.eval_config.use_moving_averages = False - exporter.export_inference_graph( - input_type='encoded_image_string_tensor', - pipeline_config=pipeline_config, - trained_checkpoint_prefix=trained_checkpoint_prefix, - output_directory=output_directory) - - inference_graph = self._load_inference_graph(inference_graph_path) - jpg_image_str = self._create_encoded_image_string( - np.ones((4, 4, 3)).astype(np.uint8), 'jpg') - png_image_str = self._create_encoded_image_string( - np.ones((4, 4, 3)).astype(np.uint8), 'png') - with self.test_session(graph=inference_graph) as sess: - image_str_tensor = inference_graph.get_tensor_by_name( - 'encoded_image_string_tensor:0') - boxes = inference_graph.get_tensor_by_name('detection_boxes:0') - scores = inference_graph.get_tensor_by_name('detection_scores:0') - classes = inference_graph.get_tensor_by_name('detection_classes:0') - keypoints = inference_graph.get_tensor_by_name('detection_keypoints:0') - masks = inference_graph.get_tensor_by_name('detection_masks:0') - num_detections = inference_graph.get_tensor_by_name('num_detections:0') - for image_str in [jpg_image_str, png_image_str]: - image_str_batch_np = np.hstack([image_str]* 2) - (boxes_np, scores_np, classes_np, keypoints_np, masks_np, - num_detections_np) = sess.run( - [boxes, scores, classes, keypoints, masks, num_detections], - feed_dict={image_str_tensor: image_str_batch_np}) - self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 0.8, 0.8]], - [[0.5, 0.5, 1.0, 1.0], - [0.0, 0.0, 0.0, 0.0]]]) - self.assertAllClose(scores_np, [[0.7, 0.6], - [0.9, 0.0]]) - self.assertAllClose(classes_np, [[1, 2], - [2, 1]]) - self.assertAllClose(keypoints_np, np.arange(48).reshape([2, 2, 6, 2])) - self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4])) - self.assertAllClose(num_detections_np, [2, 1]) - - def test_raise_runtime_error_on_images_with_different_sizes(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=True) - output_directory = os.path.join(tmp_dir, 'output') - inference_graph_path = os.path.join(output_directory, - 'frozen_inference_graph.pb') - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel( - add_detection_keypoints=True, add_detection_masks=True) - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.eval_config.use_moving_averages = False - exporter.export_inference_graph( - input_type='encoded_image_string_tensor', - pipeline_config=pipeline_config, - trained_checkpoint_prefix=trained_checkpoint_prefix, - output_directory=output_directory) - - inference_graph = self._load_inference_graph(inference_graph_path) - large_image = self._create_encoded_image_string( - np.ones((4, 4, 3)).astype(np.uint8), 'jpg') - small_image = self._create_encoded_image_string( - np.ones((2, 2, 3)).astype(np.uint8), 'jpg') - - image_str_batch_np = np.hstack([large_image, small_image]) - with self.test_session(graph=inference_graph) as sess: - image_str_tensor = inference_graph.get_tensor_by_name( - 'encoded_image_string_tensor:0') - boxes = inference_graph.get_tensor_by_name('detection_boxes:0') - scores = inference_graph.get_tensor_by_name('detection_scores:0') - classes = inference_graph.get_tensor_by_name('detection_classes:0') - keypoints = inference_graph.get_tensor_by_name('detection_keypoints:0') - masks = inference_graph.get_tensor_by_name('detection_masks:0') - num_detections = inference_graph.get_tensor_by_name('num_detections:0') - with self.assertRaisesRegexp(tf.errors.InvalidArgumentError, - 'TensorArray.*shape'): - sess.run( - [boxes, scores, classes, keypoints, masks, num_detections], - feed_dict={image_str_tensor: image_str_batch_np}) - - def test_export_and_run_inference_with_tf_example(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=True) - output_directory = os.path.join(tmp_dir, 'output') - inference_graph_path = os.path.join(output_directory, - 'frozen_inference_graph.pb') - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel( - add_detection_keypoints=True, add_detection_masks=True) - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.eval_config.use_moving_averages = False - exporter.export_inference_graph( - input_type='tf_example', - pipeline_config=pipeline_config, - trained_checkpoint_prefix=trained_checkpoint_prefix, - output_directory=output_directory) - - inference_graph = self._load_inference_graph(inference_graph_path) - tf_example_np = np.expand_dims(self._create_tf_example( - np.ones((4, 4, 3)).astype(np.uint8)), axis=0) - with self.test_session(graph=inference_graph) as sess: - tf_example = inference_graph.get_tensor_by_name('tf_example:0') - boxes = inference_graph.get_tensor_by_name('detection_boxes:0') - scores = inference_graph.get_tensor_by_name('detection_scores:0') - classes = inference_graph.get_tensor_by_name('detection_classes:0') - keypoints = inference_graph.get_tensor_by_name('detection_keypoints:0') - masks = inference_graph.get_tensor_by_name('detection_masks:0') - num_detections = inference_graph.get_tensor_by_name('num_detections:0') - (boxes_np, scores_np, classes_np, keypoints_np, masks_np, - num_detections_np) = sess.run( - [boxes, scores, classes, keypoints, masks, num_detections], - feed_dict={tf_example: tf_example_np}) - self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 0.8, 0.8]], - [[0.5, 0.5, 1.0, 1.0], - [0.0, 0.0, 0.0, 0.0]]]) - self.assertAllClose(scores_np, [[0.7, 0.6], - [0.9, 0.0]]) - self.assertAllClose(classes_np, [[1, 2], - [2, 1]]) - self.assertAllClose(keypoints_np, np.arange(48).reshape([2, 2, 6, 2])) - self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4])) - self.assertAllClose(num_detections_np, [2, 1]) - - def test_write_frozen_graph(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=True) - output_directory = os.path.join(tmp_dir, 'output') - inference_graph_path = os.path.join(output_directory, - 'frozen_inference_graph.pb') - tf.gfile.MakeDirs(output_directory) - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel( - add_detection_keypoints=True, add_detection_masks=True) - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.eval_config.use_moving_averages = False - detection_model = model_builder.build(pipeline_config.model, - is_training=False) - outputs, _ = exporter._build_detection_graph( - input_type='tf_example', - detection_model=detection_model, - input_shape=None, - output_collection_name='inference_op', - graph_hook_fn=None) - output_node_names = ','.join(outputs.keys()) - saver = tf.train.Saver() - input_saver_def = saver.as_saver_def() - frozen_graph_def = exporter.freeze_graph_with_def_protos( - input_graph_def=tf.get_default_graph().as_graph_def(), - input_saver_def=input_saver_def, - input_checkpoint=trained_checkpoint_prefix, - output_node_names=output_node_names, - restore_op_name='save/restore_all', - filename_tensor_name='save/Const:0', - clear_devices=True, - initializer_nodes='') - exporter.write_frozen_graph(inference_graph_path, frozen_graph_def) - - inference_graph = self._load_inference_graph(inference_graph_path) - tf_example_np = np.expand_dims(self._create_tf_example( - np.ones((4, 4, 3)).astype(np.uint8)), axis=0) - with self.test_session(graph=inference_graph) as sess: - tf_example = inference_graph.get_tensor_by_name('tf_example:0') - boxes = inference_graph.get_tensor_by_name('detection_boxes:0') - scores = inference_graph.get_tensor_by_name('detection_scores:0') - classes = inference_graph.get_tensor_by_name('detection_classes:0') - keypoints = inference_graph.get_tensor_by_name('detection_keypoints:0') - masks = inference_graph.get_tensor_by_name('detection_masks:0') - num_detections = inference_graph.get_tensor_by_name('num_detections:0') - (boxes_np, scores_np, classes_np, keypoints_np, masks_np, - num_detections_np) = sess.run( - [boxes, scores, classes, keypoints, masks, num_detections], - feed_dict={tf_example: tf_example_np}) - self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 0.8, 0.8]], - [[0.5, 0.5, 1.0, 1.0], - [0.0, 0.0, 0.0, 0.0]]]) - self.assertAllClose(scores_np, [[0.7, 0.6], - [0.9, 0.0]]) - self.assertAllClose(classes_np, [[1, 2], - [2, 1]]) - self.assertAllClose(keypoints_np, np.arange(48).reshape([2, 2, 6, 2])) - self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4])) - self.assertAllClose(num_detections_np, [2, 1]) - - def test_export_graph_saves_pipeline_file(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=True) - output_directory = os.path.join(tmp_dir, 'output') - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel() - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - exporter.export_inference_graph( - input_type='image_tensor', - pipeline_config=pipeline_config, - trained_checkpoint_prefix=trained_checkpoint_prefix, - output_directory=output_directory) - expected_pipeline_path = os.path.join( - output_directory, 'pipeline.config') - self.assertTrue(os.path.exists(expected_pipeline_path)) - - written_pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - with tf.gfile.GFile(expected_pipeline_path, 'r') as f: - proto_str = f.read() - text_format.Merge(proto_str, written_pipeline_config) - self.assertProtoEquals(pipeline_config, written_pipeline_config) - - def test_export_saved_model_and_run_inference(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=False) - output_directory = os.path.join(tmp_dir, 'output') - saved_model_path = os.path.join(output_directory, 'saved_model') - - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel( - add_detection_keypoints=True, add_detection_masks=True) - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.eval_config.use_moving_averages = False - exporter.export_inference_graph( - input_type='tf_example', - pipeline_config=pipeline_config, - trained_checkpoint_prefix=trained_checkpoint_prefix, - output_directory=output_directory) - - tf_example_np = np.hstack([self._create_tf_example( - np.ones((4, 4, 3)).astype(np.uint8))] * 2) - with tf.Graph().as_default() as od_graph: - with self.test_session(graph=od_graph) as sess: - meta_graph = tf.saved_model.loader.load( - sess, [tf.saved_model.tag_constants.SERVING], saved_model_path) - - signature = meta_graph.signature_def['serving_default'] - input_tensor_name = signature.inputs['inputs'].name - tf_example = od_graph.get_tensor_by_name(input_tensor_name) - - boxes = od_graph.get_tensor_by_name( - signature.outputs['detection_boxes'].name) - scores = od_graph.get_tensor_by_name( - signature.outputs['detection_scores'].name) - classes = od_graph.get_tensor_by_name( - signature.outputs['detection_classes'].name) - keypoints = od_graph.get_tensor_by_name( - signature.outputs['detection_keypoints'].name) - masks = od_graph.get_tensor_by_name( - signature.outputs['detection_masks'].name) - num_detections = od_graph.get_tensor_by_name( - signature.outputs['num_detections'].name) - - (boxes_np, scores_np, classes_np, keypoints_np, masks_np, - num_detections_np) = sess.run( - [boxes, scores, classes, keypoints, masks, num_detections], - feed_dict={tf_example: tf_example_np}) - self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 0.8, 0.8]], - [[0.5, 0.5, 1.0, 1.0], - [0.0, 0.0, 0.0, 0.0]]]) - self.assertAllClose(scores_np, [[0.7, 0.6], - [0.9, 0.0]]) - self.assertAllClose(classes_np, [[1, 2], - [2, 1]]) - self.assertAllClose(keypoints_np, np.arange(48).reshape([2, 2, 6, 2])) - self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4])) - self.assertAllClose(num_detections_np, [2, 1]) - - def test_write_saved_model(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=False) - output_directory = os.path.join(tmp_dir, 'output') - saved_model_path = os.path.join(output_directory, 'saved_model') - tf.gfile.MakeDirs(output_directory) - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel( - add_detection_keypoints=True, add_detection_masks=True) - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.eval_config.use_moving_averages = False - detection_model = model_builder.build(pipeline_config.model, - is_training=False) - outputs, placeholder_tensor = exporter._build_detection_graph( - input_type='tf_example', - detection_model=detection_model, - input_shape=None, - output_collection_name='inference_op', - graph_hook_fn=None) - output_node_names = ','.join(outputs.keys()) - saver = tf.train.Saver() - input_saver_def = saver.as_saver_def() - frozen_graph_def = exporter.freeze_graph_with_def_protos( - input_graph_def=tf.get_default_graph().as_graph_def(), - input_saver_def=input_saver_def, - input_checkpoint=trained_checkpoint_prefix, - output_node_names=output_node_names, - restore_op_name='save/restore_all', - filename_tensor_name='save/Const:0', - clear_devices=True, - initializer_nodes='') - exporter.write_saved_model( - saved_model_path=saved_model_path, - frozen_graph_def=frozen_graph_def, - inputs=placeholder_tensor, - outputs=outputs) - - tf_example_np = np.hstack([self._create_tf_example( - np.ones((4, 4, 3)).astype(np.uint8))] * 2) - with tf.Graph().as_default() as od_graph: - with self.test_session(graph=od_graph) as sess: - meta_graph = tf.saved_model.loader.load( - sess, [tf.saved_model.tag_constants.SERVING], saved_model_path) - - signature = meta_graph.signature_def['serving_default'] - input_tensor_name = signature.inputs['inputs'].name - tf_example = od_graph.get_tensor_by_name(input_tensor_name) - - boxes = od_graph.get_tensor_by_name( - signature.outputs['detection_boxes'].name) - scores = od_graph.get_tensor_by_name( - signature.outputs['detection_scores'].name) - classes = od_graph.get_tensor_by_name( - signature.outputs['detection_classes'].name) - keypoints = od_graph.get_tensor_by_name( - signature.outputs['detection_keypoints'].name) - masks = od_graph.get_tensor_by_name( - signature.outputs['detection_masks'].name) - num_detections = od_graph.get_tensor_by_name( - signature.outputs['num_detections'].name) - - (boxes_np, scores_np, classes_np, keypoints_np, masks_np, - num_detections_np) = sess.run( - [boxes, scores, classes, keypoints, masks, num_detections], - feed_dict={tf_example: tf_example_np}) - self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 0.8, 0.8]], - [[0.5, 0.5, 1.0, 1.0], - [0.0, 0.0, 0.0, 0.0]]]) - self.assertAllClose(scores_np, [[0.7, 0.6], - [0.9, 0.0]]) - self.assertAllClose(classes_np, [[1, 2], - [2, 1]]) - self.assertAllClose(keypoints_np, np.arange(48).reshape([2, 2, 6, 2])) - self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4])) - self.assertAllClose(num_detections_np, [2, 1]) - - def test_export_checkpoint_and_run_inference(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=False) - output_directory = os.path.join(tmp_dir, 'output') - model_path = os.path.join(output_directory, 'model.ckpt') - meta_graph_path = model_path + '.meta' - - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel( - add_detection_keypoints=True, add_detection_masks=True) - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.eval_config.use_moving_averages = False - exporter.export_inference_graph( - input_type='tf_example', - pipeline_config=pipeline_config, - trained_checkpoint_prefix=trained_checkpoint_prefix, - output_directory=output_directory) - - tf_example_np = np.hstack([self._create_tf_example( - np.ones((4, 4, 3)).astype(np.uint8))] * 2) - with tf.Graph().as_default() as od_graph: - with self.test_session(graph=od_graph) as sess: - new_saver = tf.train.import_meta_graph(meta_graph_path) - new_saver.restore(sess, model_path) - - tf_example = od_graph.get_tensor_by_name('tf_example:0') - boxes = od_graph.get_tensor_by_name('detection_boxes:0') - scores = od_graph.get_tensor_by_name('detection_scores:0') - classes = od_graph.get_tensor_by_name('detection_classes:0') - keypoints = od_graph.get_tensor_by_name('detection_keypoints:0') - masks = od_graph.get_tensor_by_name('detection_masks:0') - num_detections = od_graph.get_tensor_by_name('num_detections:0') - (boxes_np, scores_np, classes_np, keypoints_np, masks_np, - num_detections_np) = sess.run( - [boxes, scores, classes, keypoints, masks, num_detections], - feed_dict={tf_example: tf_example_np}) - self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 0.8, 0.8]], - [[0.5, 0.5, 1.0, 1.0], - [0.0, 0.0, 0.0, 0.0]]]) - self.assertAllClose(scores_np, [[0.7, 0.6], - [0.9, 0.0]]) - self.assertAllClose(classes_np, [[1, 2], - [2, 1]]) - self.assertAllClose(keypoints_np, np.arange(48).reshape([2, 2, 6, 2])) - self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4])) - self.assertAllClose(num_detections_np, [2, 1]) - - def test_write_graph_and_checkpoint(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=False) - output_directory = os.path.join(tmp_dir, 'output') - model_path = os.path.join(output_directory, 'model.ckpt') - meta_graph_path = model_path + '.meta' - tf.gfile.MakeDirs(output_directory) - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel( - add_detection_keypoints=True, add_detection_masks=True) - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.eval_config.use_moving_averages = False - detection_model = model_builder.build(pipeline_config.model, - is_training=False) - exporter._build_detection_graph( - input_type='tf_example', - detection_model=detection_model, - input_shape=None, - output_collection_name='inference_op', - graph_hook_fn=None) - saver = tf.train.Saver() - input_saver_def = saver.as_saver_def() - exporter.write_graph_and_checkpoint( - inference_graph_def=tf.get_default_graph().as_graph_def(), - model_path=model_path, - input_saver_def=input_saver_def, - trained_checkpoint_prefix=trained_checkpoint_prefix) - - tf_example_np = np.hstack([self._create_tf_example( - np.ones((4, 4, 3)).astype(np.uint8))] * 2) - with tf.Graph().as_default() as od_graph: - with self.test_session(graph=od_graph) as sess: - new_saver = tf.train.import_meta_graph(meta_graph_path) - new_saver.restore(sess, model_path) - - tf_example = od_graph.get_tensor_by_name('tf_example:0') - boxes = od_graph.get_tensor_by_name('detection_boxes:0') - scores = od_graph.get_tensor_by_name('detection_scores:0') - classes = od_graph.get_tensor_by_name('detection_classes:0') - keypoints = od_graph.get_tensor_by_name('detection_keypoints:0') - masks = od_graph.get_tensor_by_name('detection_masks:0') - num_detections = od_graph.get_tensor_by_name('num_detections:0') - (boxes_np, scores_np, classes_np, keypoints_np, masks_np, - num_detections_np) = sess.run( - [boxes, scores, classes, keypoints, masks, num_detections], - feed_dict={tf_example: tf_example_np}) - self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 0.8, 0.8]], - [[0.5, 0.5, 1.0, 1.0], - [0.0, 0.0, 0.0, 0.0]]]) - self.assertAllClose(scores_np, [[0.7, 0.6], - [0.9, 0.0]]) - self.assertAllClose(classes_np, [[1, 2], - [2, 1]]) - self.assertAllClose(keypoints_np, np.arange(48).reshape([2, 2, 6, 2])) - self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4])) - self.assertAllClose(num_detections_np, [2, 1]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/configuring_jobs.md b/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/configuring_jobs.md deleted file mode 100644 index 9b042625b21591c79f0de98dea275491218f1877..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/configuring_jobs.md +++ /dev/null @@ -1,162 +0,0 @@ -# Configuring the Object Detection Training Pipeline - -## Overview - -The Tensorflow Object Detection API uses protobuf files to configure the -training and evaluation process. The schema for the training pipeline can be -found in object_detection/protos/pipeline.proto. At a high level, the config -file is split into 5 parts: - -1. The `model` configuration. This defines what type of model will be trained -(ie. meta-architecture, feature extractor). -2. The `train_config`, which decides what parameters should be used to train -model parameters (ie. SGD parameters, input preprocessing and feature extractor -initialization values). -3. The `eval_config`, which determines what set of metrics will be reported for -evaluation (currently we only support the PASCAL VOC metrics). -4. The `train_input_config`, which defines what dataset the model should be -trained on. -5. The `eval_input_config`, which defines what dataset the model will be -evaluated on. Typically this should be different than the training input -dataset. - -A skeleton configuration file is shown below: - -``` -model { -(... Add model config here...) -} - -train_config : { -(... Add train_config here...) -} - -train_input_reader: { -(... Add train_input configuration here...) -} - -eval_config: { -} - -eval_input_reader: { -(... Add eval_input configuration here...) -} -``` - -## Picking Model Parameters - -There are a large number of model parameters to configure. The best settings -will depend on your given application. Faster R-CNN models are better suited to -cases where high accuracy is desired and latency is of lower priority. -Conversely, if processing time is the most important factor, SSD models are -recommended. Read [our paper](https://arxiv.org/abs/1611.10012) for a more -detailed discussion on the speed vs accuracy tradeoff. - -To help new users get started, sample model configurations have been provided -in the object_detection/samples/configs folder. The contents of these -configuration files can be pasted into `model` field of the skeleton -configuration. Users should note that the `num_classes` field should be changed -to a value suited for the dataset the user is training on. - -## Defining Inputs - -The Tensorflow Object Detection API accepts inputs in the TFRecord file format. -Users must specify the locations of both the training and evaluation files. -Additionally, users should also specify a label map, which define the mapping -between a class id and class name. The label map should be identical between -training and evaluation datasets. - -An example input configuration looks as follows: - -``` -tf_record_input_reader { - input_path: "/usr/home/username/data/train.record" -} -label_map_path: "/usr/home/username/data/label_map.pbtxt" -``` - -Users should substitute the `input_path` and `label_map_path` arguments and -insert the input configuration into the `train_input_reader` and -`eval_input_reader` fields in the skeleton configuration. Note that the paths -can also point to Google Cloud Storage buckets (ie. -"gs://project_bucket/train.record") for use on Google Cloud. - -## Configuring the Trainer - -The `train_config` defines parts of the training process: - -1. Model parameter initialization. -2. Input preprocessing. -3. SGD parameters. - -A sample `train_config` is below: - -``` -batch_size: 1 -optimizer { - momentum_optimizer: { - learning_rate: { - manual_step_learning_rate { - initial_learning_rate: 0.0002 - schedule { - step: 0 - learning_rate: .0002 - } - schedule { - step: 900000 - learning_rate: .00002 - } - schedule { - step: 1200000 - learning_rate: .000002 - } - } - } - momentum_optimizer_value: 0.9 - } - use_moving_average: false -} -fine_tune_checkpoint: "/usr/home/username/tmp/model.ckpt-#####" -from_detection_checkpoint: true -gradient_clipping_by_norm: 10.0 -data_augmentation_options { - random_horizontal_flip { - } -} -``` - -### Model Parameter Initialization - -While optional, it is highly recommended that users utilize other object -detection checkpoints. Training an object detector from scratch can take days. -To speed up the training process, it is recommended that users re-use the -feature extractor parameters from a pre-existing object classification or -detection checkpoint. `train_config` provides two fields to specify -pre-existing checkpoints: `fine_tune_checkpoint` and -`from_detection_checkpoint`. `fine_tune_checkpoint` should provide a path to -the pre-existing checkpoint -(ie:"/usr/home/username/checkpoint/model.ckpt-#####"). -`from_detection_checkpoint` is a boolean value. If false, it assumes the -checkpoint was from an object classification checkpoint. Note that starting -from a detection checkpoint will usually result in a faster training job than -a classification checkpoint. - -The list of provided checkpoints can be found [here](detection_model_zoo.md). - -### Input Preprocessing - -The `data_augmentation_options` in `train_config` can be used to specify -how training data can be modified. This field is optional. - -### SGD Parameters - -The remainings parameters in `train_config` are hyperparameters for gradient -descent. Please note that the optimal learning rates provided in these -configuration files may depend on the specifics of the training setup (e.g. -number of workers, gpu type). - -## Configuring the Evaluator - -Currently evaluation is fixed to generating metrics as defined by the PASCAL VOC -challenge. The parameters for `eval_config` are set to reasonable defaults and -typically do not need to be configured. diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/defining_your_own_model.md b/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/defining_your_own_model.md deleted file mode 100644 index 865f6af169bfe35a41765d91d36bbcfbac0a937a..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/defining_your_own_model.md +++ /dev/null @@ -1,137 +0,0 @@ -# So you want to create a new model! - -In this section, we discuss some of the abstractions that we use -for defining detection models. If you would like to define a new model -architecture for detection and use it in the Tensorflow Detection API, -then this section should also serve as a high level guide to the files that you -will need to edit to get your new model working. - -## DetectionModels (`object_detection/core/model.py`) - -In order to be trained, evaluated, and exported for serving using our -provided binaries, all models under the Tensorflow Object Detection API must -implement the `DetectionModel` interface (see the full definition in `object_detection/core/model.py`). In particular, -each of these models are responsible for implementing 5 functions: - -* `preprocess`: Run any preprocessing (e.g., scaling/shifting/reshaping) of - input values that is necessary prior to running the detector on an input - image. -* `predict`: Produce “raw” prediction tensors that can be passed to loss or - postprocess functions. -* `postprocess`: Convert predicted output tensors to final detections. -* `loss`: Compute scalar loss tensors with respect to provided groundtruth. -* `restore`: Load a checkpoint into the Tensorflow graph. - -Given a `DetectionModel` at training time, we pass each image batch through -the following sequence of functions to compute a loss which can be optimized via -SGD: - -``` -inputs (images tensor) -> preprocess -> predict -> loss -> outputs (loss tensor) -``` - -And at eval time, we pass each image batch through the following sequence of -functions to produce a set of detections: - -``` -inputs (images tensor) -> preprocess -> predict -> postprocess -> - outputs (boxes tensor, scores tensor, classes tensor, num_detections tensor) -``` - -Some conventions to be aware of: - -* `DetectionModel`s should make no assumptions about the input size or aspect - ratio --- they are responsible for doing any resize/reshaping necessary - (see docstring for the `preprocess` function). -* Output classes are always integers in the range `[0, num_classes)`. - Any mapping of these integers to semantic labels is to be handled outside - of this class. We never explicitly emit a “background class” --- thus 0 is - the first non-background class and any logic of predicting and removing - implicit background classes must be handled internally by the implementation. -* Detected boxes are to be interpreted as being in - `[y_min, x_min, y_max, x_max]` format and normalized relative to the - image window. -* We do not specifically assume any kind of probabilistic interpretation of the - scores --- the only important thing is their relative ordering. Thus - implementations of the postprocess function are free to output logits, - probabilities, calibrated probabilities, or anything else. - -## Defining a new Faster R-CNN or SSD Feature Extractor - -In most cases, you probably will not implement a `DetectionModel` from scratch ---- instead you might create a new feature extractor to be used by one of the -SSD or Faster R-CNN meta-architectures. (We think of meta-architectures as -classes that define entire families of models using the `DetectionModel` -abstraction). - -Note: For the following discussion to make sense, we recommend first becoming -familiar with the [Faster R-CNN](https://arxiv.org/abs/1506.01497) paper. - -Let’s now imagine that you have invented a brand new network architecture -(say, “InceptionV100”) for classification and want to see how InceptionV100 -would behave as a feature extractor for detection (say, with Faster R-CNN). -A similar procedure would hold for SSD models, but we’ll discuss Faster R-CNN. - -To use InceptionV100, we will have to define a new -`FasterRCNNFeatureExtractor` and pass it to our `FasterRCNNMetaArch` -constructor as input. See -`object_detection/meta_architectures/faster_rcnn_meta_arch.py` for definitions -of `FasterRCNNFeatureExtractor` and `FasterRCNNMetaArch`, respectively. -A `FasterRCNNFeatureExtractor` must define a few -functions: - -* `preprocess`: Run any preprocessing of input values that is necessary prior - to running the detector on an input image. -* `_extract_proposal_features`: Extract first stage Region Proposal Network - (RPN) features. -* `_extract_box_classifier_features`: Extract second stage Box Classifier - features. -* `restore_from_classification_checkpoint_fn`: Load a checkpoint into the - Tensorflow graph. - -See the `object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py` -definition as one example. Some remarks: - -* We typically initialize the weights of this feature extractor - using those from the - [Slim Resnet-101 classification checkpoint](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models), - and we know - that images were preprocessed when training this checkpoint - by subtracting a channel mean from each input - image. Thus, we implement the preprocess function to replicate the same - channel mean subtraction behavior. -* The “full” resnet classification network defined in slim is cut into two - parts --- all but the last “resnet block” is put into the - `_extract_proposal_features` function and the final block is separately - defined in the `_extract_box_classifier_features function`. In general, - some experimentation may be required to decide on an optimal layer at - which to “cut” your feature extractor into these two pieces for Faster R-CNN. - -## Register your model for configuration - -Assuming that your new feature extractor does not require nonstandard -configuration, you will want to ideally be able to simply change the -“feature_extractor.type” fields in your configuration protos to point to a -new feature extractor. In order for our API to know how to understand this -new type though, you will first have to register your new feature -extractor with the model builder (`object_detection/builders/model_builder.py`), -whose job is to create models from config protos.. - -Registration is simple --- just add a pointer to the new Feature Extractor -class that you have defined in one of the SSD or Faster R-CNN Feature -Extractor Class maps at the top of the -`object_detection/builders/model_builder.py` file. -We recommend adding a test in `object_detection/builders/model_builder_test.py` -to make sure that parsing your proto will work as expected. - -## Taking your new model for a spin - -After registration you are ready to go with your model! Some final tips: - -* To save time debugging, try running your configuration file locally first - (both training and evaluation). -* Do a sweep of learning rates to figure out which learning rate is best - for your model. -* A small but often important detail: you may find it necessary to disable - batchnorm training (that is, load the batch norm parameters from the - classification checkpoint, but do not update them during gradient descent). diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/detection_model_zoo.md b/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/detection_model_zoo.md deleted file mode 100644 index 37cea8ad8dd8ede25f4fef09d0fbc1f09d0354e9..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/detection_model_zoo.md +++ /dev/null @@ -1,116 +0,0 @@ -# Tensorflow detection model zoo - -We provide a collection of detection models pre-trained on the [COCO -dataset](http://mscoco.org), the [Kitti dataset](http://www.cvlibs.net/datasets/kitti/), and the -[Open Images dataset](https://github.com/openimages/dataset). These models can -be useful for -out-of-the-box inference if you are interested in categories already in COCO -(e.g., humans, cars, etc) or in Open Images (e.g., -surfboard, jacuzzi, etc). They are also useful for initializing your models when -training on novel datasets. - -In the table below, we list each such pre-trained model including: - -* a model name that corresponds to a config file that was used to train this - model in the `samples/configs` directory, -* a download link to a tar.gz file containing the pre-trained model, -* model speed --- we report running time in ms per 600x600 image (including all - pre and post-processing), but please be - aware that these timings depend highly on one's specific hardware - configuration (these timings were performed using an Nvidia - GeForce GTX TITAN X card) and should be treated more as relative timings in - many cases. Also note that desktop GPU timing does not always reflect mobile - run time. For example Mobilenet V2 is faster on mobile devices than Mobilenet - V1, but is slightly slower on desktop GPU. -* detector performance on subset of the COCO validation set or Open Images test split as measured by the dataset-specific mAP measure. - Here, higher is better, and we only report bounding box mAP rounded to the - nearest integer. -* Output types (`Boxes`, and `Masks` if applicable ) - -You can un-tar each tar.gz file via, e.g.,: - -``` -tar -xzvf ssd_mobilenet_v1_coco.tar.gz -``` - -Inside the un-tar'ed directory, you will find: - -* a graph proto (`graph.pbtxt`) -* a checkpoint - (`model.ckpt.data-00000-of-00001`, `model.ckpt.index`, `model.ckpt.meta`) -* a frozen graph proto with weights baked into the graph as constants - (`frozen_inference_graph.pb`) to be used for out of the box inference - (try this out in the Jupyter notebook!) -* a config file (`pipeline.config`) which was used to generate the graph. These - directly correspond to a config file in the - [samples/configs](https://github.com/tensorflow/models/tree/master/research/object_detection/samples/configs)) directory but often with a modified score threshold. In the case - of the heavier Faster R-CNN models, we also provide a version of the model - that uses a highly reduced number of proposals for speed. - -Some remarks on frozen inference graphs: - -* If you try to evaluate the frozen graph, you may find performance numbers for - some of the models to be slightly lower than what we report in the below - tables. This is because we discard detections with scores below a - threshold (typically 0.3) when creating the frozen graph. This corresponds - effectively to picking a point on the precision recall curve of - a detector (and discarding the part past that point), which negatively impacts - standard mAP metrics. -* Our frozen inference graphs are generated using the - [v1.5.0](https://github.com/tensorflow/tensorflow/tree/v1.5.0) - release version of Tensorflow and we do not guarantee that these will work - with other versions; this being said, each frozen inference graph can be - regenerated using your current version of Tensorflow by re-running the - [exporter](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/exporting_models.md), - pointing it at the model directory as well as the config file inside of it. - - -## COCO-trained models {#coco-models} - -| Model name | Speed (ms) | COCO mAP[^1] | Outputs | -| ------------ | :--------------: | :--------------: | :-------------: | -| [ssd_mobilenet_v1_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2017_11_17.tar.gz) | 30 | 21 | Boxes | -| [ssd_mobilenet_v2_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v2_coco_2018_03_29.tar.gz) | 31 | 22 | Boxes | -| [ssdlite_mobilenet_v2_coco](http://download.tensorflow.org/models/object_detection/ssdlite_mobilenet_v2_coco_2018_05_09.tar.gz) | 27 | 22 | Boxes | -| [ssd_inception_v2_coco](http://download.tensorflow.org/models/object_detection/ssd_inception_v2_coco_2017_11_17.tar.gz) | 42 | 24 | Boxes | -| [faster_rcnn_inception_v2_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2018_01_28.tar.gz) | 58 | 28 | Boxes | -| [faster_rcnn_resnet50_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_coco_2018_01_28.tar.gz) | 89 | 30 | Boxes | -| [faster_rcnn_resnet50_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_lowproposals_coco_2018_01_28.tar.gz) | 64 | | Boxes | -| [rfcn_resnet101_coco](http://download.tensorflow.org/models/object_detection/rfcn_resnet101_coco_2018_01_28.tar.gz) | 92 | 30 | Boxes | -| [faster_rcnn_resnet101_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_coco_2018_01_28.tar.gz) | 106 | 32 | Boxes | -| [faster_rcnn_resnet101_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_lowproposals_coco_2018_01_28.tar.gz) | 82 | | Boxes | -| [faster_rcnn_inception_resnet_v2_atrous_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_coco_2018_01_28.tar.gz) | 620 | 37 | Boxes | -| [faster_rcnn_inception_resnet_v2_atrous_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_coco_2018_01_28.tar.gz) | 241 | | Boxes | -| [faster_rcnn_nas](http://download.tensorflow.org/models/object_detection/faster_rcnn_nas_coco_2018_01_28.tar.gz) | 1833 | 43 | Boxes | -| [faster_rcnn_nas_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_nas_lowproposals_coco_2018_01_28.tar.gz) | 540 | | Boxes | -| [mask_rcnn_inception_resnet_v2_atrous_coco](http://download.tensorflow.org/models/object_detection/mask_rcnn_inception_resnet_v2_atrous_coco_2018_01_28.tar.gz) | 771 | 36 | Masks | -| [mask_rcnn_inception_v2_coco](http://download.tensorflow.org/models/object_detection/mask_rcnn_inception_v2_coco_2018_01_28.tar.gz) | 79 | 25 | Masks | -| [mask_rcnn_resnet101_atrous_coco](http://download.tensorflow.org/models/object_detection/mask_rcnn_resnet101_atrous_coco_2018_01_28.tar.gz) | 470 | 33 | Masks | -| [mask_rcnn_resnet50_atrous_coco](http://download.tensorflow.org/models/object_detection/mask_rcnn_resnet50_atrous_coco_2018_01_28.tar.gz) | 343 | 29 | Masks | - - - -## Kitti-trained models {#kitti-models} - -Model name | Speed (ms) | Pascal mAP@0.5 | Outputs ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :---: | :-------------: | :-----: -[faster_rcnn_resnet101_kitti](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_kitti_2018_01_28.tar.gz) | 79 | 87 | Boxes - -## Open Images-trained models {#open-images-models} - -Model name | Speed (ms) | Open Images mAP@0.5[^2] | Outputs ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :---: | :-------------: | :-----: -[faster_rcnn_inception_resnet_v2_atrous_oid](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid_2018_01_28.tar.gz) | 727 | 37 | Boxes -[faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid_2018_01_28.tar.gz) | 347 | | Boxes - - -## AVA v2.1 trained models {#ava-models} - -Model name | Speed (ms) | Pascal mAP@0.5 | Outputs ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :---: | :-------------: | :-----: -[faster_rcnn_resnet101_ava_v2.1](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_ava_v2.1_2018_04_30.tar.gz) | 93 | 11 | Boxes - - -[^1]: See [MSCOCO evaluation protocol](http://cocodataset.org/#detections-eval). -[^2]: This is PASCAL mAP with a slightly different way of true positives computation: see [Open Images evaluation protocol](evaluation_protocols.md#open-images). - diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/evaluation_protocols.md b/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/evaluation_protocols.md deleted file mode 100644 index ec960058bcebd3313bd60a992b818ff256a951f7..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/evaluation_protocols.md +++ /dev/null @@ -1,129 +0,0 @@ -# Supported object detection evaluation protocols - -The Tensorflow Object Detection API currently supports three evaluation protocols, -that can be configured in `EvalConfig` by setting `metrics_set` to the -corresponding value. - -## PASCAL VOC 2010 detection metric - -`EvalConfig.metrics_set='pascal_voc_detection_metrics'` - -The commonly used mAP metric for evaluating the quality of object detectors, -computed according to the protocol of the PASCAL VOC Challenge 2010-2012. The -protocol is available -[here](http://host.robots.ox.ac.uk/pascal/VOC/voc2010/devkit_doc_08-May-2010.pdf). - -## Weighted PASCAL VOC detection metric - -`EvalConfig.metrics_set='weighted_pascal_voc_detection_metrics'` - -The weighted PASCAL metric computes the mean average precision as the average -precision when treating all classes as a single class. In comparison, -PASCAL metrics computes the mean average precision as the mean of the -per-class average precisions. - -For example, the test set consists of two classes, "cat" and "dog", and there -are ten times more boxes of "cat" than those of "dog". According to PASCAL VOC -2010 metric, performance on each of the two classes would contribute equally -towards the final mAP value, while for the Weighted PASCAL VOC metric the final -mAP value will be influenced by frequency of each class. - -## PASCAL VOC 2010 instance segmentation metric - -`EvalConfig.metrics_set='pascal_voc_instance_segmentation_metrics'` - -Similar to Pascal VOC 2010 detection metric, but computes the intersection over -union based on the object masks instead of object boxes. - -## Weighted PASCAL VOC instance segmentation metric - -`EvalConfig.metrics_set='weighted_pascal_voc_instance_segmentation_metrics'` - -Similar to the weighted pascal voc 2010 detection metric, but computes the -intersection over union based on the object masks instead of object boxes. - -## Open Images V2 detection metric - -`EvalConfig.metrics_set='open_images_V2_detection_metrics'` - -This metric is defined originally for evaluating detector performance on [Open -Images V2 dataset](https://github.com/openimages/dataset) and is fairly similar -to the PASCAL VOC 2010 metric mentioned above. It computes interpolated average -precision (AP) for each class and averages it among all classes (mAP). - -The difference to the PASCAL VOC 2010 metric is the following: Open Images -annotations contain `group-of` ground-truth boxes (see [Open Images data -description](https://github.com/openimages/dataset#annotations-human-bboxcsv)), -that are treated differently for the purpose of deciding whether detections are -"true positives", "ignored", "false positives". Here we define these three -cases: - -A detection is a "true positive" if there is a non-group-of ground-truth box, -such that: - -* The detection box and the ground-truth box are of the same class, and - intersection-over-union (IoU) between the detection box and the ground-truth - box is greater than the IoU threshold (default value 0.5). \ - Illustration of handling non-group-of boxes: \ - ![alt - groupof_case_eval](img/nongroupof_case_eval.png "illustration of handling non-group-of boxes: yellow box - ground truth bounding box; green box - true positive; red box - false positives.") - - * yellow box - ground-truth box; - * green box - true positive; - * red boxes - false positives. - -* This is the highest scoring detection for this ground truth box that - satisfies the criteria above. - -A detection is "ignored" if it is not a true positive, and there is a `group-of` -ground-truth box such that: - -* The detection box and the ground-truth box are of the same class, and the - area of intersection between the detection box and the ground-truth box - divided by the area of the detection is greater than 0.5. This is intended - to measure whether the detection box is approximately inside the group-of - ground-truth box. \ - Illustration of handling `group-of` boxes: \ - ![alt - groupof_case_eval](img/groupof_case_eval.png "illustration of handling group-of boxes: yellow box - ground truth bounding box; grey boxes - two detections of cars, that are ignored; red box - false positive.") - - * yellow box - ground-truth box; - * grey boxes - two detections on cars, that are ignored; - * red box - false positive. - -A detection is a "false positive" if it is neither a "true positive" nor -"ignored". - -Precision and recall are defined as: - -* Precision = number-of-true-positives/(number-of-true-positives + number-of-false-positives) -* Recall = number-of-true-positives/number-of-non-group-of-boxes - -Note that detections ignored as firing on a `group-of` ground-truth box do not -contribute to the number of true positives. - -The labels in Open Images are organized in a -[hierarchy](https://storage.googleapis.com/openimages/2017_07/bbox_labels_vis/bbox_labels_vis.html). -Ground-truth bounding-boxes are annotated with the most specific class available -in the hierarchy. For example, "car" has two children "limousine" and "van". Any -other kind of car is annotated as "car" (for example, a sedan). Given this -convention, the evaluation software treats all classes independently, ignoring -the hierarchy. To achieve high performance values, object detectors should -output bounding-boxes labelled in the same manner. - -## OID Challenge Object Detection Metric 2018 - -`EvalConfig.metrics_set='oid_challenge_object_detection_metrics'` - -The metric for the OID Challenge Object Detection Metric 2018, Object Detection -track. The description is provided on the [Open Images Challenge -website](https://storage.googleapis.com/openimages/web/challenge.html). - -## OID Challenge Visual Relationship Detection Metric 2018 - -The metric for the OID Challenge Visual Relationship Detection Metric 2018, Visual -Relationship Detection track. The description is provided on the [Open Images -Challenge -website](https://storage.googleapis.com/openimages/web/challenge.html). Note: -this is currently a stand-alone metric, that can be used only through the -`metrics/oid_vrd_challenge_evaluation.py` util. diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/exporting_models.md b/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/exporting_models.md deleted file mode 100644 index d4735b978d82c0d239a42f80121218b6b68f0759..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/exporting_models.md +++ /dev/null @@ -1,27 +0,0 @@ -# Exporting a trained model for inference - -After your model has been trained, you should export it to a Tensorflow -graph proto. A checkpoint will typically consist of three files: - -* model.ckpt-${CHECKPOINT_NUMBER}.data-00000-of-00001 -* model.ckpt-${CHECKPOINT_NUMBER}.index -* model.ckpt-${CHECKPOINT_NUMBER}.meta - -After you've identified a candidate checkpoint to export, run the following -command from tensorflow/models/research: - -``` bash -# From tensorflow/models/research/ -python object_detection/export_inference_graph.py \ - --input_type image_tensor \ - --pipeline_config_path ${PIPELINE_CONFIG_PATH} \ - --trained_checkpoint_prefix ${TRAIN_PATH} \ - --output_directory ${EXPORT_DIR} -``` - -Afterwards, you should see the directory ${EXPORT_DIR} containing the following: - -* output_inference_graph.pb, the frozen graph format of the exported model -* saved_model/, a directory containing the saved model format of the exported model -* model.ckpt.*, the model checkpoints used for exporting -* checkpoint, a file specifying to restore included checkpoint files diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/faq.md b/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/faq.md deleted file mode 100644 index 2f4097f0d06eedc9ab87c8a3958559450b1c1f00..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/faq.md +++ /dev/null @@ -1,19 +0,0 @@ -# Frequently Asked Questions - -## Q: AttributeError: 'module' object has no attribute 'BackupHandler' -A: This BackupHandler (tf.contrib.slim.tfexample_decoder.BackupHandler) was -introduced in tensorflow 1.5.0 so runing with earlier versions may cause this -issue. It now has been replaced by -object_detection.data_decoders.tf_example_decoder.BackupHandler. Whoever sees -this issue should be able to resolve it by syncing your fork to HEAD. -Same for LookupTensor. - -## Q: AttributeError: 'module' object has no attribute 'LookupTensor' -A: Similar to BackupHandler, syncing your fork to HEAD should make it work. - -## Q: Why can't I get the inference time as reported in model zoo? -A: The inference time reported in model zoo is mean time of testing hundreds of -images with an internal machine. As mentioned in -[Tensorflow detection model zoo](detection_model_zoo.md), this speed depends -highly on one's specific hardware configuration and should be treated more as -relative timing. diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/dogs_detections_output.jpg b/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/dogs_detections_output.jpg deleted file mode 100644 index 9e88a7010fa90f5c4a74f6caee78f5c975f77e40..0000000000000000000000000000000000000000 Binary files a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/dogs_detections_output.jpg and /dev/null differ diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/example_cat.jpg b/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/example_cat.jpg deleted file mode 100644 index 74c7ef4b0849ce1b1f3b8061f172cb98ce06ef5e..0000000000000000000000000000000000000000 Binary files a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/example_cat.jpg and /dev/null differ diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/groupof_case_eval.png b/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/groupof_case_eval.png deleted file mode 100644 index 5abc9b6984fb5816ca4f2e6f40e38ec6e6ea9cfc..0000000000000000000000000000000000000000 Binary files a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/groupof_case_eval.png and /dev/null differ diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/kites_detections_output.jpg b/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/kites_detections_output.jpg deleted file mode 100644 index 7c0f3364deda6614b5bf6fdddad7e7a578f0f6eb..0000000000000000000000000000000000000000 Binary files a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/kites_detections_output.jpg and /dev/null differ diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/kites_with_segment_overlay.png b/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/kites_with_segment_overlay.png deleted file mode 100644 index a52e57de193e53edbb1a49643e8c77609abdc79d..0000000000000000000000000000000000000000 Binary files a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/kites_with_segment_overlay.png and /dev/null differ diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/nongroupof_case_eval.png b/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/nongroupof_case_eval.png deleted file mode 100644 index cbb76f493adfa725cd0b2ab323f89fdfc57a57ec..0000000000000000000000000000000000000000 Binary files a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/nongroupof_case_eval.png and /dev/null differ diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/oid_bus_72e19c28aac34ed8.jpg b/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/oid_bus_72e19c28aac34ed8.jpg deleted file mode 100644 index 1e9412ad545c0a1e1e7dcfa35a168c2a61cf2012..0000000000000000000000000000000000000000 Binary files a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/oid_bus_72e19c28aac34ed8.jpg and /dev/null differ diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/oid_monkey_3b4168c89cecbc5b.jpg b/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/oid_monkey_3b4168c89cecbc5b.jpg deleted file mode 100644 index 46b1fb282a428fe1169a7ff1d30e963bc085e733..0000000000000000000000000000000000000000 Binary files a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/oid_monkey_3b4168c89cecbc5b.jpg and /dev/null differ diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/oxford_pet.png b/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/oxford_pet.png deleted file mode 100644 index ddac415f5ef079f8d6fde8dd4c9838735fd96325..0000000000000000000000000000000000000000 Binary files a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/oxford_pet.png and /dev/null differ diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/tensorboard.png b/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/tensorboard.png deleted file mode 100644 index fbcdbeb38cf5594681c0e206a08b6d06bd1e86a9..0000000000000000000000000000000000000000 Binary files a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/tensorboard.png and /dev/null differ diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/tensorboard2.png b/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/tensorboard2.png deleted file mode 100644 index 97ad22daa11870ecebbbe7cadfb2d8bb30d738f6..0000000000000000000000000000000000000000 Binary files a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/tensorboard2.png and /dev/null differ diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/tf-od-api-logo.png b/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/tf-od-api-logo.png deleted file mode 100644 index 9fa9cc9dba228c1effabfa5c1474052ed8bad3fd..0000000000000000000000000000000000000000 Binary files a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/img/tf-od-api-logo.png and /dev/null differ diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/installation.md b/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/installation.md deleted file mode 100644 index 27786c7dfbe86e1f1bb9d06db316011e8a8a3ae3..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/installation.md +++ /dev/null @@ -1,104 +0,0 @@ -# Installation - -## Dependencies - -Tensorflow Object Detection API depends on the following libraries: - -* Protobuf 3+ -* Python-tk -* Pillow 1.0 -* lxml -* tf Slim (which is included in the "tensorflow/models/research/" checkout) -* Jupyter notebook -* Matplotlib -* Tensorflow -* Cython -* cocoapi - -For detailed steps to install Tensorflow, follow the [Tensorflow installation -instructions](https://www.tensorflow.org/install/). A typical user can install -Tensorflow using one of the following commands: - -``` bash -# For CPU -pip install tensorflow -# For GPU -pip install tensorflow-gpu -``` - -The remaining libraries can be installed on Ubuntu 16.04 using via apt-get: - -``` bash -sudo apt-get install protobuf-compiler python-pil python-lxml python-tk -sudo pip install Cython -sudo pip install jupyter -sudo pip install matplotlib -``` - -Alternatively, users can install dependencies using pip: - -``` bash -sudo pip install Cython -sudo pip install pillow -sudo pip install lxml -sudo pip install jupyter -sudo pip install matplotlib -``` - -## COCO API installation - -Download the -cocoapi and -copy the pycocotools subfolder to the tensorflow/models/research directory if -you are interested in using COCO evaluation metrics. The default metrics are -based on those used in Pascal VOC evaluation. To use the COCO object detection -metrics add `metrics_set: "coco_detection_metrics"` to the `eval_config` message -in the config file. To use the COCO instance segmentation metrics add -`metrics_set: "coco_mask_metrics"` to the `eval_config` message in the config -file. - -```bash -git clone https://github.com/cocodataset/cocoapi.git -cd cocoapi/PythonAPI -make -cp -r pycocotools /models/research/ -``` - -## Protobuf Compilation - -The Tensorflow Object Detection API uses Protobufs to configure model and -training parameters. Before the framework can be used, the Protobuf libraries -must be compiled. This should be done by running the following command from -the tensorflow/models/research/ directory: - - -``` bash -# From tensorflow/models/research/ -protoc object_detection/protos/*.proto --python_out=. -``` - -## Add Libraries to PYTHONPATH - -When running locally, the tensorflow/models/research/ and slim directories -should be appended to PYTHONPATH. This can be done by running the following from -tensorflow/models/research/: - - -``` bash -# From tensorflow/models/research/ -export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/slim -``` - -Note: This command needs to run from every new terminal you start. If you wish -to avoid running this manually, you can add it as a new line to the end of your -~/.bashrc file, replacing \`pwd\` with the absolute path of -tensorflow/models/research on your system. - -# Testing the Installation - -You can test that you have correctly installed the Tensorflow Object Detection\ -API by running the following command: - -```bash -python object_detection/builders/model_builder_test.py -``` diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/instance_segmentation.md b/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/instance_segmentation.md deleted file mode 100644 index 8ebf7d8c3d7329b95b81fbb14d272f6b0134e138..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/instance_segmentation.md +++ /dev/null @@ -1,105 +0,0 @@ -## Run an Instance Segmentation Model - -For some applications it isn't adequate enough to localize an object with a -simple bounding box. For instance, you might want to segment an object region -once it is detected. This class of problems is called **instance segmentation**. - -

- -

- -### Materializing data for instance segmentation {#materializing-instance-seg} - -Instance segmentation is an extension of object detection, where a binary mask -(i.e. object vs. background) is associated with every bounding box. This allows -for more fine-grained information about the extent of the object within the box. -To train an instance segmentation model, a groundtruth mask must be supplied for -every groundtruth bounding box. In additional to the proto fields listed in the -section titled [Using your own dataset](using_your_own_dataset.md), one must -also supply `image/object/mask`, which can either be a repeated list of -single-channel encoded PNG strings, or a single dense 3D binary tensor where -masks corresponding to each object are stacked along the first dimension. Each -is described in more detail below. - -#### PNG Instance Segmentation Masks - -Instance segmentation masks can be supplied as serialized PNG images. - -```shell -image/object/mask = ["\x89PNG\r\n\x1A\n\x00\x00\x00\rIHDR\...", ...] -``` - -These masks are whole-image masks, one for each object instance. The spatial -dimensions of each mask must agree with the image. Each mask has only a single -channel, and the pixel values are either 0 (background) or 1 (object mask). -**PNG masks are the preferred parameterization since they offer considerable -space savings compared to dense numerical masks.** - -#### Dense Numerical Instance Segmentation Masks - -Masks can also be specified via a dense numerical tensor. - -```shell -image/object/mask = [0.0, 0.0, 1.0, 1.0, 0.0, ...] -``` - -For an image with dimensions `H` x `W` and `num_boxes` groundtruth boxes, the -mask corresponds to a [`num_boxes`, `H`, `W`] float32 tensor, flattened into a -single vector of shape `num_boxes` * `H` * `W`. In TensorFlow, examples are read -in row-major format, so the elements are organized as: - -```shell -... mask 0 row 0 ... mask 0 row 1 ... // ... mask 0 row H-1 ... mask 1 row 0 ... -``` - -where each row has W contiguous binary values. - -To see an example tf-records with mask labels, see the examples under the -[Preparing Inputs](preparing_inputs.md) section. - -### Pre-existing config files - -We provide four instance segmentation config files that you can use to train -your own models: - -1. mask_rcnn_inception_resnet_v2_atrous_coco -1. mask_rcnn_resnet101_atrous_coco -1. mask_rcnn_resnet50_atrous_coco -1. mask_rcnn_inception_v2_coco - -For more details see the [detection model zoo](detection_model_zoo.md). - -### Updating a Faster R-CNN config file - -Currently, the only supported instance segmentation model is [Mask -R-CNN](https://arxiv.org/abs/1703.06870), which requires Faster R-CNN as the -backbone object detector. - -Once you have a baseline Faster R-CNN pipeline configuration, you can make the -following modifications in order to convert it into a Mask R-CNN model. - -1. Within `train_input_reader` and `eval_input_reader`, set - `load_instance_masks` to `True`. If using PNG masks, set `mask_type` to - `PNG_MASKS`, otherwise you can leave it as the default 'NUMERICAL_MASKS'. -1. Within the `faster_rcnn` config, use a `MaskRCNNBoxPredictor` as the - `second_stage_box_predictor`. -1. Within the `MaskRCNNBoxPredictor` message, set `predict_instance_masks` to - `True`. You must also define `conv_hyperparams`. -1. Within the `faster_rcnn` message, set `number_of_stages` to `3`. -1. Add instance segmentation metrics to the set of metrics: - `'coco_mask_metrics'`. -1. Update the `input_path`s to point at your data. - -Please refer to the section on [Running the pets dataset](running_pets.md) for -additional details. - -> Note: The mask prediction branch consists of a sequence of convolution layers. -> You can set the number of convolution layers and their depth as follows: -> -> 1. Within the `MaskRCNNBoxPredictor` message, set the -> `mask_prediction_conv_depth` to your value of interest. The default value -> is 256. If you set it to `0` (recommended), the depth is computed -> automatically based on the number of classes in the dataset. -> 1. Within the `MaskRCNNBoxPredictor` message, set the -> `mask_prediction_num_conv_layers` to your value of interest. The default -> value is 2. diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/oid_inference_and_evaluation.md b/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/oid_inference_and_evaluation.md deleted file mode 100644 index 93aedf2918f26101bf341821bb939d3eb9ec31db..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/oid_inference_and_evaluation.md +++ /dev/null @@ -1,255 +0,0 @@ -# Inference and evaluation on the Open Images dataset - -This page presents a tutorial for running object detector inference and -evaluation measure computations on the [Open Images -dataset](https://github.com/openimages/dataset), using tools from the -[TensorFlow Object Detection -API](https://github.com/tensorflow/models/tree/master/research/object_detection). -It shows how to download the images and annotations for the validation and test -sets of Open Images; how to package the downloaded data in a format understood -by the Object Detection API; where to find a trained object detector model for -Open Images; how to run inference; and how to compute evaluation measures on the -inferred detections. - -Inferred detections will look like the following: - -![](img/oid_bus_72e19c28aac34ed8.jpg){height="300"} -![](img/oid_monkey_3b4168c89cecbc5b.jpg){height="300"} - -On the validation set of Open Images, this tutorial requires 27GB of free disk -space and the inference step takes approximately 9 hours on a single NVIDIA -Tesla P100 GPU. On the test set -- 75GB and 27 hours respectively. All other -steps require less than two hours in total on both sets. - -## Installing TensorFlow, the Object Detection API, and Google Cloud SDK - -Please run through the [installation instructions](installation.md) to install -TensorFlow and all its dependencies. Ensure the Protobuf libraries are compiled -and the library directories are added to `PYTHONPATH`. You will also need to -`pip` install `pandas` and `contextlib2`. - -Some of the data used in this tutorial lives in Google Cloud buckets. To access -it, you will have to [install the Google Cloud -SDK](https://cloud.google.com/sdk/downloads) on your workstation or laptop. - -## Preparing the Open Images validation and test sets - -In order to run inference and subsequent evaluation measure computations, we -require a dataset of images and ground truth boxes, packaged as TFRecords of -TFExamples. To create such a dataset for Open Images, you will need to first -download ground truth boxes from the [Open Images -website](https://github.com/openimages/dataset): - -```bash -# From tensorflow/models/research -mkdir oid -cd oid -wget https://storage.googleapis.com/openimages/2017_07/annotations_human_bbox_2017_07.tar.gz -tar -xvf annotations_human_bbox_2017_07.tar.gz -``` - -Next, download the images. In this tutorial, we will use lower resolution images -provided by [CVDF](http://www.cvdfoundation.org). Please follow the instructions -on [CVDF's Open Images repository -page](https://github.com/cvdfoundation/open-images-dataset) in order to gain -access to the cloud bucket with the images. Then run: - -```bash -# From tensorflow/models/research/oid -SPLIT=validation # Set SPLIT to "test" to download the images in the test set -mkdir raw_images_${SPLIT} -gsutil -m rsync -r gs://open-images-dataset/$SPLIT raw_images_${SPLIT} -``` - -Another option for downloading the images is to follow the URLs contained in the -[image URLs and metadata CSV -files](https://storage.googleapis.com/openimages/2017_07/images_2017_07.tar.gz) -on the Open Images website. - -At this point, your `tensorflow/models/research/oid` directory should appear as -follows: - -```lang-none -|-- 2017_07 -| |-- test -| | `-- annotations-human-bbox.csv -| |-- train -| | `-- annotations-human-bbox.csv -| `-- validation -| `-- annotations-human-bbox.csv -|-- raw_images_validation (if you downloaded the validation split) -| `-- ... (41,620 files matching regex "[0-9a-f]{16}.jpg") -|-- raw_images_test (if you downloaded the test split) -| `-- ... (125,436 files matching regex "[0-9a-f]{16}.jpg") -`-- annotations_human_bbox_2017_07.tar.gz -``` - -Next, package the data into TFRecords of TFExamples by running: - -```bash -# From tensorflow/models/research/oid -SPLIT=validation # Set SPLIT to "test" to create TFRecords for the test split -mkdir ${SPLIT}_tfrecords - -PYTHONPATH=$PYTHONPATH:$(readlink -f ..) \ -python -m object_detection/dataset_tools/create_oid_tf_record \ - --input_box_annotations_csv 2017_07/$SPLIT/annotations-human-bbox.csv \ - --input_images_directory raw_images_${SPLIT} \ - --input_label_map ../object_detection/data/oid_bbox_trainable_label_map.pbtxt \ - --output_tf_record_path_prefix ${SPLIT}_tfrecords/$SPLIT.tfrecord \ - --num_shards=100 -``` - -This results in 100 TFRecord files (shards), written to -`oid/${SPLIT}_tfrecords`, with filenames matching -`${SPLIT}.tfrecord-000[0-9][0-9]-of-00100`. Each shard contains approximately -the same number of images and is defacto a representative random sample of the -input data. [This enables](#accelerating_inference) a straightforward work -division scheme for distributing inference and also approximate measure -computations on subsets of the validation and test sets. - -## Inferring detections - -Inference requires a trained object detection model. In this tutorial we will -use a model from the [detections model zoo](detection_model_zoo.md), which can -be downloaded and unpacked by running the commands below. More information about -the model, such as its architecture and how it was trained, is available in the -[model zoo page](detection_model_zoo.md). - -```bash -# From tensorflow/models/research/oid -wget http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid_14_10_2017.tar.gz -tar -zxvf faster_rcnn_inception_resnet_v2_atrous_oid_14_10_2017.tar.gz -``` - -At this point, data is packed into TFRecords and we have an object detector -model. We can run inference using: - -```bash -# From tensorflow/models/research/oid -SPLIT=validation # or test -TF_RECORD_FILES=$(ls -1 ${SPLIT}_tfrecords/* | tr '\n' ',') - -PYTHONPATH=$PYTHONPATH:$(readlink -f ..) \ -python -m object_detection/inference/infer_detections \ - --input_tfrecord_paths=$TF_RECORD_FILES \ - --output_tfrecord_path=${SPLIT}_detections.tfrecord-00000-of-00001 \ - --inference_graph=faster_rcnn_inception_resnet_v2_atrous_oid/frozen_inference_graph.pb \ - --discard_image_pixels -``` - -Inference preserves all fields of the input TFExamples, and adds new fields to -store the inferred detections. This allows [computing evaluation -measures](#compute_evaluation_measures) on the output TFRecord alone, as ground -truth boxes are preserved as well. Since measure computations don't require -access to the images, `infer_detections` can optionally discard them with the -`--discard_image_pixels` flag. Discarding the images drastically reduces the -size of the output TFRecord. - -### Accelerating inference {#accelerating_inference} - -Running inference on the whole validation or test set can take a long time to -complete due to the large number of images present in these sets (41,620 and -125,436 respectively). For quick but approximate evaluation, inference and the -subsequent measure computations can be run on a small number of shards. To run -for example on 2% of all the data, it is enough to set `TF_RECORD_FILES` as -shown below before running `infer_detections`: - -```bash -TF_RECORD_FILES=$(ls ${SPLIT}_tfrecords/${SPLIT}.tfrecord-0000[0-1]-of-00100 | tr '\n' ',') -``` - -Please note that computing evaluation measures on a small subset of the data -introduces variance and bias, since some classes of objects won't be seen during -evaluation. In the example above, this leads to 13.2% higher mAP on the first -two shards of the validation set compared to the mAP for the full set ([see mAP -results](#expected-maps)). - -Another way to accelerate inference is to run it in parallel on multiple -TensorFlow devices on possibly multiple machines. The script below uses -[tmux](https://github.com/tmux/tmux/wiki) to run a separate `infer_detections` -process for each GPU on different partition of the input data. - -```bash -# From tensorflow/models/research/oid -SPLIT=validation # or test -NUM_GPUS=4 -NUM_SHARDS=100 - -tmux new-session -d -s "inference" -function tmux_start { tmux new-window -d -n "inference:GPU$1" "${*:2}; exec bash"; } -for gpu_index in $(seq 0 $(($NUM_GPUS-1))); do - start_shard=$(( $gpu_index * $NUM_SHARDS / $NUM_GPUS )) - end_shard=$(( ($gpu_index + 1) * $NUM_SHARDS / $NUM_GPUS - 1)) - TF_RECORD_FILES=$(seq -s, -f "${SPLIT}_tfrecords/${SPLIT}.tfrecord-%05.0f-of-$(printf '%05d' $NUM_SHARDS)" $start_shard $end_shard) - tmux_start ${gpu_index} \ - PYTHONPATH=$PYTHONPATH:$(readlink -f ..) CUDA_VISIBLE_DEVICES=$gpu_index \ - python -m object_detection/inference/infer_detections \ - --input_tfrecord_paths=$TF_RECORD_FILES \ - --output_tfrecord_path=${SPLIT}_detections.tfrecord-$(printf "%05d" $gpu_index)-of-$(printf "%05d" $NUM_GPUS) \ - --inference_graph=faster_rcnn_inception_resnet_v2_atrous_oid/frozen_inference_graph.pb \ - --discard_image_pixels -done -``` - -After all `infer_detections` processes finish, `tensorflow/models/research/oid` -will contain one output TFRecord from each process, with name matching -`validation_detections.tfrecord-0000[0-3]-of-00004`. - -## Computing evaluation measures {#compute_evaluation_measures} - -To compute evaluation measures on the inferred detections you first need to -create the appropriate configuration files: - -```bash -# From tensorflow/models/research/oid -SPLIT=validation # or test -NUM_SHARDS=1 # Set to NUM_GPUS if using the parallel evaluation script above - -mkdir -p ${SPLIT}_eval_metrics - -echo " -label_map_path: '../object_detection/data/oid_bbox_trainable_label_map.pbtxt' -tf_record_input_reader: { input_path: '${SPLIT}_detections.tfrecord@${NUM_SHARDS}' } -" > ${SPLIT}_eval_metrics/${SPLIT}_input_config.pbtxt - -echo " -metrics_set: 'open_images_V2_detection_metrics' -" > ${SPLIT}_eval_metrics/${SPLIT}_eval_config.pbtxt -``` - -And then run: - -```bash -# From tensorflow/models/research/oid -SPLIT=validation # or test - -PYTHONPATH=$PYTHONPATH:$(readlink -f ..) \ -python -m object_detection/metrics/offline_eval_map_corloc \ - --eval_dir=${SPLIT}_eval_metrics \ - --eval_config_path=${SPLIT}_eval_metrics/${SPLIT}_eval_config.pbtxt \ - --input_config_path=${SPLIT}_eval_metrics/${SPLIT}_input_config.pbtxt -``` - -The first configuration file contains an `object_detection.protos.InputReader` -message that describes the location of the necessary input files. The second -file contains an `object_detection.protos.EvalConfig` message that describes the -evaluation metric. For more information about these protos see the corresponding -source files. - -### Expected mAPs {#expected-maps} - -The result of running `offline_eval_map_corloc` is a CSV file located at -`${SPLIT}_eval_metrics/metrics.csv`. With the above configuration, the file will -contain average precision at IoU≥0.5 for each of the classes present in the -dataset. It will also contain the mAP@IoU≥0.5. Both the per-class average -precisions and the mAP are computed according to the [Open Images evaluation -protocol](evaluation_protocols.md). The expected mAPs for the validation and -test sets of Open Images in this case are: - -Set | Fraction of data | Images | mAP@IoU≥0.5 ----------: | :--------------: | :-----: | ----------- -validation | everything | 41,620 | 39.2% -validation | first 2 shards | 884 | 52.4% -test | everything | 125,436 | 37.7% -test | first 2 shards | 2,476 | 50.8% diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/preparing_inputs.md b/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/preparing_inputs.md deleted file mode 100644 index d9d290d2928c1a163e250e4bfc3d05e01cb12d99..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/preparing_inputs.md +++ /dev/null @@ -1,57 +0,0 @@ -# Preparing Inputs - -Tensorflow Object Detection API reads data using the TFRecord file format. Two -sample scripts (`create_pascal_tf_record.py` and `create_pet_tf_record.py`) are -provided to convert from the PASCAL VOC dataset and Oxford-IIIT Pet dataset to -TFRecords. - -## Generating the PASCAL VOC TFRecord files. - -The raw 2012 PASCAL VOC data set is located -[here](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar). -To download, extract and convert it to TFRecords, run the following commands -below: - -```bash -# From tensorflow/models/research/ -wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar -tar -xvf VOCtrainval_11-May-2012.tar -python object_detection/dataset_tools/create_pascal_tf_record.py \ - --label_map_path=object_detection/data/pascal_label_map.pbtxt \ - --data_dir=VOCdevkit --year=VOC2012 --set=train \ - --output_path=pascal_train.record -python object_detection/dataset_tools/create_pascal_tf_record.py \ - --label_map_path=object_detection/data/pascal_label_map.pbtxt \ - --data_dir=VOCdevkit --year=VOC2012 --set=val \ - --output_path=pascal_val.record -``` - -You should end up with two TFRecord files named `pascal_train.record` and -`pascal_val.record` in the `tensorflow/models/research/` directory. - -The label map for the PASCAL VOC data set can be found at -`object_detection/data/pascal_label_map.pbtxt`. - -## Generating the Oxford-IIIT Pet TFRecord files. - -The Oxford-IIIT Pet data set is located -[here](http://www.robots.ox.ac.uk/~vgg/data/pets/). To download, extract and -convert it to TFRecrods, run the following commands below: - -```bash -# From tensorflow/models/research/ -wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz -wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz -tar -xvf annotations.tar.gz -tar -xvf images.tar.gz -python object_detection/dataset_tools/create_pet_tf_record.py \ - --label_map_path=object_detection/data/pet_label_map.pbtxt \ - --data_dir=`pwd` \ - --output_dir=`pwd` -``` - -You should end up with two TFRecord files named `pet_train.record` and -`pet_val.record` in the `tensorflow/models/research/` directory. - -The label map for the Pet dataset can be found at -`object_detection/data/pet_label_map.pbtxt`. diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/running_locally.md b/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/running_locally.md deleted file mode 100644 index b143a9b7b5bebe7a70363ac3ea118504d7eb75d8..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/running_locally.md +++ /dev/null @@ -1,81 +0,0 @@ -# Running Locally - -This page walks through the steps required to train an object detection model -on a local machine. It assumes the reader has completed the -following prerequisites: - -1. The Tensorflow Object Detection API has been installed as documented in the -[installation instructions](installation.md). This includes installing library -dependencies, compiling the configuration protobufs and setting up the Python -environment. -2. A valid data set has been created. See [this page](preparing_inputs.md) for -instructions on how to generate a dataset for the PASCAL VOC challenge or the -Oxford-IIIT Pet dataset. -3. A Object Detection pipeline configuration has been written. See -[this page](configuring_jobs.md) for details on how to write a pipeline configuration. - -## Recommended Directory Structure for Training and Evaluation - -``` -+data - -label_map file - -train TFRecord file - -eval TFRecord file -+models - + model - -pipeline config file - +train - +eval -``` - -## Running the Training Job - -A local training job can be run with the following command: - -```bash -# From the tensorflow/models/research/ directory -python object_detection/train.py \ - --logtostderr \ - --pipeline_config_path=${PATH_TO_YOUR_PIPELINE_CONFIG} \ - --train_dir=${PATH_TO_TRAIN_DIR} -``` - -where `${PATH_TO_YOUR_PIPELINE_CONFIG}` points to the pipeline config and -`${PATH_TO_TRAIN_DIR}` points to the directory in which training checkpoints -and events will be written to. By default, the training job will -run indefinitely until the user kills it. - -## Running the Evaluation Job - -Evaluation is run as a separate job. The eval job will periodically poll the -train directory for new checkpoints and evaluate them on a test dataset. The -job can be run using the following command: - -```bash -# From the tensorflow/models/research/ directory -python object_detection/eval.py \ - --logtostderr \ - --pipeline_config_path=${PATH_TO_YOUR_PIPELINE_CONFIG} \ - --checkpoint_dir=${PATH_TO_TRAIN_DIR} \ - --eval_dir=${PATH_TO_EVAL_DIR} -``` - -where `${PATH_TO_YOUR_PIPELINE_CONFIG}` points to the pipeline config, -`${PATH_TO_TRAIN_DIR}` points to the directory in which training checkpoints -were saved (same as the training job) and `${PATH_TO_EVAL_DIR}` points to the -directory in which evaluation events will be saved. As with the training job, -the eval job run until terminated by default. - -## Running Tensorboard - -Progress for training and eval jobs can be inspected using Tensorboard. If -using the recommended directory structure, Tensorboard can be run using the -following command: - -```bash -tensorboard --logdir=${PATH_TO_MODEL_DIRECTORY} -``` - -where `${PATH_TO_MODEL_DIRECTORY}` points to the directory that contains the -train and eval directories. Please note it may take Tensorboard a couple minutes -to populate with data. diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/running_notebook.md b/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/running_notebook.md deleted file mode 100644 index c2b8ad1876258d023a997d1166b0d269c2f10f48..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/running_notebook.md +++ /dev/null @@ -1,15 +0,0 @@ -# Quick Start: Jupyter notebook for off-the-shelf inference - -If you'd like to hit the ground running and run detection on a few example -images right out of the box, we recommend trying out the Jupyter notebook demo. -To run the Jupyter notebook, run the following command from -`tensorflow/models/research/object_detection`: - -``` -# From tensorflow/models/research/object_detection -jupyter notebook -``` - -The notebook should open in your favorite web browser. Click the -[`object_detection_tutorial.ipynb`](../object_detection_tutorial.ipynb) link to -open the demo. diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/running_on_cloud.md b/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/running_on_cloud.md deleted file mode 100644 index b96e2419f1e34bf4e3765334160d100a2fff2bfc..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/running_on_cloud.md +++ /dev/null @@ -1,132 +0,0 @@ -# Running on Google Cloud Platform - -The Tensorflow Object Detection API supports distributed training on Google -Cloud ML Engine. This section documents instructions on how to train and -evaluate your model using Cloud ML. The reader should complete the following -prerequistes: - -1. The reader has created and configured a project on Google Cloud Platform. -See [the Cloud ML quick start guide](https://cloud.google.com/ml-engine/docs/quickstarts/command-line). -2. The reader has installed the Tensorflow Object Detection API as documented -in the [installation instructions](installation.md). -3. The reader has a valid data set and stored it in a Google Cloud Storage -bucket. See [this page](preparing_inputs.md) for instructions on how to generate -a dataset for the PASCAL VOC challenge or the Oxford-IIIT Pet dataset. -4. The reader has configured a valid Object Detection pipeline, and stored it -in a Google Cloud Storage bucket. See [this page](configuring_jobs.md) for -details on how to write a pipeline configuration. - -Additionally, it is recommended users test their job by running training and -evaluation jobs for a few iterations -[locally on their own machines](running_locally.md). - -## Packaging - -In order to run the Tensorflow Object Detection API on Cloud ML, it must be -packaged (along with it's TF-Slim dependency). The required packages can be -created with the following command - -``` bash -# From tensorflow/models/research/ -python setup.py sdist -(cd slim && python setup.py sdist) -``` - -This will create python packages in dist/object_detection-0.1.tar.gz and -slim/dist/slim-0.1.tar.gz. - -## Running a Multiworker Training Job - -Google Cloud ML requires a YAML configuration file for a multiworker training -job using GPUs. A sample YAML file is given below: - -``` -trainingInput: - runtimeVersion: "1.2" - scaleTier: CUSTOM - masterType: standard_gpu - workerCount: 9 - workerType: standard_gpu - parameterServerCount: 3 - parameterServerType: standard - - -``` - -Please keep the following guidelines in mind when writing the YAML -configuration: - -* A job with n workers will have n + 1 training machines (n workers + 1 master). -* The number of parameters servers used should be an odd number to prevent - a parameter server from storing only weight variables or only bias variables - (due to round robin parameter scheduling). -* The learning rate in the training config should be decreased when using a - larger number of workers. Some experimentation is required to find the - optimal learning rate. - -The YAML file should be saved on the local machine (not on GCP). Once it has -been written, a user can start a training job on Cloud ML Engine using the -following command: - -``` bash -# From tensorflow/models/research/ -gcloud ml-engine jobs submit training object_detection_`date +%s` \ - --runtime-version 1.2 \ - --job-dir=gs://${TRAIN_DIR} \ - --packages dist/object_detection-0.1.tar.gz,slim/dist/slim-0.1.tar.gz \ - --module-name object_detection.train \ - --region us-central1 \ - --config ${PATH_TO_LOCAL_YAML_FILE} \ - -- \ - --train_dir=gs://${TRAIN_DIR} \ - --pipeline_config_path=gs://${PIPELINE_CONFIG_PATH} -``` - -Where `${PATH_TO_LOCAL_YAML_FILE}` is the local path to the YAML configuration, -`gs://${TRAIN_DIR}` specifies the directory on Google Cloud Storage where the -training checkpoints and events will be written to and -`gs://${PIPELINE_CONFIG_PATH}` points to the pipeline configuration stored on -Google Cloud Storage. - -Users can monitor the progress of their training job on the [ML Engine -Dashboard](https://console.cloud.google.com/mlengine/jobs). - -Note: This sample is supported for use with 1.2 runtime version. - -## Running an Evaluation Job on Cloud - -Evaluation jobs run on a single machine, so it is not necessary to write a YAML -configuration for evaluation. Run the following command to start the evaluation -job: - -``` bash -gcloud ml-engine jobs submit training object_detection_eval_`date +%s` \ - --runtime-version 1.2 \ - --job-dir=gs://${TRAIN_DIR} \ - --packages dist/object_detection-0.1.tar.gz,slim/dist/slim-0.1.tar.gz \ - --module-name object_detection.eval \ - --region us-central1 \ - --scale-tier BASIC_GPU \ - -- \ - --checkpoint_dir=gs://${TRAIN_DIR} \ - --eval_dir=gs://${EVAL_DIR} \ - --pipeline_config_path=gs://${PIPELINE_CONFIG_PATH} -``` - -Where `gs://${TRAIN_DIR}` points to the directory on Google Cloud Storage where -training checkpoints are saved (same as the training job), `gs://${EVAL_DIR}` -points to where evaluation events will be saved on Google Cloud Storage and -`gs://${PIPELINE_CONFIG_PATH}` points to where the pipeline configuration is -stored on Google Cloud Storage. - -## Running Tensorboard - -You can run Tensorboard locally on your own machine to view progress of your -training and eval jobs on Google Cloud ML. Run the following command to start -Tensorboard: - -``` bash -tensorboard --logdir=gs://${YOUR_CLOUD_BUCKET} -``` - -Note it may Tensorboard a few minutes to populate with results. diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/running_pets.md b/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/running_pets.md deleted file mode 100644 index d8ccac4132e24af978b223860ff79d34f2e8dd26..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/running_pets.md +++ /dev/null @@ -1,336 +0,0 @@ -# Quick Start: Distributed Training on the Oxford-IIIT Pets Dataset on Google Cloud - -This page is a walkthrough for training an object detector using the Tensorflow -Object Detection API. In this tutorial, we'll be training on the Oxford-IIIT Pets -dataset to build a system to detect various breeds of cats and dogs. The output -of the detector will look like the following: - -![](img/oxford_pet.png) - -## Setting up a Project on Google Cloud - -To accelerate the process, we'll run training and evaluation on [Google Cloud -ML Engine](https://cloud.google.com/ml-engine/) to leverage multiple GPUs. To -begin, you will have to set up Google Cloud via the following steps (if you have -already done this, feel free to skip to the next section): - -1. [Create a GCP project](https://cloud.google.com/resource-manager/docs/creating-managing-projects). -2. [Install the Google Cloud SDK](https://cloud.google.com/sdk/downloads) on -your workstation or laptop. -This will provide the tools you need to upload files to Google Cloud Storage and -start ML training jobs. -3. [Enable the ML Engine -APIs](https://console.cloud.google.com/flows/enableapi?apiid=ml.googleapis.com,compute_component&_ga=1.73374291.1570145678.1496689256). -By default, a new GCP project does not enable APIs to start ML Engine training -jobs. Use the above link to explicitly enable them. -4. [Set up a Google Cloud Storage (GCS) -bucket](https://cloud.google.com/storage/docs/creating-buckets). ML Engine -training jobs can only access files on a Google Cloud Storage bucket. In this -tutorial, we'll be required to upload our dataset and configuration to GCS. - -Please remember the name of your GCS bucket, as we will reference it multiple -times in this document. Substitute `${YOUR_GCS_BUCKET}` with the name of -your bucket in this document. For your convenience, you should define the -environment variable below: - -``` bash -export YOUR_GCS_BUCKET=${YOUR_GCS_BUCKET} -``` - -It is also possible to run locally by following -[the running locally instructions](running_locally.md). - -## Installing Tensorflow and the Tensorflow Object Detection API - -Please run through the [installation instructions](installation.md) to install -Tensorflow and all it dependencies. Ensure the Protobuf libraries are -compiled and the library directories are added to `PYTHONPATH`. - -## Getting the Oxford-IIIT Pets Dataset and Uploading it to Google Cloud Storage - -In order to train a detector, we require a dataset of images, bounding boxes and -classifications. For this demo, we'll use the Oxford-IIIT Pets dataset. The raw -dataset for Oxford-IIIT Pets lives -[here](http://www.robots.ox.ac.uk/~vgg/data/pets/). You will need to download -both the image dataset [`images.tar.gz`](http://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz) -and the groundtruth data [`annotations.tar.gz`](http://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz) -to the `tensorflow/models/research/` directory and unzip them. This may take -some time. - -``` bash -# From tensorflow/models/research/ -wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz -wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz -tar -xvf images.tar.gz -tar -xvf annotations.tar.gz -``` - -After downloading the tarballs, your `tensorflow/models/research/` directory -should appear as follows: - -```lang-none -- images.tar.gz -- annotations.tar.gz -+ images/ -+ annotations/ -+ object_detection/ -... other files and directories -``` - -The Tensorflow Object Detection API expects data to be in the TFRecord format, -so we'll now run the `create_pet_tf_record` script to convert from the raw -Oxford-IIIT Pet dataset into TFRecords. Run the following commands from the -`tensorflow/models/research/` directory: - -``` bash -# From tensorflow/models/research/ -python object_detection/dataset_tools/create_pet_tf_record.py \ - --label_map_path=object_detection/data/pet_label_map.pbtxt \ - --data_dir=`pwd` \ - --output_dir=`pwd` -``` - -Note: It is normal to see some warnings when running this script. You may ignore -them. - -Two TFRecord files named `pet_train.record` and `pet_val.record` should be -generated in the `tensorflow/models/research/` directory. - -Now that the data has been generated, we'll need to upload it to Google Cloud -Storage so the data can be accessed by ML Engine. Run the following command to -copy the files into your GCS bucket (substituting `${YOUR_GCS_BUCKET}`): - -``` bash -# From tensorflow/models/research/ -gsutil cp pet_train.record gs://${YOUR_GCS_BUCKET}/data/pet_train.record -gsutil cp pet_val.record gs://${YOUR_GCS_BUCKET}/data/pet_val.record -gsutil cp object_detection/data/pet_label_map.pbtxt gs://${YOUR_GCS_BUCKET}/data/pet_label_map.pbtxt -``` - -Please remember the path where you upload the data to, as we will need this -information when configuring the pipeline in a following step. - -## Downloading a COCO-pretrained Model for Transfer Learning - -Training a state of the art object detector from scratch can take days, even -when using multiple GPUs! In order to speed up training, we'll take an object -detector trained on a different dataset (COCO), and reuse some of it's -parameters to initialize our new model. - -Download our [COCO-pretrained Faster R-CNN with Resnet-101 -model](http://storage.googleapis.com/download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_coco_11_06_2017.tar.gz). -Unzip the contents of the folder and copy the `model.ckpt*` files into your GCS -Bucket. - -``` bash -wget http://storage.googleapis.com/download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_coco_11_06_2017.tar.gz -tar -xvf faster_rcnn_resnet101_coco_11_06_2017.tar.gz -gsutil cp faster_rcnn_resnet101_coco_11_06_2017/model.ckpt.* gs://${YOUR_GCS_BUCKET}/data/ -``` - -Remember the path where you uploaded the model checkpoint to, as we will need it -in the following step. - -## Configuring the Object Detection Pipeline - -In the Tensorflow Object Detection API, the model parameters, training -parameters and eval parameters are all defined by a config file. More details -can be found [here](configuring_jobs.md). For this tutorial, we will use some -predefined templates provided with the source code. In the -`object_detection/samples/configs` folder, there are skeleton object_detection -configuration files. We will use `faster_rcnn_resnet101_pets.config` as a -starting point for configuring the pipeline. Open the file with your favourite -text editor. - -We'll need to configure some paths in order for the template to work. Search the -file for instances of `PATH_TO_BE_CONFIGURED` and replace them with the -appropriate value (typically `gs://${YOUR_GCS_BUCKET}/data/`). Afterwards -upload your edited file onto GCS, making note of the path it was uploaded to -(we'll need it when starting the training/eval jobs). - -``` bash -# From tensorflow/models/research/ - -# Edit the faster_rcnn_resnet101_pets.config template. Please note that there -# are multiple places where PATH_TO_BE_CONFIGURED needs to be set. -sed -i "s|PATH_TO_BE_CONFIGURED|"gs://${YOUR_GCS_BUCKET}"/data|g" \ - object_detection/samples/configs/faster_rcnn_resnet101_pets.config - -# Copy edited template to cloud. -gsutil cp object_detection/samples/configs/faster_rcnn_resnet101_pets.config \ - gs://${YOUR_GCS_BUCKET}/data/faster_rcnn_resnet101_pets.config -``` - -## Checking Your Google Cloud Storage Bucket - -At this point in the tutorial, you should have uploaded the training/validation -datasets (including label map), our COCO trained FasterRCNN finetune checkpoint and your job -configuration to your Google Cloud Storage Bucket. Your bucket should look like -the following: - -```lang-none -+ ${YOUR_GCS_BUCKET}/ - + data/ - - faster_rcnn_resnet101_pets.config - - model.ckpt.index - - model.ckpt.meta - - model.ckpt.data-00000-of-00001 - - pet_label_map.pbtxt - - pet_train.record - - pet_val.record -``` - -You can inspect your bucket using the [Google Cloud Storage -browser](https://console.cloud.google.com/storage/browser). - -## Starting Training and Evaluation Jobs on Google Cloud ML Engine - -Before we can start a job on Google Cloud ML Engine, we must: - -1. Package the Tensorflow Object Detection code. -2. Write a cluster configuration for our Google Cloud ML job. - -To package the Tensorflow Object Detection code, run the following commands from -the `tensorflow/models/research/` directory: - -``` bash -# From tensorflow/models/research/ -python setup.py sdist -(cd slim && python setup.py sdist) -``` - -You should see two tar.gz files created at `dist/object_detection-0.1.tar.gz` -and `slim/dist/slim-0.1.tar.gz`. - -For running the training Cloud ML job, we'll configure the cluster to use 10 -training jobs (1 master + 9 workers) and three parameters servers. The -configuration file can be found at `object_detection/samples/cloud/cloud.yml`. - -Note: This sample is supported for use with 1.2 runtime version. - -To start training, execute the following command from the -`tensorflow/models/research/` directory: - -``` bash -# From tensorflow/models/research/ -gcloud ml-engine jobs submit training `whoami`_object_detection_`date +%s` \ - --runtime-version 1.2 \ - --job-dir=gs://${YOUR_GCS_BUCKET}/train \ - --packages dist/object_detection-0.1.tar.gz,slim/dist/slim-0.1.tar.gz \ - --module-name object_detection.train \ - --region us-central1 \ - --config object_detection/samples/cloud/cloud.yml \ - -- \ - --train_dir=gs://${YOUR_GCS_BUCKET}/train \ - --pipeline_config_path=gs://${YOUR_GCS_BUCKET}/data/faster_rcnn_resnet101_pets.config -``` - -Once training has started, we can run an evaluation concurrently: - -``` bash -# From tensorflow/models/research/ -gcloud ml-engine jobs submit training `whoami`_object_detection_eval_`date +%s` \ - --runtime-version 1.2 \ - --job-dir=gs://${YOUR_GCS_BUCKET}/train \ - --packages dist/object_detection-0.1.tar.gz,slim/dist/slim-0.1.tar.gz \ - --module-name object_detection.eval \ - --region us-central1 \ - --scale-tier BASIC_GPU \ - -- \ - --checkpoint_dir=gs://${YOUR_GCS_BUCKET}/train \ - --eval_dir=gs://${YOUR_GCS_BUCKET}/eval \ - --pipeline_config_path=gs://${YOUR_GCS_BUCKET}/data/faster_rcnn_resnet101_pets.config -``` - -Note: Even though we're running an evaluation job, the `gcloud ml-engine jobs -submit training` command is correct. ML Engine does not distinguish between -training and evaluation jobs. - -Users can monitor and stop training and evaluation jobs on the [ML Engine -Dashboard](https://console.cloud.google.com/mlengine/jobs). - -## Monitoring Progress with Tensorboard - -You can monitor progress of the training and eval jobs by running Tensorboard on -your local machine: - -``` bash -# This command needs to be run once to allow your local machine to access your -# GCS bucket. -gcloud auth application-default login - -tensorboard --logdir=gs://${YOUR_GCS_BUCKET} -``` - -Once Tensorboard is running, navigate to `localhost:6006` from your favourite -web browser. You should see something similar to the following: - -![](img/tensorboard.png) - -You will also want to click on the images tab to see example detections made by -the model while it trains. After about an hour and a half of training, you can -expect to see something like this: - -![](img/tensorboard2.png) - -Note: It takes roughly 10 minutes for a job to get started on ML Engine, and -roughly an hour for the system to evaluate the validation dataset. It may take -some time to populate the dashboards. If you do not see any entries after half -an hour, check the logs from the [ML Engine -Dashboard](https://console.cloud.google.com/mlengine/jobs). Note that by default -the training jobs are configured to go for much longer than is necessary for -convergence. To save money, we recommend killing your jobs once you've seen -that they've converged. - -## Exporting the Tensorflow Graph - -After your model has been trained, you should export it to a Tensorflow -graph proto. First, you need to identify a candidate checkpoint to export. You -can search your bucket using the [Google Cloud Storage -Browser](https://console.cloud.google.com/storage/browser). The file should be -stored under `${YOUR_GCS_BUCKET}/train`. The checkpoint will typically consist of -three files: - -* `model.ckpt-${CHECKPOINT_NUMBER}.data-00000-of-00001` -* `model.ckpt-${CHECKPOINT_NUMBER}.index` -* `model.ckpt-${CHECKPOINT_NUMBER}.meta` - -After you've identified a candidate checkpoint to export, run the following -command from `tensorflow/models/research/`: - -``` bash -# From tensorflow/models/research/ -gsutil cp gs://${YOUR_GCS_BUCKET}/train/model.ckpt-${CHECKPOINT_NUMBER}.* . -python object_detection/export_inference_graph.py \ - --input_type image_tensor \ - --pipeline_config_path object_detection/samples/configs/faster_rcnn_resnet101_pets.config \ - --trained_checkpoint_prefix model.ckpt-${CHECKPOINT_NUMBER} \ - --output_directory exported_graphs -``` - -Afterwards, you should see a directory named `exported_graphs` containing the -SavedModel and frozen graph. - -## Configuring the Instance Segmentation Pipeline - -Mask prediction can be turned on for an object detection config by adding -`predict_instance_masks: true` within the `MaskRCNNBoxPredictor`. Other -parameters such as mask size, number of convolutions in the mask layer, and the -convolution hyper parameters can be defined. We will use -`mask_rcnn_resnet101_pets.config` as a starting point for configuring the -instance segmentation pipeline. Everything above that was mentioned about object -detection holds true for instance segmentation. Instance segmentation consists -of an object detection model with an additional head that predicts the object -mask inside each predicted box once we remove the training and other details. -Please refer to the section on [Running an Instance Segmentation -Model](instance_segmentation.md) for instructions on how to configure a model -that predicts masks in addition to object bounding boxes. - -## What's Next - -Congratulations, you have now trained an object detector for various cats and -dogs! There different things you can do now: - -1. [Test your exported model using the provided Jupyter notebook.](running_notebook.md) -2. [Experiment with different model configurations.](configuring_jobs.md) -3. Train an object detector using your own data. diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/using_your_own_dataset.md b/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/using_your_own_dataset.md deleted file mode 100644 index 397e394ca41ef55695bdfbb8b0e52a138978e798..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/g3doc/using_your_own_dataset.md +++ /dev/null @@ -1,161 +0,0 @@ -# Preparing Inputs - -To use your own dataset in Tensorflow Object Detection API, you must convert it -into the [TFRecord file format](https://www.tensorflow.org/api_guides/python/python_io#tfrecords_format_details). -This document outlines how to write a script to generate the TFRecord file. - -## Label Maps - -Each dataset is required to have a label map associated with it. This label map -defines a mapping from string class names to integer class Ids. The label map -should be a `StringIntLabelMap` text protobuf. Sample label maps can be found in -object_detection/data. Label maps should always start from id 1. - -## Dataset Requirements - -For every example in your dataset, you should have the following information: - -1. An RGB image for the dataset encoded as jpeg or png. -2. A list of bounding boxes for the image. Each bounding box should contain: - 1. A bounding box coordinates (with origin in top left corner) defined by 4 - floating point numbers [ymin, xmin, ymax, xmax]. Note that we store the - _normalized_ coordinates (x / width, y / height) in the TFRecord dataset. - 2. The class of the object in the bounding box. - -# Example Image - -Consider the following image: - -![Example Image](img/example_cat.jpg "Example Image") - -with the following label map: - -``` -item { - id: 1 - name: 'Cat' -} - - -item { - id: 2 - name: 'Dog' -} -``` - -We can generate a tf.Example proto for this image using the following code: - -```python - -def create_cat_tf_example(encoded_cat_image_data): - """Creates a tf.Example proto from sample cat image. - - Args: - encoded_cat_image_data: The jpg encoded data of the cat image. - - Returns: - example: The created tf.Example. - """ - - height = 1032.0 - width = 1200.0 - filename = 'example_cat.jpg' - image_format = b'jpg' - - xmins = [322.0 / 1200.0] - xmaxs = [1062.0 / 1200.0] - ymins = [174.0 / 1032.0] - ymaxs = [761.0 / 1032.0] - classes_text = ['Cat'] - classes = [1] - - tf_example = tf.train.Example(features=tf.train.Features(feature={ - 'image/height': dataset_util.int64_feature(height), - 'image/width': dataset_util.int64_feature(width), - 'image/filename': dataset_util.bytes_feature(filename), - 'image/source_id': dataset_util.bytes_feature(filename), - 'image/encoded': dataset_util.bytes_feature(encoded_image_data), - 'image/format': dataset_util.bytes_feature(image_format), - 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), - 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), - 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), - 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), - 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), - 'image/object/class/label': dataset_util.int64_list_feature(classes), - })) - return tf_example -``` - -## Conversion Script Outline - -A typical conversion script will look like the following: - -```python - -import tensorflow as tf - -from object_detection.utils import dataset_util - - -flags = tf.app.flags -flags.DEFINE_string('output_path', '', 'Path to output TFRecord') -FLAGS = flags.FLAGS - - -def create_tf_example(example): - # TODO(user): Populate the following variables from your example. - height = None # Image height - width = None # Image width - filename = None # Filename of the image. Empty if image is not from file - encoded_image_data = None # Encoded image bytes - image_format = None # b'jpeg' or b'png' - - xmins = [] # List of normalized left x coordinates in bounding box (1 per box) - xmaxs = [] # List of normalized right x coordinates in bounding box - # (1 per box) - ymins = [] # List of normalized top y coordinates in bounding box (1 per box) - ymaxs = [] # List of normalized bottom y coordinates in bounding box - # (1 per box) - classes_text = [] # List of string class name of bounding box (1 per box) - classes = [] # List of integer class id of bounding box (1 per box) - - tf_example = tf.train.Example(features=tf.train.Features(feature={ - 'image/height': dataset_util.int64_feature(height), - 'image/width': dataset_util.int64_feature(width), - 'image/filename': dataset_util.bytes_feature(filename), - 'image/source_id': dataset_util.bytes_feature(filename), - 'image/encoded': dataset_util.bytes_feature(encoded_image_data), - 'image/format': dataset_util.bytes_feature(image_format), - 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), - 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), - 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), - 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), - 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), - 'image/object/class/label': dataset_util.int64_list_feature(classes), - })) - return tf_example - - -def main(_): - writer = tf.python_io.TFRecordWriter(FLAGS.output_path) - - # TODO(user): Write code to read in your dataset to examples variable - - for example in examples: - tf_example = create_tf_example(example) - writer.write(tf_example.SerializeToString()) - - writer.close() - - -if __name__ == '__main__': - tf.app.run() - -``` - -Note: You may notice additional fields in some other datasets. They are -currently unused by the API and are optional. - -Note: Please refer to the section on [Running an Instance Segmentation -Model](instance_segmentation.md) for instructions on how to configure a model -that predicts masks in addition to object bounding boxes. diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/inference/__init__.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/inference/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/inference/detection_inference.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/inference/detection_inference.py deleted file mode 100644 index dc66686ff1f496935ad2cd05ee5f969fa1306a8d..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/inference/detection_inference.py +++ /dev/null @@ -1,141 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utility functions for detection inference.""" -from __future__ import division - -import tensorflow as tf - -from object_detection.core import standard_fields - - -def build_input(tfrecord_paths): - """Builds the graph's input. - - Args: - tfrecord_paths: List of paths to the input TFRecords - - Returns: - serialized_example_tensor: The next serialized example. String scalar Tensor - image_tensor: The decoded image of the example. Uint8 tensor, - shape=[1, None, None,3] - """ - filename_queue = tf.train.string_input_producer( - tfrecord_paths, shuffle=False, num_epochs=1) - - tf_record_reader = tf.TFRecordReader() - _, serialized_example_tensor = tf_record_reader.read(filename_queue) - features = tf.parse_single_example( - serialized_example_tensor, - features={ - standard_fields.TfExampleFields.image_encoded: - tf.FixedLenFeature([], tf.string), - }) - encoded_image = features[standard_fields.TfExampleFields.image_encoded] - image_tensor = tf.image.decode_image(encoded_image, channels=3) - image_tensor.set_shape([None, None, 3]) - image_tensor = tf.expand_dims(image_tensor, 0) - - return serialized_example_tensor, image_tensor - - -def build_inference_graph(image_tensor, inference_graph_path): - """Loads the inference graph and connects it to the input image. - - Args: - image_tensor: The input image. uint8 tensor, shape=[1, None, None, 3] - inference_graph_path: Path to the inference graph with embedded weights - - Returns: - detected_boxes_tensor: Detected boxes. Float tensor, - shape=[num_detections, 4] - detected_scores_tensor: Detected scores. Float tensor, - shape=[num_detections] - detected_labels_tensor: Detected labels. Int64 tensor, - shape=[num_detections] - """ - with tf.gfile.Open(inference_graph_path, 'r') as graph_def_file: - graph_content = graph_def_file.read() - graph_def = tf.GraphDef() - graph_def.MergeFromString(graph_content) - - tf.import_graph_def( - graph_def, name='', input_map={'image_tensor': image_tensor}) - - g = tf.get_default_graph() - - num_detections_tensor = tf.squeeze( - g.get_tensor_by_name('num_detections:0'), 0) - num_detections_tensor = tf.cast(num_detections_tensor, tf.int32) - - detected_boxes_tensor = tf.squeeze( - g.get_tensor_by_name('detection_boxes:0'), 0) - detected_boxes_tensor = detected_boxes_tensor[:num_detections_tensor] - - detected_scores_tensor = tf.squeeze( - g.get_tensor_by_name('detection_scores:0'), 0) - detected_scores_tensor = detected_scores_tensor[:num_detections_tensor] - - detected_labels_tensor = tf.squeeze( - g.get_tensor_by_name('detection_classes:0'), 0) - detected_labels_tensor = tf.cast(detected_labels_tensor, tf.int64) - detected_labels_tensor = detected_labels_tensor[:num_detections_tensor] - - return detected_boxes_tensor, detected_scores_tensor, detected_labels_tensor - - -def infer_detections_and_add_to_example( - serialized_example_tensor, detected_boxes_tensor, detected_scores_tensor, - detected_labels_tensor, discard_image_pixels): - """Runs the supplied tensors and adds the inferred detections to the example. - - Args: - serialized_example_tensor: Serialized TF example. Scalar string tensor - detected_boxes_tensor: Detected boxes. Float tensor, - shape=[num_detections, 4] - detected_scores_tensor: Detected scores. Float tensor, - shape=[num_detections] - detected_labels_tensor: Detected labels. Int64 tensor, - shape=[num_detections] - discard_image_pixels: If true, discards the image from the result - Returns: - The de-serialized TF example augmented with the inferred detections. - """ - tf_example = tf.train.Example() - (serialized_example, detected_boxes, detected_scores, - detected_classes) = tf.get_default_session().run([ - serialized_example_tensor, detected_boxes_tensor, detected_scores_tensor, - detected_labels_tensor - ]) - detected_boxes = detected_boxes.T - - tf_example.ParseFromString(serialized_example) - feature = tf_example.features.feature - feature[standard_fields.TfExampleFields. - detection_score].float_list.value[:] = detected_scores - feature[standard_fields.TfExampleFields. - detection_bbox_ymin].float_list.value[:] = detected_boxes[0] - feature[standard_fields.TfExampleFields. - detection_bbox_xmin].float_list.value[:] = detected_boxes[1] - feature[standard_fields.TfExampleFields. - detection_bbox_ymax].float_list.value[:] = detected_boxes[2] - feature[standard_fields.TfExampleFields. - detection_bbox_xmax].float_list.value[:] = detected_boxes[3] - feature[standard_fields.TfExampleFields. - detection_class_label].int64_list.value[:] = detected_classes - - if discard_image_pixels: - del feature[standard_fields.TfExampleFields.image_encoded] - - return tf_example diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/inference/detection_inference_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/inference/detection_inference_test.py deleted file mode 100644 index eabb6b474d672a48139cb4cdeebd388a4d5c4fca..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/inference/detection_inference_test.py +++ /dev/null @@ -1,176 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Tests for detection_inference.py.""" - -import os -import StringIO - -import numpy as np -from PIL import Image -import tensorflow as tf - -from object_detection.core import standard_fields -from object_detection.inference import detection_inference -from object_detection.utils import dataset_util - - -def get_mock_tfrecord_path(): - return os.path.join(tf.test.get_temp_dir(), 'mock.tfrec') - - -def create_mock_tfrecord(): - pil_image = Image.fromarray(np.array([[[123, 0, 0]]], dtype=np.uint8), 'RGB') - image_output_stream = StringIO.StringIO() - pil_image.save(image_output_stream, format='png') - encoded_image = image_output_stream.getvalue() - - feature_map = { - 'test_field': - dataset_util.float_list_feature([1, 2, 3, 4]), - standard_fields.TfExampleFields.image_encoded: - dataset_util.bytes_feature(encoded_image), - } - - tf_example = tf.train.Example(features=tf.train.Features(feature=feature_map)) - with tf.python_io.TFRecordWriter(get_mock_tfrecord_path()) as writer: - writer.write(tf_example.SerializeToString()) - - -def get_mock_graph_path(): - return os.path.join(tf.test.get_temp_dir(), 'mock_graph.pb') - - -def create_mock_graph(): - g = tf.Graph() - with g.as_default(): - in_image_tensor = tf.placeholder( - tf.uint8, shape=[1, None, None, 3], name='image_tensor') - tf.constant([2.0], name='num_detections') - tf.constant( - [[[0, 0.8, 0.7, 1], [0.1, 0.2, 0.8, 0.9], [0.2, 0.3, 0.4, 0.5]]], - name='detection_boxes') - tf.constant([[0.1, 0.2, 0.3]], name='detection_scores') - tf.identity( - tf.constant([[1.0, 2.0, 3.0]]) * - tf.reduce_sum(tf.cast(in_image_tensor, dtype=tf.float32)), - name='detection_classes') - graph_def = g.as_graph_def() - - with tf.gfile.Open(get_mock_graph_path(), 'w') as fl: - fl.write(graph_def.SerializeToString()) - - -class InferDetectionsTests(tf.test.TestCase): - - def test_simple(self): - create_mock_graph() - create_mock_tfrecord() - - serialized_example_tensor, image_tensor = detection_inference.build_input( - [get_mock_tfrecord_path()]) - self.assertAllEqual(image_tensor.get_shape().as_list(), [1, None, None, 3]) - - (detected_boxes_tensor, detected_scores_tensor, - detected_labels_tensor) = detection_inference.build_inference_graph( - image_tensor, get_mock_graph_path()) - - with self.test_session(use_gpu=False) as sess: - sess.run(tf.global_variables_initializer()) - sess.run(tf.local_variables_initializer()) - tf.train.start_queue_runners() - - tf_example = detection_inference.infer_detections_and_add_to_example( - serialized_example_tensor, detected_boxes_tensor, - detected_scores_tensor, detected_labels_tensor, False) - - self.assertProtoEquals(r""" - features { - feature { - key: "image/detection/bbox/ymin" - value { float_list { value: [0.0, 0.1] } } } - feature { - key: "image/detection/bbox/xmin" - value { float_list { value: [0.8, 0.2] } } } - feature { - key: "image/detection/bbox/ymax" - value { float_list { value: [0.7, 0.8] } } } - feature { - key: "image/detection/bbox/xmax" - value { float_list { value: [1.0, 0.9] } } } - feature { - key: "image/detection/label" - value { int64_list { value: [123, 246] } } } - feature { - key: "image/detection/score" - value { float_list { value: [0.1, 0.2] } } } - feature { - key: "image/encoded" - value { bytes_list { value: - "\211PNG\r\n\032\n\000\000\000\rIHDR\000\000\000\001\000\000" - "\000\001\010\002\000\000\000\220wS\336\000\000\000\022IDATx" - "\234b\250f`\000\000\000\000\377\377\003\000\001u\000|gO\242" - "\213\000\000\000\000IEND\256B`\202" } } } - feature { - key: "test_field" - value { float_list { value: [1.0, 2.0, 3.0, 4.0] } } } } - """, tf_example) - - def test_discard_image(self): - create_mock_graph() - create_mock_tfrecord() - - serialized_example_tensor, image_tensor = detection_inference.build_input( - [get_mock_tfrecord_path()]) - (detected_boxes_tensor, detected_scores_tensor, - detected_labels_tensor) = detection_inference.build_inference_graph( - image_tensor, get_mock_graph_path()) - - with self.test_session(use_gpu=False) as sess: - sess.run(tf.global_variables_initializer()) - sess.run(tf.local_variables_initializer()) - tf.train.start_queue_runners() - - tf_example = detection_inference.infer_detections_and_add_to_example( - serialized_example_tensor, detected_boxes_tensor, - detected_scores_tensor, detected_labels_tensor, True) - - self.assertProtoEquals(r""" - features { - feature { - key: "image/detection/bbox/ymin" - value { float_list { value: [0.0, 0.1] } } } - feature { - key: "image/detection/bbox/xmin" - value { float_list { value: [0.8, 0.2] } } } - feature { - key: "image/detection/bbox/ymax" - value { float_list { value: [0.7, 0.8] } } } - feature { - key: "image/detection/bbox/xmax" - value { float_list { value: [1.0, 0.9] } } } - feature { - key: "image/detection/label" - value { int64_list { value: [123, 246] } } } - feature { - key: "image/detection/score" - value { float_list { value: [0.1, 0.2] } } } - feature { - key: "test_field" - value { float_list { value: [1.0, 2.0, 3.0, 4.0] } } } } - """, tf_example) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/inference/infer_detections.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/inference/infer_detections.py deleted file mode 100644 index a251009ef0d415e08395be038dbc4ed42d804ff7..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/inference/infer_detections.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Infers detections on a TFRecord of TFExamples given an inference graph. - -Example usage: - ./infer_detections \ - --input_tfrecord_paths=/path/to/input/tfrecord1,/path/to/input/tfrecord2 \ - --output_tfrecord_path_prefix=/path/to/output/detections.tfrecord \ - --inference_graph=/path/to/frozen_weights_inference_graph.pb - -The output is a TFRecord of TFExamples. Each TFExample from the input is first -augmented with detections from the inference graph and then copied to the -output. - -The input and output nodes of the inference graph are expected to have the same -types, shapes, and semantics, as the input and output nodes of graphs produced -by export_inference_graph.py, when run with --input_type=image_tensor. - -The script can also discard the image pixels in the output. This greatly -reduces the output size and can potentially accelerate reading data in -subsequent processing steps that don't require the images (e.g. computing -metrics). -""" - -import itertools -import tensorflow as tf -from object_detection.inference import detection_inference - -tf.flags.DEFINE_string('input_tfrecord_paths', None, - 'A comma separated list of paths to input TFRecords.') -tf.flags.DEFINE_string('output_tfrecord_path', None, - 'Path to the output TFRecord.') -tf.flags.DEFINE_string('inference_graph', None, - 'Path to the inference graph with embedded weights.') -tf.flags.DEFINE_boolean('discard_image_pixels', False, - 'Discards the images in the output TFExamples. This' - ' significantly reduces the output size and is useful' - ' if the subsequent tools don\'t need access to the' - ' images (e.g. when computing evaluation measures).') - -FLAGS = tf.flags.FLAGS - - -def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - - required_flags = ['input_tfrecord_paths', 'output_tfrecord_path', - 'inference_graph'] - for flag_name in required_flags: - if not getattr(FLAGS, flag_name): - raise ValueError('Flag --{} is required'.format(flag_name)) - - with tf.Session() as sess: - input_tfrecord_paths = [ - v for v in FLAGS.input_tfrecord_paths.split(',') if v] - tf.logging.info('Reading input from %d files', len(input_tfrecord_paths)) - serialized_example_tensor, image_tensor = detection_inference.build_input( - input_tfrecord_paths) - tf.logging.info('Reading graph and building model...') - (detected_boxes_tensor, detected_scores_tensor, - detected_labels_tensor) = detection_inference.build_inference_graph( - image_tensor, FLAGS.inference_graph) - - tf.logging.info('Running inference and writing output to {}'.format( - FLAGS.output_tfrecord_path)) - sess.run(tf.local_variables_initializer()) - tf.train.start_queue_runners() - with tf.python_io.TFRecordWriter( - FLAGS.output_tfrecord_path) as tf_record_writer: - try: - for counter in itertools.count(): - tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 10, - counter) - tf_example = detection_inference.infer_detections_and_add_to_example( - serialized_example_tensor, detected_boxes_tensor, - detected_scores_tensor, detected_labels_tensor, - FLAGS.discard_image_pixels) - tf_record_writer.write(tf_example.SerializeToString()) - except tf.errors.OutOfRangeError: - tf.logging.info('Finished processing records') - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/inputs.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/inputs.py deleted file mode 100644 index c9b3ae259d7deb1bea92b15f2bb3bdd5aa58ffb0..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/inputs.py +++ /dev/null @@ -1,440 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Model input function for tf-learn object detection model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import functools - -import tensorflow as tf -from object_detection.builders import dataset_builder -from object_detection.builders import image_resizer_builder -from object_detection.builders import model_builder -from object_detection.builders import preprocessor_builder -from object_detection.core import preprocessor -from object_detection.core import standard_fields as fields -from object_detection.data_decoders import tf_example_decoder -from object_detection.protos import eval_pb2 -from object_detection.protos import input_reader_pb2 -from object_detection.protos import model_pb2 -from object_detection.protos import train_pb2 -from object_detection.utils import config_util -from object_detection.utils import dataset_util -from object_detection.utils import ops as util_ops - -HASH_KEY = 'hash' -HASH_BINS = 1 << 31 -SERVING_FED_EXAMPLE_KEY = 'serialized_example' - -# A map of names to methods that help build the input pipeline. -INPUT_BUILDER_UTIL_MAP = { - 'dataset_build': dataset_builder.build, -} - - -def transform_input_data(tensor_dict, - model_preprocess_fn, - image_resizer_fn, - num_classes, - data_augmentation_fn=None, - merge_multiple_boxes=False, - retain_original_image=False): - """A single function that is responsible for all input data transformations. - - Data transformation functions are applied in the following order. - 1. If key fields.InputDataFields.image_additional_channels is present in - tensor_dict, the additional channels will be merged into - fields.InputDataFields.image. - 2. data_augmentation_fn (optional): applied on tensor_dict. - 3. model_preprocess_fn: applied only on image tensor in tensor_dict. - 4. image_resizer_fn: applied on original image and instance mask tensor in - tensor_dict. - 5. one_hot_encoding: applied to classes tensor in tensor_dict. - 6. merge_multiple_boxes (optional): when groundtruth boxes are exactly the - same they can be merged into a single box with an associated k-hot class - label. - - Args: - tensor_dict: dictionary containing input tensors keyed by - fields.InputDataFields. - model_preprocess_fn: model's preprocess function to apply on image tensor. - This function must take in a 4-D float tensor and return a 4-D preprocess - float tensor and a tensor containing the true image shape. - image_resizer_fn: image resizer function to apply on groundtruth instance - `masks. This function must take a 3-D float tensor of an image and a 3-D - tensor of instance masks and return a resized version of these along with - the true shapes. - num_classes: number of max classes to one-hot (or k-hot) encode the class - labels. - data_augmentation_fn: (optional) data augmentation function to apply on - input `tensor_dict`. - merge_multiple_boxes: (optional) whether to merge multiple groundtruth boxes - and classes for a given image if the boxes are exactly the same. - retain_original_image: (optional) whether to retain original image in the - output dictionary. - - Returns: - A dictionary keyed by fields.InputDataFields containing the tensors obtained - after applying all the transformations. - """ - if fields.InputDataFields.image_additional_channels in tensor_dict: - channels = tensor_dict[fields.InputDataFields.image_additional_channels] - tensor_dict[fields.InputDataFields.image] = tf.concat( - [tensor_dict[fields.InputDataFields.image], channels], axis=2) - - if retain_original_image: - tensor_dict[fields.InputDataFields.original_image] = tf.cast( - tensor_dict[fields.InputDataFields.image], tf.uint8) - - # Apply data augmentation ops. - if data_augmentation_fn is not None: - tensor_dict = data_augmentation_fn(tensor_dict) - - # Apply model preprocessing ops and resize instance masks. - image = tensor_dict[fields.InputDataFields.image] - preprocessed_resized_image, true_image_shape = model_preprocess_fn( - tf.expand_dims(tf.to_float(image), axis=0)) - tensor_dict[fields.InputDataFields.image] = tf.squeeze( - preprocessed_resized_image, axis=0) - tensor_dict[fields.InputDataFields.true_image_shape] = tf.squeeze( - true_image_shape, axis=0) - if fields.InputDataFields.groundtruth_instance_masks in tensor_dict: - masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks] - _, resized_masks, _ = image_resizer_fn(image, masks) - tensor_dict[fields.InputDataFields. - groundtruth_instance_masks] = resized_masks - - # Transform groundtruth classes to one hot encodings. - label_offset = 1 - zero_indexed_groundtruth_classes = tensor_dict[ - fields.InputDataFields.groundtruth_classes] - label_offset - tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.one_hot( - zero_indexed_groundtruth_classes, num_classes) - - if merge_multiple_boxes: - merged_boxes, merged_classes, _ = util_ops.merge_boxes_with_multiple_labels( - tensor_dict[fields.InputDataFields.groundtruth_boxes], - zero_indexed_groundtruth_classes, num_classes) - tensor_dict[fields.InputDataFields.groundtruth_boxes] = merged_boxes - tensor_dict[fields.InputDataFields.groundtruth_classes] = merged_classes - - return tensor_dict - - -def augment_input_data(tensor_dict, data_augmentation_options): - """Applies data augmentation ops to input tensors. - - Args: - tensor_dict: A dictionary of input tensors keyed by fields.InputDataFields. - data_augmentation_options: A list of tuples, where each tuple contains a - function and a dictionary that contains arguments and their values. - Usually, this is the output of core/preprocessor.build. - - Returns: - A dictionary of tensors obtained by applying data augmentation ops to the - input tensor dictionary. - """ - tensor_dict[fields.InputDataFields.image] = tf.expand_dims( - tf.to_float(tensor_dict[fields.InputDataFields.image]), 0) - - include_instance_masks = (fields.InputDataFields.groundtruth_instance_masks - in tensor_dict) - include_keypoints = (fields.InputDataFields.groundtruth_keypoints - in tensor_dict) - tensor_dict = preprocessor.preprocess( - tensor_dict, data_augmentation_options, - func_arg_map=preprocessor.get_default_func_arg_map( - include_instance_masks=include_instance_masks, - include_keypoints=include_keypoints)) - tensor_dict[fields.InputDataFields.image] = tf.squeeze( - tensor_dict[fields.InputDataFields.image], axis=0) - return tensor_dict - - -def _get_labels_dict(input_dict): - """Extracts labels dict from input dict.""" - required_label_keys = [ - fields.InputDataFields.num_groundtruth_boxes, - fields.InputDataFields.groundtruth_boxes, - fields.InputDataFields.groundtruth_classes, - fields.InputDataFields.groundtruth_weights - ] - labels_dict = {} - for key in required_label_keys: - labels_dict[key] = input_dict[key] - - optional_label_keys = [ - fields.InputDataFields.groundtruth_keypoints, - fields.InputDataFields.groundtruth_instance_masks, - fields.InputDataFields.groundtruth_area, - fields.InputDataFields.groundtruth_is_crowd, - fields.InputDataFields.groundtruth_difficult - ] - - for key in optional_label_keys: - if key in input_dict: - labels_dict[key] = input_dict[key] - if fields.InputDataFields.groundtruth_difficult in labels_dict: - labels_dict[fields.InputDataFields.groundtruth_difficult] = tf.cast( - labels_dict[fields.InputDataFields.groundtruth_difficult], tf.int32) - return labels_dict - - -def _get_features_dict(input_dict): - """Extracts features dict from input dict.""" - hash_from_source_id = tf.string_to_hash_bucket_fast( - input_dict[fields.InputDataFields.source_id], HASH_BINS) - features = { - fields.InputDataFields.image: - input_dict[fields.InputDataFields.image], - HASH_KEY: tf.cast(hash_from_source_id, tf.int32), - fields.InputDataFields.true_image_shape: - input_dict[fields.InputDataFields.true_image_shape] - } - if fields.InputDataFields.original_image in input_dict: - features[fields.InputDataFields.original_image] = input_dict[ - fields.InputDataFields.original_image] - return features - - -def create_train_input_fn(train_config, train_input_config, - model_config): - """Creates a train `input` function for `Estimator`. - - Args: - train_config: A train_pb2.TrainConfig. - train_input_config: An input_reader_pb2.InputReader. - model_config: A model_pb2.DetectionModel. - - Returns: - `input_fn` for `Estimator` in TRAIN mode. - """ - - def _train_input_fn(params=None): - """Returns `features` and `labels` tensor dictionaries for training. - - Args: - params: Parameter dictionary passed from the estimator. - - Returns: - features: Dictionary of feature tensors. - features[fields.InputDataFields.image] is a [batch_size, H, W, C] - float32 tensor with preprocessed images. - features[HASH_KEY] is a [batch_size] int32 tensor representing unique - identifiers for the images. - features[fields.InputDataFields.true_image_shape] is a [batch_size, 3] - int32 tensor representing the true image shapes, as preprocessed - images could be padded. - features[fields.InputDataFields.original_image] (optional) is a - [batch_size, H, W, C] float32 tensor with original images. - labels: Dictionary of groundtruth tensors. - labels[fields.InputDataFields.num_groundtruth_boxes] is a [batch_size] - int32 tensor indicating the number of groundtruth boxes. - labels[fields.InputDataFields.groundtruth_boxes] is a - [batch_size, num_boxes, 4] float32 tensor containing the corners of - the groundtruth boxes. - labels[fields.InputDataFields.groundtruth_classes] is a - [batch_size, num_boxes, num_classes] float32 one-hot tensor of - classes. - labels[fields.InputDataFields.groundtruth_weights] is a - [batch_size, num_boxes] float32 tensor containing groundtruth weights - for the boxes. - -- Optional -- - labels[fields.InputDataFields.groundtruth_instance_masks] is a - [batch_size, num_boxes, H, W] float32 tensor containing only binary - values, which represent instance masks for objects. - labels[fields.InputDataFields.groundtruth_keypoints] is a - [batch_size, num_boxes, num_keypoints, 2] float32 tensor containing - keypoints for each box. - - Raises: - TypeError: if the `train_config`, `train_input_config` or `model_config` - are not of the correct type. - """ - if not isinstance(train_config, train_pb2.TrainConfig): - raise TypeError('For training mode, the `train_config` must be a ' - 'train_pb2.TrainConfig.') - if not isinstance(train_input_config, input_reader_pb2.InputReader): - raise TypeError('The `train_input_config` must be a ' - 'input_reader_pb2.InputReader.') - if not isinstance(model_config, model_pb2.DetectionModel): - raise TypeError('The `model_config` must be a ' - 'model_pb2.DetectionModel.') - - data_augmentation_options = [ - preprocessor_builder.build(step) - for step in train_config.data_augmentation_options - ] - data_augmentation_fn = functools.partial( - augment_input_data, data_augmentation_options=data_augmentation_options) - - model = model_builder.build(model_config, is_training=True) - image_resizer_config = config_util.get_image_resizer_config(model_config) - image_resizer_fn = image_resizer_builder.build(image_resizer_config) - - transform_data_fn = functools.partial( - transform_input_data, model_preprocess_fn=model.preprocess, - image_resizer_fn=image_resizer_fn, - num_classes=config_util.get_number_of_classes(model_config), - data_augmentation_fn=data_augmentation_fn, - retain_original_image=train_config.retain_original_images) - dataset = INPUT_BUILDER_UTIL_MAP['dataset_build']( - train_input_config, - transform_input_data_fn=transform_data_fn, - batch_size=params['batch_size'] if params else train_config.batch_size, - max_num_boxes=train_config.max_number_of_boxes, - num_classes=config_util.get_number_of_classes(model_config), - spatial_image_shape=config_util.get_spatial_image_size( - image_resizer_config)) - input_dict = dataset_util.make_initializable_iterator(dataset).get_next() - return (_get_features_dict(input_dict), _get_labels_dict(input_dict)) - - return _train_input_fn - - -def create_eval_input_fn(eval_config, eval_input_config, model_config): - """Creates an eval `input` function for `Estimator`. - - Args: - eval_config: An eval_pb2.EvalConfig. - eval_input_config: An input_reader_pb2.InputReader. - model_config: A model_pb2.DetectionModel. - - Returns: - `input_fn` for `Estimator` in EVAL mode. - """ - - def _eval_input_fn(params=None): - """Returns `features` and `labels` tensor dictionaries for evaluation. - - Args: - params: Parameter dictionary passed from the estimator. - - Returns: - features: Dictionary of feature tensors. - features[fields.InputDataFields.image] is a [1, H, W, C] float32 tensor - with preprocessed images. - features[HASH_KEY] is a [1] int32 tensor representing unique - identifiers for the images. - features[fields.InputDataFields.true_image_shape] is a [1, 3] - int32 tensor representing the true image shapes, as preprocessed - images could be padded. - features[fields.InputDataFields.original_image] is a [1, H', W', C] - float32 tensor with the original image. - labels: Dictionary of groundtruth tensors. - labels[fields.InputDataFields.groundtruth_boxes] is a [1, num_boxes, 4] - float32 tensor containing the corners of the groundtruth boxes. - labels[fields.InputDataFields.groundtruth_classes] is a - [num_boxes, num_classes] float32 one-hot tensor of classes. - labels[fields.InputDataFields.groundtruth_area] is a [1, num_boxes] - float32 tensor containing object areas. - labels[fields.InputDataFields.groundtruth_is_crowd] is a [1, num_boxes] - bool tensor indicating if the boxes enclose a crowd. - labels[fields.InputDataFields.groundtruth_difficult] is a [1, num_boxes] - int32 tensor indicating if the boxes represent difficult instances. - -- Optional -- - labels[fields.InputDataFields.groundtruth_instance_masks] is a - [1, num_boxes, H, W] float32 tensor containing only binary values, - which represent instance masks for objects. - - Raises: - TypeError: if the `eval_config`, `eval_input_config` or `model_config` - are not of the correct type. - """ - params = params or {} - if not isinstance(eval_config, eval_pb2.EvalConfig): - raise TypeError('For eval mode, the `eval_config` must be a ' - 'train_pb2.EvalConfig.') - if not isinstance(eval_input_config, input_reader_pb2.InputReader): - raise TypeError('The `eval_input_config` must be a ' - 'input_reader_pb2.InputReader.') - if not isinstance(model_config, model_pb2.DetectionModel): - raise TypeError('The `model_config` must be a ' - 'model_pb2.DetectionModel.') - - num_classes = config_util.get_number_of_classes(model_config) - model = model_builder.build(model_config, is_training=False) - image_resizer_config = config_util.get_image_resizer_config(model_config) - image_resizer_fn = image_resizer_builder.build(image_resizer_config) - - transform_data_fn = functools.partial( - transform_input_data, model_preprocess_fn=model.preprocess, - image_resizer_fn=image_resizer_fn, - num_classes=num_classes, - data_augmentation_fn=None, - retain_original_image=eval_config.retain_original_images) - dataset = INPUT_BUILDER_UTIL_MAP['dataset_build']( - eval_input_config, - transform_input_data_fn=transform_data_fn, - batch_size=params.get('batch_size', 1), - num_classes=config_util.get_number_of_classes(model_config), - spatial_image_shape=config_util.get_spatial_image_size( - image_resizer_config)) - input_dict = dataset_util.make_initializable_iterator(dataset).get_next() - - return (_get_features_dict(input_dict), _get_labels_dict(input_dict)) - - return _eval_input_fn - - -def create_predict_input_fn(model_config): - """Creates a predict `input` function for `Estimator`. - - Args: - model_config: A model_pb2.DetectionModel. - - Returns: - `input_fn` for `Estimator` in PREDICT mode. - """ - - def _predict_input_fn(params=None): - """Decodes serialized tf.Examples and returns `ServingInputReceiver`. - - Args: - params: Parameter dictionary passed from the estimator. - - Returns: - `ServingInputReceiver`. - """ - del params - example = tf.placeholder(dtype=tf.string, shape=[], name='input_feature') - - num_classes = config_util.get_number_of_classes(model_config) - model = model_builder.build(model_config, is_training=False) - image_resizer_config = config_util.get_image_resizer_config(model_config) - image_resizer_fn = image_resizer_builder.build(image_resizer_config) - - transform_fn = functools.partial( - transform_input_data, model_preprocess_fn=model.preprocess, - image_resizer_fn=image_resizer_fn, - num_classes=num_classes, - data_augmentation_fn=None) - - decoder = tf_example_decoder.TfExampleDecoder(load_instance_masks=False) - input_dict = transform_fn(decoder.decode(example)) - images = tf.to_float(input_dict[fields.InputDataFields.image]) - images = tf.expand_dims(images, axis=0) - true_image_shape = tf.expand_dims( - input_dict[fields.InputDataFields.true_image_shape], axis=0) - - return tf.estimator.export.ServingInputReceiver( - features={ - fields.InputDataFields.image: images, - fields.InputDataFields.true_image_shape: true_image_shape}, - receiver_tensors={SERVING_FED_EXAMPLE_KEY: example}) - - return _predict_input_fn diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/inputs_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/inputs_test.py deleted file mode 100644 index 8326633597922a568c768d70c64ceea1d90156fa..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/inputs_test.py +++ /dev/null @@ -1,601 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for object_detection.tflearn.inputs.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import functools -import os - -import numpy as np -import tensorflow as tf - -from object_detection import inputs -from object_detection.core import preprocessor -from object_detection.core import standard_fields as fields -from object_detection.utils import config_util - -FLAGS = tf.flags.FLAGS - - -def _get_configs_for_model(model_name): - """Returns configurations for model.""" - fname = os.path.join(tf.resource_loader.get_data_files_path(), - 'samples/configs/' + model_name + '.config') - label_map_path = os.path.join(tf.resource_loader.get_data_files_path(), - 'data/pet_label_map.pbtxt') - data_path = os.path.join(tf.resource_loader.get_data_files_path(), - 'test_data/pets_examples.record') - configs = config_util.get_configs_from_pipeline_file(fname) - return config_util.merge_external_params_with_configs( - configs, - train_input_path=data_path, - eval_input_path=data_path, - label_map_path=label_map_path) - - -class InputsTest(tf.test.TestCase): - - def test_faster_rcnn_resnet50_train_input(self): - """Tests the training input function for FasterRcnnResnet50.""" - configs = _get_configs_for_model('faster_rcnn_resnet50_pets') - configs['train_config'].unpad_groundtruth_tensors = True - model_config = configs['model'] - model_config.faster_rcnn.num_classes = 37 - train_input_fn = inputs.create_train_input_fn( - configs['train_config'], configs['train_input_config'], model_config) - features, labels = train_input_fn() - - self.assertAllEqual([1, None, None, 3], - features[fields.InputDataFields.image].shape.as_list()) - self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype) - self.assertAllEqual([1], - features[inputs.HASH_KEY].shape.as_list()) - self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype) - self.assertAllEqual( - [1, 50, 4], - labels[fields.InputDataFields.groundtruth_boxes].shape.as_list()) - self.assertEqual(tf.float32, - labels[fields.InputDataFields.groundtruth_boxes].dtype) - self.assertAllEqual( - [1, 50, model_config.faster_rcnn.num_classes], - labels[fields.InputDataFields.groundtruth_classes].shape.as_list()) - self.assertEqual(tf.float32, - labels[fields.InputDataFields.groundtruth_classes].dtype) - self.assertAllEqual( - [1, 50], - labels[fields.InputDataFields.groundtruth_weights].shape.as_list()) - self.assertEqual(tf.float32, - labels[fields.InputDataFields.groundtruth_weights].dtype) - - def test_faster_rcnn_resnet50_eval_input(self): - """Tests the eval input function for FasterRcnnResnet50.""" - configs = _get_configs_for_model('faster_rcnn_resnet50_pets') - model_config = configs['model'] - model_config.faster_rcnn.num_classes = 37 - eval_input_fn = inputs.create_eval_input_fn( - configs['eval_config'], configs['eval_input_config'], model_config) - features, labels = eval_input_fn() - - self.assertAllEqual([1, None, None, 3], - features[fields.InputDataFields.image].shape.as_list()) - self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype) - self.assertAllEqual( - [1, None, None, 3], - features[fields.InputDataFields.original_image].shape.as_list()) - self.assertEqual(tf.uint8, - features[fields.InputDataFields.original_image].dtype) - self.assertAllEqual([1], features[inputs.HASH_KEY].shape.as_list()) - self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype) - self.assertAllEqual( - [1, None, 4], - labels[fields.InputDataFields.groundtruth_boxes].shape.as_list()) - self.assertEqual(tf.float32, - labels[fields.InputDataFields.groundtruth_boxes].dtype) - self.assertAllEqual( - [1, None, model_config.faster_rcnn.num_classes], - labels[fields.InputDataFields.groundtruth_classes].shape.as_list()) - self.assertEqual(tf.float32, - labels[fields.InputDataFields.groundtruth_classes].dtype) - self.assertAllEqual( - [1, None], - labels[fields.InputDataFields.groundtruth_area].shape.as_list()) - self.assertEqual(tf.float32, - labels[fields.InputDataFields.groundtruth_area].dtype) - self.assertAllEqual( - [1, None], - labels[fields.InputDataFields.groundtruth_is_crowd].shape.as_list()) - self.assertEqual( - tf.bool, labels[fields.InputDataFields.groundtruth_is_crowd].dtype) - self.assertAllEqual( - [1, None], - labels[fields.InputDataFields.groundtruth_difficult].shape.as_list()) - self.assertEqual( - tf.int32, labels[fields.InputDataFields.groundtruth_difficult].dtype) - - def test_ssd_inceptionV2_train_input(self): - """Tests the training input function for SSDInceptionV2.""" - configs = _get_configs_for_model('ssd_inception_v2_pets') - model_config = configs['model'] - model_config.ssd.num_classes = 37 - batch_size = configs['train_config'].batch_size - train_input_fn = inputs.create_train_input_fn( - configs['train_config'], configs['train_input_config'], model_config) - features, labels = train_input_fn() - - self.assertAllEqual([batch_size, 300, 300, 3], - features[fields.InputDataFields.image].shape.as_list()) - self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype) - self.assertAllEqual([batch_size], - features[inputs.HASH_KEY].shape.as_list()) - self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype) - self.assertAllEqual( - [batch_size], - labels[fields.InputDataFields.num_groundtruth_boxes].shape.as_list()) - self.assertEqual(tf.int32, - labels[fields.InputDataFields.num_groundtruth_boxes].dtype) - self.assertAllEqual( - [batch_size, 50, 4], - labels[fields.InputDataFields.groundtruth_boxes].shape.as_list()) - self.assertEqual(tf.float32, - labels[fields.InputDataFields.groundtruth_boxes].dtype) - self.assertAllEqual( - [batch_size, 50, model_config.ssd.num_classes], - labels[fields.InputDataFields.groundtruth_classes].shape.as_list()) - self.assertEqual(tf.float32, - labels[fields.InputDataFields.groundtruth_classes].dtype) - self.assertAllEqual( - [batch_size, 50], - labels[fields.InputDataFields.groundtruth_weights].shape.as_list()) - self.assertEqual(tf.float32, - labels[fields.InputDataFields.groundtruth_weights].dtype) - - def test_ssd_inceptionV2_eval_input(self): - """Tests the eval input function for SSDInceptionV2.""" - configs = _get_configs_for_model('ssd_inception_v2_pets') - model_config = configs['model'] - model_config.ssd.num_classes = 37 - eval_input_fn = inputs.create_eval_input_fn( - configs['eval_config'], configs['eval_input_config'], model_config) - features, labels = eval_input_fn() - - self.assertAllEqual([1, 300, 300, 3], - features[fields.InputDataFields.image].shape.as_list()) - self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype) - self.assertAllEqual( - [1, None, None, 3], - features[fields.InputDataFields.original_image].shape.as_list()) - self.assertEqual(tf.uint8, - features[fields.InputDataFields.original_image].dtype) - self.assertAllEqual([1], features[inputs.HASH_KEY].shape.as_list()) - self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype) - self.assertAllEqual( - [1, None, 4], - labels[fields.InputDataFields.groundtruth_boxes].shape.as_list()) - self.assertEqual(tf.float32, - labels[fields.InputDataFields.groundtruth_boxes].dtype) - self.assertAllEqual( - [1, None, model_config.ssd.num_classes], - labels[fields.InputDataFields.groundtruth_classes].shape.as_list()) - self.assertEqual(tf.float32, - labels[fields.InputDataFields.groundtruth_classes].dtype) - self.assertAllEqual( - [1, None], - labels[fields.InputDataFields.groundtruth_area].shape.as_list()) - self.assertEqual(tf.float32, - labels[fields.InputDataFields.groundtruth_area].dtype) - self.assertAllEqual( - [1, None], - labels[fields.InputDataFields.groundtruth_is_crowd].shape.as_list()) - self.assertEqual( - tf.bool, labels[fields.InputDataFields.groundtruth_is_crowd].dtype) - self.assertAllEqual( - [1, None], - labels[fields.InputDataFields.groundtruth_difficult].shape.as_list()) - self.assertEqual( - tf.int32, labels[fields.InputDataFields.groundtruth_difficult].dtype) - - def test_predict_input(self): - """Tests the predict input function.""" - configs = _get_configs_for_model('ssd_inception_v2_pets') - predict_input_fn = inputs.create_predict_input_fn( - model_config=configs['model']) - serving_input_receiver = predict_input_fn() - - image = serving_input_receiver.features[fields.InputDataFields.image] - receiver_tensors = serving_input_receiver.receiver_tensors[ - inputs.SERVING_FED_EXAMPLE_KEY] - self.assertEqual([1, 300, 300, 3], image.shape.as_list()) - self.assertEqual(tf.float32, image.dtype) - self.assertEqual(tf.string, receiver_tensors.dtype) - - def test_error_with_bad_train_config(self): - """Tests that a TypeError is raised with improper train config.""" - configs = _get_configs_for_model('ssd_inception_v2_pets') - configs['model'].ssd.num_classes = 37 - train_input_fn = inputs.create_train_input_fn( - train_config=configs['eval_config'], # Expecting `TrainConfig`. - train_input_config=configs['train_input_config'], - model_config=configs['model']) - with self.assertRaises(TypeError): - train_input_fn() - - def test_error_with_bad_train_input_config(self): - """Tests that a TypeError is raised with improper train input config.""" - configs = _get_configs_for_model('ssd_inception_v2_pets') - configs['model'].ssd.num_classes = 37 - train_input_fn = inputs.create_train_input_fn( - train_config=configs['train_config'], - train_input_config=configs['model'], # Expecting `InputReader`. - model_config=configs['model']) - with self.assertRaises(TypeError): - train_input_fn() - - def test_error_with_bad_train_model_config(self): - """Tests that a TypeError is raised with improper train model config.""" - configs = _get_configs_for_model('ssd_inception_v2_pets') - configs['model'].ssd.num_classes = 37 - train_input_fn = inputs.create_train_input_fn( - train_config=configs['train_config'], - train_input_config=configs['train_input_config'], - model_config=configs['train_config']) # Expecting `DetectionModel`. - with self.assertRaises(TypeError): - train_input_fn() - - def test_error_with_bad_eval_config(self): - """Tests that a TypeError is raised with improper eval config.""" - configs = _get_configs_for_model('ssd_inception_v2_pets') - configs['model'].ssd.num_classes = 37 - eval_input_fn = inputs.create_eval_input_fn( - eval_config=configs['train_config'], # Expecting `EvalConfig`. - eval_input_config=configs['eval_input_config'], - model_config=configs['model']) - with self.assertRaises(TypeError): - eval_input_fn() - - def test_error_with_bad_eval_input_config(self): - """Tests that a TypeError is raised with improper eval input config.""" - configs = _get_configs_for_model('ssd_inception_v2_pets') - configs['model'].ssd.num_classes = 37 - eval_input_fn = inputs.create_eval_input_fn( - eval_config=configs['eval_config'], - eval_input_config=configs['model'], # Expecting `InputReader`. - model_config=configs['model']) - with self.assertRaises(TypeError): - eval_input_fn() - - def test_error_with_bad_eval_model_config(self): - """Tests that a TypeError is raised with improper eval model config.""" - configs = _get_configs_for_model('ssd_inception_v2_pets') - configs['model'].ssd.num_classes = 37 - eval_input_fn = inputs.create_eval_input_fn( - eval_config=configs['eval_config'], - eval_input_config=configs['eval_input_config'], - model_config=configs['eval_config']) # Expecting `DetectionModel`. - with self.assertRaises(TypeError): - eval_input_fn() - - -class DataAugmentationFnTest(tf.test.TestCase): - - def test_apply_image_and_box_augmentation(self): - data_augmentation_options = [ - (preprocessor.resize_image, { - 'new_height': 20, - 'new_width': 20, - 'method': tf.image.ResizeMethod.NEAREST_NEIGHBOR - }), - (preprocessor.scale_boxes_to_pixel_coordinates, {}), - ] - data_augmentation_fn = functools.partial( - inputs.augment_input_data, - data_augmentation_options=data_augmentation_options) - tensor_dict = { - fields.InputDataFields.image: - tf.constant(np.random.rand(10, 10, 3).astype(np.float32)), - fields.InputDataFields.groundtruth_boxes: - tf.constant(np.array([[.5, .5, 1., 1.]], np.float32)) - } - augmented_tensor_dict = data_augmentation_fn(tensor_dict=tensor_dict) - with self.test_session() as sess: - augmented_tensor_dict_out = sess.run(augmented_tensor_dict) - - self.assertAllEqual( - augmented_tensor_dict_out[fields.InputDataFields.image].shape, - [20, 20, 3] - ) - self.assertAllClose( - augmented_tensor_dict_out[fields.InputDataFields.groundtruth_boxes], - [[10, 10, 20, 20]] - ) - - def test_include_masks_in_data_augmentation(self): - data_augmentation_options = [ - (preprocessor.resize_image, { - 'new_height': 20, - 'new_width': 20, - 'method': tf.image.ResizeMethod.NEAREST_NEIGHBOR - }) - ] - data_augmentation_fn = functools.partial( - inputs.augment_input_data, - data_augmentation_options=data_augmentation_options) - tensor_dict = { - fields.InputDataFields.image: - tf.constant(np.random.rand(10, 10, 3).astype(np.float32)), - fields.InputDataFields.groundtruth_instance_masks: - tf.constant(np.zeros([2, 10, 10], np.uint8)) - } - augmented_tensor_dict = data_augmentation_fn(tensor_dict=tensor_dict) - with self.test_session() as sess: - augmented_tensor_dict_out = sess.run(augmented_tensor_dict) - - self.assertAllEqual( - augmented_tensor_dict_out[fields.InputDataFields.image].shape, - [20, 20, 3]) - self.assertAllEqual(augmented_tensor_dict_out[ - fields.InputDataFields.groundtruth_instance_masks].shape, [2, 20, 20]) - - def test_include_keypoints_in_data_augmentation(self): - data_augmentation_options = [ - (preprocessor.resize_image, { - 'new_height': 20, - 'new_width': 20, - 'method': tf.image.ResizeMethod.NEAREST_NEIGHBOR - }), - (preprocessor.scale_boxes_to_pixel_coordinates, {}), - ] - data_augmentation_fn = functools.partial( - inputs.augment_input_data, - data_augmentation_options=data_augmentation_options) - tensor_dict = { - fields.InputDataFields.image: - tf.constant(np.random.rand(10, 10, 3).astype(np.float32)), - fields.InputDataFields.groundtruth_boxes: - tf.constant(np.array([[.5, .5, 1., 1.]], np.float32)), - fields.InputDataFields.groundtruth_keypoints: - tf.constant(np.array([[[0.5, 1.0], [0.5, 0.5]]], np.float32)) - } - augmented_tensor_dict = data_augmentation_fn(tensor_dict=tensor_dict) - with self.test_session() as sess: - augmented_tensor_dict_out = sess.run(augmented_tensor_dict) - - self.assertAllEqual( - augmented_tensor_dict_out[fields.InputDataFields.image].shape, - [20, 20, 3] - ) - self.assertAllClose( - augmented_tensor_dict_out[fields.InputDataFields.groundtruth_boxes], - [[10, 10, 20, 20]] - ) - self.assertAllClose( - augmented_tensor_dict_out[fields.InputDataFields.groundtruth_keypoints], - [[[10, 20], [10, 10]]] - ) - - -def _fake_model_preprocessor_fn(image): - return (image, tf.expand_dims(tf.shape(image)[1:], axis=0)) - - -def _fake_image_resizer_fn(image, mask): - return (image, mask, tf.shape(image)) - - -class DataTransformationFnTest(tf.test.TestCase): - - def test_combine_additional_channels_if_present(self): - image = np.random.rand(4, 4, 3).astype(np.float32) - additional_channels = np.random.rand(4, 4, 2).astype(np.float32) - tensor_dict = { - fields.InputDataFields.image: - tf.constant(image), - fields.InputDataFields.image_additional_channels: - tf.constant(additional_channels), - fields.InputDataFields.groundtruth_classes: - tf.constant(np.array([1, 1], np.int32)) - } - - input_transformation_fn = functools.partial( - inputs.transform_input_data, - model_preprocess_fn=_fake_model_preprocessor_fn, - image_resizer_fn=_fake_image_resizer_fn, - num_classes=1) - with self.test_session() as sess: - transformed_inputs = sess.run( - input_transformation_fn(tensor_dict=tensor_dict)) - self.assertAllEqual(transformed_inputs[fields.InputDataFields.image].dtype, - tf.float32) - self.assertAllEqual(transformed_inputs[fields.InputDataFields.image].shape, - [4, 4, 5]) - self.assertAllClose(transformed_inputs[fields.InputDataFields.image], - np.concatenate((image, additional_channels), axis=2)) - - def test_returns_correct_class_label_encodings(self): - tensor_dict = { - fields.InputDataFields.image: - tf.constant(np.random.rand(4, 4, 3).astype(np.float32)), - fields.InputDataFields.groundtruth_boxes: - tf.constant(np.array([[0, 0, 1, 1], [.5, .5, 1, 1]], np.float32)), - fields.InputDataFields.groundtruth_classes: - tf.constant(np.array([3, 1], np.int32)) - } - num_classes = 3 - input_transformation_fn = functools.partial( - inputs.transform_input_data, - model_preprocess_fn=_fake_model_preprocessor_fn, - image_resizer_fn=_fake_image_resizer_fn, - num_classes=num_classes) - with self.test_session() as sess: - transformed_inputs = sess.run( - input_transformation_fn(tensor_dict=tensor_dict)) - - self.assertAllClose( - transformed_inputs[fields.InputDataFields.groundtruth_classes], - [[0, 0, 1], [1, 0, 0]]) - - def test_returns_correct_merged_boxes(self): - tensor_dict = { - fields.InputDataFields.image: - tf.constant(np.random.rand(4, 4, 3).astype(np.float32)), - fields.InputDataFields.groundtruth_boxes: - tf.constant(np.array([[.5, .5, 1, 1], [.5, .5, 1, 1]], np.float32)), - fields.InputDataFields.groundtruth_classes: - tf.constant(np.array([3, 1], np.int32)) - } - - num_classes = 3 - input_transformation_fn = functools.partial( - inputs.transform_input_data, - model_preprocess_fn=_fake_model_preprocessor_fn, - image_resizer_fn=_fake_image_resizer_fn, - num_classes=num_classes, - merge_multiple_boxes=True) - - with self.test_session() as sess: - transformed_inputs = sess.run( - input_transformation_fn(tensor_dict=tensor_dict)) - self.assertAllClose( - transformed_inputs[fields.InputDataFields.groundtruth_boxes], - [[.5, .5, 1., 1.]]) - self.assertAllClose( - transformed_inputs[fields.InputDataFields.groundtruth_classes], - [[1, 0, 1]]) - - def test_returns_resized_masks(self): - tensor_dict = { - fields.InputDataFields.image: - tf.constant(np.random.rand(4, 4, 3).astype(np.float32)), - fields.InputDataFields.groundtruth_instance_masks: - tf.constant(np.random.rand(2, 4, 4).astype(np.float32)), - fields.InputDataFields.groundtruth_classes: - tf.constant(np.array([3, 1], np.int32)) - } - def fake_image_resizer_fn(image, masks=None): - resized_image = tf.image.resize_images(image, [8, 8]) - results = [resized_image] - if masks is not None: - resized_masks = tf.transpose( - tf.image.resize_images(tf.transpose(masks, [1, 2, 0]), [8, 8]), - [2, 0, 1]) - results.append(resized_masks) - results.append(tf.shape(resized_image)) - return results - - num_classes = 3 - input_transformation_fn = functools.partial( - inputs.transform_input_data, - model_preprocess_fn=_fake_model_preprocessor_fn, - image_resizer_fn=fake_image_resizer_fn, - num_classes=num_classes, - retain_original_image=True) - with self.test_session() as sess: - transformed_inputs = sess.run( - input_transformation_fn(tensor_dict=tensor_dict)) - self.assertAllEqual(transformed_inputs[ - fields.InputDataFields.original_image].dtype, tf.uint8) - self.assertAllEqual(transformed_inputs[ - fields.InputDataFields.original_image].shape, [4, 4, 3]) - self.assertAllEqual(transformed_inputs[ - fields.InputDataFields.groundtruth_instance_masks].shape, [2, 8, 8]) - - def test_applies_model_preprocess_fn_to_image_tensor(self): - np_image = np.random.randint(256, size=(4, 4, 3)) - tensor_dict = { - fields.InputDataFields.image: - tf.constant(np_image), - fields.InputDataFields.groundtruth_classes: - tf.constant(np.array([3, 1], np.int32)) - } - def fake_model_preprocessor_fn(image): - return (image / 255., tf.expand_dims(tf.shape(image)[1:], axis=0)) - - num_classes = 3 - input_transformation_fn = functools.partial( - inputs.transform_input_data, - model_preprocess_fn=fake_model_preprocessor_fn, - image_resizer_fn=_fake_image_resizer_fn, - num_classes=num_classes) - - with self.test_session() as sess: - transformed_inputs = sess.run( - input_transformation_fn(tensor_dict=tensor_dict)) - self.assertAllClose(transformed_inputs[fields.InputDataFields.image], - np_image / 255.) - self.assertAllClose(transformed_inputs[fields.InputDataFields. - true_image_shape], - [4, 4, 3]) - - def test_applies_data_augmentation_fn_to_tensor_dict(self): - np_image = np.random.randint(256, size=(4, 4, 3)) - tensor_dict = { - fields.InputDataFields.image: - tf.constant(np_image), - fields.InputDataFields.groundtruth_classes: - tf.constant(np.array([3, 1], np.int32)) - } - def add_one_data_augmentation_fn(tensor_dict): - return {key: value + 1 for key, value in tensor_dict.items()} - - num_classes = 4 - input_transformation_fn = functools.partial( - inputs.transform_input_data, - model_preprocess_fn=_fake_model_preprocessor_fn, - image_resizer_fn=_fake_image_resizer_fn, - num_classes=num_classes, - data_augmentation_fn=add_one_data_augmentation_fn) - with self.test_session() as sess: - augmented_tensor_dict = sess.run( - input_transformation_fn(tensor_dict=tensor_dict)) - - self.assertAllEqual(augmented_tensor_dict[fields.InputDataFields.image], - np_image + 1) - self.assertAllEqual( - augmented_tensor_dict[fields.InputDataFields.groundtruth_classes], - [[0, 0, 0, 1], [0, 1, 0, 0]]) - - def test_applies_data_augmentation_fn_before_model_preprocess_fn(self): - np_image = np.random.randint(256, size=(4, 4, 3)) - tensor_dict = { - fields.InputDataFields.image: - tf.constant(np_image), - fields.InputDataFields.groundtruth_classes: - tf.constant(np.array([3, 1], np.int32)) - } - def mul_two_model_preprocessor_fn(image): - return (image * 2, tf.expand_dims(tf.shape(image)[1:], axis=0)) - def add_five_to_image_data_augmentation_fn(tensor_dict): - tensor_dict[fields.InputDataFields.image] += 5 - return tensor_dict - - num_classes = 4 - input_transformation_fn = functools.partial( - inputs.transform_input_data, - model_preprocess_fn=mul_two_model_preprocessor_fn, - image_resizer_fn=_fake_image_resizer_fn, - num_classes=num_classes, - data_augmentation_fn=add_five_to_image_data_augmentation_fn) - with self.test_session() as sess: - augmented_tensor_dict = sess.run( - input_transformation_fn(tensor_dict=tensor_dict)) - - self.assertAllEqual(augmented_tensor_dict[fields.InputDataFields.image], - (np_image + 5) * 2) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/matchers/__init__.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/matchers/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/matchers/argmax_matcher.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/matchers/argmax_matcher.py deleted file mode 100644 index d397ff41ac560180cacebfe906b092582b8e2fa6..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/matchers/argmax_matcher.py +++ /dev/null @@ -1,204 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Argmax matcher implementation. - -This class takes a similarity matrix and matches columns to rows based on the -maximum value per column. One can specify matched_thresholds and -to prevent columns from matching to rows (generally resulting in a negative -training example) and unmatched_theshold to ignore the match (generally -resulting in neither a positive or negative training example). - -This matcher is used in Fast(er)-RCNN. - -Note: matchers are used in TargetAssigners. There is a create_target_assigner -factory function for popular implementations. -""" -import tensorflow as tf - -from object_detection.core import matcher -from object_detection.utils import shape_utils - - -class ArgMaxMatcher(matcher.Matcher): - """Matcher based on highest value. - - This class computes matches from a similarity matrix. Each column is matched - to a single row. - - To support object detection target assignment this class enables setting both - matched_threshold (upper threshold) and unmatched_threshold (lower thresholds) - defining three categories of similarity which define whether examples are - positive, negative, or ignored: - (1) similarity >= matched_threshold: Highest similarity. Matched/Positive! - (2) matched_threshold > similarity >= unmatched_threshold: Medium similarity. - Depending on negatives_lower_than_unmatched, this is either - Unmatched/Negative OR Ignore. - (3) unmatched_threshold > similarity: Lowest similarity. Depending on flag - negatives_lower_than_unmatched, either Unmatched/Negative OR Ignore. - For ignored matches this class sets the values in the Match object to -2. - """ - - def __init__(self, - matched_threshold, - unmatched_threshold=None, - negatives_lower_than_unmatched=True, - force_match_for_each_row=False, - use_matmul_gather=False): - """Construct ArgMaxMatcher. - - Args: - matched_threshold: Threshold for positive matches. Positive if - sim >= matched_threshold, where sim is the maximum value of the - similarity matrix for a given column. Set to None for no threshold. - unmatched_threshold: Threshold for negative matches. Negative if - sim < unmatched_threshold. Defaults to matched_threshold - when set to None. - negatives_lower_than_unmatched: Boolean which defaults to True. If True - then negative matches are the ones below the unmatched_threshold, - whereas ignored matches are in between the matched and umatched - threshold. If False, then negative matches are in between the matched - and unmatched threshold, and everything lower than unmatched is ignored. - force_match_for_each_row: If True, ensures that each row is matched to - at least one column (which is not guaranteed otherwise if the - matched_threshold is high). Defaults to False. See - argmax_matcher_test.testMatcherForceMatch() for an example. - use_matmul_gather: Force constructed match objects to use matrix - multiplication based gather instead of standard tf.gather. - (Default: False). - - Raises: - ValueError: if unmatched_threshold is set but matched_threshold is not set - or if unmatched_threshold > matched_threshold. - """ - super(ArgMaxMatcher, self).__init__(use_matmul_gather=use_matmul_gather) - if (matched_threshold is None) and (unmatched_threshold is not None): - raise ValueError('Need to also define matched_threshold when' - 'unmatched_threshold is defined') - self._matched_threshold = matched_threshold - if unmatched_threshold is None: - self._unmatched_threshold = matched_threshold - else: - if unmatched_threshold > matched_threshold: - raise ValueError('unmatched_threshold needs to be smaller or equal' - 'to matched_threshold') - self._unmatched_threshold = unmatched_threshold - if not negatives_lower_than_unmatched: - if self._unmatched_threshold == self._matched_threshold: - raise ValueError('When negatives are in between matched and ' - 'unmatched thresholds, these cannot be of equal ' - 'value. matched: %s, unmatched: %s', - self._matched_threshold, self._unmatched_threshold) - self._force_match_for_each_row = force_match_for_each_row - self._negatives_lower_than_unmatched = negatives_lower_than_unmatched - - def _match(self, similarity_matrix): - """Tries to match each column of the similarity matrix to a row. - - Args: - similarity_matrix: tensor of shape [N, M] representing any similarity - metric. - - Returns: - Match object with corresponding matches for each of M columns. - """ - - def _match_when_rows_are_empty(): - """Performs matching when the rows of similarity matrix are empty. - - When the rows are empty, all detections are false positives. So we return - a tensor of -1's to indicate that the columns do not match to any rows. - - Returns: - matches: int32 tensor indicating the row each column matches to. - """ - similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape( - similarity_matrix) - return -1 * tf.ones([similarity_matrix_shape[1]], dtype=tf.int32) - - def _match_when_rows_are_non_empty(): - """Performs matching when the rows of similarity matrix are non empty. - - Returns: - matches: int32 tensor indicating the row each column matches to. - """ - # Matches for each column - matches = tf.argmax(similarity_matrix, 0, output_type=tf.int32) - - # Deal with matched and unmatched threshold - if self._matched_threshold is not None: - # Get logical indices of ignored and unmatched columns as tf.int64 - matched_vals = tf.reduce_max(similarity_matrix, 0) - below_unmatched_threshold = tf.greater(self._unmatched_threshold, - matched_vals) - between_thresholds = tf.logical_and( - tf.greater_equal(matched_vals, self._unmatched_threshold), - tf.greater(self._matched_threshold, matched_vals)) - - if self._negatives_lower_than_unmatched: - matches = self._set_values_using_indicator(matches, - below_unmatched_threshold, - -1) - matches = self._set_values_using_indicator(matches, - between_thresholds, - -2) - else: - matches = self._set_values_using_indicator(matches, - below_unmatched_threshold, - -2) - matches = self._set_values_using_indicator(matches, - between_thresholds, - -1) - - if self._force_match_for_each_row: - similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape( - similarity_matrix) - force_match_column_ids = tf.argmax(similarity_matrix, 1, - output_type=tf.int32) - force_match_column_indicators = tf.one_hot( - force_match_column_ids, depth=similarity_matrix_shape[1]) - force_match_row_ids = tf.argmax(force_match_column_indicators, 0, - output_type=tf.int32) - force_match_column_mask = tf.cast( - tf.reduce_max(force_match_column_indicators, 0), tf.bool) - final_matches = tf.where(force_match_column_mask, - force_match_row_ids, matches) - return final_matches - else: - return matches - - if similarity_matrix.shape.is_fully_defined(): - if similarity_matrix.shape[0].value == 0: - return _match_when_rows_are_empty() - else: - return _match_when_rows_are_non_empty() - else: - return tf.cond( - tf.greater(tf.shape(similarity_matrix)[0], 0), - _match_when_rows_are_non_empty, _match_when_rows_are_empty) - - def _set_values_using_indicator(self, x, indicator, val): - """Set the indicated fields of x to val. - - Args: - x: tensor. - indicator: boolean with same shape as x. - val: scalar with value to set. - - Returns: - modified tensor. - """ - indicator = tf.cast(indicator, x.dtype) - return tf.add(tf.multiply(x, 1 - indicator), val * indicator) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/matchers/argmax_matcher_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/matchers/argmax_matcher_test.py deleted file mode 100644 index 694bebdc72a5d0d7794cb7c845c97964f4dda517..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/matchers/argmax_matcher_test.py +++ /dev/null @@ -1,207 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.matchers.argmax_matcher.""" - -import numpy as np -import tensorflow as tf - -from object_detection.matchers import argmax_matcher -from object_detection.utils import test_case - - -class ArgMaxMatcherTest(test_case.TestCase): - - def test_return_correct_matches_with_default_thresholds(self): - - def graph_fn(similarity_matrix): - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=None) - match = matcher.match(similarity_matrix) - matched_cols = match.matched_column_indicator() - unmatched_cols = match.unmatched_column_indicator() - match_results = match.match_results - return (matched_cols, unmatched_cols, match_results) - - similarity = np.array([[1., 1, 1, 3, 1], - [2, -1, 2, 0, 4], - [3, 0, -1, 0, 0]], dtype=np.float32) - expected_matched_rows = np.array([2, 0, 1, 0, 1]) - (res_matched_cols, res_unmatched_cols, - res_match_results) = self.execute(graph_fn, [similarity]) - - self.assertAllEqual(res_match_results[res_matched_cols], - expected_matched_rows) - self.assertAllEqual(np.nonzero(res_matched_cols)[0], [0, 1, 2, 3, 4]) - self.assertFalse(np.all(res_unmatched_cols)) - - def test_return_correct_matches_with_empty_rows(self): - - def graph_fn(similarity_matrix): - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=None) - match = matcher.match(similarity_matrix) - return match.unmatched_column_indicator() - similarity = 0.2 * np.ones([0, 5], dtype=np.float32) - res_unmatched_cols = self.execute(graph_fn, [similarity]) - self.assertAllEqual(np.nonzero(res_unmatched_cols)[0], np.arange(5)) - - def test_return_correct_matches_with_matched_threshold(self): - - def graph_fn(similarity): - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=3.) - match = matcher.match(similarity) - matched_cols = match.matched_column_indicator() - unmatched_cols = match.unmatched_column_indicator() - match_results = match.match_results - return (matched_cols, unmatched_cols, match_results) - - similarity = np.array([[1, 1, 1, 3, 1], - [2, -1, 2, 0, 4], - [3, 0, -1, 0, 0]], dtype=np.float32) - expected_matched_cols = np.array([0, 3, 4]) - expected_matched_rows = np.array([2, 0, 1]) - expected_unmatched_cols = np.array([1, 2]) - - (res_matched_cols, res_unmatched_cols, - match_results) = self.execute(graph_fn, [similarity]) - self.assertAllEqual(match_results[res_matched_cols], expected_matched_rows) - self.assertAllEqual(np.nonzero(res_matched_cols)[0], expected_matched_cols) - self.assertAllEqual(np.nonzero(res_unmatched_cols)[0], - expected_unmatched_cols) - - def test_return_correct_matches_with_matched_and_unmatched_threshold(self): - - def graph_fn(similarity): - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=3., - unmatched_threshold=2.) - match = matcher.match(similarity) - matched_cols = match.matched_column_indicator() - unmatched_cols = match.unmatched_column_indicator() - match_results = match.match_results - return (matched_cols, unmatched_cols, match_results) - - similarity = np.array([[1, 1, 1, 3, 1], - [2, -1, 2, 0, 4], - [3, 0, -1, 0, 0]], dtype=np.float32) - expected_matched_cols = np.array([0, 3, 4]) - expected_matched_rows = np.array([2, 0, 1]) - expected_unmatched_cols = np.array([1]) # col 2 has too high maximum val - - (res_matched_cols, res_unmatched_cols, - match_results) = self.execute(graph_fn, [similarity]) - self.assertAllEqual(match_results[res_matched_cols], expected_matched_rows) - self.assertAllEqual(np.nonzero(res_matched_cols)[0], expected_matched_cols) - self.assertAllEqual(np.nonzero(res_unmatched_cols)[0], - expected_unmatched_cols) - - def test_return_correct_matches_negatives_lower_than_unmatched_false(self): - - def graph_fn(similarity): - matcher = argmax_matcher.ArgMaxMatcher( - matched_threshold=3., - unmatched_threshold=2., - negatives_lower_than_unmatched=False) - match = matcher.match(similarity) - matched_cols = match.matched_column_indicator() - unmatched_cols = match.unmatched_column_indicator() - match_results = match.match_results - return (matched_cols, unmatched_cols, match_results) - - similarity = np.array([[1, 1, 1, 3, 1], - [2, -1, 2, 0, 4], - [3, 0, -1, 0, 0]], dtype=np.float32) - expected_matched_cols = np.array([0, 3, 4]) - expected_matched_rows = np.array([2, 0, 1]) - expected_unmatched_cols = np.array([2]) # col 1 has too low maximum val - - (res_matched_cols, res_unmatched_cols, - match_results) = self.execute(graph_fn, [similarity]) - self.assertAllEqual(match_results[res_matched_cols], expected_matched_rows) - self.assertAllEqual(np.nonzero(res_matched_cols)[0], expected_matched_cols) - self.assertAllEqual(np.nonzero(res_unmatched_cols)[0], - expected_unmatched_cols) - - def test_return_correct_matches_unmatched_row_not_using_force_match(self): - - def graph_fn(similarity): - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=3., - unmatched_threshold=2.) - match = matcher.match(similarity) - matched_cols = match.matched_column_indicator() - unmatched_cols = match.unmatched_column_indicator() - match_results = match.match_results - return (matched_cols, unmatched_cols, match_results) - - similarity = np.array([[1, 1, 1, 3, 1], - [-1, 0, -2, -2, -1], - [3, 0, -1, 2, 0]], dtype=np.float32) - expected_matched_cols = np.array([0, 3]) - expected_matched_rows = np.array([2, 0]) - expected_unmatched_cols = np.array([1, 2, 4]) - - (res_matched_cols, res_unmatched_cols, - match_results) = self.execute(graph_fn, [similarity]) - self.assertAllEqual(match_results[res_matched_cols], expected_matched_rows) - self.assertAllEqual(np.nonzero(res_matched_cols)[0], expected_matched_cols) - self.assertAllEqual(np.nonzero(res_unmatched_cols)[0], - expected_unmatched_cols) - - def test_return_correct_matches_unmatched_row_while_using_force_match(self): - def graph_fn(similarity): - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=3., - unmatched_threshold=2., - force_match_for_each_row=True) - match = matcher.match(similarity) - matched_cols = match.matched_column_indicator() - unmatched_cols = match.unmatched_column_indicator() - match_results = match.match_results - return (matched_cols, unmatched_cols, match_results) - - similarity = np.array([[1, 1, 1, 3, 1], - [-1, 0, -2, -2, -1], - [3, 0, -1, 2, 0]], dtype=np.float32) - expected_matched_cols = np.array([0, 1, 3]) - expected_matched_rows = np.array([2, 1, 0]) - expected_unmatched_cols = np.array([2, 4]) # col 2 has too high max val - - (res_matched_cols, res_unmatched_cols, - match_results) = self.execute(graph_fn, [similarity]) - self.assertAllEqual(match_results[res_matched_cols], expected_matched_rows) - self.assertAllEqual(np.nonzero(res_matched_cols)[0], expected_matched_cols) - self.assertAllEqual(np.nonzero(res_unmatched_cols)[0], - expected_unmatched_cols) - - def test_valid_arguments_corner_case(self): - argmax_matcher.ArgMaxMatcher(matched_threshold=1, - unmatched_threshold=1) - - def test_invalid_arguments_corner_case_negatives_lower_than_thres_false(self): - with self.assertRaises(ValueError): - argmax_matcher.ArgMaxMatcher(matched_threshold=1, - unmatched_threshold=1, - negatives_lower_than_unmatched=False) - - def test_invalid_arguments_no_matched_threshold(self): - with self.assertRaises(ValueError): - argmax_matcher.ArgMaxMatcher(matched_threshold=None, - unmatched_threshold=4) - - def test_invalid_arguments_unmatched_thres_larger_than_matched_thres(self): - with self.assertRaises(ValueError): - argmax_matcher.ArgMaxMatcher(matched_threshold=1, - unmatched_threshold=2) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/matchers/bipartite_matcher.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/matchers/bipartite_matcher.py deleted file mode 100644 index a1bb0b849ae77569af7d14beaeb159bedbd972bd..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/matchers/bipartite_matcher.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Bipartite matcher implementation.""" - -import tensorflow as tf - -from tensorflow.contrib.image.python.ops import image_ops -from object_detection.core import matcher - - -class GreedyBipartiteMatcher(matcher.Matcher): - """Wraps a Tensorflow greedy bipartite matcher.""" - - def __init__(self, use_matmul_gather=False): - """Constructs a Matcher. - - Args: - use_matmul_gather: Force constructed match objects to use matrix - multiplication based gather instead of standard tf.gather. - (Default: False). - """ - super(GreedyBipartiteMatcher, self).__init__( - use_matmul_gather=use_matmul_gather) - - def _match(self, similarity_matrix, num_valid_rows=-1): - """Bipartite matches a collection rows and columns. A greedy bi-partite. - - TODO(rathodv): Add num_valid_columns options to match only that many columns - with all the rows. - - Args: - similarity_matrix: Float tensor of shape [N, M] with pairwise similarity - where higher values mean more similar. - num_valid_rows: A scalar or a 1-D tensor with one element describing the - number of valid rows of similarity_matrix to consider for the bipartite - matching. If set to be negative, then all rows from similarity_matrix - are used. - - Returns: - match_results: int32 tensor of shape [M] with match_results[i]=-1 - meaning that column i is not matched and otherwise that it is matched to - row match_results[i]. - """ - # Convert similarity matrix to distance matrix as tf.image.bipartite tries - # to find minimum distance matches. - distance_matrix = -1 * similarity_matrix - _, match_results = image_ops.bipartite_match( - distance_matrix, num_valid_rows) - match_results = tf.reshape(match_results, [-1]) - match_results = tf.cast(match_results, tf.int32) - return match_results diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/matchers/bipartite_matcher_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/matchers/bipartite_matcher_test.py deleted file mode 100644 index 2ee45a80dfafc82b6ee4965a28719b9840296591..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/matchers/bipartite_matcher_test.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.core.bipartite_matcher.""" - -import tensorflow as tf - -from object_detection.matchers import bipartite_matcher - - -class GreedyBipartiteMatcherTest(tf.test.TestCase): - - def test_get_expected_matches_when_all_rows_are_valid(self): - similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]]) - num_valid_rows = 2 - expected_match_results = [-1, 1, 0] - - matcher = bipartite_matcher.GreedyBipartiteMatcher() - match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows) - with self.test_session() as sess: - match_results_out = sess.run(match._match_results) - self.assertAllEqual(match_results_out, expected_match_results) - - def test_get_expected_matches_with_valid_rows_set_to_minus_one(self): - similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]]) - num_valid_rows = -1 - expected_match_results = [-1, 1, 0] - - matcher = bipartite_matcher.GreedyBipartiteMatcher() - match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows) - with self.test_session() as sess: - match_results_out = sess.run(match._match_results) - self.assertAllEqual(match_results_out, expected_match_results) - - def test_get_no_matches_with_zero_valid_rows(self): - similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]]) - num_valid_rows = 0 - expected_match_results = [-1, -1, -1] - - matcher = bipartite_matcher.GreedyBipartiteMatcher() - match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows) - with self.test_session() as sess: - match_results_out = sess.run(match._match_results) - self.assertAllEqual(match_results_out, expected_match_results) - - def test_get_expected_matches_with_only_one_valid_row(self): - similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]]) - num_valid_rows = 1 - expected_match_results = [-1, -1, 0] - - matcher = bipartite_matcher.GreedyBipartiteMatcher() - match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows) - with self.test_session() as sess: - match_results_out = sess.run(match._match_results) - self.assertAllEqual(match_results_out, expected_match_results) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/meta_architectures/__init__.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/meta_architectures/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/meta_architectures/faster_rcnn_meta_arch.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/meta_architectures/faster_rcnn_meta_arch.py deleted file mode 100644 index 6315d4dfeb3afc8581c5da7b99866c0b1e23d47a..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/meta_architectures/faster_rcnn_meta_arch.py +++ /dev/null @@ -1,2028 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Faster R-CNN meta-architecture definition. - -General tensorflow implementation of Faster R-CNN detection models. - -See Faster R-CNN: Ren, Shaoqing, et al. -"Faster R-CNN: Towards real-time object detection with region proposal -networks." Advances in neural information processing systems. 2015. - -We allow for three modes: number_of_stages={1, 2, 3}. In case of 1 stage, -all of the user facing methods (e.g., predict, postprocess, loss) can be used as -if the model consisted only of the RPN, returning class agnostic proposals -(these can be thought of as approximate detections with no associated class -information). In case of 2 stages, proposals are computed, then passed -through a second stage "box classifier" to yield (multi-class) detections. -Finally, in case of 3 stages which is only used during eval, proposals are -computed, then passed through a second stage "box classifier" that will compute -refined boxes and classes, and then features are pooled from the refined and -non-maximum suppressed boxes and are passed through the box classifier again. If -number of stages is 3 during training it will be reduced to two automatically. - -Implementations of Faster R-CNN models must define a new -FasterRCNNFeatureExtractor and override three methods: `preprocess`, -`_extract_proposal_features` (the first stage of the model), and -`_extract_box_classifier_features` (the second stage of the model). Optionally, -the `restore_fn` method can be overridden. See tests for an example. - -A few important notes: -+ Batching conventions: We support batched inference and training where -all images within a batch have the same resolution. Batch sizes are determined -dynamically via the shape of the input tensors (rather than being specified -directly as, e.g., a model constructor). - -A complication is that due to non-max suppression, we are not guaranteed to get -the same number of proposals from the first stage RPN (region proposal network) -for each image (though in practice, we should often get the same number of -proposals). For this reason we pad to a max number of proposals per image -within a batch. This `self.max_num_proposals` property is set to the -`first_stage_max_proposals` parameter at inference time and the -`second_stage_batch_size` at training time since we subsample the batch to -be sent through the box classifier during training. - -For the second stage of the pipeline, we arrange the proposals for all images -within the batch along a single batch dimension. For example, the input to -_extract_box_classifier_features is a tensor of shape -`[total_num_proposals, crop_height, crop_width, depth]` where -total_num_proposals is batch_size * self.max_num_proposals. (And note that per -the above comment, a subset of these entries correspond to zero paddings.) - -+ Coordinate representations: -Following the API (see model.DetectionModel definition), our outputs after -postprocessing operations are always normalized boxes however, internally, we -sometimes convert to absolute --- e.g. for loss computation. In particular, -anchors and proposal_boxes are both represented as absolute coordinates. - -Images are resized in the `preprocess` method. - -The Faster R-CNN meta architecture has two post-processing methods -`_postprocess_rpn` which is applied after first stage and -`_postprocess_box_classifier` which is applied after second stage. There are -three different ways post-processing can happen depending on number_of_stages -configured in the meta architecture: - -1. When number_of_stages is 1: - `_postprocess_rpn` is run as part of the `postprocess` method where - true_image_shapes is used to clip proposals, perform non-max suppression and - normalize them. -2. When number of stages is 2: - `_postprocess_rpn` is run as part of the `_predict_second_stage` method where - `resized_image_shapes` is used to clip proposals, perform non-max suppression - and normalize them. In this case `postprocess` method skips `_postprocess_rpn` - and only runs `_postprocess_box_classifier` using `true_image_shapes` to clip - detections, perform non-max suppression and normalize them. -3. When number of stages is 3: - `_postprocess_rpn` is run as part of the `_predict_second_stage` using - `resized_image_shapes` to clip proposals, perform non-max suppression and - normalize them. Subsequently, `_postprocess_box_classifier` is run as part of - `_predict_third_stage` using `true_image_shapes` to clip detections, peform - non-max suppression and normalize them. In this case, the `postprocess` method - skips both `_postprocess_rpn` and `_postprocess_box_classifier`. -""" -from abc import abstractmethod -from functools import partial -import tensorflow as tf - -from object_detection.anchor_generators import grid_anchor_generator -from object_detection.core import balanced_positive_negative_sampler as sampler -from object_detection.core import box_list -from object_detection.core import box_list_ops -from object_detection.core import box_predictor -from object_detection.core import losses -from object_detection.core import model -from object_detection.core import post_processing -from object_detection.core import standard_fields as fields -from object_detection.core import target_assigner -from object_detection.utils import ops -from object_detection.utils import shape_utils - -slim = tf.contrib.slim - - -class FasterRCNNFeatureExtractor(object): - """Faster R-CNN Feature Extractor definition.""" - - def __init__(self, - is_training, - first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0): - """Constructor. - - Args: - is_training: A boolean indicating whether the training version of the - computation graph should be constructed. - first_stage_features_stride: Output stride of extracted RPN feature map. - batch_norm_trainable: Whether to update batch norm parameters during - training or not. When training with a relative large batch size - (e.g. 8), it could be desirable to enable batch norm update. - reuse_weights: Whether to reuse variables. Default is None. - weight_decay: float weight decay for feature extractor (default: 0.0). - """ - self._is_training = is_training - self._first_stage_features_stride = first_stage_features_stride - self._train_batch_norm = (batch_norm_trainable and is_training) - self._reuse_weights = reuse_weights - self._weight_decay = weight_decay - - @abstractmethod - def preprocess(self, resized_inputs): - """Feature-extractor specific preprocessing (minus image resizing).""" - pass - - def extract_proposal_features(self, preprocessed_inputs, scope): - """Extracts first stage RPN features. - - This function is responsible for extracting feature maps from preprocessed - images. These features are used by the region proposal network (RPN) to - predict proposals. - - Args: - preprocessed_inputs: A [batch, height, width, channels] float tensor - representing a batch of images. - scope: A scope name. - - Returns: - rpn_feature_map: A tensor with shape [batch, height, width, depth] - activations: A dictionary mapping activation tensor names to tensors. - """ - with tf.variable_scope(scope, values=[preprocessed_inputs]): - return self._extract_proposal_features(preprocessed_inputs, scope) - - @abstractmethod - def _extract_proposal_features(self, preprocessed_inputs, scope): - """Extracts first stage RPN features, to be overridden.""" - pass - - def extract_box_classifier_features(self, proposal_feature_maps, scope): - """Extracts second stage box classifier features. - - Args: - proposal_feature_maps: A 4-D float tensor with shape - [batch_size * self.max_num_proposals, crop_height, crop_width, depth] - representing the feature map cropped to each proposal. - scope: A scope name. - - Returns: - proposal_classifier_features: A 4-D float tensor with shape - [batch_size * self.max_num_proposals, height, width, depth] - representing box classifier features for each proposal. - """ - with tf.variable_scope( - scope, values=[proposal_feature_maps], reuse=tf.AUTO_REUSE): - return self._extract_box_classifier_features(proposal_feature_maps, scope) - - @abstractmethod - def _extract_box_classifier_features(self, proposal_feature_maps, scope): - """Extracts second stage box classifier features, to be overridden.""" - pass - - def restore_from_classification_checkpoint_fn( - self, - first_stage_feature_extractor_scope, - second_stage_feature_extractor_scope): - """Returns a map of variables to load from a foreign checkpoint. - - Args: - first_stage_feature_extractor_scope: A scope name for the first stage - feature extractor. - second_stage_feature_extractor_scope: A scope name for the second stage - feature extractor. - - Returns: - A dict mapping variable names (to load from a checkpoint) to variables in - the model graph. - """ - variables_to_restore = {} - for variable in tf.global_variables(): - for scope_name in [first_stage_feature_extractor_scope, - second_stage_feature_extractor_scope]: - if variable.op.name.startswith(scope_name): - var_name = variable.op.name.replace(scope_name + '/', '') - variables_to_restore[var_name] = variable - return variables_to_restore - - -class FasterRCNNMetaArch(model.DetectionModel): - """Faster R-CNN Meta-architecture definition.""" - - def __init__(self, - is_training, - num_classes, - image_resizer_fn, - feature_extractor, - number_of_stages, - first_stage_anchor_generator, - first_stage_atrous_rate, - first_stage_box_predictor_arg_scope_fn, - first_stage_box_predictor_kernel_size, - first_stage_box_predictor_depth, - first_stage_minibatch_size, - first_stage_positive_balance_fraction, - first_stage_nms_score_threshold, - first_stage_nms_iou_threshold, - first_stage_max_proposals, - first_stage_localization_loss_weight, - first_stage_objectness_loss_weight, - initial_crop_size, - maxpool_kernel_size, - maxpool_stride, - second_stage_mask_rcnn_box_predictor, - second_stage_batch_size, - second_stage_balance_fraction, - second_stage_non_max_suppression_fn, - second_stage_score_conversion_fn, - second_stage_localization_loss_weight, - second_stage_classification_loss_weight, - second_stage_classification_loss, - second_stage_mask_prediction_loss_weight=1.0, - hard_example_miner=None, - parallel_iterations=16, - add_summaries=True): - """FasterRCNNMetaArch Constructor. - - Args: - is_training: A boolean indicating whether the training version of the - computation graph should be constructed. - num_classes: Number of classes. Note that num_classes *does not* - include the background category, so if groundtruth labels take values - in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the - assigned classification targets can range from {0,... K}). - image_resizer_fn: A callable for image resizing. This callable - takes a rank-3 image tensor of shape [height, width, channels] - (corresponding to a single image), an optional rank-3 instance mask - tensor of shape [num_masks, height, width] and returns a resized rank-3 - image tensor, a resized mask tensor if one was provided in the input. In - addition this callable must also return a 1-D tensor of the form - [height, width, channels] containing the size of the true image, as the - image resizer can perform zero padding. See protos/image_resizer.proto. - feature_extractor: A FasterRCNNFeatureExtractor object. - number_of_stages: An integer values taking values in {1, 2, 3}. If - 1, the function will construct only the Region Proposal Network (RPN) - part of the model. If 2, the function will perform box refinement and - other auxiliary predictions all in the second stage. If 3, it will - extract features from refined boxes and perform the auxiliary - predictions on the non-maximum suppressed refined boxes. - If is_training is true and the value of number_of_stages is 3, it is - reduced to 2 since all the model heads are trained in parallel in second - stage during training. - first_stage_anchor_generator: An anchor_generator.AnchorGenerator object - (note that currently we only support - grid_anchor_generator.GridAnchorGenerator objects) - first_stage_atrous_rate: A single integer indicating the atrous rate for - the single convolution op which is applied to the `rpn_features_to_crop` - tensor to obtain a tensor to be used for box prediction. Some feature - extractors optionally allow for producing feature maps computed at - denser resolutions. The atrous rate is used to compensate for the - denser feature maps by using an effectively larger receptive field. - (This should typically be set to 1). - first_stage_box_predictor_arg_scope_fn: A function to construct tf-slim - arg_scope for conv2d, separable_conv2d and fully_connected ops for the - RPN box predictor. - first_stage_box_predictor_kernel_size: Kernel size to use for the - convolution op just prior to RPN box predictions. - first_stage_box_predictor_depth: Output depth for the convolution op - just prior to RPN box predictions. - first_stage_minibatch_size: The "batch size" to use for computing the - objectness and location loss of the region proposal network. This - "batch size" refers to the number of anchors selected as contributing - to the loss function for any given image within the image batch and is - only called "batch_size" due to terminology from the Faster R-CNN paper. - first_stage_positive_balance_fraction: Fraction of positive examples - per image for the RPN. The recommended value for Faster RCNN is 0.5. - first_stage_nms_score_threshold: Score threshold for non max suppression - for the Region Proposal Network (RPN). This value is expected to be in - [0, 1] as it is applied directly after a softmax transformation. The - recommended value for Faster R-CNN is 0. - first_stage_nms_iou_threshold: The Intersection Over Union (IOU) threshold - for performing Non-Max Suppression (NMS) on the boxes predicted by the - Region Proposal Network (RPN). - first_stage_max_proposals: Maximum number of boxes to retain after - performing Non-Max Suppression (NMS) on the boxes predicted by the - Region Proposal Network (RPN). - first_stage_localization_loss_weight: A float - first_stage_objectness_loss_weight: A float - initial_crop_size: A single integer indicating the output size - (width and height are set to be the same) of the initial bilinear - interpolation based cropping during ROI pooling. - maxpool_kernel_size: A single integer indicating the kernel size of the - max pool op on the cropped feature map during ROI pooling. - maxpool_stride: A single integer indicating the stride of the max pool - op on the cropped feature map during ROI pooling. - second_stage_mask_rcnn_box_predictor: Mask R-CNN box predictor to use for - the second stage. - second_stage_batch_size: The batch size used for computing the - classification and refined location loss of the box classifier. This - "batch size" refers to the number of proposals selected as contributing - to the loss function for any given image within the image batch and is - only called "batch_size" due to terminology from the Faster R-CNN paper. - second_stage_balance_fraction: Fraction of positive examples to use - per image for the box classifier. The recommended value for Faster RCNN - is 0.25. - second_stage_non_max_suppression_fn: batch_multiclass_non_max_suppression - callable that takes `boxes`, `scores`, optional `clip_window` and - optional (kwarg) `mask` inputs (with all other inputs already set) - and returns a dictionary containing tensors with keys: - `detection_boxes`, `detection_scores`, `detection_classes`, - `num_detections`, and (optionally) `detection_masks`. See - `post_processing.batch_multiclass_non_max_suppression` for the type and - shape of these tensors. - second_stage_score_conversion_fn: Callable elementwise nonlinearity - (that takes tensors as inputs and returns tensors). This is usually - used to convert logits to probabilities. - second_stage_localization_loss_weight: A float indicating the scale factor - for second stage localization loss. - second_stage_classification_loss_weight: A float indicating the scale - factor for second stage classification loss. - second_stage_classification_loss: Classification loss used by the second - stage classifier. Either losses.WeightedSigmoidClassificationLoss or - losses.WeightedSoftmaxClassificationLoss. - second_stage_mask_prediction_loss_weight: A float indicating the scale - factor for second stage mask prediction loss. This is applicable only if - second stage box predictor is configured to predict masks. - hard_example_miner: A losses.HardExampleMiner object (can be None). - parallel_iterations: (Optional) The number of iterations allowed to run - in parallel for calls to tf.map_fn. - add_summaries: boolean (default: True) controlling whether summary ops - should be added to tensorflow graph. - - Raises: - ValueError: If `second_stage_batch_size` > `first_stage_max_proposals` at - training time. - ValueError: If first_stage_anchor_generator is not of type - grid_anchor_generator.GridAnchorGenerator. - """ - # TODO(rathodv): add_summaries is currently unused. Respect that directive - # in the future. - super(FasterRCNNMetaArch, self).__init__(num_classes=num_classes) - - if is_training and second_stage_batch_size > first_stage_max_proposals: - raise ValueError('second_stage_batch_size should be no greater than ' - 'first_stage_max_proposals.') - if not isinstance(first_stage_anchor_generator, - grid_anchor_generator.GridAnchorGenerator): - raise ValueError('first_stage_anchor_generator must be of type ' - 'grid_anchor_generator.GridAnchorGenerator.') - - self._is_training = is_training - self._image_resizer_fn = image_resizer_fn - self._feature_extractor = feature_extractor - self._number_of_stages = number_of_stages - - # The first class is reserved as background. - unmatched_cls_target = tf.constant( - [1] + self._num_classes * [0], dtype=tf.float32) - self._proposal_target_assigner = target_assigner.create_target_assigner( - 'FasterRCNN', 'proposal') - self._detector_target_assigner = target_assigner.create_target_assigner( - 'FasterRCNN', 'detection', unmatched_cls_target=unmatched_cls_target) - # Both proposal and detector target assigners use the same box coder - self._box_coder = self._proposal_target_assigner.box_coder - - # (First stage) Region proposal network parameters - self._first_stage_anchor_generator = first_stage_anchor_generator - self._first_stage_atrous_rate = first_stage_atrous_rate - self._first_stage_box_predictor_arg_scope_fn = ( - first_stage_box_predictor_arg_scope_fn) - self._first_stage_box_predictor_kernel_size = ( - first_stage_box_predictor_kernel_size) - self._first_stage_box_predictor_depth = first_stage_box_predictor_depth - self._first_stage_minibatch_size = first_stage_minibatch_size - self._first_stage_sampler = sampler.BalancedPositiveNegativeSampler( - positive_fraction=first_stage_positive_balance_fraction) - self._first_stage_box_predictor = box_predictor.ConvolutionalBoxPredictor( - self._is_training, num_classes=1, - conv_hyperparams_fn=self._first_stage_box_predictor_arg_scope_fn, - min_depth=0, max_depth=0, num_layers_before_predictor=0, - use_dropout=False, dropout_keep_prob=1.0, kernel_size=1, - box_code_size=self._box_coder.code_size) - - self._first_stage_nms_score_threshold = first_stage_nms_score_threshold - self._first_stage_nms_iou_threshold = first_stage_nms_iou_threshold - self._first_stage_max_proposals = first_stage_max_proposals - - self._first_stage_localization_loss = ( - losses.WeightedSmoothL1LocalizationLoss()) - self._first_stage_objectness_loss = ( - losses.WeightedSoftmaxClassificationLoss()) - self._first_stage_loc_loss_weight = first_stage_localization_loss_weight - self._first_stage_obj_loss_weight = first_stage_objectness_loss_weight - - # Per-region cropping parameters - self._initial_crop_size = initial_crop_size - self._maxpool_kernel_size = maxpool_kernel_size - self._maxpool_stride = maxpool_stride - - self._mask_rcnn_box_predictor = second_stage_mask_rcnn_box_predictor - - self._second_stage_batch_size = second_stage_batch_size - self._second_stage_sampler = sampler.BalancedPositiveNegativeSampler( - positive_fraction=second_stage_balance_fraction) - - self._second_stage_nms_fn = second_stage_non_max_suppression_fn - self._second_stage_score_conversion_fn = second_stage_score_conversion_fn - - self._second_stage_localization_loss = ( - losses.WeightedSmoothL1LocalizationLoss()) - self._second_stage_classification_loss = second_stage_classification_loss - self._second_stage_mask_loss = ( - losses.WeightedSigmoidClassificationLoss()) - self._second_stage_loc_loss_weight = second_stage_localization_loss_weight - self._second_stage_cls_loss_weight = second_stage_classification_loss_weight - self._second_stage_mask_loss_weight = ( - second_stage_mask_prediction_loss_weight) - self._hard_example_miner = hard_example_miner - self._parallel_iterations = parallel_iterations - - if self._number_of_stages <= 0 or self._number_of_stages > 3: - raise ValueError('Number of stages should be a value in {1, 2, 3}.') - - @property - def first_stage_feature_extractor_scope(self): - return 'FirstStageFeatureExtractor' - - @property - def second_stage_feature_extractor_scope(self): - return 'SecondStageFeatureExtractor' - - @property - def first_stage_box_predictor_scope(self): - return 'FirstStageBoxPredictor' - - @property - def second_stage_box_predictor_scope(self): - return 'SecondStageBoxPredictor' - - @property - def max_num_proposals(self): - """Max number of proposals (to pad to) for each image in the input batch. - - At training time, this is set to be the `second_stage_batch_size` if hard - example miner is not configured, else it is set to - `first_stage_max_proposals`. At inference time, this is always set to - `first_stage_max_proposals`. - - Returns: - A positive integer. - """ - if self._is_training and not self._hard_example_miner: - return self._second_stage_batch_size - return self._first_stage_max_proposals - - @property - def anchors(self): - if not self._anchors: - raise RuntimeError('anchors have not been constructed yet!') - if not isinstance(self._anchors, box_list.BoxList): - raise RuntimeError('anchors should be a BoxList object, but is not.') - return self._anchors - - def preprocess(self, inputs): - """Feature-extractor specific preprocessing. - - See base class. - - For Faster R-CNN, we perform image resizing in the base class --- each - class subclassing FasterRCNNMetaArch is responsible for any additional - preprocessing (e.g., scaling pixel values to be in [-1, 1]). - - Args: - inputs: a [batch, height_in, width_in, channels] float tensor representing - a batch of images with values between 0 and 255.0. - - Returns: - preprocessed_inputs: a [batch, height_out, width_out, channels] float - tensor representing a batch of images. - true_image_shapes: int32 tensor of shape [batch, 3] where each row is - of the form [height, width, channels] indicating the shapes - of true images in the resized images, as resized images can be padded - with zeros. - Raises: - ValueError: if inputs tensor does not have type tf.float32 - """ - if inputs.dtype is not tf.float32: - raise ValueError('`preprocess` expects a tf.float32 tensor') - with tf.name_scope('Preprocessor'): - outputs = shape_utils.static_or_dynamic_map_fn( - self._image_resizer_fn, - elems=inputs, - dtype=[tf.float32, tf.int32], - parallel_iterations=self._parallel_iterations) - resized_inputs = outputs[0] - true_image_shapes = outputs[1] - return (self._feature_extractor.preprocess(resized_inputs), - true_image_shapes) - - def _compute_clip_window(self, image_shapes): - """Computes clip window for non max suppression based on image shapes. - - This function assumes that the clip window's left top corner is at (0, 0). - - Args: - image_shapes: A 2-D int32 tensor of shape [batch_size, 3] containing - shapes of images in the batch. Each row represents [height, width, - channels] of an image. - - Returns: - A 2-D float32 tensor of shape [batch_size, 4] containing the clip window - for each image in the form [ymin, xmin, ymax, xmax]. - """ - clip_heights = image_shapes[:, 0] - clip_widths = image_shapes[:, 1] - clip_window = tf.to_float(tf.stack([tf.zeros_like(clip_heights), - tf.zeros_like(clip_heights), - clip_heights, clip_widths], axis=1)) - return clip_window - - def predict(self, preprocessed_inputs, true_image_shapes): - """Predicts unpostprocessed tensors from input tensor. - - This function takes an input batch of images and runs it through the - forward pass of the network to yield "raw" un-postprocessed predictions. - If `number_of_stages` is 1, this function only returns first stage - RPN predictions (un-postprocessed). Otherwise it returns both - first stage RPN predictions as well as second stage box classifier - predictions. - - Other remarks: - + Anchor pruning vs. clipping: following the recommendation of the Faster - R-CNN paper, we prune anchors that venture outside the image window at - training time and clip anchors to the image window at inference time. - + Proposal padding: as described at the top of the file, proposals are - padded to self._max_num_proposals and flattened so that proposals from all - images within the input batch are arranged along the same batch dimension. - - Args: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - true_image_shapes: int32 tensor of shape [batch, 3] where each row is - of the form [height, width, channels] indicating the shapes - of true images in the resized images, as resized images can be padded - with zeros. - - Returns: - prediction_dict: a dictionary holding "raw" prediction tensors: - 1) rpn_box_predictor_features: A 4-D float32 tensor with shape - [batch_size, height, width, depth] to be used for predicting proposal - boxes and corresponding objectness scores. - 2) rpn_features_to_crop: A 4-D float32 tensor with shape - [batch_size, height, width, depth] representing image features to crop - using the proposal boxes predicted by the RPN. - 3) image_shape: a 1-D tensor of shape [4] representing the input - image shape. - 4) rpn_box_encodings: 3-D float tensor of shape - [batch_size, num_anchors, self._box_coder.code_size] containing - predicted boxes. - 5) rpn_objectness_predictions_with_background: 3-D float tensor of shape - [batch_size, num_anchors, 2] containing class - predictions (logits) for each of the anchors. Note that this - tensor *includes* background class predictions (at class index 0). - 6) anchors: A 2-D tensor of shape [num_anchors, 4] representing anchors - for the first stage RPN (in absolute coordinates). Note that - `num_anchors` can differ depending on whether the model is created in - training or inference mode. - - (and if number_of_stages > 1): - 7) refined_box_encodings: a 3-D tensor with shape - [total_num_proposals, num_classes, self._box_coder.code_size] - representing predicted (final) refined box encodings, where - total_num_proposals=batch_size*self._max_num_proposals. If using - a shared box across classes the shape will instead be - [total_num_proposals, 1, self._box_coder.code_size]. - 8) class_predictions_with_background: a 3-D tensor with shape - [total_num_proposals, num_classes + 1] containing class - predictions (logits) for each of the anchors, where - total_num_proposals=batch_size*self._max_num_proposals. - Note that this tensor *includes* background class predictions - (at class index 0). - 9) num_proposals: An int32 tensor of shape [batch_size] representing the - number of proposals generated by the RPN. `num_proposals` allows us - to keep track of which entries are to be treated as zero paddings and - which are not since we always pad the number of proposals to be - `self.max_num_proposals` for each image. - 10) proposal_boxes: A float32 tensor of shape - [batch_size, self.max_num_proposals, 4] representing - decoded proposal bounding boxes in absolute coordinates. - 11) mask_predictions: (optional) a 4-D tensor with shape - [total_num_padded_proposals, num_classes, mask_height, mask_width] - containing instance mask predictions. - - Raises: - ValueError: If `predict` is called before `preprocess`. - """ - (rpn_box_predictor_features, rpn_features_to_crop, anchors_boxlist, - image_shape) = self._extract_rpn_feature_maps(preprocessed_inputs) - (rpn_box_encodings, rpn_objectness_predictions_with_background - ) = self._predict_rpn_proposals(rpn_box_predictor_features) - - # The Faster R-CNN paper recommends pruning anchors that venture outside - # the image window at training time and clipping at inference time. - clip_window = tf.to_float(tf.stack([0, 0, image_shape[1], image_shape[2]])) - if self._is_training: - (rpn_box_encodings, rpn_objectness_predictions_with_background, - anchors_boxlist) = self._remove_invalid_anchors_and_predictions( - rpn_box_encodings, rpn_objectness_predictions_with_background, - anchors_boxlist, clip_window) - else: - anchors_boxlist = box_list_ops.clip_to_window( - anchors_boxlist, clip_window) - - self._anchors = anchors_boxlist - prediction_dict = { - 'rpn_box_predictor_features': rpn_box_predictor_features, - 'rpn_features_to_crop': rpn_features_to_crop, - 'image_shape': image_shape, - 'rpn_box_encodings': rpn_box_encodings, - 'rpn_objectness_predictions_with_background': - rpn_objectness_predictions_with_background, - 'anchors': self._anchors.get() - } - - if self._number_of_stages >= 2: - prediction_dict.update(self._predict_second_stage( - rpn_box_encodings, - rpn_objectness_predictions_with_background, - rpn_features_to_crop, - self._anchors.get(), image_shape, true_image_shapes)) - - if self._number_of_stages == 3: - prediction_dict = self._predict_third_stage( - prediction_dict, true_image_shapes) - - return prediction_dict - - def _image_batch_shape_2d(self, image_batch_shape_1d): - """Takes a 1-D image batch shape tensor and converts it to a 2-D tensor. - - Example: - If 1-D image batch shape tensor is [2, 300, 300, 3]. The corresponding 2-D - image batch tensor would be [[300, 300, 3], [300, 300, 3]] - - Args: - image_batch_shape_1d: 1-D tensor of the form [batch_size, height, - width, channels]. - - Returns: - image_batch_shape_2d: 2-D tensor of shape [batch_size, 3] were each row is - of the form [height, width, channels]. - """ - return tf.tile(tf.expand_dims(image_batch_shape_1d[1:], 0), - [image_batch_shape_1d[0], 1]) - - def _predict_second_stage(self, rpn_box_encodings, - rpn_objectness_predictions_with_background, - rpn_features_to_crop, - anchors, - image_shape, - true_image_shapes): - """Predicts the output tensors from second stage of Faster R-CNN. - - Args: - rpn_box_encodings: 4-D float tensor of shape - [batch_size, num_valid_anchors, self._box_coder.code_size] containing - predicted boxes. - rpn_objectness_predictions_with_background: 2-D float tensor of shape - [batch_size, num_valid_anchors, 2] containing class - predictions (logits) for each of the anchors. Note that this - tensor *includes* background class predictions (at class index 0). - rpn_features_to_crop: A 4-D float32 tensor with shape - [batch_size, height, width, depth] representing image features to crop - using the proposal boxes predicted by the RPN. - anchors: 2-D float tensor of shape - [num_anchors, self._box_coder.code_size]. - image_shape: A 1D int32 tensors of size [4] containing the image shape. - true_image_shapes: int32 tensor of shape [batch, 3] where each row is - of the form [height, width, channels] indicating the shapes - of true images in the resized images, as resized images can be padded - with zeros. - - Returns: - prediction_dict: a dictionary holding "raw" prediction tensors: - 1) refined_box_encodings: a 3-D tensor with shape - [total_num_proposals, num_classes, self._box_coder.code_size] - representing predicted (final) refined box encodings, where - total_num_proposals=batch_size*self._max_num_proposals. If using a - shared box across classes the shape will instead be - [total_num_proposals, 1, self._box_coder.code_size]. - 2) class_predictions_with_background: a 3-D tensor with shape - [total_num_proposals, num_classes + 1] containing class - predictions (logits) for each of the anchors, where - total_num_proposals=batch_size*self._max_num_proposals. - Note that this tensor *includes* background class predictions - (at class index 0). - 3) num_proposals: An int32 tensor of shape [batch_size] representing the - number of proposals generated by the RPN. `num_proposals` allows us - to keep track of which entries are to be treated as zero paddings and - which are not since we always pad the number of proposals to be - `self.max_num_proposals` for each image. - 4) proposal_boxes: A float32 tensor of shape - [batch_size, self.max_num_proposals, 4] representing - decoded proposal bounding boxes in absolute coordinates. - 5) proposal_boxes_normalized: A float32 tensor of shape - [batch_size, self.max_num_proposals, 4] representing decoded proposal - bounding boxes in normalized coordinates. Can be used to override the - boxes proposed by the RPN, thus enabling one to extract features and - get box classification and prediction for externally selected areas - of the image. - 6) box_classifier_features: a 4-D float32 tensor representing the - features for each proposal. - """ - image_shape_2d = self._image_batch_shape_2d(image_shape) - proposal_boxes_normalized, _, num_proposals = self._postprocess_rpn( - rpn_box_encodings, rpn_objectness_predictions_with_background, - anchors, image_shape_2d, true_image_shapes) - - flattened_proposal_feature_maps = ( - self._compute_second_stage_input_feature_maps( - rpn_features_to_crop, proposal_boxes_normalized)) - - box_classifier_features = ( - self._feature_extractor.extract_box_classifier_features( - flattened_proposal_feature_maps, - scope=self.second_stage_feature_extractor_scope)) - - box_predictions = self._mask_rcnn_box_predictor.predict( - [box_classifier_features], - num_predictions_per_location=[1], - scope=self.second_stage_box_predictor_scope, - predict_boxes_and_classes=True) - - refined_box_encodings = tf.squeeze( - box_predictions[box_predictor.BOX_ENCODINGS], - axis=1, name='all_refined_box_encodings') - class_predictions_with_background = tf.squeeze( - box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], - axis=1, name='all_class_predictions_with_background') - - absolute_proposal_boxes = ops.normalized_to_image_coordinates( - proposal_boxes_normalized, image_shape, self._parallel_iterations) - - prediction_dict = { - 'refined_box_encodings': refined_box_encodings, - 'class_predictions_with_background': - class_predictions_with_background, - 'num_proposals': num_proposals, - 'proposal_boxes': absolute_proposal_boxes, - 'box_classifier_features': box_classifier_features, - 'proposal_boxes_normalized': proposal_boxes_normalized, - } - - return prediction_dict - - def _predict_third_stage(self, prediction_dict, image_shapes): - """Predicts non-box, non-class outputs using refined detections. - - For training, masks as predicted directly on the box_classifier_features, - which are region-features from the initial anchor boxes. - For inference, this happens after calling the post-processing stage, such - that masks are only calculated for the top scored boxes. - - Args: - prediction_dict: a dictionary holding "raw" prediction tensors: - 1) refined_box_encodings: a 3-D tensor with shape - [total_num_proposals, num_classes, self._box_coder.code_size] - representing predicted (final) refined box encodings, where - total_num_proposals=batch_size*self._max_num_proposals. If using a - shared box across classes the shape will instead be - [total_num_proposals, 1, self._box_coder.code_size]. - 2) class_predictions_with_background: a 3-D tensor with shape - [total_num_proposals, num_classes + 1] containing class - predictions (logits) for each of the anchors, where - total_num_proposals=batch_size*self._max_num_proposals. - Note that this tensor *includes* background class predictions - (at class index 0). - 3) num_proposals: An int32 tensor of shape [batch_size] representing the - number of proposals generated by the RPN. `num_proposals` allows us - to keep track of which entries are to be treated as zero paddings and - which are not since we always pad the number of proposals to be - `self.max_num_proposals` for each image. - 4) proposal_boxes: A float32 tensor of shape - [batch_size, self.max_num_proposals, 4] representing - decoded proposal bounding boxes in absolute coordinates. - 5) box_classifier_features: a 4-D float32 tensor representing the - features for each proposal. - image_shapes: A 2-D int32 tensors of shape [batch_size, 3] containing - shapes of images in the batch. - - Returns: - prediction_dict: a dictionary that in addition to the input predictions - does hold the following predictions as well: - 1) mask_predictions: a 4-D tensor with shape - [batch_size, max_detection, mask_height, mask_width] containing - instance mask predictions. - """ - if self._is_training: - curr_box_classifier_features = prediction_dict['box_classifier_features'] - detection_classes = prediction_dict['class_predictions_with_background'] - mask_predictions = self._mask_rcnn_box_predictor.predict( - [curr_box_classifier_features], - num_predictions_per_location=[1], - scope=self.second_stage_box_predictor_scope, - predict_boxes_and_classes=False, - predict_auxiliary_outputs=True) - prediction_dict['mask_predictions'] = tf.squeeze(mask_predictions[ - box_predictor.MASK_PREDICTIONS], axis=1) - else: - detections_dict = self._postprocess_box_classifier( - prediction_dict['refined_box_encodings'], - prediction_dict['class_predictions_with_background'], - prediction_dict['proposal_boxes'], - prediction_dict['num_proposals'], - image_shapes) - prediction_dict.update(detections_dict) - detection_boxes = detections_dict[ - fields.DetectionResultFields.detection_boxes] - detection_classes = detections_dict[ - fields.DetectionResultFields.detection_classes] - rpn_features_to_crop = prediction_dict['rpn_features_to_crop'] - batch_size = tf.shape(detection_boxes)[0] - max_detection = tf.shape(detection_boxes)[1] - flattened_detected_feature_maps = ( - self._compute_second_stage_input_feature_maps( - rpn_features_to_crop, detection_boxes)) - curr_box_classifier_features = ( - self._feature_extractor.extract_box_classifier_features( - flattened_detected_feature_maps, - scope=self.second_stage_feature_extractor_scope)) - - mask_predictions = self._mask_rcnn_box_predictor.predict( - [curr_box_classifier_features], - num_predictions_per_location=[1], - scope=self.second_stage_box_predictor_scope, - predict_boxes_and_classes=False, - predict_auxiliary_outputs=True) - - detection_masks = tf.squeeze(mask_predictions[ - box_predictor.MASK_PREDICTIONS], axis=1) - - _, num_classes, mask_height, mask_width = ( - detection_masks.get_shape().as_list()) - _, max_detection = detection_classes.get_shape().as_list() - if num_classes > 1: - detection_masks = self._gather_instance_masks( - detection_masks, detection_classes) - - prediction_dict[fields.DetectionResultFields.detection_masks] = ( - tf.reshape(detection_masks, - [batch_size, max_detection, mask_height, mask_width])) - - return prediction_dict - - def _gather_instance_masks(self, instance_masks, classes): - """Gathers the masks that correspond to classes. - - Args: - instance_masks: A 4-D float32 tensor with shape - [K, num_classes, mask_height, mask_width]. - classes: A 2-D int32 tensor with shape [batch_size, max_detection]. - - Returns: - masks: a 3-D float32 tensor with shape [K, mask_height, mask_width]. - """ - _, num_classes, height, width = instance_masks.get_shape().as_list() - k = tf.shape(instance_masks)[0] - instance_masks = tf.reshape(instance_masks, [-1, height, width]) - classes = tf.to_int32(tf.reshape(classes, [-1])) - gather_idx = tf.range(k) * num_classes + classes - return tf.gather(instance_masks, gather_idx) - - def _extract_rpn_feature_maps(self, preprocessed_inputs): - """Extracts RPN features. - - This function extracts two feature maps: a feature map to be directly - fed to a box predictor (to predict location and objectness scores for - proposals) and a feature map from which to crop regions which will then - be sent to the second stage box classifier. - - Args: - preprocessed_inputs: a [batch, height, width, channels] image tensor. - - Returns: - rpn_box_predictor_features: A 4-D float32 tensor with shape - [batch, height, width, depth] to be used for predicting proposal boxes - and corresponding objectness scores. - rpn_features_to_crop: A 4-D float32 tensor with shape - [batch, height, width, depth] representing image features to crop using - the proposals boxes. - anchors: A BoxList representing anchors (for the RPN) in - absolute coordinates. - image_shape: A 1-D tensor representing the input image shape. - """ - image_shape = tf.shape(preprocessed_inputs) - rpn_features_to_crop, _ = self._feature_extractor.extract_proposal_features( - preprocessed_inputs, scope=self.first_stage_feature_extractor_scope) - - feature_map_shape = tf.shape(rpn_features_to_crop) - anchors = box_list_ops.concatenate( - self._first_stage_anchor_generator.generate([(feature_map_shape[1], - feature_map_shape[2])])) - with slim.arg_scope(self._first_stage_box_predictor_arg_scope_fn()): - kernel_size = self._first_stage_box_predictor_kernel_size - rpn_box_predictor_features = slim.conv2d( - rpn_features_to_crop, - self._first_stage_box_predictor_depth, - kernel_size=[kernel_size, kernel_size], - rate=self._first_stage_atrous_rate, - activation_fn=tf.nn.relu6) - return (rpn_box_predictor_features, rpn_features_to_crop, - anchors, image_shape) - - def _predict_rpn_proposals(self, rpn_box_predictor_features): - """Adds box predictors to RPN feature map to predict proposals. - - Note resulting tensors will not have been postprocessed. - - Args: - rpn_box_predictor_features: A 4-D float32 tensor with shape - [batch, height, width, depth] to be used for predicting proposal boxes - and corresponding objectness scores. - - Returns: - box_encodings: 3-D float tensor of shape - [batch_size, num_anchors, self._box_coder.code_size] containing - predicted boxes. - objectness_predictions_with_background: 3-D float tensor of shape - [batch_size, num_anchors, 2] containing class - predictions (logits) for each of the anchors. Note that this - tensor *includes* background class predictions (at class index 0). - - Raises: - RuntimeError: if the anchor generator generates anchors corresponding to - multiple feature maps. We currently assume that a single feature map - is generated for the RPN. - """ - num_anchors_per_location = ( - self._first_stage_anchor_generator.num_anchors_per_location()) - if len(num_anchors_per_location) != 1: - raise RuntimeError('anchor_generator is expected to generate anchors ' - 'corresponding to a single feature map.') - box_predictions = self._first_stage_box_predictor.predict( - [rpn_box_predictor_features], - num_anchors_per_location, - scope=self.first_stage_box_predictor_scope) - - box_encodings = tf.concat( - box_predictions[box_predictor.BOX_ENCODINGS], axis=1) - objectness_predictions_with_background = tf.concat( - box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], - axis=1) - return (tf.squeeze(box_encodings, axis=2), - objectness_predictions_with_background) - - def _remove_invalid_anchors_and_predictions( - self, - box_encodings, - objectness_predictions_with_background, - anchors_boxlist, - clip_window): - """Removes anchors that (partially) fall outside an image. - - Also removes associated box encodings and objectness predictions. - - Args: - box_encodings: 3-D float tensor of shape - [batch_size, num_anchors, self._box_coder.code_size] containing - predicted boxes. - objectness_predictions_with_background: 3-D float tensor of shape - [batch_size, num_anchors, 2] containing class - predictions (logits) for each of the anchors. Note that this - tensor *includes* background class predictions (at class index 0). - anchors_boxlist: A BoxList representing num_anchors anchors (for the RPN) - in absolute coordinates. - clip_window: a 1-D tensor representing the [ymin, xmin, ymax, xmax] - extent of the window to clip/prune to. - - Returns: - box_encodings: 4-D float tensor of shape - [batch_size, num_valid_anchors, self._box_coder.code_size] containing - predicted boxes, where num_valid_anchors <= num_anchors - objectness_predictions_with_background: 2-D float tensor of shape - [batch_size, num_valid_anchors, 2] containing class - predictions (logits) for each of the anchors, where - num_valid_anchors <= num_anchors. Note that this - tensor *includes* background class predictions (at class index 0). - anchors: A BoxList representing num_valid_anchors anchors (for the RPN) in - absolute coordinates. - """ - pruned_anchors_boxlist, keep_indices = box_list_ops.prune_outside_window( - anchors_boxlist, clip_window) - def _batch_gather_kept_indices(predictions_tensor): - return shape_utils.static_or_dynamic_map_fn( - partial(tf.gather, indices=keep_indices), - elems=predictions_tensor, - dtype=tf.float32, - parallel_iterations=self._parallel_iterations, - back_prop=True) - return (_batch_gather_kept_indices(box_encodings), - _batch_gather_kept_indices(objectness_predictions_with_background), - pruned_anchors_boxlist) - - def _flatten_first_two_dimensions(self, inputs): - """Flattens `K-d` tensor along batch dimension to be a `(K-1)-d` tensor. - - Converts `inputs` with shape [A, B, ..., depth] into a tensor of shape - [A * B, ..., depth]. - - Args: - inputs: A float tensor with shape [A, B, ..., depth]. Note that the first - two and last dimensions must be statically defined. - Returns: - A float tensor with shape [A * B, ..., depth] (where the first and last - dimension are statically defined. - """ - combined_shape = shape_utils.combined_static_and_dynamic_shape(inputs) - flattened_shape = tf.stack([combined_shape[0] * combined_shape[1]] + - combined_shape[2:]) - return tf.reshape(inputs, flattened_shape) - - def postprocess(self, prediction_dict, true_image_shapes): - """Convert prediction tensors to final detections. - - This function converts raw predictions tensors to final detection results. - See base class for output format conventions. Note also that by default, - scores are to be interpreted as logits, but if a score_converter is used, - then scores are remapped (and may thus have a different interpretation). - - If number_of_stages=1, the returned results represent proposals from the - first stage RPN and are padded to have self.max_num_proposals for each - image; otherwise, the results can be interpreted as multiclass detections - from the full two-stage model and are padded to self._max_detections. - - Args: - prediction_dict: a dictionary holding prediction tensors (see the - documentation for the predict method. If number_of_stages=1, we - expect prediction_dict to contain `rpn_box_encodings`, - `rpn_objectness_predictions_with_background`, `rpn_features_to_crop`, - and `anchors` fields. Otherwise we expect prediction_dict to - additionally contain `refined_box_encodings`, - `class_predictions_with_background`, `num_proposals`, - `proposal_boxes` and, optionally, `mask_predictions` fields. - true_image_shapes: int32 tensor of shape [batch, 3] where each row is - of the form [height, width, channels] indicating the shapes - of true images in the resized images, as resized images can be padded - with zeros. - - Returns: - detections: a dictionary containing the following fields - detection_boxes: [batch, max_detection, 4] - detection_scores: [batch, max_detections] - detection_classes: [batch, max_detections] - (this entry is only created if rpn_mode=False) - num_detections: [batch] - - Raises: - ValueError: If `predict` is called before `preprocess`. - """ - - with tf.name_scope('FirstStagePostprocessor'): - if self._number_of_stages == 1: - proposal_boxes, proposal_scores, num_proposals = self._postprocess_rpn( - prediction_dict['rpn_box_encodings'], - prediction_dict['rpn_objectness_predictions_with_background'], - prediction_dict['anchors'], - true_image_shapes, - true_image_shapes) - return { - fields.DetectionResultFields.detection_boxes: proposal_boxes, - fields.DetectionResultFields.detection_scores: proposal_scores, - fields.DetectionResultFields.num_detections: - tf.to_float(num_proposals), - } - - # TODO(jrru): Remove mask_predictions from _post_process_box_classifier. - with tf.name_scope('SecondStagePostprocessor'): - if (self._number_of_stages == 2 or - (self._number_of_stages == 3 and self._is_training)): - mask_predictions = prediction_dict.get(box_predictor.MASK_PREDICTIONS) - detections_dict = self._postprocess_box_classifier( - prediction_dict['refined_box_encodings'], - prediction_dict['class_predictions_with_background'], - prediction_dict['proposal_boxes'], - prediction_dict['num_proposals'], - true_image_shapes, - mask_predictions=mask_predictions) - return detections_dict - - if self._number_of_stages == 3: - # Post processing is already performed in 3rd stage. We need to transfer - # postprocessed tensors from `prediction_dict` to `detections_dict`. - detections_dict = {} - for key in prediction_dict: - if key == fields.DetectionResultFields.detection_masks: - detections_dict[key] = tf.sigmoid(prediction_dict[key]) - elif 'detection' in key: - detections_dict[key] = prediction_dict[key] - return detections_dict - - def _postprocess_rpn(self, - rpn_box_encodings_batch, - rpn_objectness_predictions_with_background_batch, - anchors, - image_shapes, - true_image_shapes): - """Converts first stage prediction tensors from the RPN to proposals. - - This function decodes the raw RPN predictions, runs non-max suppression - on the result. - - Note that the behavior of this function is slightly modified during - training --- specifically, we stop the gradient from passing through the - proposal boxes and we only return a balanced sampled subset of proposals - with size `second_stage_batch_size`. - - Args: - rpn_box_encodings_batch: A 3-D float32 tensor of shape - [batch_size, num_anchors, self._box_coder.code_size] containing - predicted proposal box encodings. - rpn_objectness_predictions_with_background_batch: A 3-D float tensor of - shape [batch_size, num_anchors, 2] containing objectness predictions - (logits) for each of the anchors with 0 corresponding to background - and 1 corresponding to object. - anchors: A 2-D tensor of shape [num_anchors, 4] representing anchors - for the first stage RPN. Note that `num_anchors` can differ depending - on whether the model is created in training or inference mode. - image_shapes: A 2-D tensor of shape [batch, 3] containing the shapes of - images in the batch. - true_image_shapes: int32 tensor of shape [batch, 3] where each row is - of the form [height, width, channels] indicating the shapes - of true images in the resized images, as resized images can be padded - with zeros. - - Returns: - proposal_boxes: A float tensor with shape - [batch_size, max_num_proposals, 4] representing the (potentially zero - padded) proposal boxes for all images in the batch. These boxes are - represented as normalized coordinates. - proposal_scores: A float tensor with shape - [batch_size, max_num_proposals] representing the (potentially zero - padded) proposal objectness scores for all images in the batch. - num_proposals: A Tensor of type `int32`. A 1-D tensor of shape [batch] - representing the number of proposals predicted for each image in - the batch. - """ - rpn_box_encodings_batch = tf.expand_dims(rpn_box_encodings_batch, axis=2) - rpn_encodings_shape = shape_utils.combined_static_and_dynamic_shape( - rpn_box_encodings_batch) - tiled_anchor_boxes = tf.tile( - tf.expand_dims(anchors, 0), [rpn_encodings_shape[0], 1, 1]) - proposal_boxes = self._batch_decode_boxes(rpn_box_encodings_batch, - tiled_anchor_boxes) - proposal_boxes = tf.squeeze(proposal_boxes, axis=2) - rpn_objectness_softmax_without_background = tf.nn.softmax( - rpn_objectness_predictions_with_background_batch)[:, :, 1] - clip_window = self._compute_clip_window(image_shapes) - (proposal_boxes, proposal_scores, _, _, _, - num_proposals) = post_processing.batch_multiclass_non_max_suppression( - tf.expand_dims(proposal_boxes, axis=2), - tf.expand_dims(rpn_objectness_softmax_without_background, - axis=2), - self._first_stage_nms_score_threshold, - self._first_stage_nms_iou_threshold, - self._first_stage_max_proposals, - self._first_stage_max_proposals, - clip_window=clip_window) - if self._is_training: - proposal_boxes = tf.stop_gradient(proposal_boxes) - if not self._hard_example_miner: - (groundtruth_boxlists, groundtruth_classes_with_background_list, _, - _) = self._format_groundtruth_data(true_image_shapes) - (proposal_boxes, proposal_scores, - num_proposals) = self._unpad_proposals_and_sample_box_classifier_batch( - proposal_boxes, proposal_scores, num_proposals, - groundtruth_boxlists, groundtruth_classes_with_background_list) - # normalize proposal boxes - def normalize_boxes(args): - proposal_boxes_per_image = args[0] - image_shape = args[1] - normalized_boxes_per_image = box_list_ops.to_normalized_coordinates( - box_list.BoxList(proposal_boxes_per_image), image_shape[0], - image_shape[1], check_range=False).get() - return normalized_boxes_per_image - normalized_proposal_boxes = shape_utils.static_or_dynamic_map_fn( - normalize_boxes, elems=[proposal_boxes, image_shapes], dtype=tf.float32) - return normalized_proposal_boxes, proposal_scores, num_proposals - - def _unpad_proposals_and_sample_box_classifier_batch( - self, - proposal_boxes, - proposal_scores, - num_proposals, - groundtruth_boxlists, - groundtruth_classes_with_background_list): - """Unpads proposals and samples a minibatch for second stage. - - Args: - proposal_boxes: A float tensor with shape - [batch_size, num_proposals, 4] representing the (potentially zero - padded) proposal boxes for all images in the batch. These boxes are - represented in absolute coordinates. - proposal_scores: A float tensor with shape - [batch_size, num_proposals] representing the (potentially zero - padded) proposal objectness scores for all images in the batch. - num_proposals: A Tensor of type `int32`. A 1-D tensor of shape [batch] - representing the number of proposals predicted for each image in - the batch. - groundtruth_boxlists: A list of BoxLists containing (absolute) coordinates - of the groundtruth boxes. - groundtruth_classes_with_background_list: A list of 2-D one-hot - (or k-hot) tensors of shape [num_boxes, num_classes+1] containing the - class targets with the 0th index assumed to map to the background class. - - Returns: - proposal_boxes: A float tensor with shape - [batch_size, second_stage_batch_size, 4] representing the (potentially - zero padded) proposal boxes for all images in the batch. These boxes - are represented in absolute coordinates. - proposal_scores: A float tensor with shape - [batch_size, second_stage_batch_size] representing the (potentially zero - padded) proposal objectness scores for all images in the batch. - num_proposals: A Tensor of type `int32`. A 1-D tensor of shape [batch] - representing the number of proposals predicted for each image in - the batch. - """ - single_image_proposal_box_sample = [] - single_image_proposal_score_sample = [] - single_image_num_proposals_sample = [] - for (single_image_proposal_boxes, - single_image_proposal_scores, - single_image_num_proposals, - single_image_groundtruth_boxlist, - single_image_groundtruth_classes_with_background) in zip( - tf.unstack(proposal_boxes), - tf.unstack(proposal_scores), - tf.unstack(num_proposals), - groundtruth_boxlists, - groundtruth_classes_with_background_list): - static_shape = single_image_proposal_boxes.get_shape() - sliced_static_shape = tf.TensorShape([tf.Dimension(None), - static_shape.dims[-1]]) - single_image_proposal_boxes = tf.slice( - single_image_proposal_boxes, - [0, 0], - [single_image_num_proposals, -1]) - single_image_proposal_boxes.set_shape(sliced_static_shape) - - single_image_proposal_scores = tf.slice(single_image_proposal_scores, - [0], - [single_image_num_proposals]) - single_image_boxlist = box_list.BoxList(single_image_proposal_boxes) - single_image_boxlist.add_field(fields.BoxListFields.scores, - single_image_proposal_scores) - sampled_boxlist = self._sample_box_classifier_minibatch( - single_image_boxlist, - single_image_groundtruth_boxlist, - single_image_groundtruth_classes_with_background) - sampled_padded_boxlist = box_list_ops.pad_or_clip_box_list( - sampled_boxlist, - num_boxes=self._second_stage_batch_size) - single_image_num_proposals_sample.append(tf.minimum( - sampled_boxlist.num_boxes(), - self._second_stage_batch_size)) - bb = sampled_padded_boxlist.get() - single_image_proposal_box_sample.append(bb) - single_image_proposal_score_sample.append( - sampled_padded_boxlist.get_field(fields.BoxListFields.scores)) - return (tf.stack(single_image_proposal_box_sample), - tf.stack(single_image_proposal_score_sample), - tf.stack(single_image_num_proposals_sample)) - - def _format_groundtruth_data(self, true_image_shapes): - """Helper function for preparing groundtruth data for target assignment. - - In order to be consistent with the model.DetectionModel interface, - groundtruth boxes are specified in normalized coordinates and classes are - specified as label indices with no assumed background category. To prepare - for target assignment, we: - 1) convert boxes to absolute coordinates, - 2) add a background class at class index 0 - 3) groundtruth instance masks, if available, are resized to match - image_shape. - - Args: - true_image_shapes: int32 tensor of shape [batch, 3] where each row is - of the form [height, width, channels] indicating the shapes - of true images in the resized images, as resized images can be padded - with zeros. - - Returns: - groundtruth_boxlists: A list of BoxLists containing (absolute) coordinates - of the groundtruth boxes. - groundtruth_classes_with_background_list: A list of 2-D one-hot - (or k-hot) tensors of shape [num_boxes, num_classes+1] containing the - class targets with the 0th index assumed to map to the background class. - groundtruth_masks_list: If present, a list of 3-D tf.float32 tensors of - shape [num_boxes, image_height, image_width] containing instance masks. - This is set to None if no masks exist in the provided groundtruth. - """ - groundtruth_boxlists = [ - box_list_ops.to_absolute_coordinates( - box_list.BoxList(boxes), true_image_shapes[i, 0], - true_image_shapes[i, 1]) - for i, boxes in enumerate( - self.groundtruth_lists(fields.BoxListFields.boxes)) - ] - groundtruth_classes_with_background_list = [ - tf.to_float( - tf.pad(one_hot_encoding, [[0, 0], [1, 0]], mode='CONSTANT')) - for one_hot_encoding in self.groundtruth_lists( - fields.BoxListFields.classes)] - - groundtruth_masks_list = self._groundtruth_lists.get( - fields.BoxListFields.masks) - if groundtruth_masks_list is not None: - resized_masks_list = [] - for mask in groundtruth_masks_list: - _, resized_mask, _ = self._image_resizer_fn( - # Reuse the given `image_resizer_fn` to resize groundtruth masks. - # `mask` tensor for an image is of the shape [num_masks, - # image_height, image_width]. Below we create a dummy image of the - # the shape [image_height, image_width, 1] to use with - # `image_resizer_fn`. - image=tf.zeros(tf.stack([tf.shape(mask)[1], tf.shape(mask)[2], 1])), - masks=mask) - resized_masks_list.append(resized_mask) - - groundtruth_masks_list = resized_masks_list - groundtruth_weights_list = None - if self.groundtruth_has_field(fields.BoxListFields.weights): - groundtruth_weights_list = self.groundtruth_lists( - fields.BoxListFields.weights) - - return (groundtruth_boxlists, groundtruth_classes_with_background_list, - groundtruth_masks_list, groundtruth_weights_list) - - def _sample_box_classifier_minibatch(self, - proposal_boxlist, - groundtruth_boxlist, - groundtruth_classes_with_background): - """Samples a mini-batch of proposals to be sent to the box classifier. - - Helper function for self._postprocess_rpn. - - Args: - proposal_boxlist: A BoxList containing K proposal boxes in absolute - coordinates. - groundtruth_boxlist: A Boxlist containing N groundtruth object boxes in - absolute coordinates. - groundtruth_classes_with_background: A tensor with shape - `[N, self.num_classes + 1]` representing groundtruth classes. The - classes are assumed to be k-hot encoded, and include background as the - zero-th class. - - Returns: - a BoxList contained sampled proposals. - """ - (cls_targets, cls_weights, _, _, _) = self._detector_target_assigner.assign( - proposal_boxlist, groundtruth_boxlist, - groundtruth_classes_with_background) - # Selects all boxes as candidates if none of them is selected according - # to cls_weights. This could happen as boxes within certain IOU ranges - # are ignored. If triggered, the selected boxes will still be ignored - # during loss computation. - cls_weights += tf.to_float(tf.equal(tf.reduce_sum(cls_weights), 0)) - positive_indicator = tf.greater(tf.argmax(cls_targets, axis=1), 0) - sampled_indices = self._second_stage_sampler.subsample( - tf.cast(cls_weights, tf.bool), - self._second_stage_batch_size, - positive_indicator) - return box_list_ops.boolean_mask(proposal_boxlist, sampled_indices) - - def _compute_second_stage_input_feature_maps(self, features_to_crop, - proposal_boxes_normalized): - """Crops to a set of proposals from the feature map for a batch of images. - - Helper function for self._postprocess_rpn. This function calls - `tf.image.crop_and_resize` to create the feature map to be passed to the - second stage box classifier for each proposal. - - Args: - features_to_crop: A float32 tensor with shape - [batch_size, height, width, depth] - proposal_boxes_normalized: A float32 tensor with shape [batch_size, - num_proposals, box_code_size] containing proposal boxes in - normalized coordinates. - - Returns: - A float32 tensor with shape [K, new_height, new_width, depth]. - """ - def get_box_inds(proposals): - proposals_shape = proposals.get_shape().as_list() - if any(dim is None for dim in proposals_shape): - proposals_shape = tf.shape(proposals) - ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32) - multiplier = tf.expand_dims( - tf.range(start=0, limit=proposals_shape[0]), 1) - return tf.reshape(ones_mat * multiplier, [-1]) - - cropped_regions = tf.image.crop_and_resize( - features_to_crop, - self._flatten_first_two_dimensions(proposal_boxes_normalized), - get_box_inds(proposal_boxes_normalized), - (self._initial_crop_size, self._initial_crop_size)) - return slim.max_pool2d( - cropped_regions, - [self._maxpool_kernel_size, self._maxpool_kernel_size], - stride=self._maxpool_stride) - - def _postprocess_box_classifier(self, - refined_box_encodings, - class_predictions_with_background, - proposal_boxes, - num_proposals, - image_shapes, - mask_predictions=None): - """Converts predictions from the second stage box classifier to detections. - - Args: - refined_box_encodings: a 3-D float tensor with shape - [total_num_padded_proposals, num_classes, self._box_coder.code_size] - representing predicted (final) refined box encodings. If using a shared - box across classes the shape will instead be - [total_num_padded_proposals, 1, 4] - class_predictions_with_background: a 3-D tensor float with shape - [total_num_padded_proposals, num_classes + 1] containing class - predictions (logits) for each of the proposals. Note that this tensor - *includes* background class predictions (at class index 0). - proposal_boxes: a 3-D float tensor with shape - [batch_size, self.max_num_proposals, 4] representing decoded proposal - bounding boxes in absolute coordinates. - num_proposals: a 1-D int32 tensor of shape [batch] representing the number - of proposals predicted for each image in the batch. - image_shapes: a 2-D int32 tensor containing shapes of input image in the - batch. - mask_predictions: (optional) a 4-D float tensor with shape - [total_num_padded_proposals, num_classes, mask_height, mask_width] - containing instance mask prediction logits. - - Returns: - A dictionary containing: - `detection_boxes`: [batch, max_detection, 4] - `detection_scores`: [batch, max_detections] - `detection_classes`: [batch, max_detections] - `num_detections`: [batch] - `detection_masks`: - (optional) [batch, max_detections, mask_height, mask_width]. Note - that a pixel-wise sigmoid score converter is applied to the detection - masks. - """ - refined_box_encodings_batch = tf.reshape( - refined_box_encodings, - [-1, - self.max_num_proposals, - refined_box_encodings.shape[1], - self._box_coder.code_size]) - class_predictions_with_background_batch = tf.reshape( - class_predictions_with_background, - [-1, self.max_num_proposals, self.num_classes + 1] - ) - refined_decoded_boxes_batch = self._batch_decode_boxes( - refined_box_encodings_batch, proposal_boxes) - class_predictions_with_background_batch = ( - self._second_stage_score_conversion_fn( - class_predictions_with_background_batch)) - class_predictions_batch = tf.reshape( - tf.slice(class_predictions_with_background_batch, - [0, 0, 1], [-1, -1, -1]), - [-1, self.max_num_proposals, self.num_classes]) - clip_window = self._compute_clip_window(image_shapes) - mask_predictions_batch = None - if mask_predictions is not None: - mask_height = mask_predictions.shape[2].value - mask_width = mask_predictions.shape[3].value - mask_predictions = tf.sigmoid(mask_predictions) - mask_predictions_batch = tf.reshape( - mask_predictions, [-1, self.max_num_proposals, - self.num_classes, mask_height, mask_width]) - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, _, - num_detections) = self._second_stage_nms_fn( - refined_decoded_boxes_batch, - class_predictions_batch, - clip_window=clip_window, - change_coordinate_frame=True, - num_valid_boxes=num_proposals, - masks=mask_predictions_batch) - detections = { - fields.DetectionResultFields.detection_boxes: nmsed_boxes, - fields.DetectionResultFields.detection_scores: nmsed_scores, - fields.DetectionResultFields.detection_classes: nmsed_classes, - fields.DetectionResultFields.num_detections: tf.to_float(num_detections) - } - if nmsed_masks is not None: - detections[fields.DetectionResultFields.detection_masks] = nmsed_masks - return detections - - def _batch_decode_boxes(self, box_encodings, anchor_boxes): - """Decodes box encodings with respect to the anchor boxes. - - Args: - box_encodings: a 4-D tensor with shape - [batch_size, num_anchors, num_classes, self._box_coder.code_size] - representing box encodings. - anchor_boxes: [batch_size, num_anchors, self._box_coder.code_size] - representing decoded bounding boxes. If using a shared box across - classes the shape will instead be - [total_num_proposals, 1, self._box_coder.code_size]. - - Returns: - decoded_boxes: a - [batch_size, num_anchors, num_classes, self._box_coder.code_size] - float tensor representing bounding box predictions (for each image in - batch, proposal and class). If using a shared box across classes the - shape will instead be - [batch_size, num_anchors, 1, self._box_coder.code_size]. - """ - combined_shape = shape_utils.combined_static_and_dynamic_shape( - box_encodings) - num_classes = combined_shape[2] - tiled_anchor_boxes = tf.tile( - tf.expand_dims(anchor_boxes, 2), [1, 1, num_classes, 1]) - tiled_anchors_boxlist = box_list.BoxList( - tf.reshape(tiled_anchor_boxes, [-1, 4])) - decoded_boxes = self._box_coder.decode( - tf.reshape(box_encodings, [-1, self._box_coder.code_size]), - tiled_anchors_boxlist) - return tf.reshape(decoded_boxes.get(), - tf.stack([combined_shape[0], combined_shape[1], - num_classes, 4])) - - def loss(self, prediction_dict, true_image_shapes, scope=None): - """Compute scalar loss tensors given prediction tensors. - - If number_of_stages=1, only RPN related losses are computed (i.e., - `rpn_localization_loss` and `rpn_objectness_loss`). Otherwise all - losses are computed. - - Args: - prediction_dict: a dictionary holding prediction tensors (see the - documentation for the predict method. If number_of_stages=1, we - expect prediction_dict to contain `rpn_box_encodings`, - `rpn_objectness_predictions_with_background`, `rpn_features_to_crop`, - `image_shape`, and `anchors` fields. Otherwise we expect - prediction_dict to additionally contain `refined_box_encodings`, - `class_predictions_with_background`, `num_proposals`, and - `proposal_boxes` fields. - true_image_shapes: int32 tensor of shape [batch, 3] where each row is - of the form [height, width, channels] indicating the shapes - of true images in the resized images, as resized images can be padded - with zeros. - scope: Optional scope name. - - Returns: - a dictionary mapping loss keys (`first_stage_localization_loss`, - `first_stage_objectness_loss`, 'second_stage_localization_loss', - 'second_stage_classification_loss') to scalar tensors representing - corresponding loss values. - """ - with tf.name_scope(scope, 'Loss', prediction_dict.values()): - (groundtruth_boxlists, groundtruth_classes_with_background_list, - groundtruth_masks_list, groundtruth_weights_list - ) = self._format_groundtruth_data(true_image_shapes) - loss_dict = self._loss_rpn( - prediction_dict['rpn_box_encodings'], - prediction_dict['rpn_objectness_predictions_with_background'], - prediction_dict['anchors'], groundtruth_boxlists, - groundtruth_classes_with_background_list, groundtruth_weights_list) - if self._number_of_stages > 1: - loss_dict.update( - self._loss_box_classifier( - prediction_dict['refined_box_encodings'], - prediction_dict['class_predictions_with_background'], - prediction_dict['proposal_boxes'], - prediction_dict['num_proposals'], - groundtruth_boxlists, - groundtruth_classes_with_background_list, - groundtruth_weights_list, - prediction_dict['image_shape'], - prediction_dict.get('mask_predictions'), - groundtruth_masks_list, - )) - return loss_dict - - def _loss_rpn(self, rpn_box_encodings, - rpn_objectness_predictions_with_background, anchors, - groundtruth_boxlists, groundtruth_classes_with_background_list, - groundtruth_weights_list): - """Computes scalar RPN loss tensors. - - Uses self._proposal_target_assigner to obtain regression and classification - targets for the first stage RPN, samples a "minibatch" of anchors to - participate in the loss computation, and returns the RPN losses. - - Args: - rpn_box_encodings: A 4-D float tensor of shape - [batch_size, num_anchors, self._box_coder.code_size] containing - predicted proposal box encodings. - rpn_objectness_predictions_with_background: A 2-D float tensor of shape - [batch_size, num_anchors, 2] containing objectness predictions - (logits) for each of the anchors with 0 corresponding to background - and 1 corresponding to object. - anchors: A 2-D tensor of shape [num_anchors, 4] representing anchors - for the first stage RPN. Note that `num_anchors` can differ depending - on whether the model is created in training or inference mode. - groundtruth_boxlists: A list of BoxLists containing coordinates of the - groundtruth boxes. - groundtruth_classes_with_background_list: A list of 2-D one-hot - (or k-hot) tensors of shape [num_boxes, num_classes+1] containing the - class targets with the 0th index assumed to map to the background class. - groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape - [num_boxes] containing weights for groundtruth boxes. - - Returns: - a dictionary mapping loss keys (`first_stage_localization_loss`, - `first_stage_objectness_loss`) to scalar tensors representing - corresponding loss values. - """ - with tf.name_scope('RPNLoss'): - (batch_cls_targets, batch_cls_weights, batch_reg_targets, - batch_reg_weights, _) = target_assigner.batch_assign_targets( - self._proposal_target_assigner, box_list.BoxList(anchors), - groundtruth_boxlists, - len(groundtruth_boxlists) * [None], groundtruth_weights_list) - batch_cls_targets = tf.squeeze(batch_cls_targets, axis=2) - - def _minibatch_subsample_fn(inputs): - cls_targets, cls_weights = inputs - return self._first_stage_sampler.subsample( - tf.cast(cls_weights, tf.bool), - self._first_stage_minibatch_size, tf.cast(cls_targets, tf.bool)) - batch_sampled_indices = tf.to_float(shape_utils.static_or_dynamic_map_fn( - _minibatch_subsample_fn, - [batch_cls_targets, batch_cls_weights], - dtype=tf.bool, - parallel_iterations=self._parallel_iterations, - back_prop=True)) - - # Normalize by number of examples in sampled minibatch - normalizer = tf.reduce_sum(batch_sampled_indices, axis=1) - batch_one_hot_targets = tf.one_hot( - tf.to_int32(batch_cls_targets), depth=2) - sampled_reg_indices = tf.multiply(batch_sampled_indices, - batch_reg_weights) - - localization_losses = self._first_stage_localization_loss( - rpn_box_encodings, batch_reg_targets, weights=sampled_reg_indices) - objectness_losses = self._first_stage_objectness_loss( - rpn_objectness_predictions_with_background, - batch_one_hot_targets, weights=batch_sampled_indices) - localization_loss = tf.reduce_mean( - tf.reduce_sum(localization_losses, axis=1) / normalizer) - objectness_loss = tf.reduce_mean( - tf.reduce_sum(objectness_losses, axis=1) / normalizer) - - localization_loss = tf.multiply(self._first_stage_loc_loss_weight, - localization_loss, - name='localization_loss') - objectness_loss = tf.multiply(self._first_stage_obj_loss_weight, - objectness_loss, name='objectness_loss') - loss_dict = {localization_loss.op.name: localization_loss, - objectness_loss.op.name: objectness_loss} - return loss_dict - - def _loss_box_classifier(self, - refined_box_encodings, - class_predictions_with_background, - proposal_boxes, - num_proposals, - groundtruth_boxlists, - groundtruth_classes_with_background_list, - groundtruth_weights_list, - image_shape, - prediction_masks=None, - groundtruth_masks_list=None): - """Computes scalar box classifier loss tensors. - - Uses self._detector_target_assigner to obtain regression and classification - targets for the second stage box classifier, optionally performs - hard mining, and returns losses. All losses are computed independently - for each image and then averaged across the batch. - Please note that for boxes and masks with multiple labels, the box - regression and mask prediction losses are only computed for one label. - - This function assumes that the proposal boxes in the "padded" regions are - actually zero (and thus should not be matched to). - - - Args: - refined_box_encodings: a 3-D tensor with shape - [total_num_proposals, num_classes, box_coder.code_size] representing - predicted (final) refined box encodings. If using a shared box across - classes this will instead have shape - [total_num_proposals, 1, box_coder.code_size]. - class_predictions_with_background: a 2-D tensor with shape - [total_num_proposals, num_classes + 1] containing class - predictions (logits) for each of the anchors. Note that this tensor - *includes* background class predictions (at class index 0). - proposal_boxes: [batch_size, self.max_num_proposals, 4] representing - decoded proposal bounding boxes. - num_proposals: A Tensor of type `int32`. A 1-D tensor of shape [batch] - representing the number of proposals predicted for each image in - the batch. - groundtruth_boxlists: a list of BoxLists containing coordinates of the - groundtruth boxes. - groundtruth_classes_with_background_list: a list of 2-D one-hot - (or k-hot) tensors of shape [num_boxes, num_classes + 1] containing the - class targets with the 0th index assumed to map to the background class. - groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape - [num_boxes] containing weights for groundtruth boxes. - image_shape: a 1-D tensor of shape [4] representing the image shape. - prediction_masks: an optional 4-D tensor with shape [total_num_proposals, - num_classes, mask_height, mask_width] containing the instance masks for - each box. - groundtruth_masks_list: an optional list of 3-D tensors of shape - [num_boxes, image_height, image_width] containing the instance masks for - each of the boxes. - - Returns: - a dictionary mapping loss keys ('second_stage_localization_loss', - 'second_stage_classification_loss') to scalar tensors representing - corresponding loss values. - - Raises: - ValueError: if `predict_instance_masks` in - second_stage_mask_rcnn_box_predictor is True and - `groundtruth_masks_list` is not provided. - """ - with tf.name_scope('BoxClassifierLoss'): - paddings_indicator = self._padded_batched_proposals_indicator( - num_proposals, self.max_num_proposals) - proposal_boxlists = [ - box_list.BoxList(proposal_boxes_single_image) - for proposal_boxes_single_image in tf.unstack(proposal_boxes)] - batch_size = len(proposal_boxlists) - - num_proposals_or_one = tf.to_float(tf.expand_dims( - tf.maximum(num_proposals, tf.ones_like(num_proposals)), 1)) - normalizer = tf.tile(num_proposals_or_one, - [1, self.max_num_proposals]) * batch_size - - (batch_cls_targets_with_background, batch_cls_weights, batch_reg_targets, - batch_reg_weights, _) = target_assigner.batch_assign_targets( - self._detector_target_assigner, proposal_boxlists, - groundtruth_boxlists, groundtruth_classes_with_background_list, - groundtruth_weights_list) - - class_predictions_with_background = tf.reshape( - class_predictions_with_background, - [batch_size, self.max_num_proposals, -1]) - - flat_cls_targets_with_background = tf.reshape( - batch_cls_targets_with_background, - [batch_size * self.max_num_proposals, -1]) - one_hot_flat_cls_targets_with_background = tf.argmax( - flat_cls_targets_with_background, axis=1) - one_hot_flat_cls_targets_with_background = tf.one_hot( - one_hot_flat_cls_targets_with_background, - flat_cls_targets_with_background.get_shape()[1]) - - # If using a shared box across classes use directly - if refined_box_encodings.shape[1] == 1: - reshaped_refined_box_encodings = tf.reshape( - refined_box_encodings, - [batch_size, self.max_num_proposals, self._box_coder.code_size]) - # For anchors with multiple labels, picks refined_location_encodings - # for just one class to avoid over-counting for regression loss and - # (optionally) mask loss. - else: - # We only predict refined location encodings for the non background - # classes, but we now pad it to make it compatible with the class - # predictions - refined_box_encodings_with_background = tf.pad( - refined_box_encodings, [[0, 0], [1, 0], [0, 0]]) - refined_box_encodings_masked_by_class_targets = tf.boolean_mask( - refined_box_encodings_with_background, - tf.greater(one_hot_flat_cls_targets_with_background, 0)) - reshaped_refined_box_encodings = tf.reshape( - refined_box_encodings_masked_by_class_targets, - [batch_size, self.max_num_proposals, self._box_coder.code_size]) - - second_stage_loc_losses = self._second_stage_localization_loss( - reshaped_refined_box_encodings, - batch_reg_targets, weights=batch_reg_weights) / normalizer - second_stage_cls_losses = ops.reduce_sum_trailing_dimensions( - self._second_stage_classification_loss( - class_predictions_with_background, - batch_cls_targets_with_background, - weights=batch_cls_weights), - ndims=2) / normalizer - - second_stage_loc_loss = tf.reduce_sum( - tf.boolean_mask(second_stage_loc_losses, paddings_indicator)) - second_stage_cls_loss = tf.reduce_sum( - tf.boolean_mask(second_stage_cls_losses, paddings_indicator)) - - if self._hard_example_miner: - (second_stage_loc_loss, second_stage_cls_loss - ) = self._unpad_proposals_and_apply_hard_mining( - proposal_boxlists, second_stage_loc_losses, - second_stage_cls_losses, num_proposals) - localization_loss = tf.multiply(self._second_stage_loc_loss_weight, - second_stage_loc_loss, - name='localization_loss') - - classification_loss = tf.multiply(self._second_stage_cls_loss_weight, - second_stage_cls_loss, - name='classification_loss') - - loss_dict = {localization_loss.op.name: localization_loss, - classification_loss.op.name: classification_loss} - second_stage_mask_loss = None - if prediction_masks is not None: - if groundtruth_masks_list is None: - raise ValueError('Groundtruth instance masks not provided. ' - 'Please configure input reader.') - - # Create a new target assigner that matches the proposals to groundtruth - # and returns the mask targets. - # TODO(rathodv): Move `unmatched_cls_target` from constructor to assign - # function. This will enable reuse of a single target assigner for both - # class targets and mask targets. - mask_target_assigner = target_assigner.create_target_assigner( - 'FasterRCNN', 'detection', - unmatched_cls_target=tf.zeros(image_shape[1:3], dtype=tf.float32)) - (batch_mask_targets, _, _, - batch_mask_target_weights, _) = target_assigner.batch_assign_targets( - mask_target_assigner, proposal_boxlists, groundtruth_boxlists, - groundtruth_masks_list, groundtruth_weights_list) - - # Pad the prediction_masks with to add zeros for background class to be - # consistent with class predictions. - if prediction_masks.get_shape().as_list()[1] == 1: - # Class agnostic masks or masks for one-class prediction. Logic for - # both cases is the same since background predictions are ignored - # through the batch_mask_target_weights. - prediction_masks_masked_by_class_targets = prediction_masks - else: - prediction_masks_with_background = tf.pad( - prediction_masks, [[0, 0], [1, 0], [0, 0], [0, 0]]) - prediction_masks_masked_by_class_targets = tf.boolean_mask( - prediction_masks_with_background, - tf.greater(one_hot_flat_cls_targets_with_background, 0)) - - mask_height = prediction_masks.shape[2].value - mask_width = prediction_masks.shape[3].value - reshaped_prediction_masks = tf.reshape( - prediction_masks_masked_by_class_targets, - [batch_size, -1, mask_height * mask_width]) - - batch_mask_targets_shape = tf.shape(batch_mask_targets) - flat_gt_masks = tf.reshape(batch_mask_targets, - [-1, batch_mask_targets_shape[2], - batch_mask_targets_shape[3]]) - - # Use normalized proposals to crop mask targets from image masks. - flat_normalized_proposals = box_list_ops.to_normalized_coordinates( - box_list.BoxList(tf.reshape(proposal_boxes, [-1, 4])), - image_shape[1], image_shape[2]).get() - - flat_cropped_gt_mask = tf.image.crop_and_resize( - tf.expand_dims(flat_gt_masks, -1), - flat_normalized_proposals, - tf.range(flat_normalized_proposals.shape[0].value), - [mask_height, mask_width]) - - batch_cropped_gt_mask = tf.reshape( - flat_cropped_gt_mask, - [batch_size, -1, mask_height * mask_width]) - - second_stage_mask_losses = ops.reduce_sum_trailing_dimensions( - self._second_stage_mask_loss( - reshaped_prediction_masks, - batch_cropped_gt_mask, - weights=batch_mask_target_weights), - ndims=2) / ( - mask_height * mask_width * tf.maximum( - tf.reduce_sum( - batch_mask_target_weights, axis=1, keep_dims=True - ), tf.ones((batch_size, 1)))) - second_stage_mask_loss = tf.reduce_sum( - tf.boolean_mask(second_stage_mask_losses, paddings_indicator)) - - if second_stage_mask_loss is not None: - mask_loss = tf.multiply(self._second_stage_mask_loss_weight, - second_stage_mask_loss, name='mask_loss') - loss_dict[mask_loss.op.name] = mask_loss - return loss_dict - - def _padded_batched_proposals_indicator(self, - num_proposals, - max_num_proposals): - """Creates indicator matrix of non-pad elements of padded batch proposals. - - Args: - num_proposals: Tensor of type tf.int32 with shape [batch_size]. - max_num_proposals: Maximum number of proposals per image (integer). - - Returns: - A Tensor of type tf.bool with shape [batch_size, max_num_proposals]. - """ - batch_size = tf.size(num_proposals) - tiled_num_proposals = tf.tile( - tf.expand_dims(num_proposals, 1), [1, max_num_proposals]) - tiled_proposal_index = tf.tile( - tf.expand_dims(tf.range(max_num_proposals), 0), [batch_size, 1]) - return tf.greater(tiled_num_proposals, tiled_proposal_index) - - def _unpad_proposals_and_apply_hard_mining(self, - proposal_boxlists, - second_stage_loc_losses, - second_stage_cls_losses, - num_proposals): - """Unpads proposals and applies hard mining. - - Args: - proposal_boxlists: A list of `batch_size` BoxLists each representing - `self.max_num_proposals` representing decoded proposal bounding boxes - for each image. - second_stage_loc_losses: A Tensor of type `float32`. A tensor of shape - `[batch_size, self.max_num_proposals]` representing per-anchor - second stage localization loss values. - second_stage_cls_losses: A Tensor of type `float32`. A tensor of shape - `[batch_size, self.max_num_proposals]` representing per-anchor - second stage classification loss values. - num_proposals: A Tensor of type `int32`. A 1-D tensor of shape [batch] - representing the number of proposals predicted for each image in - the batch. - - Returns: - second_stage_loc_loss: A scalar float32 tensor representing the second - stage localization loss. - second_stage_cls_loss: A scalar float32 tensor representing the second - stage classification loss. - """ - for (proposal_boxlist, single_image_loc_loss, single_image_cls_loss, - single_image_num_proposals) in zip( - proposal_boxlists, - tf.unstack(second_stage_loc_losses), - tf.unstack(second_stage_cls_losses), - tf.unstack(num_proposals)): - proposal_boxlist = box_list.BoxList( - tf.slice(proposal_boxlist.get(), - [0, 0], [single_image_num_proposals, -1])) - single_image_loc_loss = tf.slice(single_image_loc_loss, - [0], [single_image_num_proposals]) - single_image_cls_loss = tf.slice(single_image_cls_loss, - [0], [single_image_num_proposals]) - return self._hard_example_miner( - location_losses=tf.expand_dims(single_image_loc_loss, 0), - cls_losses=tf.expand_dims(single_image_cls_loss, 0), - decoded_boxlist_list=[proposal_boxlist]) - - def restore_map(self, - fine_tune_checkpoint_type='detection', - load_all_detection_checkpoint_vars=False): - """Returns a map of variables to load from a foreign checkpoint. - - See parent class for details. - - Args: - fine_tune_checkpoint_type: whether to restore from a full detection - checkpoint (with compatible variable names) or to restore from a - classification checkpoint for initialization prior to training. - Valid values: `detection`, `classification`. Default 'detection'. - load_all_detection_checkpoint_vars: whether to load all variables (when - `fine_tune_checkpoint_type` is `detection`). If False, only variables - within the feature extractor scopes are included. Default False. - - Returns: - A dict mapping variable names (to load from a checkpoint) to variables in - the model graph. - Raises: - ValueError: if fine_tune_checkpoint_type is neither `classification` - nor `detection`. - """ - if fine_tune_checkpoint_type not in ['detection', 'classification']: - raise ValueError('Not supported fine_tune_checkpoint_type: {}'.format( - fine_tune_checkpoint_type)) - if fine_tune_checkpoint_type == 'classification': - return self._feature_extractor.restore_from_classification_checkpoint_fn( - self.first_stage_feature_extractor_scope, - self.second_stage_feature_extractor_scope) - - variables_to_restore = tf.global_variables() - variables_to_restore.append(slim.get_or_create_global_step()) - # Only load feature extractor variables to be consistent with loading from - # a classification checkpoint. - include_patterns = None - if not load_all_detection_checkpoint_vars: - include_patterns = [ - self.first_stage_feature_extractor_scope, - self.second_stage_feature_extractor_scope - ] - feature_extractor_variables = tf.contrib.framework.filter_variables( - variables_to_restore, include_patterns=include_patterns) - return {var.op.name: var for var in feature_extractor_variables} diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py deleted file mode 100644 index 93bd34c915c848d0d44013a059031c14bdbff60c..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py +++ /dev/null @@ -1,370 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.meta_architectures.faster_rcnn_meta_arch.""" - -from absl.testing import parameterized -import numpy as np -import tensorflow as tf - -from object_detection.meta_architectures import faster_rcnn_meta_arch_test_lib - - -class FasterRCNNMetaArchTest( - faster_rcnn_meta_arch_test_lib.FasterRCNNMetaArchTestBase, - parameterized.TestCase): - - def test_postprocess_second_stage_only_inference_mode_with_masks(self): - model = self._build_model( - is_training=False, number_of_stages=2, second_stage_batch_size=6) - - batch_size = 2 - total_num_padded_proposals = batch_size * model.max_num_proposals - proposal_boxes = tf.constant( - [[[1, 1, 2, 3], - [0, 0, 1, 1], - [.5, .5, .6, .6], - 4*[0], 4*[0], 4*[0], 4*[0], 4*[0]], - [[2, 3, 6, 8], - [1, 2, 5, 3], - 4*[0], 4*[0], 4*[0], 4*[0], 4*[0], 4*[0]]], dtype=tf.float32) - num_proposals = tf.constant([3, 2], dtype=tf.int32) - refined_box_encodings = tf.zeros( - [total_num_padded_proposals, model.num_classes, 4], dtype=tf.float32) - class_predictions_with_background = tf.ones( - [total_num_padded_proposals, model.num_classes+1], dtype=tf.float32) - image_shape = tf.constant([batch_size, 36, 48, 3], dtype=tf.int32) - - mask_height = 2 - mask_width = 2 - mask_predictions = 30. * tf.ones( - [total_num_padded_proposals, model.num_classes, - mask_height, mask_width], dtype=tf.float32) - exp_detection_masks = np.array([[[[1, 1], [1, 1]], - [[1, 1], [1, 1]], - [[1, 1], [1, 1]], - [[1, 1], [1, 1]], - [[1, 1], [1, 1]]], - [[[1, 1], [1, 1]], - [[1, 1], [1, 1]], - [[1, 1], [1, 1]], - [[1, 1], [1, 1]], - [[0, 0], [0, 0]]]]) - - _, true_image_shapes = model.preprocess(tf.zeros(image_shape)) - detections = model.postprocess({ - 'refined_box_encodings': refined_box_encodings, - 'class_predictions_with_background': class_predictions_with_background, - 'num_proposals': num_proposals, - 'proposal_boxes': proposal_boxes, - 'image_shape': image_shape, - 'mask_predictions': mask_predictions - }, true_image_shapes) - with self.test_session() as sess: - detections_out = sess.run(detections) - self.assertAllEqual(detections_out['detection_boxes'].shape, [2, 5, 4]) - self.assertAllClose(detections_out['detection_scores'], - [[1, 1, 1, 1, 1], [1, 1, 1, 1, 0]]) - self.assertAllClose(detections_out['detection_classes'], - [[0, 0, 0, 1, 1], [0, 0, 1, 1, 0]]) - self.assertAllClose(detections_out['num_detections'], [5, 4]) - self.assertAllClose(detections_out['detection_masks'], - exp_detection_masks) - self.assertTrue(np.amax(detections_out['detection_masks'] <= 1.0)) - self.assertTrue(np.amin(detections_out['detection_masks'] >= 0.0)) - - def test_postprocess_second_stage_only_inference_mode_with_shared_boxes(self): - model = self._build_model( - is_training=False, number_of_stages=2, second_stage_batch_size=6) - - batch_size = 2 - total_num_padded_proposals = batch_size * model.max_num_proposals - proposal_boxes = tf.constant( - [[[1, 1, 2, 3], - [0, 0, 1, 1], - [.5, .5, .6, .6], - 4*[0], 4*[0], 4*[0], 4*[0], 4*[0]], - [[2, 3, 6, 8], - [1, 2, 5, 3], - 4*[0], 4*[0], 4*[0], 4*[0], 4*[0], 4*[0]]], dtype=tf.float32) - num_proposals = tf.constant([3, 2], dtype=tf.int32) - - # This has 1 box instead of one for each class. - refined_box_encodings = tf.zeros( - [total_num_padded_proposals, 1, 4], dtype=tf.float32) - class_predictions_with_background = tf.ones( - [total_num_padded_proposals, model.num_classes+1], dtype=tf.float32) - image_shape = tf.constant([batch_size, 36, 48, 3], dtype=tf.int32) - - _, true_image_shapes = model.preprocess(tf.zeros(image_shape)) - detections = model.postprocess({ - 'refined_box_encodings': refined_box_encodings, - 'class_predictions_with_background': class_predictions_with_background, - 'num_proposals': num_proposals, - 'proposal_boxes': proposal_boxes, - 'image_shape': image_shape, - }, true_image_shapes) - with self.test_session() as sess: - detections_out = sess.run(detections) - self.assertAllEqual(detections_out['detection_boxes'].shape, [2, 5, 4]) - self.assertAllClose(detections_out['detection_scores'], - [[1, 1, 1, 1, 1], [1, 1, 1, 1, 0]]) - self.assertAllClose(detections_out['detection_classes'], - [[0, 0, 0, 1, 1], [0, 0, 1, 1, 0]]) - self.assertAllClose(detections_out['num_detections'], [5, 4]) - - @parameterized.parameters( - {'masks_are_class_agnostic': False}, - {'masks_are_class_agnostic': True}, - ) - def test_predict_correct_shapes_in_inference_mode_three_stages_with_masks( - self, masks_are_class_agnostic): - batch_size = 2 - image_size = 10 - max_num_proposals = 8 - initial_crop_size = 3 - maxpool_stride = 1 - - input_shapes = [(batch_size, image_size, image_size, 3), - (None, image_size, image_size, 3), - (batch_size, None, None, 3), - (None, None, None, 3)] - expected_num_anchors = image_size * image_size * 3 * 3 - expected_shapes = { - 'rpn_box_predictor_features': - (2, image_size, image_size, 512), - 'rpn_features_to_crop': (2, image_size, image_size, 3), - 'image_shape': (4,), - 'rpn_box_encodings': (2, expected_num_anchors, 4), - 'rpn_objectness_predictions_with_background': - (2, expected_num_anchors, 2), - 'anchors': (expected_num_anchors, 4), - 'refined_box_encodings': (2 * max_num_proposals, 2, 4), - 'class_predictions_with_background': (2 * max_num_proposals, 2 + 1), - 'num_proposals': (2,), - 'proposal_boxes': (2, max_num_proposals, 4), - 'proposal_boxes_normalized': (2, max_num_proposals, 4), - 'box_classifier_features': - self._get_box_classifier_features_shape(image_size, - batch_size, - max_num_proposals, - initial_crop_size, - maxpool_stride, - 3) - } - - for input_shape in input_shapes: - test_graph = tf.Graph() - with test_graph.as_default(): - model = self._build_model( - is_training=False, - number_of_stages=3, - second_stage_batch_size=2, - predict_masks=True, - masks_are_class_agnostic=masks_are_class_agnostic) - preprocessed_inputs = tf.placeholder(tf.float32, shape=input_shape) - _, true_image_shapes = model.preprocess(preprocessed_inputs) - result_tensor_dict = model.predict(preprocessed_inputs, - true_image_shapes) - init_op = tf.global_variables_initializer() - with self.test_session(graph=test_graph) as sess: - sess.run(init_op) - tensor_dict_out = sess.run(result_tensor_dict, feed_dict={ - preprocessed_inputs: - np.zeros((batch_size, image_size, image_size, 3))}) - self.assertEqual( - set(tensor_dict_out.keys()), - set(expected_shapes.keys()).union( - set([ - 'detection_boxes', 'detection_scores', 'detection_classes', - 'detection_masks', 'num_detections' - ]))) - for key in expected_shapes: - self.assertAllEqual(tensor_dict_out[key].shape, expected_shapes[key]) - self.assertAllEqual(tensor_dict_out['detection_boxes'].shape, [2, 5, 4]) - self.assertAllEqual(tensor_dict_out['detection_masks'].shape, - [2, 5, 14, 14]) - self.assertAllEqual(tensor_dict_out['detection_classes'].shape, [2, 5]) - self.assertAllEqual(tensor_dict_out['detection_scores'].shape, [2, 5]) - self.assertAllEqual(tensor_dict_out['num_detections'].shape, [2]) - - @parameterized.parameters( - {'masks_are_class_agnostic': False}, - {'masks_are_class_agnostic': True}, - ) - def test_predict_gives_correct_shapes_in_train_mode_both_stages_with_masks( - self, masks_are_class_agnostic): - test_graph = tf.Graph() - with test_graph.as_default(): - model = self._build_model( - is_training=True, - number_of_stages=3, - second_stage_batch_size=7, - predict_masks=True, - masks_are_class_agnostic=masks_are_class_agnostic) - batch_size = 2 - image_size = 10 - max_num_proposals = 7 - initial_crop_size = 3 - maxpool_stride = 1 - - image_shape = (batch_size, image_size, image_size, 3) - preprocessed_inputs = tf.zeros(image_shape, dtype=tf.float32) - groundtruth_boxes_list = [ - tf.constant([[0, 0, .5, .5], [.5, .5, 1, 1]], dtype=tf.float32), - tf.constant([[0, .5, .5, 1], [.5, 0, 1, .5]], dtype=tf.float32) - ] - groundtruth_classes_list = [ - tf.constant([[1, 0], [0, 1]], dtype=tf.float32), - tf.constant([[1, 0], [1, 0]], dtype=tf.float32) - ] - _, true_image_shapes = model.preprocess(tf.zeros(image_shape)) - model.provide_groundtruth(groundtruth_boxes_list, - groundtruth_classes_list) - - result_tensor_dict = model.predict(preprocessed_inputs, true_image_shapes) - mask_shape_1 = 1 if masks_are_class_agnostic else model._num_classes - expected_shapes = { - 'rpn_box_predictor_features': (2, image_size, image_size, 512), - 'rpn_features_to_crop': (2, image_size, image_size, 3), - 'image_shape': (4,), - 'refined_box_encodings': (2 * max_num_proposals, 2, 4), - 'class_predictions_with_background': (2 * max_num_proposals, 2 + 1), - 'num_proposals': (2,), - 'proposal_boxes': (2, max_num_proposals, 4), - 'proposal_boxes_normalized': (2, max_num_proposals, 4), - 'box_classifier_features': - self._get_box_classifier_features_shape( - image_size, batch_size, max_num_proposals, initial_crop_size, - maxpool_stride, 3), - 'mask_predictions': (2 * max_num_proposals, mask_shape_1, 14, 14) - } - - init_op = tf.global_variables_initializer() - with self.test_session(graph=test_graph) as sess: - sess.run(init_op) - tensor_dict_out = sess.run(result_tensor_dict) - self.assertEqual( - set(tensor_dict_out.keys()), - set(expected_shapes.keys()).union( - set([ - 'rpn_box_encodings', - 'rpn_objectness_predictions_with_background', - 'anchors', - ]))) - for key in expected_shapes: - self.assertAllEqual(tensor_dict_out[key].shape, expected_shapes[key]) - - anchors_shape_out = tensor_dict_out['anchors'].shape - self.assertEqual(2, len(anchors_shape_out)) - self.assertEqual(4, anchors_shape_out[1]) - num_anchors_out = anchors_shape_out[0] - self.assertAllEqual(tensor_dict_out['rpn_box_encodings'].shape, - (2, num_anchors_out, 4)) - self.assertAllEqual( - tensor_dict_out['rpn_objectness_predictions_with_background'].shape, - (2, num_anchors_out, 2)) - - def test_postprocess_third_stage_only_inference_mode(self): - num_proposals_shapes = [(2), (None)] - refined_box_encodings_shapes = [(16, 2, 4), (None, 2, 4)] - class_predictions_with_background_shapes = [(16, 3), (None, 3)] - proposal_boxes_shapes = [(2, 8, 4), (None, 8, 4)] - batch_size = 2 - image_shape = np.array((2, 36, 48, 3), dtype=np.int32) - for (num_proposals_shape, refined_box_encoding_shape, - class_predictions_with_background_shape, - proposal_boxes_shape) in zip(num_proposals_shapes, - refined_box_encodings_shapes, - class_predictions_with_background_shapes, - proposal_boxes_shapes): - tf_graph = tf.Graph() - with tf_graph.as_default(): - model = self._build_model( - is_training=False, number_of_stages=3, - second_stage_batch_size=6, predict_masks=True) - total_num_padded_proposals = batch_size * model.max_num_proposals - proposal_boxes = np.array( - [[[1, 1, 2, 3], - [0, 0, 1, 1], - [.5, .5, .6, .6], - 4*[0], 4*[0], 4*[0], 4*[0], 4*[0]], - [[2, 3, 6, 8], - [1, 2, 5, 3], - 4*[0], 4*[0], 4*[0], 4*[0], 4*[0], 4*[0]]]) - num_proposals = np.array([3, 2], dtype=np.int32) - refined_box_encodings = np.zeros( - [total_num_padded_proposals, model.num_classes, 4]) - class_predictions_with_background = np.ones( - [total_num_padded_proposals, model.num_classes+1]) - - num_proposals_placeholder = tf.placeholder(tf.int32, - shape=num_proposals_shape) - refined_box_encodings_placeholder = tf.placeholder( - tf.float32, shape=refined_box_encoding_shape) - class_predictions_with_background_placeholder = tf.placeholder( - tf.float32, shape=class_predictions_with_background_shape) - proposal_boxes_placeholder = tf.placeholder( - tf.float32, shape=proposal_boxes_shape) - image_shape_placeholder = tf.placeholder(tf.int32, shape=(4)) - _, true_image_shapes = model.preprocess( - tf.zeros(image_shape_placeholder)) - detections = model.postprocess({ - 'refined_box_encodings': refined_box_encodings_placeholder, - 'class_predictions_with_background': - class_predictions_with_background_placeholder, - 'num_proposals': num_proposals_placeholder, - 'proposal_boxes': proposal_boxes_placeholder, - 'image_shape': image_shape_placeholder, - 'detection_boxes': tf.zeros([2, 5, 4]), - 'detection_masks': tf.zeros([2, 5, 14, 14]), - 'detection_scores': tf.zeros([2, 5]), - 'detection_classes': tf.zeros([2, 5]), - 'num_detections': tf.zeros([2]), - }, true_image_shapes) - with self.test_session(graph=tf_graph) as sess: - detections_out = sess.run( - detections, - feed_dict={ - refined_box_encodings_placeholder: refined_box_encodings, - class_predictions_with_background_placeholder: - class_predictions_with_background, - num_proposals_placeholder: num_proposals, - proposal_boxes_placeholder: proposal_boxes, - image_shape_placeholder: image_shape - }) - self.assertAllEqual(detections_out['detection_boxes'].shape, [2, 5, 4]) - self.assertAllEqual(detections_out['detection_masks'].shape, - [2, 5, 14, 14]) - self.assertAllClose(detections_out['detection_scores'].shape, [2, 5]) - self.assertAllClose(detections_out['detection_classes'].shape, [2, 5]) - self.assertAllClose(detections_out['num_detections'].shape, [2]) - self.assertTrue(np.amax(detections_out['detection_masks'] <= 1.0)) - self.assertTrue(np.amin(detections_out['detection_masks'] >= 0.0)) - - def _get_box_classifier_features_shape(self, - image_size, - batch_size, - max_num_proposals, - initial_crop_size, - maxpool_stride, - num_features): - return (batch_size * max_num_proposals, - initial_crop_size/maxpool_stride, - initial_crop_size/maxpool_stride, - num_features) - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py deleted file mode 100644 index 00c7054006b2a7d63f3bf77fa74e5dc4e170501e..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py +++ /dev/null @@ -1,1512 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.meta_architectures.faster_rcnn_meta_arch.""" -import numpy as np -import tensorflow as tf -from google.protobuf import text_format -from object_detection.anchor_generators import grid_anchor_generator -from object_detection.builders import box_predictor_builder -from object_detection.builders import hyperparams_builder -from object_detection.builders import post_processing_builder -from object_detection.core import losses -from object_detection.meta_architectures import faster_rcnn_meta_arch -from object_detection.protos import box_predictor_pb2 -from object_detection.protos import hyperparams_pb2 -from object_detection.protos import post_processing_pb2 -from object_detection.utils import test_utils - -slim = tf.contrib.slim -BOX_CODE_SIZE = 4 - - -class FakeFasterRCNNFeatureExtractor( - faster_rcnn_meta_arch.FasterRCNNFeatureExtractor): - """Fake feature extracture to use in tests.""" - - def __init__(self): - super(FakeFasterRCNNFeatureExtractor, self).__init__( - is_training=False, - first_stage_features_stride=32, - reuse_weights=None, - weight_decay=0.0) - - def preprocess(self, resized_inputs): - return tf.identity(resized_inputs) - - def _extract_proposal_features(self, preprocessed_inputs, scope): - with tf.variable_scope('mock_model'): - proposal_features = 0 * slim.conv2d( - preprocessed_inputs, num_outputs=3, kernel_size=1, scope='layer1') - return proposal_features, {} - - def _extract_box_classifier_features(self, proposal_feature_maps, scope): - with tf.variable_scope('mock_model'): - return 0 * slim.conv2d(proposal_feature_maps, - num_outputs=3, kernel_size=1, scope='layer2') - - -class FasterRCNNMetaArchTestBase(tf.test.TestCase): - """Base class to test Faster R-CNN and R-FCN meta architectures.""" - - def _build_arg_scope_with_hyperparams(self, - hyperparams_text_proto, - is_training): - hyperparams = hyperparams_pb2.Hyperparams() - text_format.Merge(hyperparams_text_proto, hyperparams) - return hyperparams_builder.build(hyperparams, is_training=is_training) - - def _get_second_stage_box_predictor_text_proto(self): - box_predictor_text_proto = """ - mask_rcnn_box_predictor { - fc_hyperparams { - op: FC - activation: NONE - regularizer { - l2_regularizer { - weight: 0.0005 - } - } - initializer { - variance_scaling_initializer { - factor: 1.0 - uniform: true - mode: FAN_AVG - } - } - } - } - """ - return box_predictor_text_proto - - def _add_mask_to_second_stage_box_predictor_text_proto( - self, masks_are_class_agnostic=False): - agnostic = 'true' if masks_are_class_agnostic else 'false' - box_predictor_text_proto = """ - mask_rcnn_box_predictor { - predict_instance_masks: true - masks_are_class_agnostic: """ + agnostic + """ - mask_height: 14 - mask_width: 14 - conv_hyperparams { - op: CONV - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.01 - } - } - } - } - """ - return box_predictor_text_proto - - def _get_second_stage_box_predictor(self, num_classes, is_training, - predict_masks, masks_are_class_agnostic): - box_predictor_proto = box_predictor_pb2.BoxPredictor() - text_format.Merge(self._get_second_stage_box_predictor_text_proto(), - box_predictor_proto) - if predict_masks: - text_format.Merge( - self._add_mask_to_second_stage_box_predictor_text_proto( - masks_are_class_agnostic), - box_predictor_proto) - - return box_predictor_builder.build( - hyperparams_builder.build, - box_predictor_proto, - num_classes=num_classes, - is_training=is_training) - - def _get_model(self, box_predictor, **common_kwargs): - return faster_rcnn_meta_arch.FasterRCNNMetaArch( - initial_crop_size=3, - maxpool_kernel_size=1, - maxpool_stride=1, - second_stage_mask_rcnn_box_predictor=box_predictor, - **common_kwargs) - - def _build_model(self, - is_training, - number_of_stages, - second_stage_batch_size, - first_stage_max_proposals=8, - num_classes=2, - hard_mining=False, - softmax_second_stage_classification_loss=True, - predict_masks=False, - pad_to_max_dimension=None, - masks_are_class_agnostic=False): - - def image_resizer_fn(image, masks=None): - """Fake image resizer function.""" - resized_inputs = [] - resized_image = tf.identity(image) - if pad_to_max_dimension is not None: - resized_image = tf.image.pad_to_bounding_box(image, 0, 0, - pad_to_max_dimension, - pad_to_max_dimension) - resized_inputs.append(resized_image) - if masks is not None: - resized_masks = tf.identity(masks) - if pad_to_max_dimension is not None: - resized_masks = tf.image.pad_to_bounding_box(tf.transpose(masks, - [1, 2, 0]), - 0, 0, - pad_to_max_dimension, - pad_to_max_dimension) - resized_masks = tf.transpose(resized_masks, [2, 0, 1]) - resized_inputs.append(resized_masks) - resized_inputs.append(tf.shape(image)) - return resized_inputs - - # anchors in this test are designed so that a subset of anchors are inside - # the image and a subset of anchors are outside. - first_stage_anchor_scales = (0.001, 0.005, 0.1) - first_stage_anchor_aspect_ratios = (0.5, 1.0, 2.0) - first_stage_anchor_strides = (1, 1) - first_stage_anchor_generator = grid_anchor_generator.GridAnchorGenerator( - first_stage_anchor_scales, - first_stage_anchor_aspect_ratios, - anchor_stride=first_stage_anchor_strides) - - fake_feature_extractor = FakeFasterRCNNFeatureExtractor() - - first_stage_box_predictor_hyperparams_text_proto = """ - op: CONV - activation: RELU - regularizer { - l2_regularizer { - weight: 0.00004 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.03 - } - } - """ - first_stage_box_predictor_arg_scope_fn = ( - self._build_arg_scope_with_hyperparams( - first_stage_box_predictor_hyperparams_text_proto, is_training)) - - first_stage_box_predictor_kernel_size = 3 - first_stage_atrous_rate = 1 - first_stage_box_predictor_depth = 512 - first_stage_minibatch_size = 3 - first_stage_positive_balance_fraction = .5 - - first_stage_nms_score_threshold = -1.0 - first_stage_nms_iou_threshold = 1.0 - first_stage_max_proposals = first_stage_max_proposals - - first_stage_localization_loss_weight = 1.0 - first_stage_objectness_loss_weight = 1.0 - - post_processing_text_proto = """ - batch_non_max_suppression { - score_threshold: -20.0 - iou_threshold: 1.0 - max_detections_per_class: 5 - max_total_detections: 5 - } - """ - post_processing_config = post_processing_pb2.PostProcessing() - text_format.Merge(post_processing_text_proto, post_processing_config) - second_stage_non_max_suppression_fn, _ = post_processing_builder.build( - post_processing_config) - second_stage_balance_fraction = 1.0 - - second_stage_score_conversion_fn = tf.identity - second_stage_localization_loss_weight = 1.0 - second_stage_classification_loss_weight = 1.0 - if softmax_second_stage_classification_loss: - second_stage_classification_loss = ( - losses.WeightedSoftmaxClassificationLoss()) - else: - second_stage_classification_loss = ( - losses.WeightedSigmoidClassificationLoss()) - - hard_example_miner = None - if hard_mining: - hard_example_miner = losses.HardExampleMiner( - num_hard_examples=1, - iou_threshold=0.99, - loss_type='both', - cls_loss_weight=second_stage_classification_loss_weight, - loc_loss_weight=second_stage_localization_loss_weight, - max_negatives_per_positive=None) - - common_kwargs = { - 'is_training': is_training, - 'num_classes': num_classes, - 'image_resizer_fn': image_resizer_fn, - 'feature_extractor': fake_feature_extractor, - 'number_of_stages': number_of_stages, - 'first_stage_anchor_generator': first_stage_anchor_generator, - 'first_stage_atrous_rate': first_stage_atrous_rate, - 'first_stage_box_predictor_arg_scope_fn': - first_stage_box_predictor_arg_scope_fn, - 'first_stage_box_predictor_kernel_size': - first_stage_box_predictor_kernel_size, - 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, - 'first_stage_minibatch_size': first_stage_minibatch_size, - 'first_stage_positive_balance_fraction': - first_stage_positive_balance_fraction, - 'first_stage_nms_score_threshold': first_stage_nms_score_threshold, - 'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold, - 'first_stage_max_proposals': first_stage_max_proposals, - 'first_stage_localization_loss_weight': - first_stage_localization_loss_weight, - 'first_stage_objectness_loss_weight': - first_stage_objectness_loss_weight, - 'second_stage_batch_size': second_stage_batch_size, - 'second_stage_balance_fraction': second_stage_balance_fraction, - 'second_stage_non_max_suppression_fn': - second_stage_non_max_suppression_fn, - 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, - 'second_stage_localization_loss_weight': - second_stage_localization_loss_weight, - 'second_stage_classification_loss_weight': - second_stage_classification_loss_weight, - 'second_stage_classification_loss': - second_stage_classification_loss, - 'hard_example_miner': hard_example_miner} - - return self._get_model( - self._get_second_stage_box_predictor( - num_classes=num_classes, - is_training=is_training, - predict_masks=predict_masks, - masks_are_class_agnostic=masks_are_class_agnostic), **common_kwargs) - - def test_predict_gives_correct_shapes_in_inference_mode_first_stage_only( - self): - test_graph = tf.Graph() - with test_graph.as_default(): - model = self._build_model( - is_training=False, number_of_stages=1, second_stage_batch_size=2) - batch_size = 2 - height = 10 - width = 12 - input_image_shape = (batch_size, height, width, 3) - - _, true_image_shapes = model.preprocess(tf.zeros(input_image_shape)) - preprocessed_inputs = tf.placeholder( - dtype=tf.float32, shape=(batch_size, None, None, 3)) - prediction_dict = model.predict(preprocessed_inputs, true_image_shapes) - - # In inference mode, anchors are clipped to the image window, but not - # pruned. Since MockFasterRCNN.extract_proposal_features returns a - # tensor with the same shape as its input, the expected number of anchors - # is height * width * the number of anchors per location (i.e. 3x3). - expected_num_anchors = height * width * 3 * 3 - expected_output_keys = set([ - 'rpn_box_predictor_features', 'rpn_features_to_crop', 'image_shape', - 'rpn_box_encodings', 'rpn_objectness_predictions_with_background', - 'anchors']) - expected_output_shapes = { - 'rpn_box_predictor_features': (batch_size, height, width, 512), - 'rpn_features_to_crop': (batch_size, height, width, 3), - 'rpn_box_encodings': (batch_size, expected_num_anchors, 4), - 'rpn_objectness_predictions_with_background': - (batch_size, expected_num_anchors, 2), - 'anchors': (expected_num_anchors, 4) - } - - init_op = tf.global_variables_initializer() - with self.test_session(graph=test_graph) as sess: - sess.run(init_op) - prediction_out = sess.run(prediction_dict, - feed_dict={ - preprocessed_inputs: - np.zeros(input_image_shape) - }) - - self.assertEqual(set(prediction_out.keys()), expected_output_keys) - - self.assertAllEqual(prediction_out['image_shape'], input_image_shape) - for output_key, expected_shape in expected_output_shapes.items(): - self.assertAllEqual(prediction_out[output_key].shape, expected_shape) - - # Check that anchors are clipped to window. - anchors = prediction_out['anchors'] - self.assertTrue(np.all(np.greater_equal(anchors, 0))) - self.assertTrue(np.all(np.less_equal(anchors[:, 0], height))) - self.assertTrue(np.all(np.less_equal(anchors[:, 1], width))) - self.assertTrue(np.all(np.less_equal(anchors[:, 2], height))) - self.assertTrue(np.all(np.less_equal(anchors[:, 3], width))) - - def test_predict_gives_valid_anchors_in_training_mode_first_stage_only(self): - test_graph = tf.Graph() - with test_graph.as_default(): - model = self._build_model( - is_training=True, number_of_stages=1, second_stage_batch_size=2) - batch_size = 2 - height = 10 - width = 12 - input_image_shape = (batch_size, height, width, 3) - _, true_image_shapes = model.preprocess(tf.zeros(input_image_shape)) - preprocessed_inputs = tf.placeholder( - dtype=tf.float32, shape=(batch_size, None, None, 3)) - prediction_dict = model.predict(preprocessed_inputs, true_image_shapes) - - expected_output_keys = set([ - 'rpn_box_predictor_features', 'rpn_features_to_crop', 'image_shape', - 'rpn_box_encodings', 'rpn_objectness_predictions_with_background', - 'anchors']) - # At training time, anchors that exceed image bounds are pruned. Thus - # the `expected_num_anchors` in the above inference mode test is now - # a strict upper bound on the number of anchors. - num_anchors_strict_upper_bound = height * width * 3 * 3 - - init_op = tf.global_variables_initializer() - with self.test_session(graph=test_graph) as sess: - sess.run(init_op) - prediction_out = sess.run(prediction_dict, - feed_dict={ - preprocessed_inputs: - np.zeros(input_image_shape) - }) - - self.assertEqual(set(prediction_out.keys()), expected_output_keys) - self.assertAllEqual(prediction_out['image_shape'], input_image_shape) - - # Check that anchors have less than the upper bound and - # are clipped to window. - anchors = prediction_out['anchors'] - self.assertTrue(len(anchors.shape) == 2 and anchors.shape[1] == 4) - num_anchors_out = anchors.shape[0] - self.assertTrue(num_anchors_out < num_anchors_strict_upper_bound) - - self.assertTrue(np.all(np.greater_equal(anchors, 0))) - self.assertTrue(np.all(np.less_equal(anchors[:, 0], height))) - self.assertTrue(np.all(np.less_equal(anchors[:, 1], width))) - self.assertTrue(np.all(np.less_equal(anchors[:, 2], height))) - self.assertTrue(np.all(np.less_equal(anchors[:, 3], width))) - - self.assertAllEqual(prediction_out['rpn_box_encodings'].shape, - (batch_size, num_anchors_out, 4)) - self.assertAllEqual( - prediction_out['rpn_objectness_predictions_with_background'].shape, - (batch_size, num_anchors_out, 2)) - - def test_predict_correct_shapes_in_inference_mode_two_stages(self): - batch_size = 2 - image_size = 10 - max_num_proposals = 8 - initial_crop_size = 3 - maxpool_stride = 1 - - input_shapes = [(batch_size, image_size, image_size, 3), - (None, image_size, image_size, 3), - (batch_size, None, None, 3), - (None, None, None, 3)] - expected_num_anchors = image_size * image_size * 3 * 3 - expected_shapes = { - 'rpn_box_predictor_features': - (2, image_size, image_size, 512), - 'rpn_features_to_crop': (2, image_size, image_size, 3), - 'image_shape': (4,), - 'rpn_box_encodings': (2, expected_num_anchors, 4), - 'rpn_objectness_predictions_with_background': - (2, expected_num_anchors, 2), - 'anchors': (expected_num_anchors, 4), - 'refined_box_encodings': (2 * max_num_proposals, 2, 4), - 'class_predictions_with_background': (2 * max_num_proposals, 2 + 1), - 'num_proposals': (2,), - 'proposal_boxes': (2, max_num_proposals, 4), - 'proposal_boxes_normalized': (2, max_num_proposals, 4), - 'box_classifier_features': - self._get_box_classifier_features_shape(image_size, - batch_size, - max_num_proposals, - initial_crop_size, - maxpool_stride, - 3) - } - - for input_shape in input_shapes: - test_graph = tf.Graph() - with test_graph.as_default(): - model = self._build_model( - is_training=False, - number_of_stages=2, - second_stage_batch_size=2, - predict_masks=False) - preprocessed_inputs = tf.placeholder(tf.float32, shape=input_shape) - _, true_image_shapes = model.preprocess(preprocessed_inputs) - result_tensor_dict = model.predict( - preprocessed_inputs, true_image_shapes) - init_op = tf.global_variables_initializer() - with self.test_session(graph=test_graph) as sess: - sess.run(init_op) - tensor_dict_out = sess.run(result_tensor_dict, feed_dict={ - preprocessed_inputs: - np.zeros((batch_size, image_size, image_size, 3))}) - self.assertEqual(set(tensor_dict_out.keys()), - set(expected_shapes.keys())) - for key in expected_shapes: - self.assertAllEqual(tensor_dict_out[key].shape, expected_shapes[key]) - - def test_predict_gives_correct_shapes_in_train_mode_both_stages(self): - test_graph = tf.Graph() - with test_graph.as_default(): - model = self._build_model( - is_training=True, - number_of_stages=2, - second_stage_batch_size=7, - predict_masks=False) - - batch_size = 2 - image_size = 10 - max_num_proposals = 7 - initial_crop_size = 3 - maxpool_stride = 1 - - image_shape = (batch_size, image_size, image_size, 3) - preprocessed_inputs = tf.zeros(image_shape, dtype=tf.float32) - groundtruth_boxes_list = [ - tf.constant([[0, 0, .5, .5], [.5, .5, 1, 1]], dtype=tf.float32), - tf.constant([[0, .5, .5, 1], [.5, 0, 1, .5]], dtype=tf.float32)] - groundtruth_classes_list = [ - tf.constant([[1, 0], [0, 1]], dtype=tf.float32), - tf.constant([[1, 0], [1, 0]], dtype=tf.float32)] - - _, true_image_shapes = model.preprocess(tf.zeros(image_shape)) - model.provide_groundtruth(groundtruth_boxes_list, - groundtruth_classes_list) - - result_tensor_dict = model.predict(preprocessed_inputs, true_image_shapes) - expected_shapes = { - 'rpn_box_predictor_features': - (2, image_size, image_size, 512), - 'rpn_features_to_crop': (2, image_size, image_size, 3), - 'image_shape': (4,), - 'refined_box_encodings': (2 * max_num_proposals, 2, 4), - 'class_predictions_with_background': (2 * max_num_proposals, 2 + 1), - 'num_proposals': (2,), - 'proposal_boxes': (2, max_num_proposals, 4), - 'proposal_boxes_normalized': (2, max_num_proposals, 4), - 'box_classifier_features': - self._get_box_classifier_features_shape(image_size, - batch_size, - max_num_proposals, - initial_crop_size, - maxpool_stride, - 3) - } - - init_op = tf.global_variables_initializer() - with self.test_session(graph=test_graph) as sess: - sess.run(init_op) - tensor_dict_out = sess.run(result_tensor_dict) - self.assertEqual(set(tensor_dict_out.keys()), - set(expected_shapes.keys()).union(set([ - 'rpn_box_encodings', - 'rpn_objectness_predictions_with_background', - 'anchors']))) - for key in expected_shapes: - self.assertAllEqual(tensor_dict_out[key].shape, expected_shapes[key]) - - anchors_shape_out = tensor_dict_out['anchors'].shape - self.assertEqual(2, len(anchors_shape_out)) - self.assertEqual(4, anchors_shape_out[1]) - num_anchors_out = anchors_shape_out[0] - self.assertAllEqual(tensor_dict_out['rpn_box_encodings'].shape, - (2, num_anchors_out, 4)) - self.assertAllEqual( - tensor_dict_out['rpn_objectness_predictions_with_background'].shape, - (2, num_anchors_out, 2)) - - def _test_postprocess_first_stage_only_inference_mode( - self, pad_to_max_dimension=None): - model = self._build_model( - is_training=False, number_of_stages=1, second_stage_batch_size=6, - pad_to_max_dimension=pad_to_max_dimension) - batch_size = 2 - anchors = tf.constant( - [[0, 0, 16, 16], - [0, 16, 16, 32], - [16, 0, 32, 16], - [16, 16, 32, 32]], dtype=tf.float32) - rpn_box_encodings = tf.zeros( - [batch_size, anchors.get_shape().as_list()[0], - BOX_CODE_SIZE], dtype=tf.float32) - # use different numbers for the objectness category to break ties in - # order of boxes returned by NMS - rpn_objectness_predictions_with_background = tf.constant([ - [[-10, 13], - [10, -10], - [10, -11], - [-10, 12]], - [[10, -10], - [-10, 13], - [-10, 12], - [10, -11]]], dtype=tf.float32) - rpn_features_to_crop = tf.ones((batch_size, 8, 8, 10), dtype=tf.float32) - image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32) - _, true_image_shapes = model.preprocess(tf.zeros(image_shape)) - proposals = model.postprocess({ - 'rpn_box_encodings': rpn_box_encodings, - 'rpn_objectness_predictions_with_background': - rpn_objectness_predictions_with_background, - 'rpn_features_to_crop': rpn_features_to_crop, - 'anchors': anchors}, true_image_shapes) - expected_proposal_boxes = [ - [[0, 0, .5, .5], [.5, .5, 1, 1], [0, .5, .5, 1], [.5, 0, 1.0, .5]] - + 4 * [4 * [0]], - [[0, .5, .5, 1], [.5, 0, 1.0, .5], [0, 0, .5, .5], [.5, .5, 1, 1]] - + 4 * [4 * [0]]] - expected_proposal_scores = [[1, 1, 0, 0, 0, 0, 0, 0], - [1, 1, 0, 0, 0, 0, 0, 0]] - expected_num_proposals = [4, 4] - - expected_output_keys = set(['detection_boxes', 'detection_scores', - 'num_detections']) - self.assertEqual(set(proposals.keys()), expected_output_keys) - with self.test_session() as sess: - proposals_out = sess.run(proposals) - self.assertAllClose(proposals_out['detection_boxes'], - expected_proposal_boxes) - self.assertAllClose(proposals_out['detection_scores'], - expected_proposal_scores) - self.assertAllEqual(proposals_out['num_detections'], - expected_num_proposals) - - def test_postprocess_first_stage_only_inference_mode(self): - self._test_postprocess_first_stage_only_inference_mode() - - def test_postprocess_first_stage_only_inference_mode_padded_image(self): - self._test_postprocess_first_stage_only_inference_mode( - pad_to_max_dimension=56) - - def _test_postprocess_first_stage_only_train_mode(self, - pad_to_max_dimension=None): - model = self._build_model( - is_training=True, number_of_stages=1, second_stage_batch_size=2, - pad_to_max_dimension=pad_to_max_dimension) - batch_size = 2 - anchors = tf.constant( - [[0, 0, 16, 16], - [0, 16, 16, 32], - [16, 0, 32, 16], - [16, 16, 32, 32]], dtype=tf.float32) - rpn_box_encodings = tf.zeros( - [batch_size, anchors.get_shape().as_list()[0], - BOX_CODE_SIZE], dtype=tf.float32) - # use different numbers for the objectness category to break ties in - # order of boxes returned by NMS - rpn_objectness_predictions_with_background = tf.constant([ - [[-10, 13], - [-10, 12], - [-10, 11], - [-10, 10]], - [[-10, 13], - [-10, 12], - [-10, 11], - [-10, 10]]], dtype=tf.float32) - rpn_features_to_crop = tf.ones((batch_size, 8, 8, 10), dtype=tf.float32) - image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32) - groundtruth_boxes_list = [ - tf.constant([[0, 0, .5, .5], [.5, .5, 1, 1]], dtype=tf.float32), - tf.constant([[0, .5, .5, 1], [.5, 0, 1, .5]], dtype=tf.float32)] - groundtruth_classes_list = [tf.constant([[1, 0], [0, 1]], dtype=tf.float32), - tf.constant([[1, 0], [1, 0]], dtype=tf.float32)] - - _, true_image_shapes = model.preprocess(tf.zeros(image_shape)) - model.provide_groundtruth(groundtruth_boxes_list, - groundtruth_classes_list) - proposals = model.postprocess({ - 'rpn_box_encodings': rpn_box_encodings, - 'rpn_objectness_predictions_with_background': - rpn_objectness_predictions_with_background, - 'rpn_features_to_crop': rpn_features_to_crop, - 'anchors': anchors}, true_image_shapes) - expected_proposal_boxes = [ - [[0, 0, .5, .5], [.5, .5, 1, 1]], [[0, .5, .5, 1], [.5, 0, 1, .5]]] - expected_proposal_scores = [[1, 1], - [1, 1]] - expected_num_proposals = [2, 2] - - expected_output_keys = set(['detection_boxes', 'detection_scores', - 'num_detections']) - self.assertEqual(set(proposals.keys()), expected_output_keys) - - with self.test_session() as sess: - proposals_out = sess.run(proposals) - for image_idx in range(batch_size): - self.assertTrue( - test_utils.first_rows_close_as_set( - proposals_out['detection_boxes'][image_idx].tolist(), - expected_proposal_boxes[image_idx])) - self.assertAllClose(proposals_out['detection_scores'], - expected_proposal_scores) - self.assertAllEqual(proposals_out['num_detections'], - expected_num_proposals) - - def test_postprocess_first_stage_only_train_mode(self): - self._test_postprocess_first_stage_only_train_mode() - - def test_postprocess_first_stage_only_train_mode_padded_image(self): - self._test_postprocess_first_stage_only_train_mode(pad_to_max_dimension=56) - - def _test_postprocess_second_stage_only_inference_mode( - self, pad_to_max_dimension=None): - num_proposals_shapes = [(2), (None,)] - refined_box_encodings_shapes = [(16, 2, 4), (None, 2, 4)] - class_predictions_with_background_shapes = [(16, 3), (None, 3)] - proposal_boxes_shapes = [(2, 8, 4), (None, 8, 4)] - batch_size = 2 - image_shape = np.array((2, 36, 48, 3), dtype=np.int32) - for (num_proposals_shape, refined_box_encoding_shape, - class_predictions_with_background_shape, - proposal_boxes_shape) in zip(num_proposals_shapes, - refined_box_encodings_shapes, - class_predictions_with_background_shapes, - proposal_boxes_shapes): - tf_graph = tf.Graph() - with tf_graph.as_default(): - model = self._build_model( - is_training=False, number_of_stages=2, - second_stage_batch_size=6, - pad_to_max_dimension=pad_to_max_dimension) - _, true_image_shapes = model.preprocess(tf.zeros(image_shape)) - total_num_padded_proposals = batch_size * model.max_num_proposals - proposal_boxes = np.array( - [[[1, 1, 2, 3], - [0, 0, 1, 1], - [.5, .5, .6, .6], - 4*[0], 4*[0], 4*[0], 4*[0], 4*[0]], - [[2, 3, 6, 8], - [1, 2, 5, 3], - 4*[0], 4*[0], 4*[0], 4*[0], 4*[0], 4*[0]]]) - num_proposals = np.array([3, 2], dtype=np.int32) - refined_box_encodings = np.zeros( - [total_num_padded_proposals, model.num_classes, 4]) - class_predictions_with_background = np.ones( - [total_num_padded_proposals, model.num_classes+1]) - - num_proposals_placeholder = tf.placeholder(tf.int32, - shape=num_proposals_shape) - refined_box_encodings_placeholder = tf.placeholder( - tf.float32, shape=refined_box_encoding_shape) - class_predictions_with_background_placeholder = tf.placeholder( - tf.float32, shape=class_predictions_with_background_shape) - proposal_boxes_placeholder = tf.placeholder( - tf.float32, shape=proposal_boxes_shape) - image_shape_placeholder = tf.placeholder(tf.int32, shape=(4)) - - detections = model.postprocess({ - 'refined_box_encodings': refined_box_encodings_placeholder, - 'class_predictions_with_background': - class_predictions_with_background_placeholder, - 'num_proposals': num_proposals_placeholder, - 'proposal_boxes': proposal_boxes_placeholder, - }, true_image_shapes) - with self.test_session(graph=tf_graph) as sess: - detections_out = sess.run( - detections, - feed_dict={ - refined_box_encodings_placeholder: refined_box_encodings, - class_predictions_with_background_placeholder: - class_predictions_with_background, - num_proposals_placeholder: num_proposals, - proposal_boxes_placeholder: proposal_boxes, - image_shape_placeholder: image_shape - }) - self.assertAllEqual(detections_out['detection_boxes'].shape, [2, 5, 4]) - self.assertAllClose(detections_out['detection_scores'], - [[1, 1, 1, 1, 1], [1, 1, 1, 1, 0]]) - self.assertAllClose(detections_out['detection_classes'], - [[0, 0, 0, 1, 1], [0, 0, 1, 1, 0]]) - self.assertAllClose(detections_out['num_detections'], [5, 4]) - - def test_postprocess_second_stage_only_inference_mode(self): - self._test_postprocess_second_stage_only_inference_mode() - - def test_postprocess_second_stage_only_inference_mode_padded_image(self): - self._test_postprocess_second_stage_only_inference_mode( - pad_to_max_dimension=56) - - def test_preprocess_preserves_input_shapes(self): - image_shapes = [(3, None, None, 3), - (None, 10, 10, 3), - (None, None, None, 3)] - for image_shape in image_shapes: - model = self._build_model( - is_training=False, number_of_stages=2, second_stage_batch_size=6) - image_placeholder = tf.placeholder(tf.float32, shape=image_shape) - preprocessed_inputs, _ = model.preprocess(image_placeholder) - self.assertAllEqual(preprocessed_inputs.shape.as_list(), image_shape) - - # TODO(rathodv): Split test into two - with and without masks. - def test_loss_first_stage_only_mode(self): - model = self._build_model( - is_training=True, number_of_stages=1, second_stage_batch_size=6) - batch_size = 2 - anchors = tf.constant( - [[0, 0, 16, 16], - [0, 16, 16, 32], - [16, 0, 32, 16], - [16, 16, 32, 32]], dtype=tf.float32) - - rpn_box_encodings = tf.zeros( - [batch_size, - anchors.get_shape().as_list()[0], - BOX_CODE_SIZE], dtype=tf.float32) - # use different numbers for the objectness category to break ties in - # order of boxes returned by NMS - rpn_objectness_predictions_with_background = tf.constant([ - [[-10, 13], - [10, -10], - [10, -11], - [-10, 12]], - [[10, -10], - [-10, 13], - [-10, 12], - [10, -11]]], dtype=tf.float32) - image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32) - - groundtruth_boxes_list = [ - tf.constant([[0, 0, .5, .5], [.5, .5, 1, 1]], dtype=tf.float32), - tf.constant([[0, .5, .5, 1], [.5, 0, 1, .5]], dtype=tf.float32)] - groundtruth_classes_list = [tf.constant([[1, 0], [0, 1]], dtype=tf.float32), - tf.constant([[1, 0], [1, 0]], dtype=tf.float32)] - - prediction_dict = { - 'rpn_box_encodings': rpn_box_encodings, - 'rpn_objectness_predictions_with_background': - rpn_objectness_predictions_with_background, - 'image_shape': image_shape, - 'anchors': anchors - } - _, true_image_shapes = model.preprocess(tf.zeros(image_shape)) - model.provide_groundtruth(groundtruth_boxes_list, - groundtruth_classes_list) - loss_dict = model.loss(prediction_dict, true_image_shapes) - with self.test_session() as sess: - loss_dict_out = sess.run(loss_dict) - self.assertAllClose(loss_dict_out['Loss/RPNLoss/localization_loss'], 0) - self.assertAllClose(loss_dict_out['Loss/RPNLoss/objectness_loss'], 0) - self.assertTrue('Loss/BoxClassifierLoss/localization_loss' - not in loss_dict_out) - self.assertTrue('Loss/BoxClassifierLoss/classification_loss' - not in loss_dict_out) - - # TODO(rathodv): Split test into two - with and without masks. - def test_loss_full(self): - model = self._build_model( - is_training=True, number_of_stages=2, second_stage_batch_size=6) - batch_size = 3 - anchors = tf.constant( - [[0, 0, 16, 16], - [0, 16, 16, 32], - [16, 0, 32, 16], - [16, 16, 32, 32]], dtype=tf.float32) - rpn_box_encodings = tf.zeros( - [batch_size, - anchors.get_shape().as_list()[0], - BOX_CODE_SIZE], dtype=tf.float32) - # use different numbers for the objectness category to break ties in - # order of boxes returned by NMS - rpn_objectness_predictions_with_background = tf.constant( - [[[-10, 13], [10, -10], [10, -11], [-10, 12]], [[10, -10], [-10, 13], [ - -10, 12 - ], [10, -11]], [[10, -10], [-10, 13], [-10, 12], [10, -11]]], - dtype=tf.float32) - image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32) - - num_proposals = tf.constant([6, 6, 6], dtype=tf.int32) - proposal_boxes = tf.constant( - 3 * [[[0, 0, 16, 16], [0, 16, 16, 32], [16, 0, 32, 16], - [16, 16, 32, 32], [0, 0, 16, 16], [0, 16, 16, 32]]], - dtype=tf.float32) - refined_box_encodings = tf.zeros( - (batch_size * model.max_num_proposals, - model.num_classes, - BOX_CODE_SIZE), dtype=tf.float32) - class_predictions_with_background = tf.constant( - [ - [-10, 10, -10], # first image - [10, -10, -10], - [10, -10, -10], - [-10, -10, 10], - [-10, 10, -10], - [10, -10, -10], - [10, -10, -10], # second image - [-10, 10, -10], - [-10, 10, -10], - [10, -10, -10], - [10, -10, -10], - [-10, 10, -10], - [10, -10, -10], # third image - [-10, 10, -10], - [-10, 10, -10], - [10, -10, -10], - [10, -10, -10], - [-10, 10, -10] - ], - dtype=tf.float32) - - mask_predictions_logits = 20 * tf.ones((batch_size * - model.max_num_proposals, - model.num_classes, - 14, 14), - dtype=tf.float32) - - groundtruth_boxes_list = [ - tf.constant([[0, 0, .5, .5], [.5, .5, 1, 1]], dtype=tf.float32), - tf.constant([[0, .5, .5, 1], [.5, 0, 1, .5]], dtype=tf.float32), - tf.constant([[0, .5, .5, 1], [.5, 0, 1, 1]], dtype=tf.float32) - ] - groundtruth_classes_list = [ - tf.constant([[1, 0], [0, 1]], dtype=tf.float32), - tf.constant([[1, 0], [1, 0]], dtype=tf.float32), - tf.constant([[1, 0], [0, 1]], dtype=tf.float32) - ] - - # Set all elements of groundtruth mask to 1.0. In this case all proposal - # crops of the groundtruth masks should return a mask that covers the entire - # proposal. Thus, if mask_predictions_logits element values are all greater - # than 20, the loss should be zero. - groundtruth_masks_list = [ - tf.convert_to_tensor(np.ones((2, 32, 32)), dtype=tf.float32), - tf.convert_to_tensor(np.ones((2, 32, 32)), dtype=tf.float32), - tf.convert_to_tensor(np.ones((2, 32, 32)), dtype=tf.float32) - ] - groundtruth_weights_list = [ - tf.constant([1, 1], dtype=tf.float32), - tf.constant([1, 1], dtype=tf.float32), - tf.constant([1, 0], dtype=tf.float32) - ] - prediction_dict = { - 'rpn_box_encodings': rpn_box_encodings, - 'rpn_objectness_predictions_with_background': - rpn_objectness_predictions_with_background, - 'image_shape': image_shape, - 'anchors': anchors, - 'refined_box_encodings': refined_box_encodings, - 'class_predictions_with_background': class_predictions_with_background, - 'proposal_boxes': proposal_boxes, - 'num_proposals': num_proposals, - 'mask_predictions': mask_predictions_logits - } - _, true_image_shapes = model.preprocess(tf.zeros(image_shape)) - model.provide_groundtruth( - groundtruth_boxes_list, - groundtruth_classes_list, - groundtruth_masks_list, - groundtruth_weights_list=groundtruth_weights_list) - loss_dict = model.loss(prediction_dict, true_image_shapes) - - with self.test_session() as sess: - loss_dict_out = sess.run(loss_dict) - self.assertAllClose(loss_dict_out['Loss/RPNLoss/localization_loss'], 0) - self.assertAllClose(loss_dict_out['Loss/RPNLoss/objectness_loss'], 0) - self.assertAllClose(loss_dict_out[ - 'Loss/BoxClassifierLoss/localization_loss'], 0) - self.assertAllClose(loss_dict_out[ - 'Loss/BoxClassifierLoss/classification_loss'], 0) - self.assertAllClose(loss_dict_out['Loss/BoxClassifierLoss/mask_loss'], 0) - - def test_loss_full_zero_padded_proposals(self): - model = self._build_model( - is_training=True, number_of_stages=2, second_stage_batch_size=6) - batch_size = 1 - anchors = tf.constant( - [[0, 0, 16, 16], - [0, 16, 16, 32], - [16, 0, 32, 16], - [16, 16, 32, 32]], dtype=tf.float32) - rpn_box_encodings = tf.zeros( - [batch_size, - anchors.get_shape().as_list()[0], - BOX_CODE_SIZE], dtype=tf.float32) - # use different numbers for the objectness category to break ties in - # order of boxes returned by NMS - rpn_objectness_predictions_with_background = tf.constant([ - [[-10, 13], - [10, -10], - [10, -11], - [10, -12]],], dtype=tf.float32) - image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32) - - # box_classifier_batch_size is 6, but here we assume that the number of - # actual proposals (not counting zero paddings) is fewer (3). - num_proposals = tf.constant([3], dtype=tf.int32) - proposal_boxes = tf.constant( - [[[0, 0, 16, 16], - [0, 16, 16, 32], - [16, 0, 32, 16], - [0, 0, 0, 0], # begin paddings - [0, 0, 0, 0], - [0, 0, 0, 0]]], dtype=tf.float32) - - refined_box_encodings = tf.zeros( - (batch_size * model.max_num_proposals, - model.num_classes, - BOX_CODE_SIZE), dtype=tf.float32) - class_predictions_with_background = tf.constant( - [[-10, 10, -10], - [10, -10, -10], - [10, -10, -10], - [0, 0, 0], # begin paddings - [0, 0, 0], - [0, 0, 0]], dtype=tf.float32) - - mask_predictions_logits = 20 * tf.ones((batch_size * - model.max_num_proposals, - model.num_classes, - 14, 14), - dtype=tf.float32) - - groundtruth_boxes_list = [ - tf.constant([[0, 0, .5, .5]], dtype=tf.float32)] - groundtruth_classes_list = [tf.constant([[1, 0]], dtype=tf.float32)] - - # Set all elements of groundtruth mask to 1.0. In this case all proposal - # crops of the groundtruth masks should return a mask that covers the entire - # proposal. Thus, if mask_predictions_logits element values are all greater - # than 20, the loss should be zero. - groundtruth_masks_list = [tf.convert_to_tensor(np.ones((1, 32, 32)), - dtype=tf.float32)] - - prediction_dict = { - 'rpn_box_encodings': rpn_box_encodings, - 'rpn_objectness_predictions_with_background': - rpn_objectness_predictions_with_background, - 'image_shape': image_shape, - 'anchors': anchors, - 'refined_box_encodings': refined_box_encodings, - 'class_predictions_with_background': class_predictions_with_background, - 'proposal_boxes': proposal_boxes, - 'num_proposals': num_proposals, - 'mask_predictions': mask_predictions_logits - } - _, true_image_shapes = model.preprocess(tf.zeros(image_shape)) - model.provide_groundtruth(groundtruth_boxes_list, - groundtruth_classes_list, - groundtruth_masks_list) - loss_dict = model.loss(prediction_dict, true_image_shapes) - - with self.test_session() as sess: - loss_dict_out = sess.run(loss_dict) - self.assertAllClose(loss_dict_out['Loss/RPNLoss/localization_loss'], 0) - self.assertAllClose(loss_dict_out['Loss/RPNLoss/objectness_loss'], 0) - self.assertAllClose(loss_dict_out[ - 'Loss/BoxClassifierLoss/localization_loss'], 0) - self.assertAllClose(loss_dict_out[ - 'Loss/BoxClassifierLoss/classification_loss'], 0) - self.assertAllClose(loss_dict_out['Loss/BoxClassifierLoss/mask_loss'], 0) - - def test_loss_full_multiple_label_groundtruth(self): - model = self._build_model( - is_training=True, number_of_stages=2, second_stage_batch_size=6, - softmax_second_stage_classification_loss=False) - batch_size = 1 - anchors = tf.constant( - [[0, 0, 16, 16], - [0, 16, 16, 32], - [16, 0, 32, 16], - [16, 16, 32, 32]], dtype=tf.float32) - rpn_box_encodings = tf.zeros( - [batch_size, - anchors.get_shape().as_list()[0], - BOX_CODE_SIZE], dtype=tf.float32) - # use different numbers for the objectness category to break ties in - # order of boxes returned by NMS - rpn_objectness_predictions_with_background = tf.constant([ - [[-10, 13], - [10, -10], - [10, -11], - [10, -12]],], dtype=tf.float32) - image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32) - - # box_classifier_batch_size is 6, but here we assume that the number of - # actual proposals (not counting zero paddings) is fewer (3). - num_proposals = tf.constant([3], dtype=tf.int32) - proposal_boxes = tf.constant( - [[[0, 0, 16, 16], - [0, 16, 16, 32], - [16, 0, 32, 16], - [0, 0, 0, 0], # begin paddings - [0, 0, 0, 0], - [0, 0, 0, 0]]], dtype=tf.float32) - - # second_stage_localization_loss should only be computed for predictions - # that match groundtruth. For multiple label groundtruth boxes, the loss - # should only be computed once for the label with the smaller index. - refined_box_encodings = tf.constant( - [[[0, 0, 0, 0], [1, 1, -1, -1]], - [[1, 1, -1, -1], [1, 1, 1, 1]], - [[1, 1, -1, -1], [1, 1, 1, 1]], - [[1, 1, -1, -1], [1, 1, 1, 1]], - [[1, 1, -1, -1], [1, 1, 1, 1]], - [[1, 1, -1, -1], [1, 1, 1, 1]]], dtype=tf.float32) - class_predictions_with_background = tf.constant( - [[-100, 100, 100], - [100, -100, -100], - [100, -100, -100], - [0, 0, 0], # begin paddings - [0, 0, 0], - [0, 0, 0]], dtype=tf.float32) - - mask_predictions_logits = 20 * tf.ones((batch_size * - model.max_num_proposals, - model.num_classes, - 14, 14), - dtype=tf.float32) - - groundtruth_boxes_list = [ - tf.constant([[0, 0, .5, .5]], dtype=tf.float32)] - # Box contains two ground truth labels. - groundtruth_classes_list = [tf.constant([[1, 1]], dtype=tf.float32)] - - # Set all elements of groundtruth mask to 1.0. In this case all proposal - # crops of the groundtruth masks should return a mask that covers the entire - # proposal. Thus, if mask_predictions_logits element values are all greater - # than 20, the loss should be zero. - groundtruth_masks_list = [tf.convert_to_tensor(np.ones((1, 32, 32)), - dtype=tf.float32)] - - prediction_dict = { - 'rpn_box_encodings': rpn_box_encodings, - 'rpn_objectness_predictions_with_background': - rpn_objectness_predictions_with_background, - 'image_shape': image_shape, - 'anchors': anchors, - 'refined_box_encodings': refined_box_encodings, - 'class_predictions_with_background': class_predictions_with_background, - 'proposal_boxes': proposal_boxes, - 'num_proposals': num_proposals, - 'mask_predictions': mask_predictions_logits - } - _, true_image_shapes = model.preprocess(tf.zeros(image_shape)) - model.provide_groundtruth(groundtruth_boxes_list, - groundtruth_classes_list, - groundtruth_masks_list) - loss_dict = model.loss(prediction_dict, true_image_shapes) - - with self.test_session() as sess: - loss_dict_out = sess.run(loss_dict) - self.assertAllClose(loss_dict_out['Loss/RPNLoss/localization_loss'], 0) - self.assertAllClose(loss_dict_out['Loss/RPNLoss/objectness_loss'], 0) - self.assertAllClose(loss_dict_out[ - 'Loss/BoxClassifierLoss/localization_loss'], 0) - self.assertAllClose(loss_dict_out[ - 'Loss/BoxClassifierLoss/classification_loss'], 0) - self.assertAllClose(loss_dict_out['Loss/BoxClassifierLoss/mask_loss'], 0) - - def test_loss_full_zero_padded_proposals_nonzero_loss_with_two_images(self): - model = self._build_model( - is_training=True, number_of_stages=2, second_stage_batch_size=6) - batch_size = 2 - anchors = tf.constant( - [[0, 0, 16, 16], - [0, 16, 16, 32], - [16, 0, 32, 16], - [16, 16, 32, 32]], dtype=tf.float32) - rpn_box_encodings = tf.zeros( - [batch_size, - anchors.get_shape().as_list()[0], - BOX_CODE_SIZE], dtype=tf.float32) - # use different numbers for the objectness category to break ties in - # order of boxes returned by NMS - rpn_objectness_predictions_with_background = tf.constant( - [[[-10, 13], - [10, -10], - [10, -11], - [10, -12]], - [[-10, 13], - [10, -10], - [10, -11], - [10, -12]]], dtype=tf.float32) - image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32) - - # box_classifier_batch_size is 6, but here we assume that the number of - # actual proposals (not counting zero paddings) is fewer. - num_proposals = tf.constant([3, 2], dtype=tf.int32) - proposal_boxes = tf.constant( - [[[0, 0, 16, 16], - [0, 16, 16, 32], - [16, 0, 32, 16], - [0, 0, 0, 0], # begin paddings - [0, 0, 0, 0], - [0, 0, 0, 0]], - [[0, 0, 16, 16], - [0, 16, 16, 32], - [0, 0, 0, 0], # begin paddings - [0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0]]], dtype=tf.float32) - - refined_box_encodings = tf.zeros( - (batch_size * model.max_num_proposals, - model.num_classes, - BOX_CODE_SIZE), dtype=tf.float32) - class_predictions_with_background = tf.constant( - [[-10, 10, -10], # first image - [10, -10, -10], - [10, -10, -10], - [0, 0, 0], # begin paddings - [0, 0, 0], - [0, 0, 0], - [-10, -10, 10], # second image - [10, -10, -10], - [0, 0, 0], # begin paddings - [0, 0, 0], - [0, 0, 0], - [0, 0, 0],], dtype=tf.float32) - - # The first groundtruth box is 4/5 of the anchor size in both directions - # experiencing a loss of: - # 2 * SmoothL1(5 * log(4/5)) / num_proposals - # = 2 * (abs(5 * log(1/2)) - .5) / 3 - # The second groundtruth box is identical to the prediction and thus - # experiences zero loss. - # Total average loss is (abs(5 * log(1/2)) - .5) / 3. - groundtruth_boxes_list = [ - tf.constant([[0.05, 0.05, 0.45, 0.45]], dtype=tf.float32), - tf.constant([[0.0, 0.0, 0.5, 0.5]], dtype=tf.float32)] - groundtruth_classes_list = [tf.constant([[1, 0]], dtype=tf.float32), - tf.constant([[0, 1]], dtype=tf.float32)] - exp_loc_loss = (-5 * np.log(.8) - 0.5) / 3.0 - - prediction_dict = { - 'rpn_box_encodings': rpn_box_encodings, - 'rpn_objectness_predictions_with_background': - rpn_objectness_predictions_with_background, - 'image_shape': image_shape, - 'anchors': anchors, - 'refined_box_encodings': refined_box_encodings, - 'class_predictions_with_background': class_predictions_with_background, - 'proposal_boxes': proposal_boxes, - 'num_proposals': num_proposals - } - _, true_image_shapes = model.preprocess(tf.zeros(image_shape)) - model.provide_groundtruth(groundtruth_boxes_list, - groundtruth_classes_list) - loss_dict = model.loss(prediction_dict, true_image_shapes) - - with self.test_session() as sess: - loss_dict_out = sess.run(loss_dict) - self.assertAllClose(loss_dict_out['Loss/RPNLoss/localization_loss'], - exp_loc_loss) - self.assertAllClose(loss_dict_out['Loss/RPNLoss/objectness_loss'], 0) - self.assertAllClose(loss_dict_out[ - 'Loss/BoxClassifierLoss/localization_loss'], exp_loc_loss) - self.assertAllClose(loss_dict_out[ - 'Loss/BoxClassifierLoss/classification_loss'], 0) - - def test_loss_with_hard_mining(self): - model = self._build_model(is_training=True, - number_of_stages=2, - second_stage_batch_size=None, - first_stage_max_proposals=6, - hard_mining=True) - batch_size = 1 - anchors = tf.constant( - [[0, 0, 16, 16], - [0, 16, 16, 32], - [16, 0, 32, 16], - [16, 16, 32, 32]], dtype=tf.float32) - rpn_box_encodings = tf.zeros( - [batch_size, - anchors.get_shape().as_list()[0], - BOX_CODE_SIZE], dtype=tf.float32) - # use different numbers for the objectness category to break ties in - # order of boxes returned by NMS - rpn_objectness_predictions_with_background = tf.constant( - [[[-10, 13], - [-10, 12], - [10, -11], - [10, -12]]], dtype=tf.float32) - image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32) - - # box_classifier_batch_size is 6, but here we assume that the number of - # actual proposals (not counting zero paddings) is fewer (3). - num_proposals = tf.constant([3], dtype=tf.int32) - proposal_boxes = tf.constant( - [[[0, 0, 16, 16], - [0, 16, 16, 32], - [16, 0, 32, 16], - [0, 0, 0, 0], # begin paddings - [0, 0, 0, 0], - [0, 0, 0, 0]]], dtype=tf.float32) - - refined_box_encodings = tf.zeros( - (batch_size * model.max_num_proposals, - model.num_classes, - BOX_CODE_SIZE), dtype=tf.float32) - class_predictions_with_background = tf.constant( - [[-10, 10, -10], # first image - [-10, -10, 10], - [10, -10, -10], - [0, 0, 0], # begin paddings - [0, 0, 0], - [0, 0, 0]], dtype=tf.float32) - - # The first groundtruth box is 4/5 of the anchor size in both directions - # experiencing a loss of: - # 2 * SmoothL1(5 * log(4/5)) / num_proposals - # = 2 * (abs(5 * log(1/2)) - .5) / 3 - # The second groundtruth box is 46/50 of the anchor size in both directions - # experiencing a loss of: - # 2 * SmoothL1(5 * log(42/50)) / num_proposals - # = 2 * (.5(5 * log(.92))^2 - .5) / 3. - # Since the first groundtruth box experiences greater loss, and we have - # set num_hard_examples=1 in the HardMiner, the final localization loss - # corresponds to that of the first groundtruth box. - groundtruth_boxes_list = [ - tf.constant([[0.05, 0.05, 0.45, 0.45], - [0.02, 0.52, 0.48, 0.98],], dtype=tf.float32)] - groundtruth_classes_list = [tf.constant([[1, 0], [0, 1]], dtype=tf.float32)] - exp_loc_loss = 2 * (-5 * np.log(.8) - 0.5) / 3.0 - - prediction_dict = { - 'rpn_box_encodings': rpn_box_encodings, - 'rpn_objectness_predictions_with_background': - rpn_objectness_predictions_with_background, - 'image_shape': image_shape, - 'anchors': anchors, - 'refined_box_encodings': refined_box_encodings, - 'class_predictions_with_background': class_predictions_with_background, - 'proposal_boxes': proposal_boxes, - 'num_proposals': num_proposals - } - _, true_image_shapes = model.preprocess(tf.zeros(image_shape)) - model.provide_groundtruth(groundtruth_boxes_list, - groundtruth_classes_list) - loss_dict = model.loss(prediction_dict, true_image_shapes) - - with self.test_session() as sess: - loss_dict_out = sess.run(loss_dict) - self.assertAllClose(loss_dict_out[ - 'Loss/BoxClassifierLoss/localization_loss'], exp_loc_loss) - self.assertAllClose(loss_dict_out[ - 'Loss/BoxClassifierLoss/classification_loss'], 0) - - def test_loss_full_with_shared_boxes(self): - model = self._build_model( - is_training=True, number_of_stages=2, second_stage_batch_size=6) - batch_size = 2 - anchors = tf.constant( - [[0, 0, 16, 16], - [0, 16, 16, 32], - [16, 0, 32, 16], - [16, 16, 32, 32]], dtype=tf.float32) - rpn_box_encodings = tf.zeros( - [batch_size, - anchors.get_shape().as_list()[0], - BOX_CODE_SIZE], dtype=tf.float32) - # use different numbers for the objectness category to break ties in - # order of boxes returned by NMS - rpn_objectness_predictions_with_background = tf.constant([ - [[-10, 13], - [10, -10], - [10, -11], - [-10, 12]], - [[10, -10], - [-10, 13], - [-10, 12], - [10, -11]]], dtype=tf.float32) - image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32) - - num_proposals = tf.constant([6, 6], dtype=tf.int32) - proposal_boxes = tf.constant( - 2 * [[[0, 0, 16, 16], - [0, 16, 16, 32], - [16, 0, 32, 16], - [16, 16, 32, 32], - [0, 0, 16, 16], - [0, 16, 16, 32]]], dtype=tf.float32) - refined_box_encodings = tf.zeros( - (batch_size * model.max_num_proposals, - 1, # one box shared among all the classes - BOX_CODE_SIZE), dtype=tf.float32) - class_predictions_with_background = tf.constant( - [[-10, 10, -10], # first image - [10, -10, -10], - [10, -10, -10], - [-10, -10, 10], - [-10, 10, -10], - [10, -10, -10], - [10, -10, -10], # second image - [-10, 10, -10], - [-10, 10, -10], - [10, -10, -10], - [10, -10, -10], - [-10, 10, -10]], dtype=tf.float32) - - mask_predictions_logits = 20 * tf.ones((batch_size * - model.max_num_proposals, - model.num_classes, - 14, 14), - dtype=tf.float32) - - groundtruth_boxes_list = [ - tf.constant([[0, 0, .5, .5], [.5, .5, 1, 1]], dtype=tf.float32), - tf.constant([[0, .5, .5, 1], [.5, 0, 1, .5]], dtype=tf.float32)] - groundtruth_classes_list = [tf.constant([[1, 0], [0, 1]], dtype=tf.float32), - tf.constant([[1, 0], [1, 0]], dtype=tf.float32)] - - # Set all elements of groundtruth mask to 1.0. In this case all proposal - # crops of the groundtruth masks should return a mask that covers the entire - # proposal. Thus, if mask_predictions_logits element values are all greater - # than 20, the loss should be zero. - groundtruth_masks_list = [tf.convert_to_tensor(np.ones((2, 32, 32)), - dtype=tf.float32), - tf.convert_to_tensor(np.ones((2, 32, 32)), - dtype=tf.float32)] - prediction_dict = { - 'rpn_box_encodings': rpn_box_encodings, - 'rpn_objectness_predictions_with_background': - rpn_objectness_predictions_with_background, - 'image_shape': image_shape, - 'anchors': anchors, - 'refined_box_encodings': refined_box_encodings, - 'class_predictions_with_background': class_predictions_with_background, - 'proposal_boxes': proposal_boxes, - 'num_proposals': num_proposals, - 'mask_predictions': mask_predictions_logits - } - _, true_image_shapes = model.preprocess(tf.zeros(image_shape)) - model.provide_groundtruth(groundtruth_boxes_list, - groundtruth_classes_list, - groundtruth_masks_list) - loss_dict = model.loss(prediction_dict, true_image_shapes) - - with self.test_session() as sess: - loss_dict_out = sess.run(loss_dict) - self.assertAllClose(loss_dict_out['Loss/RPNLoss/localization_loss'], 0) - self.assertAllClose(loss_dict_out['Loss/RPNLoss/objectness_loss'], 0) - self.assertAllClose(loss_dict_out[ - 'Loss/BoxClassifierLoss/localization_loss'], 0) - self.assertAllClose(loss_dict_out[ - 'Loss/BoxClassifierLoss/classification_loss'], 0) - self.assertAllClose(loss_dict_out['Loss/BoxClassifierLoss/mask_loss'], 0) - - def test_restore_map_for_classification_ckpt(self): - # Define mock tensorflow classification graph and save variables. - test_graph_classification = tf.Graph() - with test_graph_classification.as_default(): - image = tf.placeholder(dtype=tf.float32, shape=[1, 20, 20, 3]) - with tf.variable_scope('mock_model'): - net = slim.conv2d(image, num_outputs=3, kernel_size=1, scope='layer1') - slim.conv2d(net, num_outputs=3, kernel_size=1, scope='layer2') - - init_op = tf.global_variables_initializer() - saver = tf.train.Saver() - save_path = self.get_temp_dir() - with self.test_session(graph=test_graph_classification) as sess: - sess.run(init_op) - saved_model_path = saver.save(sess, save_path) - - # Create tensorflow detection graph and load variables from - # classification checkpoint. - test_graph_detection = tf.Graph() - with test_graph_detection.as_default(): - model = self._build_model( - is_training=False, number_of_stages=2, second_stage_batch_size=6) - - inputs_shape = (2, 20, 20, 3) - inputs = tf.to_float(tf.random_uniform( - inputs_shape, minval=0, maxval=255, dtype=tf.int32)) - preprocessed_inputs, true_image_shapes = model.preprocess(inputs) - prediction_dict = model.predict(preprocessed_inputs, true_image_shapes) - model.postprocess(prediction_dict, true_image_shapes) - var_map = model.restore_map(fine_tune_checkpoint_type='classification') - self.assertIsInstance(var_map, dict) - saver = tf.train.Saver(var_map) - with self.test_session(graph=test_graph_classification) as sess: - saver.restore(sess, saved_model_path) - for var in sess.run(tf.report_uninitialized_variables()): - self.assertNotIn(model.first_stage_feature_extractor_scope, var) - self.assertNotIn(model.second_stage_feature_extractor_scope, var) - - def test_restore_map_for_detection_ckpt(self): - # Define first detection graph and save variables. - test_graph_detection1 = tf.Graph() - with test_graph_detection1.as_default(): - model = self._build_model( - is_training=False, number_of_stages=2, second_stage_batch_size=6) - inputs_shape = (2, 20, 20, 3) - inputs = tf.to_float(tf.random_uniform( - inputs_shape, minval=0, maxval=255, dtype=tf.int32)) - preprocessed_inputs, true_image_shapes = model.preprocess(inputs) - prediction_dict = model.predict(preprocessed_inputs, true_image_shapes) - model.postprocess(prediction_dict, true_image_shapes) - another_variable = tf.Variable([17.0], name='another_variable') # pylint: disable=unused-variable - init_op = tf.global_variables_initializer() - saver = tf.train.Saver() - save_path = self.get_temp_dir() - with self.test_session(graph=test_graph_detection1) as sess: - sess.run(init_op) - saved_model_path = saver.save(sess, save_path) - - # Define second detection graph and restore variables. - test_graph_detection2 = tf.Graph() - with test_graph_detection2.as_default(): - model2 = self._build_model(is_training=False, number_of_stages=2, - second_stage_batch_size=6, num_classes=42) - - inputs_shape2 = (2, 20, 20, 3) - inputs2 = tf.to_float(tf.random_uniform( - inputs_shape2, minval=0, maxval=255, dtype=tf.int32)) - preprocessed_inputs2, true_image_shapes = model2.preprocess(inputs2) - prediction_dict2 = model2.predict(preprocessed_inputs2, true_image_shapes) - model2.postprocess(prediction_dict2, true_image_shapes) - another_variable = tf.Variable([17.0], name='another_variable') # pylint: disable=unused-variable - var_map = model2.restore_map(fine_tune_checkpoint_type='detection') - self.assertIsInstance(var_map, dict) - saver = tf.train.Saver(var_map) - with self.test_session(graph=test_graph_detection2) as sess: - saver.restore(sess, saved_model_path) - uninitialized_vars_list = sess.run(tf.report_uninitialized_variables()) - self.assertIn('another_variable', uninitialized_vars_list) - for var in uninitialized_vars_list: - self.assertNotIn(model2.first_stage_feature_extractor_scope, var) - self.assertNotIn(model2.second_stage_feature_extractor_scope, var) - - def test_load_all_det_checkpoint_vars(self): - test_graph_detection = tf.Graph() - with test_graph_detection.as_default(): - model = self._build_model( - is_training=False, - number_of_stages=2, - second_stage_batch_size=6, - num_classes=42) - - inputs_shape = (2, 20, 20, 3) - inputs = tf.to_float( - tf.random_uniform(inputs_shape, minval=0, maxval=255, dtype=tf.int32)) - preprocessed_inputs, true_image_shapes = model.preprocess(inputs) - prediction_dict = model.predict(preprocessed_inputs, true_image_shapes) - model.postprocess(prediction_dict, true_image_shapes) - another_variable = tf.Variable([17.0], name='another_variable') # pylint: disable=unused-variable - var_map = model.restore_map( - fine_tune_checkpoint_type='detection', - load_all_detection_checkpoint_vars=True) - self.assertIsInstance(var_map, dict) - self.assertIn('another_variable', var_map) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/meta_architectures/rfcn_meta_arch.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/meta_architectures/rfcn_meta_arch.py deleted file mode 100644 index 3655449382a2d59009c251f5b7f571c2cc14a4f0..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/meta_architectures/rfcn_meta_arch.py +++ /dev/null @@ -1,299 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""R-FCN meta-architecture definition. - -R-FCN: Dai, Jifeng, et al. "R-FCN: Object Detection via Region-based -Fully Convolutional Networks." arXiv preprint arXiv:1605.06409 (2016). - -The R-FCN meta architecture is similar to Faster R-CNN and only differs in the -second stage. Hence this class inherits FasterRCNNMetaArch and overrides only -the `_predict_second_stage` method. - -Similar to Faster R-CNN we allow for two modes: number_of_stages=1 and -number_of_stages=2. In the former setting, all of the user facing methods -(e.g., predict, postprocess, loss) can be used as if the model consisted -only of the RPN, returning class agnostic proposals (these can be thought of as -approximate detections with no associated class information). In the latter -setting, proposals are computed, then passed through a second stage -"box classifier" to yield (multi-class) detections. - -Implementations of R-FCN models must define a new FasterRCNNFeatureExtractor and -override three methods: `preprocess`, `_extract_proposal_features` (the first -stage of the model), and `_extract_box_classifier_features` (the second stage of -the model). Optionally, the `restore_fn` method can be overridden. See tests -for an example. - -See notes in the documentation of Faster R-CNN meta-architecture as they all -apply here. -""" -import tensorflow as tf - -from object_detection.core import box_predictor -from object_detection.meta_architectures import faster_rcnn_meta_arch -from object_detection.utils import ops - - -class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): - """R-FCN Meta-architecture definition.""" - - def __init__(self, - is_training, - num_classes, - image_resizer_fn, - feature_extractor, - number_of_stages, - first_stage_anchor_generator, - first_stage_atrous_rate, - first_stage_box_predictor_arg_scope_fn, - first_stage_box_predictor_kernel_size, - first_stage_box_predictor_depth, - first_stage_minibatch_size, - first_stage_positive_balance_fraction, - first_stage_nms_score_threshold, - first_stage_nms_iou_threshold, - first_stage_max_proposals, - first_stage_localization_loss_weight, - first_stage_objectness_loss_weight, - second_stage_rfcn_box_predictor, - second_stage_batch_size, - second_stage_balance_fraction, - second_stage_non_max_suppression_fn, - second_stage_score_conversion_fn, - second_stage_localization_loss_weight, - second_stage_classification_loss_weight, - second_stage_classification_loss, - hard_example_miner, - parallel_iterations=16, - add_summaries=True): - """RFCNMetaArch Constructor. - - Args: - is_training: A boolean indicating whether the training version of the - computation graph should be constructed. - num_classes: Number of classes. Note that num_classes *does not* - include the background category, so if groundtruth labels take values - in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the - assigned classification targets can range from {0,... K}). - image_resizer_fn: A callable for image resizing. This callable always - takes a rank-3 image tensor (corresponding to a single image) and - returns a rank-3 image tensor, possibly with new spatial dimensions. - See builders/image_resizer_builder.py. - feature_extractor: A FasterRCNNFeatureExtractor object. - number_of_stages: Valid values are {1, 2}. If 1 will only construct the - Region Proposal Network (RPN) part of the model. - first_stage_anchor_generator: An anchor_generator.AnchorGenerator object - (note that currently we only support - grid_anchor_generator.GridAnchorGenerator objects) - first_stage_atrous_rate: A single integer indicating the atrous rate for - the single convolution op which is applied to the `rpn_features_to_crop` - tensor to obtain a tensor to be used for box prediction. Some feature - extractors optionally allow for producing feature maps computed at - denser resolutions. The atrous rate is used to compensate for the - denser feature maps by using an effectively larger receptive field. - (This should typically be set to 1). - first_stage_box_predictor_arg_scope_fn: A function to generate tf-slim - arg_scope for conv2d, separable_conv2d and fully_connected ops for the - RPN box predictor. - first_stage_box_predictor_kernel_size: Kernel size to use for the - convolution op just prior to RPN box predictions. - first_stage_box_predictor_depth: Output depth for the convolution op - just prior to RPN box predictions. - first_stage_minibatch_size: The "batch size" to use for computing the - objectness and location loss of the region proposal network. This - "batch size" refers to the number of anchors selected as contributing - to the loss function for any given image within the image batch and is - only called "batch_size" due to terminology from the Faster R-CNN paper. - first_stage_positive_balance_fraction: Fraction of positive examples - per image for the RPN. The recommended value for Faster RCNN is 0.5. - first_stage_nms_score_threshold: Score threshold for non max suppression - for the Region Proposal Network (RPN). This value is expected to be in - [0, 1] as it is applied directly after a softmax transformation. The - recommended value for Faster R-CNN is 0. - first_stage_nms_iou_threshold: The Intersection Over Union (IOU) threshold - for performing Non-Max Suppression (NMS) on the boxes predicted by the - Region Proposal Network (RPN). - first_stage_max_proposals: Maximum number of boxes to retain after - performing Non-Max Suppression (NMS) on the boxes predicted by the - Region Proposal Network (RPN). - first_stage_localization_loss_weight: A float - first_stage_objectness_loss_weight: A float - second_stage_rfcn_box_predictor: RFCN box predictor to use for - second stage. - second_stage_batch_size: The batch size used for computing the - classification and refined location loss of the box classifier. This - "batch size" refers to the number of proposals selected as contributing - to the loss function for any given image within the image batch and is - only called "batch_size" due to terminology from the Faster R-CNN paper. - second_stage_balance_fraction: Fraction of positive examples to use - per image for the box classifier. The recommended value for Faster RCNN - is 0.25. - second_stage_non_max_suppression_fn: batch_multiclass_non_max_suppression - callable that takes `boxes`, `scores`, optional `clip_window` and - optional (kwarg) `mask` inputs (with all other inputs already set) - and returns a dictionary containing tensors with keys: - `detection_boxes`, `detection_scores`, `detection_classes`, - `num_detections`, and (optionally) `detection_masks`. See - `post_processing.batch_multiclass_non_max_suppression` for the type and - shape of these tensors. - second_stage_score_conversion_fn: Callable elementwise nonlinearity - (that takes tensors as inputs and returns tensors). This is usually - used to convert logits to probabilities. - second_stage_localization_loss_weight: A float - second_stage_classification_loss_weight: A float - second_stage_classification_loss: A string indicating which loss function - to use, supports 'softmax' and 'sigmoid'. - hard_example_miner: A losses.HardExampleMiner object (can be None). - parallel_iterations: (Optional) The number of iterations allowed to run - in parallel for calls to tf.map_fn. - add_summaries: boolean (default: True) controlling whether summary ops - should be added to tensorflow graph. - - Raises: - ValueError: If `second_stage_batch_size` > `first_stage_max_proposals` - ValueError: If first_stage_anchor_generator is not of type - grid_anchor_generator.GridAnchorGenerator. - """ - # TODO(rathodv): add_summaries is currently unused. Respect that directive - # in the future. - super(RFCNMetaArch, self).__init__( - is_training, - num_classes, - image_resizer_fn, - feature_extractor, - number_of_stages, - first_stage_anchor_generator, - first_stage_atrous_rate, - first_stage_box_predictor_arg_scope_fn, - first_stage_box_predictor_kernel_size, - first_stage_box_predictor_depth, - first_stage_minibatch_size, - first_stage_positive_balance_fraction, - first_stage_nms_score_threshold, - first_stage_nms_iou_threshold, - first_stage_max_proposals, - first_stage_localization_loss_weight, - first_stage_objectness_loss_weight, - None, # initial_crop_size is not used in R-FCN - None, # maxpool_kernel_size is not use in R-FCN - None, # maxpool_stride is not use in R-FCN - None, # fully_connected_box_predictor is not used in R-FCN. - second_stage_batch_size, - second_stage_balance_fraction, - second_stage_non_max_suppression_fn, - second_stage_score_conversion_fn, - second_stage_localization_loss_weight, - second_stage_classification_loss_weight, - second_stage_classification_loss, - 1.0, # second stage mask prediction loss weight isn't used in R-FCN. - hard_example_miner, - parallel_iterations) - - self._rfcn_box_predictor = second_stage_rfcn_box_predictor - - def _predict_second_stage(self, rpn_box_encodings, - rpn_objectness_predictions_with_background, - rpn_features, - anchors, - image_shape, - true_image_shapes): - """Predicts the output tensors from 2nd stage of R-FCN. - - Args: - rpn_box_encodings: 3-D float tensor of shape - [batch_size, num_valid_anchors, self._box_coder.code_size] containing - predicted boxes. - rpn_objectness_predictions_with_background: 3-D float tensor of shape - [batch_size, num_valid_anchors, 2] containing class - predictions (logits) for each of the anchors. Note that this - tensor *includes* background class predictions (at class index 0). - rpn_features: A 4-D float32 tensor with shape - [batch_size, height, width, depth] representing image features from the - RPN. - anchors: 2-D float tensor of shape - [num_anchors, self._box_coder.code_size]. - image_shape: A 1D int32 tensors of size [4] containing the image shape. - true_image_shapes: int32 tensor of shape [batch, 3] where each row is - of the form [height, width, channels] indicating the shapes - of true images in the resized images, as resized images can be padded - with zeros. - - Returns: - prediction_dict: a dictionary holding "raw" prediction tensors: - 1) refined_box_encodings: a 3-D tensor with shape - [total_num_proposals, num_classes, 4] representing predicted - (final) refined box encodings, where - total_num_proposals=batch_size*self._max_num_proposals - 2) class_predictions_with_background: a 2-D tensor with shape - [total_num_proposals, num_classes + 1] containing class - predictions (logits) for each of the anchors, where - total_num_proposals=batch_size*self._max_num_proposals. - Note that this tensor *includes* background class predictions - (at class index 0). - 3) num_proposals: An int32 tensor of shape [batch_size] representing the - number of proposals generated by the RPN. `num_proposals` allows us - to keep track of which entries are to be treated as zero paddings and - which are not since we always pad the number of proposals to be - `self.max_num_proposals` for each image. - 4) proposal_boxes: A float32 tensor of shape - [batch_size, self.max_num_proposals, 4] representing - decoded proposal bounding boxes (in absolute coordinates). - 5) proposal_boxes_normalized: A float32 tensor of shape - [batch_size, self.max_num_proposals, 4] representing decoded proposal - bounding boxes (in normalized coordinates). Can be used to override - the boxes proposed by the RPN, thus enabling one to extract box - classification and prediction for externally selected areas of the - image. - 6) box_classifier_features: a 4-D float32 tensor, of shape - [batch_size, feature_map_height, feature_map_width, depth], - representing the box classifier features. - """ - image_shape_2d = tf.tile(tf.expand_dims(image_shape[1:], 0), - [image_shape[0], 1]) - proposal_boxes_normalized, _, num_proposals = self._postprocess_rpn( - rpn_box_encodings, rpn_objectness_predictions_with_background, - anchors, image_shape_2d, true_image_shapes) - - box_classifier_features = ( - self._feature_extractor.extract_box_classifier_features( - rpn_features, - scope=self.second_stage_feature_extractor_scope)) - - box_predictions = self._rfcn_box_predictor.predict( - [box_classifier_features], - num_predictions_per_location=[1], - scope=self.second_stage_box_predictor_scope, - proposal_boxes=proposal_boxes_normalized) - refined_box_encodings = tf.squeeze( - tf.concat(box_predictions[box_predictor.BOX_ENCODINGS], axis=1), axis=1) - class_predictions_with_background = tf.squeeze( - tf.concat( - box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], - axis=1), - axis=1) - - absolute_proposal_boxes = ops.normalized_to_image_coordinates( - proposal_boxes_normalized, image_shape, - parallel_iterations=self._parallel_iterations) - - prediction_dict = { - 'refined_box_encodings': refined_box_encodings, - 'class_predictions_with_background': - class_predictions_with_background, - 'num_proposals': num_proposals, - 'proposal_boxes': absolute_proposal_boxes, - 'box_classifier_features': box_classifier_features, - 'proposal_boxes_normalized': proposal_boxes_normalized, - } - return prediction_dict diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/meta_architectures/rfcn_meta_arch_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/meta_architectures/rfcn_meta_arch_test.py deleted file mode 100644 index 829140ac968885bf200924bc08edfc170474d1bc..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/meta_architectures/rfcn_meta_arch_test.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.meta_architectures.rfcn_meta_arch.""" - -import tensorflow as tf - -from object_detection.meta_architectures import faster_rcnn_meta_arch_test_lib -from object_detection.meta_architectures import rfcn_meta_arch - - -class RFCNMetaArchTest( - faster_rcnn_meta_arch_test_lib.FasterRCNNMetaArchTestBase): - - def _get_second_stage_box_predictor_text_proto(self): - box_predictor_text_proto = """ - rfcn_box_predictor { - conv_hyperparams { - op: CONV - activation: NONE - regularizer { - l2_regularizer { - weight: 0.0005 - } - } - initializer { - variance_scaling_initializer { - factor: 1.0 - uniform: true - mode: FAN_AVG - } - } - } - } - """ - return box_predictor_text_proto - - def _get_model(self, box_predictor, **common_kwargs): - return rfcn_meta_arch.RFCNMetaArch( - second_stage_rfcn_box_predictor=box_predictor, **common_kwargs) - - def _get_box_classifier_features_shape(self, - image_size, - batch_size, - max_num_proposals, - initial_crop_size, - maxpool_stride, - num_features): - return (batch_size, image_size, image_size, num_features) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/meta_architectures/ssd_meta_arch.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/meta_architectures/ssd_meta_arch.py deleted file mode 100644 index ffbe914837a47d7fa8426fb59bfdcde25ae771e7..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/meta_architectures/ssd_meta_arch.py +++ /dev/null @@ -1,870 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""SSD Meta-architecture definition. - -General tensorflow implementation of convolutional Multibox/SSD detection -models. -""" -from abc import abstractmethod - -import re -import tensorflow as tf - -from object_detection.core import box_list -from object_detection.core import box_list_ops -from object_detection.core import model -from object_detection.core import standard_fields as fields -from object_detection.core import target_assigner -from object_detection.utils import ops -from object_detection.utils import shape_utils -from object_detection.utils import visualization_utils - -slim = tf.contrib.slim - - -class SSDFeatureExtractor(object): - """SSD Feature Extractor definition.""" - - def __init__(self, - is_training, - depth_multiplier, - min_depth, - pad_to_multiple, - conv_hyperparams_fn, - reuse_weights=None, - use_explicit_padding=False, - use_depthwise=False, - override_base_feature_extractor_hyperparams=False): - """Constructor. - - Args: - is_training: whether the network is in training mode. - depth_multiplier: float depth multiplier for feature extractor. - min_depth: minimum feature extractor depth. - pad_to_multiple: the nearest multiple to zero pad the input height and - width dimensions to. - conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d - and separable_conv2d ops in the layers that are added on top of the - base feature extractor. - reuse_weights: whether to reuse variables. Default is None. - use_explicit_padding: Whether to use explicit padding when extracting - features. Default is False. - use_depthwise: Whether to use depthwise convolutions. Default is False. - override_base_feature_extractor_hyperparams: Whether to override - hyperparameters of the base feature extractor with the one from - `conv_hyperparams_fn`. - """ - self._is_training = is_training - self._depth_multiplier = depth_multiplier - self._min_depth = min_depth - self._pad_to_multiple = pad_to_multiple - self._conv_hyperparams_fn = conv_hyperparams_fn - self._reuse_weights = reuse_weights - self._use_explicit_padding = use_explicit_padding - self._use_depthwise = use_depthwise - self._override_base_feature_extractor_hyperparams = ( - override_base_feature_extractor_hyperparams) - - @abstractmethod - def preprocess(self, resized_inputs): - """Preprocesses images for feature extraction (minus image resizing). - - Args: - resized_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - - Returns: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - true_image_shapes: int32 tensor of shape [batch, 3] where each row is - of the form [height, width, channels] indicating the shapes - of true images in the resized images, as resized images can be padded - with zeros. - """ - pass - - @abstractmethod - def extract_features(self, preprocessed_inputs): - """Extracts features from preprocessed inputs. - - This function is responsible for extracting feature maps from preprocessed - images. - - Args: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - - Returns: - feature_maps: a list of tensors where the ith tensor has shape - [batch, height_i, width_i, depth_i] - """ - raise NotImplementedError - - -class SSDMetaArch(model.DetectionModel): - """SSD Meta-architecture definition.""" - - def __init__(self, - is_training, - anchor_generator, - box_predictor, - box_coder, - feature_extractor, - matcher, - region_similarity_calculator, - encode_background_as_zeros, - negative_class_weight, - image_resizer_fn, - non_max_suppression_fn, - score_conversion_fn, - classification_loss, - localization_loss, - classification_loss_weight, - localization_loss_weight, - normalize_loss_by_num_matches, - hard_example_miner, - add_summaries=True, - normalize_loc_loss_by_codesize=False, - freeze_batchnorm=False, - inplace_batchnorm_update=False, - add_background_class=True, - random_example_sampler=None): - """SSDMetaArch Constructor. - - TODO(rathodv,jonathanhuang): group NMS parameters + score converter into - a class and loss parameters into a class and write config protos for - postprocessing and losses. - - Args: - is_training: A boolean indicating whether the training version of the - computation graph should be constructed. - anchor_generator: an anchor_generator.AnchorGenerator object. - box_predictor: a box_predictor.BoxPredictor object. - box_coder: a box_coder.BoxCoder object. - feature_extractor: a SSDFeatureExtractor object. - matcher: a matcher.Matcher object. - region_similarity_calculator: a - region_similarity_calculator.RegionSimilarityCalculator object. - encode_background_as_zeros: boolean determining whether background - targets are to be encoded as an all zeros vector or a one-hot - vector (where background is the 0th class). - negative_class_weight: Weight for confidence loss of negative anchors. - image_resizer_fn: a callable for image resizing. This callable always - takes a rank-3 image tensor (corresponding to a single image) and - returns a rank-3 image tensor, possibly with new spatial dimensions and - a 1-D tensor of shape [3] indicating shape of true image within - the resized image tensor as the resized image tensor could be padded. - See builders/image_resizer_builder.py. - non_max_suppression_fn: batch_multiclass_non_max_suppression - callable that takes `boxes`, `scores` and optional `clip_window` - inputs (with all other inputs already set) and returns a dictionary - hold tensors with keys: `detection_boxes`, `detection_scores`, - `detection_classes` and `num_detections`. See `post_processing. - batch_multiclass_non_max_suppression` for the type and shape of these - tensors. - score_conversion_fn: callable elementwise nonlinearity (that takes tensors - as inputs and returns tensors). This is usually used to convert logits - to probabilities. - classification_loss: an object_detection.core.losses.Loss object. - localization_loss: a object_detection.core.losses.Loss object. - classification_loss_weight: float - localization_loss_weight: float - normalize_loss_by_num_matches: boolean - hard_example_miner: a losses.HardExampleMiner object (can be None) - add_summaries: boolean (default: True) controlling whether summary ops - should be added to tensorflow graph. - normalize_loc_loss_by_codesize: whether to normalize localization loss - by code size of the box encoder. - freeze_batchnorm: Whether to freeze batch norm parameters during - training or not. When training with a small batch size (e.g. 1), it is - desirable to freeze batch norm update and use pretrained batch norm - params. - inplace_batchnorm_update: Whether to update batch norm moving average - values inplace. When this is false train op must add a control - dependency on tf.graphkeys.UPDATE_OPS collection in order to update - batch norm statistics. - add_background_class: Whether to add an implicit background class to - one-hot encodings of groundtruth labels. Set to false if using - groundtruth labels with an explicit background class or using multiclass - scores instead of truth in the case of distillation. - random_example_sampler: a BalancedPositiveNegativeSampler object that can - perform random example sampling when computing loss. If None, random - sampling process is skipped. Note that random example sampler and hard - example miner can both be applied to the model. In that case, random - sampler will take effect first and hard example miner can only process - the random sampled examples. - """ - super(SSDMetaArch, self).__init__(num_classes=box_predictor.num_classes) - self._is_training = is_training - self._freeze_batchnorm = freeze_batchnorm - self._inplace_batchnorm_update = inplace_batchnorm_update - - # Needed for fine-tuning from classification checkpoints whose - # variables do not have the feature extractor scope. - self._extract_features_scope = 'FeatureExtractor' - - self._anchor_generator = anchor_generator - self._box_predictor = box_predictor - - self._box_coder = box_coder - self._feature_extractor = feature_extractor - self._matcher = matcher - self._region_similarity_calculator = region_similarity_calculator - self._add_background_class = add_background_class - - # TODO(jonathanhuang): handle agnostic mode - # weights - unmatched_cls_target = None - unmatched_cls_target = tf.constant([1] + self.num_classes * [0], - tf.float32) - if encode_background_as_zeros: - unmatched_cls_target = tf.constant((self.num_classes + 1) * [0], - tf.float32) - - self._target_assigner = target_assigner.TargetAssigner( - self._region_similarity_calculator, - self._matcher, - self._box_coder, - negative_class_weight=negative_class_weight, - unmatched_cls_target=unmatched_cls_target) - - self._classification_loss = classification_loss - self._localization_loss = localization_loss - self._classification_loss_weight = classification_loss_weight - self._localization_loss_weight = localization_loss_weight - self._normalize_loss_by_num_matches = normalize_loss_by_num_matches - self._normalize_loc_loss_by_codesize = normalize_loc_loss_by_codesize - self._hard_example_miner = hard_example_miner - self._random_example_sampler = random_example_sampler - self._parallel_iterations = 16 - - self._image_resizer_fn = image_resizer_fn - self._non_max_suppression_fn = non_max_suppression_fn - self._score_conversion_fn = score_conversion_fn - - self._anchors = None - self._add_summaries = add_summaries - self._batched_prediction_tensor_names = [] - - @property - def anchors(self): - if not self._anchors: - raise RuntimeError('anchors have not been constructed yet!') - if not isinstance(self._anchors, box_list.BoxList): - raise RuntimeError('anchors should be a BoxList object, but is not.') - return self._anchors - - @property - def batched_prediction_tensor_names(self): - if not self._batched_prediction_tensor_names: - raise RuntimeError('Must call predict() method to get batched prediction ' - 'tensor names.') - return self._batched_prediction_tensor_names - - def preprocess(self, inputs): - """Feature-extractor specific preprocessing. - - SSD meta architecture uses a default clip_window of [0, 0, 1, 1] during - post-processing. On calling `preprocess` method, clip_window gets updated - based on `true_image_shapes` returned by `image_resizer_fn`. - - Args: - inputs: a [batch, height_in, width_in, channels] float tensor representing - a batch of images with values between 0 and 255.0. - - Returns: - preprocessed_inputs: a [batch, height_out, width_out, channels] float - tensor representing a batch of images. - true_image_shapes: int32 tensor of shape [batch, 3] where each row is - of the form [height, width, channels] indicating the shapes - of true images in the resized images, as resized images can be padded - with zeros. - - Raises: - ValueError: if inputs tensor does not have type tf.float32 - """ - if inputs.dtype is not tf.float32: - raise ValueError('`preprocess` expects a tf.float32 tensor') - with tf.name_scope('Preprocessor'): - # TODO(jonathanhuang): revisit whether to always use batch size as - # the number of parallel iterations vs allow for dynamic batching. - outputs = shape_utils.static_or_dynamic_map_fn( - self._image_resizer_fn, - elems=inputs, - dtype=[tf.float32, tf.int32]) - resized_inputs = outputs[0] - true_image_shapes = outputs[1] - - return (self._feature_extractor.preprocess(resized_inputs), - true_image_shapes) - - def _compute_clip_window(self, preprocessed_images, true_image_shapes): - """Computes clip window to use during post_processing. - - Computes a new clip window to use during post-processing based on - `resized_image_shapes` and `true_image_shapes` only if `preprocess` method - has been called. Otherwise returns a default clip window of [0, 0, 1, 1]. - - Args: - preprocessed_images: the [batch, height, width, channels] image - tensor. - true_image_shapes: int32 tensor of shape [batch, 3] where each row is - of the form [height, width, channels] indicating the shapes - of true images in the resized images, as resized images can be padded - with zeros. Or None if the clip window should cover the full image. - - Returns: - a 2-D float32 tensor of the form [batch_size, 4] containing the clip - window for each image in the batch in normalized coordinates (relative to - the resized dimensions) where each clip window is of the form [ymin, xmin, - ymax, xmax] or a default clip window of [0, 0, 1, 1]. - - """ - if true_image_shapes is None: - return tf.constant([0, 0, 1, 1], dtype=tf.float32) - - resized_inputs_shape = shape_utils.combined_static_and_dynamic_shape( - preprocessed_images) - true_heights, true_widths, _ = tf.unstack( - tf.to_float(true_image_shapes), axis=1) - padded_height = tf.to_float(resized_inputs_shape[1]) - padded_width = tf.to_float(resized_inputs_shape[2]) - return tf.stack( - [ - tf.zeros_like(true_heights), - tf.zeros_like(true_widths), true_heights / padded_height, - true_widths / padded_width - ], - axis=1) - - def predict(self, preprocessed_inputs, true_image_shapes): - """Predicts unpostprocessed tensors from input tensor. - - This function takes an input batch of images and runs it through the forward - pass of the network to yield unpostprocessesed predictions. - - A side effect of calling the predict method is that self._anchors is - populated with a box_list.BoxList of anchors. These anchors must be - constructed before the postprocess or loss functions can be called. - - Args: - preprocessed_inputs: a [batch, height, width, channels] image tensor. - true_image_shapes: int32 tensor of shape [batch, 3] where each row is - of the form [height, width, channels] indicating the shapes - of true images in the resized images, as resized images can be padded - with zeros. - - Returns: - prediction_dict: a dictionary holding "raw" prediction tensors: - 1) preprocessed_inputs: the [batch, height, width, channels] image - tensor. - 2) box_encodings: 4-D float tensor of shape [batch_size, num_anchors, - box_code_dimension] containing predicted boxes. - 3) class_predictions_with_background: 3-D float tensor of shape - [batch_size, num_anchors, num_classes+1] containing class predictions - (logits) for each of the anchors. Note that this tensor *includes* - background class predictions (at class index 0). - 4) feature_maps: a list of tensors where the ith tensor has shape - [batch, height_i, width_i, depth_i]. - 5) anchors: 2-D float tensor of shape [num_anchors, 4] containing - the generated anchors in normalized coordinates. - """ - batchnorm_updates_collections = (None if self._inplace_batchnorm_update - else tf.GraphKeys.UPDATE_OPS) - with slim.arg_scope([slim.batch_norm], - is_training=(self._is_training and - not self._freeze_batchnorm), - updates_collections=batchnorm_updates_collections): - with tf.variable_scope(None, self._extract_features_scope, - [preprocessed_inputs]): - feature_maps = self._feature_extractor.extract_features( - preprocessed_inputs) - feature_map_spatial_dims = self._get_feature_map_spatial_dims( - feature_maps) - image_shape = shape_utils.combined_static_and_dynamic_shape( - preprocessed_inputs) - self._anchors = box_list_ops.concatenate( - self._anchor_generator.generate( - feature_map_spatial_dims, - im_height=image_shape[1], - im_width=image_shape[2])) - prediction_dict = self._box_predictor.predict( - feature_maps, self._anchor_generator.num_anchors_per_location()) - box_encodings = tf.concat(prediction_dict['box_encodings'], axis=1) - if box_encodings.shape.ndims == 4 and box_encodings.shape[2] == 1: - box_encodings = tf.squeeze(box_encodings, axis=2) - class_predictions_with_background = tf.concat( - prediction_dict['class_predictions_with_background'], axis=1) - predictions_dict = { - 'preprocessed_inputs': preprocessed_inputs, - 'box_encodings': box_encodings, - 'class_predictions_with_background': - class_predictions_with_background, - 'feature_maps': feature_maps, - 'anchors': self._anchors.get() - } - self._batched_prediction_tensor_names = [x for x in predictions_dict - if x != 'anchors'] - return predictions_dict - - def _get_feature_map_spatial_dims(self, feature_maps): - """Return list of spatial dimensions for each feature map in a list. - - Args: - feature_maps: a list of tensors where the ith tensor has shape - [batch, height_i, width_i, depth_i]. - - Returns: - a list of pairs (height, width) for each feature map in feature_maps - """ - feature_map_shapes = [ - shape_utils.combined_static_and_dynamic_shape( - feature_map) for feature_map in feature_maps - ] - return [(shape[1], shape[2]) for shape in feature_map_shapes] - - def postprocess(self, prediction_dict, true_image_shapes): - """Converts prediction tensors to final detections. - - This function converts raw predictions tensors to final detection results by - slicing off the background class, decoding box predictions and applying - non max suppression and clipping to the image window. - - See base class for output format conventions. Note also that by default, - scores are to be interpreted as logits, but if a score_conversion_fn is - used, then scores are remapped (and may thus have a different - interpretation). - - Args: - prediction_dict: a dictionary holding prediction tensors with - 1) preprocessed_inputs: a [batch, height, width, channels] image - tensor. - 2) box_encodings: 3-D float tensor of shape [batch_size, num_anchors, - box_code_dimension] containing predicted boxes. - 3) class_predictions_with_background: 3-D float tensor of shape - [batch_size, num_anchors, num_classes+1] containing class predictions - (logits) for each of the anchors. Note that this tensor *includes* - background class predictions. - true_image_shapes: int32 tensor of shape [batch, 3] where each row is - of the form [height, width, channels] indicating the shapes - of true images in the resized images, as resized images can be padded - with zeros. Or None, if the clip window should cover the full image. - - Returns: - detections: a dictionary containing the following fields - detection_boxes: [batch, max_detections, 4] - detection_scores: [batch, max_detections] - detection_classes: [batch, max_detections] - detection_keypoints: [batch, max_detections, num_keypoints, 2] (if - encoded in the prediction_dict 'box_encodings') - num_detections: [batch] - Raises: - ValueError: if prediction_dict does not contain `box_encodings` or - `class_predictions_with_background` fields. - """ - if ('box_encodings' not in prediction_dict or - 'class_predictions_with_background' not in prediction_dict): - raise ValueError('prediction_dict does not contain expected entries.') - with tf.name_scope('Postprocessor'): - preprocessed_images = prediction_dict['preprocessed_inputs'] - box_encodings = prediction_dict['box_encodings'] - class_predictions = prediction_dict['class_predictions_with_background'] - detection_boxes, detection_keypoints = self._batch_decode(box_encodings) - detection_boxes = tf.expand_dims(detection_boxes, axis=2) - - detection_scores_with_background = self._score_conversion_fn( - class_predictions) - detection_scores = tf.slice(detection_scores_with_background, [0, 0, 1], - [-1, -1, -1]) - additional_fields = None - - if detection_keypoints is not None: - additional_fields = { - fields.BoxListFields.keypoints: detection_keypoints} - (nmsed_boxes, nmsed_scores, nmsed_classes, _, nmsed_additional_fields, - num_detections) = self._non_max_suppression_fn( - detection_boxes, - detection_scores, - clip_window=self._compute_clip_window( - preprocessed_images, true_image_shapes), - additional_fields=additional_fields) - detection_dict = { - fields.DetectionResultFields.detection_boxes: nmsed_boxes, - fields.DetectionResultFields.detection_scores: nmsed_scores, - fields.DetectionResultFields.detection_classes: nmsed_classes, - fields.DetectionResultFields.num_detections: - tf.to_float(num_detections) - } - if (nmsed_additional_fields is not None and - fields.BoxListFields.keypoints in nmsed_additional_fields): - detection_dict[fields.DetectionResultFields.detection_keypoints] = ( - nmsed_additional_fields[fields.BoxListFields.keypoints]) - return detection_dict - - def loss(self, prediction_dict, true_image_shapes, scope=None): - """Compute scalar loss tensors with respect to provided groundtruth. - - Calling this function requires that groundtruth tensors have been - provided via the provide_groundtruth function. - - Args: - prediction_dict: a dictionary holding prediction tensors with - 1) box_encodings: 3-D float tensor of shape [batch_size, num_anchors, - box_code_dimension] containing predicted boxes. - 2) class_predictions_with_background: 3-D float tensor of shape - [batch_size, num_anchors, num_classes+1] containing class predictions - (logits) for each of the anchors. Note that this tensor *includes* - background class predictions. - true_image_shapes: int32 tensor of shape [batch, 3] where each row is - of the form [height, width, channels] indicating the shapes - of true images in the resized images, as resized images can be padded - with zeros. - scope: Optional scope name. - - Returns: - a dictionary mapping loss keys (`localization_loss` and - `classification_loss`) to scalar tensors representing corresponding loss - values. - """ - with tf.name_scope(scope, 'Loss', prediction_dict.values()): - keypoints = None - if self.groundtruth_has_field(fields.BoxListFields.keypoints): - keypoints = self.groundtruth_lists(fields.BoxListFields.keypoints) - weights = None - if self.groundtruth_has_field(fields.BoxListFields.weights): - weights = self.groundtruth_lists(fields.BoxListFields.weights) - (batch_cls_targets, batch_cls_weights, batch_reg_targets, - batch_reg_weights, match_list) = self._assign_targets( - self.groundtruth_lists(fields.BoxListFields.boxes), - self.groundtruth_lists(fields.BoxListFields.classes), - keypoints, weights) - if self._add_summaries: - self._summarize_target_assignment( - self.groundtruth_lists(fields.BoxListFields.boxes), match_list) - - if self._random_example_sampler: - batch_sampled_indicator = tf.to_float( - shape_utils.static_or_dynamic_map_fn( - self._minibatch_subsample_fn, - [batch_cls_targets, batch_cls_weights], - dtype=tf.bool, - parallel_iterations=self._parallel_iterations, - back_prop=True)) - batch_reg_weights = tf.multiply(batch_sampled_indicator, - batch_reg_weights) - batch_cls_weights = tf.multiply(batch_sampled_indicator, - batch_cls_weights) - - location_losses = self._localization_loss( - prediction_dict['box_encodings'], - batch_reg_targets, - ignore_nan_targets=True, - weights=batch_reg_weights) - cls_losses = ops.reduce_sum_trailing_dimensions( - self._classification_loss( - prediction_dict['class_predictions_with_background'], - batch_cls_targets, - weights=batch_cls_weights), - ndims=2) - - if self._hard_example_miner: - (localization_loss, classification_loss) = self._apply_hard_mining( - location_losses, cls_losses, prediction_dict, match_list) - if self._add_summaries: - self._hard_example_miner.summarize() - else: - if self._add_summaries: - class_ids = tf.argmax(batch_cls_targets, axis=2) - flattened_class_ids = tf.reshape(class_ids, [-1]) - flattened_classification_losses = tf.reshape(cls_losses, [-1]) - self._summarize_anchor_classification_loss( - flattened_class_ids, flattened_classification_losses) - localization_loss = tf.reduce_sum(location_losses) - classification_loss = tf.reduce_sum(cls_losses) - - # Optionally normalize by number of positive matches - normalizer = tf.constant(1.0, dtype=tf.float32) - if self._normalize_loss_by_num_matches: - normalizer = tf.maximum(tf.to_float(tf.reduce_sum(batch_reg_weights)), - 1.0) - - localization_loss_normalizer = normalizer - if self._normalize_loc_loss_by_codesize: - localization_loss_normalizer *= self._box_coder.code_size - localization_loss = tf.multiply((self._localization_loss_weight / - localization_loss_normalizer), - localization_loss, - name='localization_loss') - classification_loss = tf.multiply((self._classification_loss_weight / - normalizer), classification_loss, - name='classification_loss') - - loss_dict = { - str(localization_loss.op.name): localization_loss, - str(classification_loss.op.name): classification_loss - } - return loss_dict - - def _minibatch_subsample_fn(self, inputs): - """Randomly samples anchors for one image. - - Args: - inputs: a list of 2 inputs. First one is a tensor of shape [num_anchors, - num_classes] indicating targets assigned to each anchor. Second one - is a tensor of shape [num_anchors] indicating the class weight of each - anchor. - - Returns: - batch_sampled_indicator: bool tensor of shape [num_anchors] indicating - whether the anchor should be selected for loss computation. - """ - cls_targets, cls_weights = inputs - if self._add_background_class: - # Set background_class bits to 0 so that the positives_indicator - # computation would not consider background class. - background_class = tf.zeros_like(tf.slice(cls_targets, [0, 0], [-1, 1])) - regular_class = tf.slice(cls_targets, [0, 1], [-1, -1]) - cls_targets = tf.concat([background_class, regular_class], 1) - positives_indicator = tf.reduce_sum(cls_targets, axis=1) - return self._random_example_sampler.subsample( - tf.cast(cls_weights, tf.bool), - batch_size=None, - labels=tf.cast(positives_indicator, tf.bool)) - - def _summarize_anchor_classification_loss(self, class_ids, cls_losses): - positive_indices = tf.where(tf.greater(class_ids, 0)) - positive_anchor_cls_loss = tf.squeeze( - tf.gather(cls_losses, positive_indices), axis=1) - visualization_utils.add_cdf_image_summary(positive_anchor_cls_loss, - 'PositiveAnchorLossCDF') - negative_indices = tf.where(tf.equal(class_ids, 0)) - negative_anchor_cls_loss = tf.squeeze( - tf.gather(cls_losses, negative_indices), axis=1) - visualization_utils.add_cdf_image_summary(negative_anchor_cls_loss, - 'NegativeAnchorLossCDF') - - def _assign_targets(self, groundtruth_boxes_list, groundtruth_classes_list, - groundtruth_keypoints_list=None, - groundtruth_weights_list=None): - """Assign groundtruth targets. - - Adds a background class to each one-hot encoding of groundtruth classes - and uses target assigner to obtain regression and classification targets. - - Args: - groundtruth_boxes_list: a list of 2-D tensors of shape [num_boxes, 4] - containing coordinates of the groundtruth boxes. - Groundtruth boxes are provided in [y_min, x_min, y_max, x_max] - format and assumed to be normalized and clipped - relative to the image window with y_min <= y_max and x_min <= x_max. - groundtruth_classes_list: a list of 2-D one-hot (or k-hot) tensors of - shape [num_boxes, num_classes] containing the class targets with the 0th - index assumed to map to the first non-background class. - groundtruth_keypoints_list: (optional) a list of 3-D tensors of shape - [num_boxes, num_keypoints, 2] - groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape - [num_boxes] containing weights for groundtruth boxes. - - Returns: - batch_cls_targets: a tensor with shape [batch_size, num_anchors, - num_classes], - batch_cls_weights: a tensor with shape [batch_size, num_anchors], - batch_reg_targets: a tensor with shape [batch_size, num_anchors, - box_code_dimension] - batch_reg_weights: a tensor with shape [batch_size, num_anchors], - match_list: a list of matcher.Match objects encoding the match between - anchors and groundtruth boxes for each image of the batch, - with rows of the Match objects corresponding to groundtruth boxes - and columns corresponding to anchors. - """ - groundtruth_boxlists = [ - box_list.BoxList(boxes) for boxes in groundtruth_boxes_list - ] - if self._add_background_class: - groundtruth_classes_with_background_list = [ - tf.pad(one_hot_encoding, [[0, 0], [1, 0]], mode='CONSTANT') - for one_hot_encoding in groundtruth_classes_list - ] - else: - groundtruth_classes_with_background_list = groundtruth_classes_list - - if groundtruth_keypoints_list is not None: - for boxlist, keypoints in zip( - groundtruth_boxlists, groundtruth_keypoints_list): - boxlist.add_field(fields.BoxListFields.keypoints, keypoints) - return target_assigner.batch_assign_targets( - self._target_assigner, self.anchors, groundtruth_boxlists, - groundtruth_classes_with_background_list, groundtruth_weights_list) - - def _summarize_target_assignment(self, groundtruth_boxes_list, match_list): - """Creates tensorflow summaries for the input boxes and anchors. - - This function creates four summaries corresponding to the average - number (over images in a batch) of (1) groundtruth boxes, (2) anchors - marked as positive, (3) anchors marked as negative, and (4) anchors marked - as ignored. - - Args: - groundtruth_boxes_list: a list of 2-D tensors of shape [num_boxes, 4] - containing corners of the groundtruth boxes. - match_list: a list of matcher.Match objects encoding the match between - anchors and groundtruth boxes for each image of the batch, - with rows of the Match objects corresponding to groundtruth boxes - and columns corresponding to anchors. - """ - num_boxes_per_image = tf.stack( - [tf.shape(x)[0] for x in groundtruth_boxes_list]) - pos_anchors_per_image = tf.stack( - [match.num_matched_columns() for match in match_list]) - neg_anchors_per_image = tf.stack( - [match.num_unmatched_columns() for match in match_list]) - ignored_anchors_per_image = tf.stack( - [match.num_ignored_columns() for match in match_list]) - tf.summary.scalar('AvgNumGroundtruthBoxesPerImage', - tf.reduce_mean(tf.to_float(num_boxes_per_image)), - family='TargetAssignment') - tf.summary.scalar('AvgNumPositiveAnchorsPerImage', - tf.reduce_mean(tf.to_float(pos_anchors_per_image)), - family='TargetAssignment') - tf.summary.scalar('AvgNumNegativeAnchorsPerImage', - tf.reduce_mean(tf.to_float(neg_anchors_per_image)), - family='TargetAssignment') - tf.summary.scalar('AvgNumIgnoredAnchorsPerImage', - tf.reduce_mean(tf.to_float(ignored_anchors_per_image)), - family='TargetAssignment') - - def _apply_hard_mining(self, location_losses, cls_losses, prediction_dict, - match_list): - """Applies hard mining to anchorwise losses. - - Args: - location_losses: Float tensor of shape [batch_size, num_anchors] - representing anchorwise location losses. - cls_losses: Float tensor of shape [batch_size, num_anchors] - representing anchorwise classification losses. - prediction_dict: p a dictionary holding prediction tensors with - 1) box_encodings: 3-D float tensor of shape [batch_size, num_anchors, - box_code_dimension] containing predicted boxes. - 2) class_predictions_with_background: 3-D float tensor of shape - [batch_size, num_anchors, num_classes+1] containing class predictions - (logits) for each of the anchors. Note that this tensor *includes* - background class predictions. - match_list: a list of matcher.Match objects encoding the match between - anchors and groundtruth boxes for each image of the batch, - with rows of the Match objects corresponding to groundtruth boxes - and columns corresponding to anchors. - - Returns: - mined_location_loss: a float scalar with sum of localization losses from - selected hard examples. - mined_cls_loss: a float scalar with sum of classification losses from - selected hard examples. - """ - class_predictions = tf.slice( - prediction_dict['class_predictions_with_background'], [0, 0, - 1], [-1, -1, -1]) - - decoded_boxes, _ = self._batch_decode(prediction_dict['box_encodings']) - decoded_box_tensors_list = tf.unstack(decoded_boxes) - class_prediction_list = tf.unstack(class_predictions) - decoded_boxlist_list = [] - for box_location, box_score in zip(decoded_box_tensors_list, - class_prediction_list): - decoded_boxlist = box_list.BoxList(box_location) - decoded_boxlist.add_field('scores', box_score) - decoded_boxlist_list.append(decoded_boxlist) - return self._hard_example_miner( - location_losses=location_losses, - cls_losses=cls_losses, - decoded_boxlist_list=decoded_boxlist_list, - match_list=match_list) - - def _batch_decode(self, box_encodings): - """Decodes a batch of box encodings with respect to the anchors. - - Args: - box_encodings: A float32 tensor of shape - [batch_size, num_anchors, box_code_size] containing box encodings. - - Returns: - decoded_boxes: A float32 tensor of shape - [batch_size, num_anchors, 4] containing the decoded boxes. - decoded_keypoints: A float32 tensor of shape - [batch_size, num_anchors, num_keypoints, 2] containing the decoded - keypoints if present in the input `box_encodings`, None otherwise. - """ - combined_shape = shape_utils.combined_static_and_dynamic_shape( - box_encodings) - batch_size = combined_shape[0] - tiled_anchor_boxes = tf.tile( - tf.expand_dims(self.anchors.get(), 0), [batch_size, 1, 1]) - tiled_anchors_boxlist = box_list.BoxList( - tf.reshape(tiled_anchor_boxes, [-1, 4])) - decoded_boxes = self._box_coder.decode( - tf.reshape(box_encodings, [-1, self._box_coder.code_size]), - tiled_anchors_boxlist) - decoded_keypoints = None - if decoded_boxes.has_field(fields.BoxListFields.keypoints): - decoded_keypoints = decoded_boxes.get_field( - fields.BoxListFields.keypoints) - num_keypoints = decoded_keypoints.get_shape()[1] - decoded_keypoints = tf.reshape( - decoded_keypoints, - tf.stack([combined_shape[0], combined_shape[1], num_keypoints, 2])) - decoded_boxes = tf.reshape(decoded_boxes.get(), tf.stack( - [combined_shape[0], combined_shape[1], 4])) - return decoded_boxes, decoded_keypoints - - def restore_map(self, - fine_tune_checkpoint_type='detection', - load_all_detection_checkpoint_vars=False): - """Returns a map of variables to load from a foreign checkpoint. - - See parent class for details. - - Args: - fine_tune_checkpoint_type: whether to restore from a full detection - checkpoint (with compatible variable names) or to restore from a - classification checkpoint for initialization prior to training. - Valid values: `detection`, `classification`. Default 'detection'. - load_all_detection_checkpoint_vars: whether to load all variables (when - `fine_tune_checkpoint_type='detection'`). If False, only variables - within the appropriate scopes are included. Default False. - - Returns: - A dict mapping variable names (to load from a checkpoint) to variables in - the model graph. - Raises: - ValueError: if fine_tune_checkpoint_type is neither `classification` - nor `detection`. - """ - if fine_tune_checkpoint_type not in ['detection', 'classification']: - raise ValueError('Not supported fine_tune_checkpoint_type: {}'.format( - fine_tune_checkpoint_type)) - variables_to_restore = {} - for variable in tf.global_variables(): - var_name = variable.op.name - if (fine_tune_checkpoint_type == 'detection' and - load_all_detection_checkpoint_vars): - variables_to_restore[var_name] = variable - else: - if var_name.startswith(self._extract_features_scope): - if fine_tune_checkpoint_type == 'classification': - var_name = ( - re.split('^' + self._extract_features_scope + '/', - var_name)[-1]) - variables_to_restore[var_name] = variable - - return variables_to_restore diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/meta_architectures/ssd_meta_arch_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/meta_architectures/ssd_meta_arch_test.py deleted file mode 100644 index b1b62a3c00bd602efc42c7359feee1206da19584..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/meta_architectures/ssd_meta_arch_test.py +++ /dev/null @@ -1,554 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.meta_architectures.ssd_meta_arch.""" -import functools -import numpy as np -import tensorflow as tf - -from object_detection.core import anchor_generator -from object_detection.core import balanced_positive_negative_sampler as sampler -from object_detection.core import box_list -from object_detection.core import losses -from object_detection.core import post_processing -from object_detection.core import region_similarity_calculator as sim_calc -from object_detection.meta_architectures import ssd_meta_arch -from object_detection.utils import test_case -from object_detection.utils import test_utils - -slim = tf.contrib.slim - - -class FakeSSDFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): - - def __init__(self): - super(FakeSSDFeatureExtractor, self).__init__( - is_training=True, - depth_multiplier=0, - min_depth=0, - pad_to_multiple=1, - conv_hyperparams_fn=None) - - def preprocess(self, resized_inputs): - return tf.identity(resized_inputs) - - def extract_features(self, preprocessed_inputs): - with tf.variable_scope('mock_model'): - features = slim.conv2d(inputs=preprocessed_inputs, num_outputs=32, - kernel_size=1, scope='layer1') - return [features] - - -class MockAnchorGenerator2x2(anchor_generator.AnchorGenerator): - """Sets up a simple 2x2 anchor grid on the unit square.""" - - def name_scope(self): - return 'MockAnchorGenerator' - - def num_anchors_per_location(self): - return [1] - - def _generate(self, feature_map_shape_list, im_height, im_width): - return [box_list.BoxList( - tf.constant([[0, 0, .5, .5], - [0, .5, .5, 1], - [.5, 0, 1, .5], - [1., 1., 1.5, 1.5] # Anchor that is outside clip_window. - ], tf.float32))] - - def num_anchors(self): - return 4 - - -def _get_value_for_matching_key(dictionary, suffix): - for key in dictionary.keys(): - if key.endswith(suffix): - return dictionary[key] - raise ValueError('key not found {}'.format(suffix)) - - -class SsdMetaArchTest(test_case.TestCase): - - def _create_model(self, - apply_hard_mining=True, - normalize_loc_loss_by_codesize=False, - add_background_class=True, - random_example_sampling=False): - is_training = False - num_classes = 1 - mock_anchor_generator = MockAnchorGenerator2x2() - mock_box_predictor = test_utils.MockBoxPredictor( - is_training, num_classes) - mock_box_coder = test_utils.MockBoxCoder() - fake_feature_extractor = FakeSSDFeatureExtractor() - mock_matcher = test_utils.MockMatcher() - region_similarity_calculator = sim_calc.IouSimilarity() - encode_background_as_zeros = False - def image_resizer_fn(image): - return [tf.identity(image), tf.shape(image)] - - classification_loss = losses.WeightedSigmoidClassificationLoss() - localization_loss = losses.WeightedSmoothL1LocalizationLoss() - non_max_suppression_fn = functools.partial( - post_processing.batch_multiclass_non_max_suppression, - score_thresh=-20.0, - iou_thresh=1.0, - max_size_per_class=5, - max_total_size=5) - classification_loss_weight = 1.0 - localization_loss_weight = 1.0 - negative_class_weight = 1.0 - normalize_loss_by_num_matches = False - - hard_example_miner = None - if apply_hard_mining: - # This hard example miner is expected to be a no-op. - hard_example_miner = losses.HardExampleMiner( - num_hard_examples=None, - iou_threshold=1.0) - - random_example_sampler = None - if random_example_sampling: - random_example_sampler = sampler.BalancedPositiveNegativeSampler( - positive_fraction=0.5) - - code_size = 4 - model = ssd_meta_arch.SSDMetaArch( - is_training, - mock_anchor_generator, - mock_box_predictor, - mock_box_coder, - fake_feature_extractor, - mock_matcher, - region_similarity_calculator, - encode_background_as_zeros, - negative_class_weight, - image_resizer_fn, - non_max_suppression_fn, - tf.identity, - classification_loss, - localization_loss, - classification_loss_weight, - localization_loss_weight, - normalize_loss_by_num_matches, - hard_example_miner, - add_summaries=False, - normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize, - freeze_batchnorm=False, - inplace_batchnorm_update=False, - add_background_class=add_background_class, - random_example_sampler=random_example_sampler) - return model, num_classes, mock_anchor_generator.num_anchors(), code_size - - def test_preprocess_preserves_shapes_with_dynamic_input_image(self): - image_shapes = [(3, None, None, 3), - (None, 10, 10, 3), - (None, None, None, 3)] - model, _, _, _ = self._create_model() - for image_shape in image_shapes: - image_placeholder = tf.placeholder(tf.float32, shape=image_shape) - preprocessed_inputs, _ = model.preprocess(image_placeholder) - self.assertAllEqual(preprocessed_inputs.shape.as_list(), image_shape) - - def test_preprocess_preserves_shape_with_static_input_image(self): - def graph_fn(input_image): - model, _, _, _ = self._create_model() - return model.preprocess(input_image) - input_image = np.random.rand(2, 3, 3, 3).astype(np.float32) - preprocessed_inputs, _ = self.execute(graph_fn, [input_image]) - self.assertAllEqual(preprocessed_inputs.shape, [2, 3, 3, 3]) - - def test_predict_result_shapes_on_image_with_dynamic_shape(self): - batch_size = 3 - image_size = 2 - input_shapes = [(None, image_size, image_size, 3), - (batch_size, None, None, 3), - (None, None, None, 3)] - - for input_shape in input_shapes: - tf_graph = tf.Graph() - with tf_graph.as_default(): - model, num_classes, num_anchors, code_size = self._create_model() - preprocessed_input_placeholder = tf.placeholder(tf.float32, - shape=input_shape) - prediction_dict = model.predict( - preprocessed_input_placeholder, true_image_shapes=None) - - self.assertTrue('box_encodings' in prediction_dict) - self.assertTrue('class_predictions_with_background' in prediction_dict) - self.assertTrue('feature_maps' in prediction_dict) - self.assertTrue('anchors' in prediction_dict) - - init_op = tf.global_variables_initializer() - with self.test_session(graph=tf_graph) as sess: - sess.run(init_op) - prediction_out = sess.run(prediction_dict, - feed_dict={ - preprocessed_input_placeholder: - np.random.uniform( - size=(batch_size, 2, 2, 3))}) - expected_box_encodings_shape_out = (batch_size, num_anchors, code_size) - expected_class_predictions_with_background_shape_out = (batch_size, - num_anchors, - num_classes + 1) - - self.assertAllEqual(prediction_out['box_encodings'].shape, - expected_box_encodings_shape_out) - self.assertAllEqual( - prediction_out['class_predictions_with_background'].shape, - expected_class_predictions_with_background_shape_out) - - def test_predict_result_shapes_on_image_with_static_shape(self): - - with tf.Graph().as_default(): - _, num_classes, num_anchors, code_size = self._create_model() - - def graph_fn(input_image): - model, _, _, _ = self._create_model() - predictions = model.predict(input_image, true_image_shapes=None) - return (predictions['box_encodings'], - predictions['class_predictions_with_background'], - predictions['feature_maps'], - predictions['anchors']) - batch_size = 3 - image_size = 2 - channels = 3 - input_image = np.random.rand(batch_size, image_size, image_size, - channels).astype(np.float32) - expected_box_encodings_shape = (batch_size, num_anchors, code_size) - expected_class_predictions_shape = (batch_size, num_anchors, num_classes+1) - (box_encodings, class_predictions, _, _) = self.execute(graph_fn, - [input_image]) - self.assertAllEqual(box_encodings.shape, expected_box_encodings_shape) - self.assertAllEqual(class_predictions.shape, - expected_class_predictions_shape) - - def test_postprocess_results_are_correct(self): - batch_size = 2 - image_size = 2 - input_shapes = [(batch_size, image_size, image_size, 3), - (None, image_size, image_size, 3), - (batch_size, None, None, 3), - (None, None, None, 3)] - - expected_boxes = [ - [ - [0, 0, .5, .5], - [0, .5, .5, 1], - [.5, 0, 1, .5], - [0, 0, 0, 0], # pruned prediction - [0, 0, 0, 0] - ], # padding - [ - [0, 0, .5, .5], - [0, .5, .5, 1], - [.5, 0, 1, .5], - [0, 0, 0, 0], # pruned prediction - [0, 0, 0, 0] - ] - ] # padding - expected_scores = [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]] - expected_classes = [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]] - expected_num_detections = np.array([3, 3]) - - for input_shape in input_shapes: - tf_graph = tf.Graph() - with tf_graph.as_default(): - model, _, _, _ = self._create_model() - input_placeholder = tf.placeholder(tf.float32, shape=input_shape) - preprocessed_inputs, true_image_shapes = model.preprocess( - input_placeholder) - prediction_dict = model.predict(preprocessed_inputs, - true_image_shapes) - detections = model.postprocess(prediction_dict, true_image_shapes) - self.assertTrue('detection_boxes' in detections) - self.assertTrue('detection_scores' in detections) - self.assertTrue('detection_classes' in detections) - self.assertTrue('num_detections' in detections) - init_op = tf.global_variables_initializer() - with self.test_session(graph=tf_graph) as sess: - sess.run(init_op) - detections_out = sess.run(detections, - feed_dict={ - input_placeholder: - np.random.uniform( - size=(batch_size, 2, 2, 3))}) - for image_idx in range(batch_size): - self.assertTrue( - test_utils.first_rows_close_as_set( - detections_out['detection_boxes'][image_idx].tolist(), - expected_boxes[image_idx])) - self.assertAllClose(detections_out['detection_scores'], expected_scores) - self.assertAllClose(detections_out['detection_classes'], expected_classes) - self.assertAllClose(detections_out['num_detections'], - expected_num_detections) - - def test_loss_results_are_correct(self): - - with tf.Graph().as_default(): - _, num_classes, num_anchors, _ = self._create_model() - def graph_fn(preprocessed_tensor, groundtruth_boxes1, groundtruth_boxes2, - groundtruth_classes1, groundtruth_classes2): - groundtruth_boxes_list = [groundtruth_boxes1, groundtruth_boxes2] - groundtruth_classes_list = [groundtruth_classes1, groundtruth_classes2] - model, _, _, _ = self._create_model(apply_hard_mining=False) - model.provide_groundtruth(groundtruth_boxes_list, - groundtruth_classes_list) - prediction_dict = model.predict(preprocessed_tensor, - true_image_shapes=None) - loss_dict = model.loss(prediction_dict, true_image_shapes=None) - return ( - _get_value_for_matching_key(loss_dict, 'Loss/localization_loss'), - _get_value_for_matching_key(loss_dict, 'Loss/classification_loss')) - - batch_size = 2 - preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32) - groundtruth_boxes1 = np.array([[0, 0, .5, .5]], dtype=np.float32) - groundtruth_boxes2 = np.array([[0, 0, .5, .5]], dtype=np.float32) - groundtruth_classes1 = np.array([[1]], dtype=np.float32) - groundtruth_classes2 = np.array([[1]], dtype=np.float32) - expected_localization_loss = 0.0 - expected_classification_loss = (batch_size * num_anchors - * (num_classes+1) * np.log(2.0)) - (localization_loss, - classification_loss) = self.execute(graph_fn, [preprocessed_input, - groundtruth_boxes1, - groundtruth_boxes2, - groundtruth_classes1, - groundtruth_classes2]) - self.assertAllClose(localization_loss, expected_localization_loss) - self.assertAllClose(classification_loss, expected_classification_loss) - - def test_loss_results_are_correct_with_normalize_by_codesize_true(self): - - with tf.Graph().as_default(): - _, _, _, _ = self._create_model() - def graph_fn(preprocessed_tensor, groundtruth_boxes1, groundtruth_boxes2, - groundtruth_classes1, groundtruth_classes2): - groundtruth_boxes_list = [groundtruth_boxes1, groundtruth_boxes2] - groundtruth_classes_list = [groundtruth_classes1, groundtruth_classes2] - model, _, _, _ = self._create_model(apply_hard_mining=False, - normalize_loc_loss_by_codesize=True) - model.provide_groundtruth(groundtruth_boxes_list, - groundtruth_classes_list) - prediction_dict = model.predict(preprocessed_tensor, - true_image_shapes=None) - loss_dict = model.loss(prediction_dict, true_image_shapes=None) - return (_get_value_for_matching_key(loss_dict, 'Loss/localization_loss'),) - - batch_size = 2 - preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32) - groundtruth_boxes1 = np.array([[0, 0, 1, 1]], dtype=np.float32) - groundtruth_boxes2 = np.array([[0, 0, 1, 1]], dtype=np.float32) - groundtruth_classes1 = np.array([[1]], dtype=np.float32) - groundtruth_classes2 = np.array([[1]], dtype=np.float32) - expected_localization_loss = 0.5 / 4 - localization_loss = self.execute(graph_fn, [preprocessed_input, - groundtruth_boxes1, - groundtruth_boxes2, - groundtruth_classes1, - groundtruth_classes2]) - self.assertAllClose(localization_loss, expected_localization_loss) - - def test_loss_results_are_correct_with_hard_example_mining(self): - - with tf.Graph().as_default(): - _, num_classes, num_anchors, _ = self._create_model() - def graph_fn(preprocessed_tensor, groundtruth_boxes1, groundtruth_boxes2, - groundtruth_classes1, groundtruth_classes2): - groundtruth_boxes_list = [groundtruth_boxes1, groundtruth_boxes2] - groundtruth_classes_list = [groundtruth_classes1, groundtruth_classes2] - model, _, _, _ = self._create_model() - model.provide_groundtruth(groundtruth_boxes_list, - groundtruth_classes_list) - prediction_dict = model.predict(preprocessed_tensor, - true_image_shapes=None) - loss_dict = model.loss(prediction_dict, true_image_shapes=None) - return ( - _get_value_for_matching_key(loss_dict, 'Loss/localization_loss'), - _get_value_for_matching_key(loss_dict, 'Loss/classification_loss')) - - batch_size = 2 - preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32) - groundtruth_boxes1 = np.array([[0, 0, .5, .5]], dtype=np.float32) - groundtruth_boxes2 = np.array([[0, 0, .5, .5]], dtype=np.float32) - groundtruth_classes1 = np.array([[1]], dtype=np.float32) - groundtruth_classes2 = np.array([[1]], dtype=np.float32) - expected_localization_loss = 0.0 - expected_classification_loss = (batch_size * num_anchors - * (num_classes+1) * np.log(2.0)) - (localization_loss, classification_loss) = self.execute_cpu( - graph_fn, [ - preprocessed_input, groundtruth_boxes1, groundtruth_boxes2, - groundtruth_classes1, groundtruth_classes2 - ]) - self.assertAllClose(localization_loss, expected_localization_loss) - self.assertAllClose(classification_loss, expected_classification_loss) - - def test_loss_results_are_correct_without_add_background_class(self): - - with tf.Graph().as_default(): - _, num_classes, num_anchors, _ = self._create_model( - add_background_class=False) - - def graph_fn(preprocessed_tensor, groundtruth_boxes1, groundtruth_boxes2, - groundtruth_classes1, groundtruth_classes2): - groundtruth_boxes_list = [groundtruth_boxes1, groundtruth_boxes2] - groundtruth_classes_list = [groundtruth_classes1, groundtruth_classes2] - model, _, _, _ = self._create_model( - apply_hard_mining=False, add_background_class=False) - model.provide_groundtruth(groundtruth_boxes_list, - groundtruth_classes_list) - prediction_dict = model.predict( - preprocessed_tensor, true_image_shapes=None) - loss_dict = model.loss(prediction_dict, true_image_shapes=None) - return (loss_dict['Loss/localization_loss'], - loss_dict['Loss/classification_loss']) - - batch_size = 2 - preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32) - groundtruth_boxes1 = np.array([[0, 0, .5, .5]], dtype=np.float32) - groundtruth_boxes2 = np.array([[0, 0, .5, .5]], dtype=np.float32) - groundtruth_classes1 = np.array([[0, 1]], dtype=np.float32) - groundtruth_classes2 = np.array([[0, 1]], dtype=np.float32) - expected_localization_loss = 0.0 - expected_classification_loss = ( - batch_size * num_anchors * (num_classes + 1) * np.log(2.0)) - (localization_loss, classification_loss) = self.execute( - graph_fn, [ - preprocessed_input, groundtruth_boxes1, groundtruth_boxes2, - groundtruth_classes1, groundtruth_classes2 - ]) - self.assertAllClose(localization_loss, expected_localization_loss) - self.assertAllClose(classification_loss, expected_classification_loss) - - def test_restore_map_for_detection_ckpt(self): - model, _, _, _ = self._create_model() - model.predict(tf.constant(np.array([[[[0, 0], [1, 1]], [[1, 0], [0, 1]]]], - dtype=np.float32)), - true_image_shapes=None) - init_op = tf.global_variables_initializer() - saver = tf.train.Saver() - save_path = self.get_temp_dir() - with self.test_session() as sess: - sess.run(init_op) - saved_model_path = saver.save(sess, save_path) - var_map = model.restore_map( - fine_tune_checkpoint_type='detection', - load_all_detection_checkpoint_vars=False) - self.assertIsInstance(var_map, dict) - saver = tf.train.Saver(var_map) - saver.restore(sess, saved_model_path) - for var in sess.run(tf.report_uninitialized_variables()): - self.assertNotIn('FeatureExtractor', var) - - def test_restore_map_for_classification_ckpt(self): - # Define mock tensorflow classification graph and save variables. - test_graph_classification = tf.Graph() - with test_graph_classification.as_default(): - image = tf.placeholder(dtype=tf.float32, shape=[1, 20, 20, 3]) - with tf.variable_scope('mock_model'): - net = slim.conv2d(image, num_outputs=32, kernel_size=1, scope='layer1') - slim.conv2d(net, num_outputs=3, kernel_size=1, scope='layer2') - - init_op = tf.global_variables_initializer() - saver = tf.train.Saver() - save_path = self.get_temp_dir() - with self.test_session(graph=test_graph_classification) as sess: - sess.run(init_op) - saved_model_path = saver.save(sess, save_path) - - # Create tensorflow detection graph and load variables from - # classification checkpoint. - test_graph_detection = tf.Graph() - with test_graph_detection.as_default(): - model, _, _, _ = self._create_model() - inputs_shape = [2, 2, 2, 3] - inputs = tf.to_float(tf.random_uniform( - inputs_shape, minval=0, maxval=255, dtype=tf.int32)) - preprocessed_inputs, true_image_shapes = model.preprocess(inputs) - prediction_dict = model.predict(preprocessed_inputs, true_image_shapes) - model.postprocess(prediction_dict, true_image_shapes) - another_variable = tf.Variable([17.0], name='another_variable') # pylint: disable=unused-variable - var_map = model.restore_map(fine_tune_checkpoint_type='classification') - self.assertNotIn('another_variable', var_map) - self.assertIsInstance(var_map, dict) - saver = tf.train.Saver(var_map) - with self.test_session(graph=test_graph_detection) as sess: - saver.restore(sess, saved_model_path) - for var in sess.run(tf.report_uninitialized_variables()): - self.assertNotIn('FeatureExtractor', var) - - def test_load_all_det_checkpoint_vars(self): - test_graph_detection = tf.Graph() - with test_graph_detection.as_default(): - model, _, _, _ = self._create_model() - inputs_shape = [2, 2, 2, 3] - inputs = tf.to_float( - tf.random_uniform(inputs_shape, minval=0, maxval=255, dtype=tf.int32)) - preprocessed_inputs, true_image_shapes = model.preprocess(inputs) - prediction_dict = model.predict(preprocessed_inputs, true_image_shapes) - model.postprocess(prediction_dict, true_image_shapes) - another_variable = tf.Variable([17.0], name='another_variable') # pylint: disable=unused-variable - var_map = model.restore_map( - fine_tune_checkpoint_type='detection', - load_all_detection_checkpoint_vars=True) - self.assertIsInstance(var_map, dict) - self.assertIn('another_variable', var_map) - - def test_loss_results_are_correct_with_random_example_sampling(self): - - with tf.Graph().as_default(): - _, num_classes, num_anchors, _ = self._create_model( - random_example_sampling=True) - print num_classes, num_anchors - - def graph_fn(preprocessed_tensor, groundtruth_boxes1, groundtruth_boxes2, - groundtruth_classes1, groundtruth_classes2): - groundtruth_boxes_list = [groundtruth_boxes1, groundtruth_boxes2] - groundtruth_classes_list = [groundtruth_classes1, groundtruth_classes2] - model, _, _, _ = self._create_model(random_example_sampling=True) - model.provide_groundtruth(groundtruth_boxes_list, - groundtruth_classes_list) - prediction_dict = model.predict( - preprocessed_tensor, true_image_shapes=None) - loss_dict = model.loss(prediction_dict, true_image_shapes=None) - return (_get_value_for_matching_key(loss_dict, 'Loss/localization_loss'), - _get_value_for_matching_key(loss_dict, - 'Loss/classification_loss')) - - batch_size = 2 - preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32) - groundtruth_boxes1 = np.array([[0, 0, .5, .5]], dtype=np.float32) - groundtruth_boxes2 = np.array([[0, 0, .5, .5]], dtype=np.float32) - groundtruth_classes1 = np.array([[1]], dtype=np.float32) - groundtruth_classes2 = np.array([[1]], dtype=np.float32) - expected_localization_loss = 0.0 - # Among 4 anchors (1 positive, 3 negative) in this test, only 2 anchors are - # selected (1 positive, 1 negative) since random sampler will adjust number - # of negative examples to make sure positive example fraction in the batch - # is 0.5. - expected_classification_loss = ( - batch_size * 2 * (num_classes + 1) * np.log(2.0)) - (localization_loss, classification_loss) = self.execute_cpu( - graph_fn, [ - preprocessed_input, groundtruth_boxes1, groundtruth_boxes2, - groundtruth_classes1, groundtruth_classes2 - ]) - self.assertAllClose(localization_loss, expected_localization_loss) - self.assertAllClose(classification_loss, expected_classification_loss) - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/__init__.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/coco_evaluation.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/coco_evaluation.py deleted file mode 100644 index 9f9bc7c7acca40dc303cf6b632ff5b43311eb38e..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/coco_evaluation.py +++ /dev/null @@ -1,658 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Class for evaluating object detections with COCO metrics.""" -import numpy as np -import tensorflow as tf - -from object_detection.core import standard_fields -from object_detection.metrics import coco_tools -from object_detection.utils import object_detection_evaluation - - -class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): - """Class to evaluate COCO detection metrics.""" - - def __init__(self, - categories, - include_metrics_per_category=False, - all_metrics_per_category=False): - """Constructor. - - Args: - categories: A list of dicts, each of which has the following keys - - 'id': (required) an integer id uniquely identifying this category. - 'name': (required) string representing category name e.g., 'cat', 'dog'. - include_metrics_per_category: If True, include metrics for each category. - all_metrics_per_category: Whether to include all the summary metrics for - each category in per_category_ap. Be careful with setting it to true if - you have more than handful of categories, because it will pollute - your mldash. - """ - super(CocoDetectionEvaluator, self).__init__(categories) - # _image_ids is a dictionary that maps unique image ids to Booleans which - # indicate whether a corresponding detection has been added. - self._image_ids = {} - self._groundtruth_list = [] - self._detection_boxes_list = [] - self._category_id_set = set([cat['id'] for cat in self._categories]) - self._annotation_id = 1 - self._metrics = None - self._include_metrics_per_category = include_metrics_per_category - self._all_metrics_per_category = all_metrics_per_category - - def clear(self): - """Clears the state to prepare for a fresh evaluation.""" - self._image_ids.clear() - self._groundtruth_list = [] - self._detection_boxes_list = [] - - def add_single_ground_truth_image_info(self, - image_id, - groundtruth_dict): - """Adds groundtruth for a single image to be used for evaluation. - - If the image has already been added, a warning is logged, and groundtruth is - ignored. - - Args: - image_id: A unique string/integer identifier for the image. - groundtruth_dict: A dictionary containing - - InputDataFields.groundtruth_boxes: float32 numpy array of shape - [num_boxes, 4] containing `num_boxes` groundtruth boxes of the format - [ymin, xmin, ymax, xmax] in absolute image coordinates. - InputDataFields.groundtruth_classes: integer numpy array of shape - [num_boxes] containing 1-indexed groundtruth classes for the boxes. - InputDataFields.groundtruth_is_crowd (optional): integer numpy array of - shape [num_boxes] containing iscrowd flag for groundtruth boxes. - """ - if image_id in self._image_ids: - tf.logging.warning('Ignoring ground truth with image id %s since it was ' - 'previously added', image_id) - return - - groundtruth_is_crowd = groundtruth_dict.get( - standard_fields.InputDataFields.groundtruth_is_crowd) - # Drop groundtruth_is_crowd if empty tensor. - if groundtruth_is_crowd is not None and not groundtruth_is_crowd.shape[0]: - groundtruth_is_crowd = None - - self._groundtruth_list.extend( - coco_tools.ExportSingleImageGroundtruthToCoco( - image_id=image_id, - next_annotation_id=self._annotation_id, - category_id_set=self._category_id_set, - groundtruth_boxes=groundtruth_dict[ - standard_fields.InputDataFields.groundtruth_boxes], - groundtruth_classes=groundtruth_dict[ - standard_fields.InputDataFields.groundtruth_classes], - groundtruth_is_crowd=groundtruth_is_crowd)) - self._annotation_id += groundtruth_dict[standard_fields.InputDataFields. - groundtruth_boxes].shape[0] - # Boolean to indicate whether a detection has been added for this image. - self._image_ids[image_id] = False - - def add_single_detected_image_info(self, - image_id, - detections_dict): - """Adds detections for a single image to be used for evaluation. - - If a detection has already been added for this image id, a warning is - logged, and the detection is skipped. - - Args: - image_id: A unique string/integer identifier for the image. - detections_dict: A dictionary containing - - DetectionResultFields.detection_boxes: float32 numpy array of shape - [num_boxes, 4] containing `num_boxes` detection boxes of the format - [ymin, xmin, ymax, xmax] in absolute image coordinates. - DetectionResultFields.detection_scores: float32 numpy array of shape - [num_boxes] containing detection scores for the boxes. - DetectionResultFields.detection_classes: integer numpy array of shape - [num_boxes] containing 1-indexed detection classes for the boxes. - - Raises: - ValueError: If groundtruth for the image_id is not available. - """ - if image_id not in self._image_ids: - raise ValueError('Missing groundtruth for image id: {}'.format(image_id)) - - if self._image_ids[image_id]: - tf.logging.warning('Ignoring detection with image id %s since it was ' - 'previously added', image_id) - return - - self._detection_boxes_list.extend( - coco_tools.ExportSingleImageDetectionBoxesToCoco( - image_id=image_id, - category_id_set=self._category_id_set, - detection_boxes=detections_dict[standard_fields. - DetectionResultFields - .detection_boxes], - detection_scores=detections_dict[standard_fields. - DetectionResultFields. - detection_scores], - detection_classes=detections_dict[standard_fields. - DetectionResultFields. - detection_classes])) - self._image_ids[image_id] = True - - def evaluate(self): - """Evaluates the detection boxes and returns a dictionary of coco metrics. - - Returns: - A dictionary holding - - - 1. summary_metrics: - 'DetectionBoxes_Precision/mAP': mean average precision over classes - averaged over IOU thresholds ranging from .5 to .95 with .05 - increments. - 'DetectionBoxes_Precision/mAP@.50IOU': mean average precision at 50% IOU - 'DetectionBoxes_Precision/mAP@.75IOU': mean average precision at 75% IOU - 'DetectionBoxes_Precision/mAP (small)': mean average precision for small - objects (area < 32^2 pixels). - 'DetectionBoxes_Precision/mAP (medium)': mean average precision for - medium sized objects (32^2 pixels < area < 96^2 pixels). - 'DetectionBoxes_Precision/mAP (large)': mean average precision for large - objects (96^2 pixels < area < 10000^2 pixels). - 'DetectionBoxes_Recall/AR@1': average recall with 1 detection. - 'DetectionBoxes_Recall/AR@10': average recall with 10 detections. - 'DetectionBoxes_Recall/AR@100': average recall with 100 detections. - 'DetectionBoxes_Recall/AR@100 (small)': average recall for small objects - with 100. - 'DetectionBoxes_Recall/AR@100 (medium)': average recall for medium objects - with 100. - 'DetectionBoxes_Recall/AR@100 (large)': average recall for large objects - with 100 detections. - - 2. per_category_ap: if include_metrics_per_category is True, category - specific results with keys of the form: - 'Precision mAP ByCategory/category' (without the supercategory part if - no supercategories exist). For backward compatibility - 'PerformanceByCategory' is included in the output regardless of - all_metrics_per_category. - """ - groundtruth_dict = { - 'annotations': self._groundtruth_list, - 'images': [{'id': image_id} for image_id in self._image_ids], - 'categories': self._categories - } - coco_wrapped_groundtruth = coco_tools.COCOWrapper(groundtruth_dict) - coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations( - self._detection_boxes_list) - box_evaluator = coco_tools.COCOEvalWrapper( - coco_wrapped_groundtruth, coco_wrapped_detections, agnostic_mode=False) - box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics( - include_metrics_per_category=self._include_metrics_per_category, - all_metrics_per_category=self._all_metrics_per_category) - box_metrics.update(box_per_category_ap) - box_metrics = {'DetectionBoxes_'+ key: value - for key, value in iter(box_metrics.items())} - return box_metrics - - def get_estimator_eval_metric_ops(self, image_id, groundtruth_boxes, - groundtruth_classes, - detection_boxes, - detection_scores, detection_classes, - groundtruth_is_crowd=None, - num_gt_boxes_per_image=None, - num_det_boxes_per_image=None): - """Returns a dictionary of eval metric ops to use with `tf.EstimatorSpec`. - - Note that once value_op is called, the detections and groundtruth added via - update_op are cleared. - - This function can take in groundtruth and detections for a batch of images, - or for a single image. For the latter case, the batch dimension for input - tensors need not be present. - - Args: - image_id: string/integer tensor of shape [batch] with unique identifiers - for the images. - groundtruth_boxes: float32 tensor of shape [batch, num_boxes, 4] - containing `num_boxes` groundtruth boxes of the format - [ymin, xmin, ymax, xmax] in absolute image coordinates. - groundtruth_classes: int32 tensor of shape [batch, num_boxes] containing - 1-indexed groundtruth classes for the boxes. - detection_boxes: float32 tensor of shape [batch, num_boxes, 4] containing - `num_boxes` detection boxes of the format [ymin, xmin, ymax, xmax] - in absolute image coordinates. - detection_scores: float32 tensor of shape [batch, num_boxes] containing - detection scores for the boxes. - detection_classes: int32 tensor of shape [batch, num_boxes] containing - 1-indexed detection classes for the boxes. - groundtruth_is_crowd: bool tensor of shape [batch, num_boxes] containing - is_crowd annotations. This field is optional, and if not passed, then - all boxes are treated as *not* is_crowd. - num_gt_boxes_per_image: int32 tensor of shape [batch] containing the - number of groundtruth boxes per image. If None, will assume no padding - in groundtruth tensors. - num_det_boxes_per_image: int32 tensor of shape [batch] containing the - number of detection boxes per image. If None, will assume no padding in - the detection tensors. - - Returns: - a dictionary of metric names to tuple of value_op and update_op that can - be used as eval metric ops in tf.EstimatorSpec. Note that all update ops - must be run together and similarly all value ops must be run together to - guarantee correct behaviour. - """ - def update_op( - image_id_batched, - groundtruth_boxes_batched, - groundtruth_classes_batched, - groundtruth_is_crowd_batched, - num_gt_boxes_per_image, - detection_boxes_batched, - detection_scores_batched, - detection_classes_batched, - num_det_boxes_per_image): - """Update operation for adding batch of images to Coco evaluator.""" - - for (image_id, gt_box, gt_class, gt_is_crowd, num_gt_box, det_box, - det_score, det_class, num_det_box) in zip( - image_id_batched, groundtruth_boxes_batched, - groundtruth_classes_batched, groundtruth_is_crowd_batched, - num_gt_boxes_per_image, - detection_boxes_batched, detection_scores_batched, - detection_classes_batched, num_det_boxes_per_image): - self.add_single_ground_truth_image_info( - image_id, - {'groundtruth_boxes': gt_box[:num_gt_box], - 'groundtruth_classes': gt_class[:num_gt_box], - 'groundtruth_is_crowd': gt_is_crowd[:num_gt_box]}) - self.add_single_detected_image_info( - image_id, - {'detection_boxes': det_box[:num_det_box], - 'detection_scores': det_score[:num_det_box], - 'detection_classes': det_class[:num_det_box]}) - - if groundtruth_is_crowd is None: - groundtruth_is_crowd = tf.zeros_like(groundtruth_classes, dtype=tf.bool) - if not image_id.shape.as_list(): - # Apply a batch dimension to all tensors. - image_id = tf.expand_dims(image_id, 0) - groundtruth_boxes = tf.expand_dims(groundtruth_boxes, 0) - groundtruth_classes = tf.expand_dims(groundtruth_classes, 0) - groundtruth_is_crowd = tf.expand_dims(groundtruth_is_crowd, 0) - detection_boxes = tf.expand_dims(detection_boxes, 0) - detection_scores = tf.expand_dims(detection_scores, 0) - detection_classes = tf.expand_dims(detection_classes, 0) - - if num_gt_boxes_per_image is None: - num_gt_boxes_per_image = tf.shape(groundtruth_boxes)[1:2] - else: - num_gt_boxes_per_image = tf.expand_dims(num_gt_boxes_per_image, 0) - - if num_det_boxes_per_image is None: - num_det_boxes_per_image = tf.shape(detection_boxes)[1:2] - else: - num_det_boxes_per_image = tf.expand_dims(num_det_boxes_per_image, 0) - else: - if num_gt_boxes_per_image is None: - num_gt_boxes_per_image = tf.tile( - tf.shape(groundtruth_boxes)[1:2], - multiples=tf.shape(groundtruth_boxes)[0:1]) - if num_det_boxes_per_image is None: - num_det_boxes_per_image = tf.tile( - tf.shape(detection_boxes)[1:2], - multiples=tf.shape(detection_boxes)[0:1]) - - update_op = tf.py_func(update_op, [image_id, - groundtruth_boxes, - groundtruth_classes, - groundtruth_is_crowd, - num_gt_boxes_per_image, - detection_boxes, - detection_scores, - detection_classes, - num_det_boxes_per_image], []) - metric_names = ['DetectionBoxes_Precision/mAP', - 'DetectionBoxes_Precision/mAP@.50IOU', - 'DetectionBoxes_Precision/mAP@.75IOU', - 'DetectionBoxes_Precision/mAP (large)', - 'DetectionBoxes_Precision/mAP (medium)', - 'DetectionBoxes_Precision/mAP (small)', - 'DetectionBoxes_Recall/AR@1', - 'DetectionBoxes_Recall/AR@10', - 'DetectionBoxes_Recall/AR@100', - 'DetectionBoxes_Recall/AR@100 (large)', - 'DetectionBoxes_Recall/AR@100 (medium)', - 'DetectionBoxes_Recall/AR@100 (small)'] - if self._include_metrics_per_category: - for category_dict in self._categories: - metric_names.append('DetectionBoxes_PerformanceByCategory/mAP/' + - category_dict['name']) - - def first_value_func(): - self._metrics = self.evaluate() - self.clear() - return np.float32(self._metrics[metric_names[0]]) - - def value_func_factory(metric_name): - def value_func(): - return np.float32(self._metrics[metric_name]) - return value_func - - # Ensure that the metrics are only evaluated once. - first_value_op = tf.py_func(first_value_func, [], tf.float32) - eval_metric_ops = {metric_names[0]: (first_value_op, update_op)} - with tf.control_dependencies([first_value_op]): - for metric_name in metric_names[1:]: - eval_metric_ops[metric_name] = (tf.py_func( - value_func_factory(metric_name), [], np.float32), update_op) - return eval_metric_ops - - -def _check_mask_type_and_value(array_name, masks): - """Checks whether mask dtype is uint8 and the values are either 0 or 1.""" - if masks.dtype != np.uint8: - raise ValueError('{} must be of type np.uint8. Found {}.'.format( - array_name, masks.dtype)) - if np.any(np.logical_and(masks != 0, masks != 1)): - raise ValueError('{} elements can only be either 0 or 1.'.format( - array_name)) - - -class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): - """Class to evaluate COCO detection metrics.""" - - def __init__(self, categories, include_metrics_per_category=False): - """Constructor. - - Args: - categories: A list of dicts, each of which has the following keys - - 'id': (required) an integer id uniquely identifying this category. - 'name': (required) string representing category name e.g., 'cat', 'dog'. - include_metrics_per_category: If True, include metrics for each category. - """ - super(CocoMaskEvaluator, self).__init__(categories) - self._image_id_to_mask_shape_map = {} - self._image_ids_with_detections = set([]) - self._groundtruth_list = [] - self._detection_masks_list = [] - self._category_id_set = set([cat['id'] for cat in self._categories]) - self._annotation_id = 1 - self._include_metrics_per_category = include_metrics_per_category - - def clear(self): - """Clears the state to prepare for a fresh evaluation.""" - self._image_id_to_mask_shape_map.clear() - self._image_ids_with_detections.clear() - self._groundtruth_list = [] - self._detection_masks_list = [] - - def add_single_ground_truth_image_info(self, - image_id, - groundtruth_dict): - """Adds groundtruth for a single image to be used for evaluation. - - If the image has already been added, a warning is logged, and groundtruth is - ignored. - - Args: - image_id: A unique string/integer identifier for the image. - groundtruth_dict: A dictionary containing - - InputDataFields.groundtruth_boxes: float32 numpy array of shape - [num_boxes, 4] containing `num_boxes` groundtruth boxes of the format - [ymin, xmin, ymax, xmax] in absolute image coordinates. - InputDataFields.groundtruth_classes: integer numpy array of shape - [num_boxes] containing 1-indexed groundtruth classes for the boxes. - InputDataFields.groundtruth_instance_masks: uint8 numpy array of shape - [num_boxes, image_height, image_width] containing groundtruth masks - corresponding to the boxes. The elements of the array must be in - {0, 1}. - """ - if image_id in self._image_id_to_mask_shape_map: - tf.logging.warning('Ignoring ground truth with image id %s since it was ' - 'previously added', image_id) - return - - groundtruth_instance_masks = groundtruth_dict[ - standard_fields.InputDataFields.groundtruth_instance_masks] - _check_mask_type_and_value(standard_fields.InputDataFields. - groundtruth_instance_masks, - groundtruth_instance_masks) - self._groundtruth_list.extend( - coco_tools. - ExportSingleImageGroundtruthToCoco( - image_id=image_id, - next_annotation_id=self._annotation_id, - category_id_set=self._category_id_set, - groundtruth_boxes=groundtruth_dict[standard_fields.InputDataFields. - groundtruth_boxes], - groundtruth_classes=groundtruth_dict[standard_fields. - InputDataFields. - groundtruth_classes], - groundtruth_masks=groundtruth_instance_masks)) - self._annotation_id += groundtruth_dict[standard_fields.InputDataFields. - groundtruth_boxes].shape[0] - self._image_id_to_mask_shape_map[image_id] = groundtruth_dict[ - standard_fields.InputDataFields.groundtruth_instance_masks].shape - - def add_single_detected_image_info(self, - image_id, - detections_dict): - """Adds detections for a single image to be used for evaluation. - - If a detection has already been added for this image id, a warning is - logged, and the detection is skipped. - - Args: - image_id: A unique string/integer identifier for the image. - detections_dict: A dictionary containing - - DetectionResultFields.detection_scores: float32 numpy array of shape - [num_boxes] containing detection scores for the boxes. - DetectionResultFields.detection_classes: integer numpy array of shape - [num_boxes] containing 1-indexed detection classes for the boxes. - DetectionResultFields.detection_masks: optional uint8 numpy array of - shape [num_boxes, image_height, image_width] containing instance - masks corresponding to the boxes. The elements of the array must be - in {0, 1}. - - Raises: - ValueError: If groundtruth for the image_id is not available or if - spatial shapes of groundtruth_instance_masks and detection_masks are - incompatible. - """ - if image_id not in self._image_id_to_mask_shape_map: - raise ValueError('Missing groundtruth for image id: {}'.format(image_id)) - - if image_id in self._image_ids_with_detections: - tf.logging.warning('Ignoring detection with image id %s since it was ' - 'previously added', image_id) - return - - groundtruth_masks_shape = self._image_id_to_mask_shape_map[image_id] - detection_masks = detections_dict[standard_fields.DetectionResultFields. - detection_masks] - if groundtruth_masks_shape[1:] != detection_masks.shape[1:]: - raise ValueError('Spatial shape of groundtruth masks and detection masks ' - 'are incompatible: {} vs {}'.format( - groundtruth_masks_shape, - detection_masks.shape)) - _check_mask_type_and_value(standard_fields.DetectionResultFields. - detection_masks, - detection_masks) - self._detection_masks_list.extend( - coco_tools.ExportSingleImageDetectionMasksToCoco( - image_id=image_id, - category_id_set=self._category_id_set, - detection_masks=detection_masks, - detection_scores=detections_dict[standard_fields. - DetectionResultFields. - detection_scores], - detection_classes=detections_dict[standard_fields. - DetectionResultFields. - detection_classes])) - self._image_ids_with_detections.update([image_id]) - - def evaluate(self): - """Evaluates the detection masks and returns a dictionary of coco metrics. - - Returns: - A dictionary holding - - - 1. summary_metrics: - 'DetectionMasks_Precision/mAP': mean average precision over classes - averaged over IOU thresholds ranging from .5 to .95 with .05 increments. - 'DetectionMasks_Precision/mAP@.50IOU': mean average precision at 50% IOU. - 'DetectionMasks_Precision/mAP@.75IOU': mean average precision at 75% IOU. - 'DetectionMasks_Precision/mAP (small)': mean average precision for small - objects (area < 32^2 pixels). - 'DetectionMasks_Precision/mAP (medium)': mean average precision for medium - sized objects (32^2 pixels < area < 96^2 pixels). - 'DetectionMasks_Precision/mAP (large)': mean average precision for large - objects (96^2 pixels < area < 10000^2 pixels). - 'DetectionMasks_Recall/AR@1': average recall with 1 detection. - 'DetectionMasks_Recall/AR@10': average recall with 10 detections. - 'DetectionMasks_Recall/AR@100': average recall with 100 detections. - 'DetectionMasks_Recall/AR@100 (small)': average recall for small objects - with 100 detections. - 'DetectionMasks_Recall/AR@100 (medium)': average recall for medium objects - with 100 detections. - 'DetectionMasks_Recall/AR@100 (large)': average recall for large objects - with 100 detections. - - 2. per_category_ap: if include_metrics_per_category is True, category - specific results with keys of the form: - 'Precision mAP ByCategory/category' (without the supercategory part if - no supercategories exist). For backward compatibility - 'PerformanceByCategory' is included in the output regardless of - all_metrics_per_category. - """ - groundtruth_dict = { - 'annotations': self._groundtruth_list, - 'images': [{'id': image_id, 'height': shape[1], 'width': shape[2]} - for image_id, shape in self._image_id_to_mask_shape_map. - items()], - 'categories': self._categories - } - coco_wrapped_groundtruth = coco_tools.COCOWrapper( - groundtruth_dict, detection_type='segmentation') - coco_wrapped_detection_masks = coco_wrapped_groundtruth.LoadAnnotations( - self._detection_masks_list) - mask_evaluator = coco_tools.COCOEvalWrapper( - coco_wrapped_groundtruth, coco_wrapped_detection_masks, - agnostic_mode=False, iou_type='segm') - mask_metrics, mask_per_category_ap = mask_evaluator.ComputeMetrics( - include_metrics_per_category=self._include_metrics_per_category) - mask_metrics.update(mask_per_category_ap) - mask_metrics = {'DetectionMasks_'+ key: value - for key, value in mask_metrics.items()} - return mask_metrics - - def get_estimator_eval_metric_ops(self, image_id, groundtruth_boxes, - groundtruth_classes, - groundtruth_instance_masks, - detection_scores, detection_classes, - detection_masks, groundtruth_is_crowd=None): - """Returns a dictionary of eval metric ops to use with `tf.EstimatorSpec`. - - Note that once value_op is called, the detections and groundtruth added via - update_op are cleared. - - Args: - image_id: Unique string/integer identifier for the image. - groundtruth_boxes: float32 tensor of shape [num_boxes, 4] containing - `num_boxes` groundtruth boxes of the format - [ymin, xmin, ymax, xmax] in absolute image coordinates. - groundtruth_classes: int32 tensor of shape [num_boxes] containing - 1-indexed groundtruth classes for the boxes. - groundtruth_instance_masks: uint8 tensor array of shape - [num_boxes, image_height, image_width] containing groundtruth masks - corresponding to the boxes. The elements of the array must be in {0, 1}. - detection_scores: float32 tensor of shape [num_boxes] containing - detection scores for the boxes. - detection_classes: int32 tensor of shape [num_boxes] containing - 1-indexed detection classes for the boxes. - detection_masks: uint8 tensor array of shape - [num_boxes, image_height, image_width] containing instance masks - corresponding to the boxes. The elements of the array must be in {0, 1}. - groundtruth_is_crowd: bool tensor of shape [batch, num_boxes] containing - is_crowd annotations. This field is optional, and if not passed, then - all boxes are treated as *not* is_crowd. - - Returns: - a dictionary of metric names to tuple of value_op and update_op that can - be used as eval metric ops in tf.EstimatorSpec. Note that all update ops - must be run together and similarly all value ops must be run together to - guarantee correct behaviour. - """ - def update_op( - image_id, - groundtruth_boxes, - groundtruth_classes, - groundtruth_instance_masks, - groundtruth_is_crowd, - detection_scores, - detection_classes, - detection_masks): - self.add_single_ground_truth_image_info( - image_id, - {'groundtruth_boxes': groundtruth_boxes, - 'groundtruth_classes': groundtruth_classes, - 'groundtruth_instance_masks': groundtruth_instance_masks, - 'groundtruth_is_crowd': groundtruth_is_crowd}) - self.add_single_detected_image_info( - image_id, - {'detection_scores': detection_scores, - 'detection_classes': detection_classes, - 'detection_masks': detection_masks}) - - if groundtruth_is_crowd is None: - groundtruth_is_crowd = tf.zeros_like(groundtruth_classes, dtype=tf.bool) - update_op = tf.py_func(update_op, [image_id, - groundtruth_boxes, - groundtruth_classes, - groundtruth_instance_masks, - groundtruth_is_crowd, - detection_scores, - detection_classes, - detection_masks], []) - metric_names = ['DetectionMasks_Precision/mAP', - 'DetectionMasks_Precision/mAP@.50IOU', - 'DetectionMasks_Precision/mAP@.75IOU', - 'DetectionMasks_Precision/mAP (large)', - 'DetectionMasks_Precision/mAP (medium)', - 'DetectionMasks_Precision/mAP (small)', - 'DetectionMasks_Recall/AR@1', - 'DetectionMasks_Recall/AR@10', - 'DetectionMasks_Recall/AR@100', - 'DetectionMasks_Recall/AR@100 (large)', - 'DetectionMasks_Recall/AR@100 (medium)', - 'DetectionMasks_Recall/AR@100 (small)'] - if self._include_metrics_per_category: - for category_dict in self._categories: - metric_names.append('DetectionMasks_PerformanceByCategory/mAP/' + - category_dict['name']) - - def first_value_func(): - self._metrics = self.evaluate() - self.clear() - return np.float32(self._metrics[metric_names[0]]) - - def value_func_factory(metric_name): - def value_func(): - return np.float32(self._metrics[metric_name]) - return value_func - - # Ensure that the metrics are only evaluated once. - first_value_op = tf.py_func(first_value_func, [], tf.float32) - eval_metric_ops = {metric_names[0]: (first_value_op, update_op)} - with tf.control_dependencies([first_value_op]): - for metric_name in metric_names[1:]: - eval_metric_ops[metric_name] = (tf.py_func( - value_func_factory(metric_name), [], np.float32), update_op) - return eval_metric_ops diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/coco_evaluation_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/coco_evaluation_test.py deleted file mode 100644 index 47547e20389ad822a59dce318b019bf5a9fb5a67..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/coco_evaluation_test.py +++ /dev/null @@ -1,727 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for tensorflow_models.object_detection.metrics.coco_evaluation.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf -from object_detection.core import standard_fields -from object_detection.metrics import coco_evaluation - - -class CocoDetectionEvaluationTest(tf.test.TestCase): - - def testGetOneMAPWithMatchingGroundtruthAndDetections(self): - """Tests that mAP is calculated correctly on GT and Detections.""" - category_list = [{'id': 0, 'name': 'person'}, - {'id': 1, 'name': 'cat'}, - {'id': 2, 'name': 'dog'}] - coco_evaluator = coco_evaluation.CocoDetectionEvaluator(category_list) - coco_evaluator.add_single_ground_truth_image_info( - image_id='image1', - groundtruth_dict={ - standard_fields.InputDataFields.groundtruth_boxes: - np.array([[100., 100., 200., 200.]]), - standard_fields.InputDataFields.groundtruth_classes: np.array([1]) - }) - coco_evaluator.add_single_detected_image_info( - image_id='image1', - detections_dict={ - standard_fields.DetectionResultFields.detection_boxes: - np.array([[100., 100., 200., 200.]]), - standard_fields.DetectionResultFields.detection_scores: - np.array([.8]), - standard_fields.DetectionResultFields.detection_classes: - np.array([1]) - }) - coco_evaluator.add_single_ground_truth_image_info( - image_id='image2', - groundtruth_dict={ - standard_fields.InputDataFields.groundtruth_boxes: - np.array([[50., 50., 100., 100.]]), - standard_fields.InputDataFields.groundtruth_classes: np.array([1]) - }) - coco_evaluator.add_single_detected_image_info( - image_id='image2', - detections_dict={ - standard_fields.DetectionResultFields.detection_boxes: - np.array([[50., 50., 100., 100.]]), - standard_fields.DetectionResultFields.detection_scores: - np.array([.8]), - standard_fields.DetectionResultFields.detection_classes: - np.array([1]) - }) - coco_evaluator.add_single_ground_truth_image_info( - image_id='image3', - groundtruth_dict={ - standard_fields.InputDataFields.groundtruth_boxes: - np.array([[25., 25., 50., 50.]]), - standard_fields.InputDataFields.groundtruth_classes: np.array([1]) - }) - coco_evaluator.add_single_detected_image_info( - image_id='image3', - detections_dict={ - standard_fields.DetectionResultFields.detection_boxes: - np.array([[25., 25., 50., 50.]]), - standard_fields.DetectionResultFields.detection_scores: - np.array([.8]), - standard_fields.DetectionResultFields.detection_classes: - np.array([1]) - }) - metrics = coco_evaluator.evaluate() - self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0) - - def testGetOneMAPWithMatchingGroundtruthAndDetectionsSkipCrowd(self): - """Tests computing mAP with is_crowd GT boxes skipped.""" - category_list = [{ - 'id': 0, - 'name': 'person' - }, { - 'id': 1, - 'name': 'cat' - }, { - 'id': 2, - 'name': 'dog' - }] - coco_evaluator = coco_evaluation.CocoDetectionEvaluator(category_list) - coco_evaluator.add_single_ground_truth_image_info( - image_id='image1', - groundtruth_dict={ - standard_fields.InputDataFields.groundtruth_boxes: - np.array([[100., 100., 200., 200.], [99., 99., 200., 200.]]), - standard_fields.InputDataFields.groundtruth_classes: - np.array([1, 2]), - standard_fields.InputDataFields.groundtruth_is_crowd: - np.array([0, 1]) - }) - coco_evaluator.add_single_detected_image_info( - image_id='image1', - detections_dict={ - standard_fields.DetectionResultFields.detection_boxes: - np.array([[100., 100., 200., 200.]]), - standard_fields.DetectionResultFields.detection_scores: - np.array([.8]), - standard_fields.DetectionResultFields.detection_classes: - np.array([1]) - }) - metrics = coco_evaluator.evaluate() - self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0) - - def testGetOneMAPWithMatchingGroundtruthAndDetectionsEmptyCrowd(self): - """Tests computing mAP with empty is_crowd array passed in.""" - category_list = [{ - 'id': 0, - 'name': 'person' - }, { - 'id': 1, - 'name': 'cat' - }, { - 'id': 2, - 'name': 'dog' - }] - coco_evaluator = coco_evaluation.CocoDetectionEvaluator(category_list) - coco_evaluator.add_single_ground_truth_image_info( - image_id='image1', - groundtruth_dict={ - standard_fields.InputDataFields.groundtruth_boxes: - np.array([[100., 100., 200., 200.]]), - standard_fields.InputDataFields.groundtruth_classes: - np.array([1]), - standard_fields.InputDataFields.groundtruth_is_crowd: - np.array([]) - }) - coco_evaluator.add_single_detected_image_info( - image_id='image1', - detections_dict={ - standard_fields.DetectionResultFields.detection_boxes: - np.array([[100., 100., 200., 200.]]), - standard_fields.DetectionResultFields.detection_scores: - np.array([.8]), - standard_fields.DetectionResultFields.detection_classes: - np.array([1]) - }) - metrics = coco_evaluator.evaluate() - self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0) - - def testRejectionOnDuplicateGroundtruth(self): - """Tests that groundtruth cannot be added more than once for an image.""" - categories = [{'id': 1, 'name': 'cat'}, - {'id': 2, 'name': 'dog'}, - {'id': 3, 'name': 'elephant'}] - # Add groundtruth - coco_evaluator = coco_evaluation.CocoDetectionEvaluator(categories) - image_key1 = 'img1' - groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]], - dtype=float) - groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int) - coco_evaluator.add_single_ground_truth_image_info(image_key1, { - standard_fields.InputDataFields.groundtruth_boxes: - groundtruth_boxes1, - standard_fields.InputDataFields.groundtruth_classes: - groundtruth_class_labels1 - }) - groundtruth_lists_len = len(coco_evaluator._groundtruth_list) - - # Add groundtruth with the same image id. - coco_evaluator.add_single_ground_truth_image_info(image_key1, { - standard_fields.InputDataFields.groundtruth_boxes: - groundtruth_boxes1, - standard_fields.InputDataFields.groundtruth_classes: - groundtruth_class_labels1 - }) - self.assertEqual(groundtruth_lists_len, - len(coco_evaluator._groundtruth_list)) - - def testRejectionOnDuplicateDetections(self): - """Tests that detections cannot be added more than once for an image.""" - categories = [{'id': 1, 'name': 'cat'}, - {'id': 2, 'name': 'dog'}, - {'id': 3, 'name': 'elephant'}] - # Add groundtruth - coco_evaluator = coco_evaluation.CocoDetectionEvaluator(categories) - coco_evaluator.add_single_ground_truth_image_info( - image_id='image1', - groundtruth_dict={ - standard_fields.InputDataFields.groundtruth_boxes: - np.array([[99., 100., 200., 200.]]), - standard_fields.InputDataFields.groundtruth_classes: np.array([1]) - }) - coco_evaluator.add_single_detected_image_info( - image_id='image1', - detections_dict={ - standard_fields.DetectionResultFields.detection_boxes: - np.array([[100., 100., 200., 200.]]), - standard_fields.DetectionResultFields.detection_scores: - np.array([.8]), - standard_fields.DetectionResultFields.detection_classes: - np.array([1]) - }) - detections_lists_len = len(coco_evaluator._detection_boxes_list) - coco_evaluator.add_single_detected_image_info( - image_id='image1', # Note that this image id was previously added. - detections_dict={ - standard_fields.DetectionResultFields.detection_boxes: - np.array([[100., 100., 200., 200.]]), - standard_fields.DetectionResultFields.detection_scores: - np.array([.8]), - standard_fields.DetectionResultFields.detection_classes: - np.array([1]) - }) - self.assertEqual(detections_lists_len, - len(coco_evaluator._detection_boxes_list)) - - def testExceptionRaisedWithMissingGroundtruth(self): - """Tests that exception is raised for detection with missing groundtruth.""" - categories = [{'id': 1, 'name': 'cat'}, - {'id': 2, 'name': 'dog'}, - {'id': 3, 'name': 'elephant'}] - coco_evaluator = coco_evaluation.CocoDetectionEvaluator(categories) - with self.assertRaises(ValueError): - coco_evaluator.add_single_detected_image_info( - image_id='image1', - detections_dict={ - standard_fields.DetectionResultFields.detection_boxes: - np.array([[100., 100., 200., 200.]]), - standard_fields.DetectionResultFields.detection_scores: - np.array([.8]), - standard_fields.DetectionResultFields.detection_classes: - np.array([1]) - }) - - -class CocoEvaluationPyFuncTest(tf.test.TestCase): - - def testGetOneMAPWithMatchingGroundtruthAndDetections(self): - category_list = [{'id': 0, 'name': 'person'}, - {'id': 1, 'name': 'cat'}, - {'id': 2, 'name': 'dog'}] - coco_evaluator = coco_evaluation.CocoDetectionEvaluator(category_list) - image_id = tf.placeholder(tf.string, shape=()) - groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4)) - groundtruth_classes = tf.placeholder(tf.float32, shape=(None)) - detection_boxes = tf.placeholder(tf.float32, shape=(None, 4)) - detection_scores = tf.placeholder(tf.float32, shape=(None)) - detection_classes = tf.placeholder(tf.float32, shape=(None)) - - eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops( - image_id, groundtruth_boxes, - groundtruth_classes, - detection_boxes, - detection_scores, - detection_classes) - - _, update_op = eval_metric_ops['DetectionBoxes_Precision/mAP'] - - with self.test_session() as sess: - sess.run(update_op, - feed_dict={ - image_id: 'image1', - groundtruth_boxes: np.array([[100., 100., 200., 200.]]), - groundtruth_classes: np.array([1]), - detection_boxes: np.array([[100., 100., 200., 200.]]), - detection_scores: np.array([.8]), - detection_classes: np.array([1]) - }) - sess.run(update_op, - feed_dict={ - image_id: 'image2', - groundtruth_boxes: np.array([[50., 50., 100., 100.]]), - groundtruth_classes: np.array([3]), - detection_boxes: np.array([[50., 50., 100., 100.]]), - detection_scores: np.array([.7]), - detection_classes: np.array([3]) - }) - sess.run(update_op, - feed_dict={ - image_id: 'image3', - groundtruth_boxes: np.array([[25., 25., 50., 50.]]), - groundtruth_classes: np.array([2]), - detection_boxes: np.array([[25., 25., 50., 50.]]), - detection_scores: np.array([.9]), - detection_classes: np.array([2]) - }) - metrics = {} - for key, (value_op, _) in eval_metric_ops.iteritems(): - metrics[key] = value_op - metrics = sess.run(metrics) - self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.50IOU'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'], - -1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'], - -1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0) - self.assertFalse(coco_evaluator._groundtruth_list) - self.assertFalse(coco_evaluator._detection_boxes_list) - self.assertFalse(coco_evaluator._image_ids) - - def testGetOneMAPWithMatchingGroundtruthAndDetectionsPadded(self): - category_list = [{ - 'id': 0, - 'name': 'person' - }, { - 'id': 1, - 'name': 'cat' - }, { - 'id': 2, - 'name': 'dog' - }] - coco_evaluator = coco_evaluation.CocoDetectionEvaluator(category_list) - image_id = tf.placeholder(tf.string, shape=()) - groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4)) - groundtruth_classes = tf.placeholder(tf.float32, shape=(None)) - detection_boxes = tf.placeholder(tf.float32, shape=(None, 4)) - detection_scores = tf.placeholder(tf.float32, shape=(None)) - detection_classes = tf.placeholder(tf.float32, shape=(None)) - - eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops( - image_id, groundtruth_boxes, groundtruth_classes, detection_boxes, - detection_scores, detection_classes) - - _, update_op = eval_metric_ops['DetectionBoxes_Precision/mAP'] - - with self.test_session() as sess: - sess.run( - update_op, - feed_dict={ - image_id: - 'image1', - groundtruth_boxes: - np.array([[100., 100., 200., 200.], [-1, -1, -1, -1]]), - groundtruth_classes: - np.array([1, -1]), - detection_boxes: - np.array([[100., 100., 200., 200.], [0., 0., 0., 0.]]), - detection_scores: - np.array([.8, 0.]), - detection_classes: - np.array([1, -1]) - }) - sess.run( - update_op, - feed_dict={ - image_id: - 'image2', - groundtruth_boxes: - np.array([[50., 50., 100., 100.], [-1, -1, -1, -1]]), - groundtruth_classes: - np.array([3, -1]), - detection_boxes: - np.array([[50., 50., 100., 100.], [0., 0., 0., 0.]]), - detection_scores: - np.array([.7, 0.]), - detection_classes: - np.array([3, -1]) - }) - sess.run( - update_op, - feed_dict={ - image_id: - 'image3', - groundtruth_boxes: - np.array([[25., 25., 50., 50.], [10., 10., 15., 15.]]), - groundtruth_classes: - np.array([2, 2]), - detection_boxes: - np.array([[25., 25., 50., 50.], [10., 10., 15., 15.]]), - detection_scores: - np.array([.95, .9]), - detection_classes: - np.array([2, 2]) - }) - metrics = {} - for key, (value_op, _) in eval_metric_ops.iteritems(): - metrics[key] = value_op - metrics = sess.run(metrics) - self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.50IOU'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'], - -1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 0.75) - self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'], - -1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0) - self.assertFalse(coco_evaluator._groundtruth_list) - self.assertFalse(coco_evaluator._detection_boxes_list) - self.assertFalse(coco_evaluator._image_ids) - - def testGetOneMAPWithMatchingGroundtruthAndDetectionsBatched(self): - category_list = [{'id': 0, 'name': 'person'}, - {'id': 1, 'name': 'cat'}, - {'id': 2, 'name': 'dog'}] - coco_evaluator = coco_evaluation.CocoDetectionEvaluator(category_list) - batch_size = 3 - image_id = tf.placeholder(tf.string, shape=(batch_size)) - groundtruth_boxes = tf.placeholder(tf.float32, shape=(batch_size, None, 4)) - groundtruth_classes = tf.placeholder(tf.float32, shape=(batch_size, None)) - detection_boxes = tf.placeholder(tf.float32, shape=(batch_size, None, 4)) - detection_scores = tf.placeholder(tf.float32, shape=(batch_size, None)) - detection_classes = tf.placeholder(tf.float32, shape=(batch_size, None)) - - eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops( - image_id, groundtruth_boxes, - groundtruth_classes, - detection_boxes, - detection_scores, - detection_classes) - - _, update_op = eval_metric_ops['DetectionBoxes_Precision/mAP'] - - with self.test_session() as sess: - sess.run(update_op, - feed_dict={ - image_id: ['image1', 'image2', 'image3'], - groundtruth_boxes: np.array([[[100., 100., 200., 200.]], - [[50., 50., 100., 100.]], - [[25., 25., 50., 50.]]]), - groundtruth_classes: np.array([[1], [3], [2]]), - detection_boxes: np.array([[[100., 100., 200., 200.]], - [[50., 50., 100., 100.]], - [[25., 25., 50., 50.]]]), - detection_scores: np.array([[.8], [.7], [.9]]), - detection_classes: np.array([[1], [3], [2]]) - }) - metrics = {} - for key, (value_op, _) in eval_metric_ops.iteritems(): - metrics[key] = value_op - metrics = sess.run(metrics) - self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.50IOU'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'], - -1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'], - -1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0) - self.assertFalse(coco_evaluator._groundtruth_list) - self.assertFalse(coco_evaluator._detection_boxes_list) - self.assertFalse(coco_evaluator._image_ids) - - def testGetOneMAPWithMatchingGroundtruthAndDetectionsPaddedBatches(self): - category_list = [{'id': 0, 'name': 'person'}, - {'id': 1, 'name': 'cat'}, - {'id': 2, 'name': 'dog'}] - coco_evaluator = coco_evaluation.CocoDetectionEvaluator(category_list) - batch_size = 3 - image_id = tf.placeholder(tf.string, shape=(batch_size)) - groundtruth_boxes = tf.placeholder(tf.float32, shape=(batch_size, None, 4)) - groundtruth_classes = tf.placeholder(tf.float32, shape=(batch_size, None)) - num_gt_boxes_per_image = tf.placeholder(tf.int32, shape=(None)) - detection_boxes = tf.placeholder(tf.float32, shape=(batch_size, None, 4)) - detection_scores = tf.placeholder(tf.float32, shape=(batch_size, None)) - detection_classes = tf.placeholder(tf.float32, shape=(batch_size, None)) - num_det_boxes_per_image = tf.placeholder(tf.int32, shape=(None)) - - eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops( - image_id, groundtruth_boxes, - groundtruth_classes, - detection_boxes, - detection_scores, - detection_classes, - num_gt_boxes_per_image=num_gt_boxes_per_image, - num_det_boxes_per_image=num_det_boxes_per_image) - - _, update_op = eval_metric_ops['DetectionBoxes_Precision/mAP'] - - with self.test_session() as sess: - sess.run(update_op, - feed_dict={ - image_id: ['image1', 'image2', 'image3'], - groundtruth_boxes: np.array([[[100., 100., 200., 200.], - [-1, -1, -1, -1]], - [[50., 50., 100., 100.], - [-1, -1, -1, -1]], - [[25., 25., 50., 50.], - [10., 10., 15., 15.]]]), - groundtruth_classes: np.array([[1, -1], [3, -1], [2, 2]]), - num_gt_boxes_per_image: np.array([1, 1, 2]), - detection_boxes: np.array([[[100., 100., 200., 200.], - [0., 0., 0., 0.]], - [[50., 50., 100., 100.], - [0., 0., 0., 0.]], - [[25., 25., 50., 50.], - [10., 10., 15., 15.]]]), - detection_scores: np.array([[.8, 0.], [.7, 0.], [.95, .9]]), - detection_classes: np.array([[1, -1], [3, -1], [2, 2]]), - num_det_boxes_per_image: np.array([1, 1, 2]), - }) - metrics = {} - for key, (value_op, _) in eval_metric_ops.iteritems(): - metrics[key] = value_op - metrics = sess.run(metrics) - self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.50IOU'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'], - -1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 0.75) - self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'], - -1.0) - self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0) - self.assertFalse(coco_evaluator._groundtruth_list) - self.assertFalse(coco_evaluator._detection_boxes_list) - self.assertFalse(coco_evaluator._image_ids) - - -class CocoMaskEvaluationTest(tf.test.TestCase): - - def testGetOneMAPWithMatchingGroundtruthAndDetections(self): - category_list = [{'id': 0, 'name': 'person'}, - {'id': 1, 'name': 'cat'}, - {'id': 2, 'name': 'dog'}] - coco_evaluator = coco_evaluation.CocoMaskEvaluator(category_list) - coco_evaluator.add_single_ground_truth_image_info( - image_id='image1', - groundtruth_dict={ - standard_fields.InputDataFields.groundtruth_boxes: - np.array([[100., 100., 200., 200.]]), - standard_fields.InputDataFields.groundtruth_classes: np.array([1]), - standard_fields.InputDataFields.groundtruth_instance_masks: - np.pad(np.ones([1, 100, 100], dtype=np.uint8), - ((0, 0), (10, 10), (10, 10)), mode='constant') - }) - coco_evaluator.add_single_detected_image_info( - image_id='image1', - detections_dict={ - standard_fields.DetectionResultFields.detection_boxes: - np.array([[100., 100., 200., 200.]]), - standard_fields.DetectionResultFields.detection_scores: - np.array([.8]), - standard_fields.DetectionResultFields.detection_classes: - np.array([1]), - standard_fields.DetectionResultFields.detection_masks: - np.pad(np.ones([1, 100, 100], dtype=np.uint8), - ((0, 0), (10, 10), (10, 10)), mode='constant') - }) - coco_evaluator.add_single_ground_truth_image_info( - image_id='image2', - groundtruth_dict={ - standard_fields.InputDataFields.groundtruth_boxes: - np.array([[50., 50., 100., 100.]]), - standard_fields.InputDataFields.groundtruth_classes: np.array([1]), - standard_fields.InputDataFields.groundtruth_instance_masks: - np.pad(np.ones([1, 50, 50], dtype=np.uint8), - ((0, 0), (10, 10), (10, 10)), mode='constant') - }) - coco_evaluator.add_single_detected_image_info( - image_id='image2', - detections_dict={ - standard_fields.DetectionResultFields.detection_boxes: - np.array([[50., 50., 100., 100.]]), - standard_fields.DetectionResultFields.detection_scores: - np.array([.8]), - standard_fields.DetectionResultFields.detection_classes: - np.array([1]), - standard_fields.DetectionResultFields.detection_masks: - np.pad(np.ones([1, 50, 50], dtype=np.uint8), - ((0, 0), (10, 10), (10, 10)), mode='constant') - }) - coco_evaluator.add_single_ground_truth_image_info( - image_id='image3', - groundtruth_dict={ - standard_fields.InputDataFields.groundtruth_boxes: - np.array([[25., 25., 50., 50.]]), - standard_fields.InputDataFields.groundtruth_classes: np.array([1]), - standard_fields.InputDataFields.groundtruth_instance_masks: - np.pad(np.ones([1, 25, 25], dtype=np.uint8), - ((0, 0), (10, 10), (10, 10)), mode='constant') - }) - coco_evaluator.add_single_detected_image_info( - image_id='image3', - detections_dict={ - standard_fields.DetectionResultFields.detection_boxes: - np.array([[25., 25., 50., 50.]]), - standard_fields.DetectionResultFields.detection_scores: - np.array([.8]), - standard_fields.DetectionResultFields.detection_classes: - np.array([1]), - standard_fields.DetectionResultFields.detection_masks: - np.pad(np.ones([1, 25, 25], dtype=np.uint8), - ((0, 0), (10, 10), (10, 10)), mode='constant') - }) - metrics = coco_evaluator.evaluate() - self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP'], 1.0) - coco_evaluator.clear() - self.assertFalse(coco_evaluator._image_id_to_mask_shape_map) - self.assertFalse(coco_evaluator._image_ids_with_detections) - self.assertFalse(coco_evaluator._groundtruth_list) - self.assertFalse(coco_evaluator._detection_masks_list) - - -class CocoMaskEvaluationPyFuncTest(tf.test.TestCase): - - def testGetOneMAPWithMatchingGroundtruthAndDetections(self): - category_list = [{'id': 0, 'name': 'person'}, - {'id': 1, 'name': 'cat'}, - {'id': 2, 'name': 'dog'}] - coco_evaluator = coco_evaluation.CocoMaskEvaluator(category_list) - image_id = tf.placeholder(tf.string, shape=()) - groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4)) - groundtruth_classes = tf.placeholder(tf.float32, shape=(None)) - groundtruth_masks = tf.placeholder(tf.uint8, shape=(None, None, None)) - detection_scores = tf.placeholder(tf.float32, shape=(None)) - detection_classes = tf.placeholder(tf.float32, shape=(None)) - detection_masks = tf.placeholder(tf.uint8, shape=(None, None, None)) - - eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops( - image_id, groundtruth_boxes, - groundtruth_classes, - groundtruth_masks, - detection_scores, - detection_classes, - detection_masks) - - _, update_op = eval_metric_ops['DetectionMasks_Precision/mAP'] - - with self.test_session() as sess: - sess.run(update_op, - feed_dict={ - image_id: 'image1', - groundtruth_boxes: np.array([[100., 100., 200., 200.]]), - groundtruth_classes: np.array([1]), - groundtruth_masks: np.pad(np.ones([1, 100, 100], - dtype=np.uint8), - ((0, 0), (10, 10), (10, 10)), - mode='constant'), - detection_scores: np.array([.8]), - detection_classes: np.array([1]), - detection_masks: np.pad(np.ones([1, 100, 100], - dtype=np.uint8), - ((0, 0), (10, 10), (10, 10)), - mode='constant') - }) - sess.run(update_op, - feed_dict={ - image_id: 'image2', - groundtruth_boxes: np.array([[50., 50., 100., 100.]]), - groundtruth_classes: np.array([1]), - groundtruth_masks: np.pad(np.ones([1, 50, 50], - dtype=np.uint8), - ((0, 0), (10, 10), (10, 10)), - mode='constant'), - detection_scores: np.array([.8]), - detection_classes: np.array([1]), - detection_masks: np.pad(np.ones([1, 50, 50], dtype=np.uint8), - ((0, 0), (10, 10), (10, 10)), - mode='constant') - }) - sess.run(update_op, - feed_dict={ - image_id: 'image3', - groundtruth_boxes: np.array([[25., 25., 50., 50.]]), - groundtruth_classes: np.array([1]), - groundtruth_masks: np.pad(np.ones([1, 25, 25], - dtype=np.uint8), - ((0, 0), (10, 10), (10, 10)), - mode='constant'), - detection_scores: np.array([.8]), - detection_classes: np.array([1]), - detection_masks: np.pad(np.ones([1, 25, 25], - dtype=np.uint8), - ((0, 0), (10, 10), (10, 10)), - mode='constant') - }) - metrics = {} - for key, (value_op, _) in eval_metric_ops.iteritems(): - metrics[key] = value_op - metrics = sess.run(metrics) - self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP'], 1.0) - self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP@.50IOU'], 1.0) - self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP@.75IOU'], 1.0) - self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP (large)'], 1.0) - self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP (medium)'], - 1.0) - self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP (small)'], 1.0) - self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@1'], 1.0) - self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@10'], 1.0) - self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@100'], 1.0) - self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@100 (large)'], 1.0) - self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@100 (medium)'], - 1.0) - self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@100 (small)'], 1.0) - self.assertFalse(coco_evaluator._groundtruth_list) - self.assertFalse(coco_evaluator._image_ids_with_detections) - self.assertFalse(coco_evaluator._image_id_to_mask_shape_map) - self.assertFalse(coco_evaluator._detection_masks_list) - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/coco_tools.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/coco_tools.py deleted file mode 100644 index 71b747bcbde1ae6f39e92d2401079172b3b9a69b..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/coco_tools.py +++ /dev/null @@ -1,850 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Wrappers for third party pycocotools to be used within object_detection. - -Note that nothing in this file is tensorflow related and thus cannot -be called directly as a slim metric, for example. - -TODO(jonathanhuang): wrap as a slim metric in metrics.py - - -Usage example: given a set of images with ids in the list image_ids -and corresponding lists of numpy arrays encoding groundtruth (boxes and classes) -and detections (boxes, scores and classes), where elements of each list -correspond to detections/annotations of a single image, -then evaluation (in multi-class mode) can be invoked as follows: - - groundtruth_dict = coco_tools.ExportGroundtruthToCOCO( - image_ids, groundtruth_boxes_list, groundtruth_classes_list, - max_num_classes, output_path=None) - detections_list = coco_tools.ExportDetectionsToCOCO( - image_ids, detection_boxes_list, detection_scores_list, - detection_classes_list, output_path=None) - groundtruth = coco_tools.COCOWrapper(groundtruth_dict) - detections = groundtruth.LoadAnnotations(detections_list) - evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, - agnostic_mode=False) - metrics = evaluator.ComputeMetrics() - -""" -from collections import OrderedDict -import copy -import time -import numpy as np - -from pycocotools import coco -from pycocotools import cocoeval -from pycocotools import mask - -import tensorflow as tf - -from object_detection.utils import json_utils - - -class COCOWrapper(coco.COCO): - """Wrapper for the pycocotools COCO class.""" - - def __init__(self, dataset, detection_type='bbox'): - """COCOWrapper constructor. - - See http://mscoco.org/dataset/#format for a description of the format. - By default, the coco.COCO class constructor reads from a JSON file. - This function duplicates the same behavior but loads from a dictionary, - allowing us to perform evaluation without writing to external storage. - - Args: - dataset: a dictionary holding bounding box annotations in the COCO format. - detection_type: type of detections being wrapped. Can be one of ['bbox', - 'segmentation'] - - Raises: - ValueError: if detection_type is unsupported. - """ - supported_detection_types = ['bbox', 'segmentation'] - if detection_type not in supported_detection_types: - raise ValueError('Unsupported detection type: {}. ' - 'Supported values are: {}'.format( - detection_type, supported_detection_types)) - self._detection_type = detection_type - coco.COCO.__init__(self) - self.dataset = dataset - self.createIndex() - - def LoadAnnotations(self, annotations): - """Load annotations dictionary into COCO datastructure. - - See http://mscoco.org/dataset/#format for a description of the annotations - format. As above, this function replicates the default behavior of the API - but does not require writing to external storage. - - Args: - annotations: python list holding object detection results where each - detection is encoded as a dict with required keys ['image_id', - 'category_id', 'score'] and one of ['bbox', 'segmentation'] based on - `detection_type`. - - Returns: - a coco.COCO datastructure holding object detection annotations results - - Raises: - ValueError: if annotations is not a list - ValueError: if annotations do not correspond to the images contained - in self. - """ - results = coco.COCO() - results.dataset['images'] = [img for img in self.dataset['images']] - - tf.logging.info('Loading and preparing annotation results...') - tic = time.time() - - if not isinstance(annotations, list): - raise ValueError('annotations is not a list of objects') - annotation_img_ids = [ann['image_id'] for ann in annotations] - if (set(annotation_img_ids) != (set(annotation_img_ids) - & set(self.getImgIds()))): - raise ValueError('Results do not correspond to current coco set') - results.dataset['categories'] = copy.deepcopy(self.dataset['categories']) - if self._detection_type == 'bbox': - for idx, ann in enumerate(annotations): - bb = ann['bbox'] - ann['area'] = bb[2] * bb[3] - ann['id'] = idx + 1 - ann['iscrowd'] = 0 - elif self._detection_type == 'segmentation': - for idx, ann in enumerate(annotations): - ann['area'] = mask.area(ann['segmentation']) - ann['bbox'] = mask.toBbox(ann['segmentation']) - ann['id'] = idx + 1 - ann['iscrowd'] = 0 - tf.logging.info('DONE (t=%0.2fs)', (time.time() - tic)) - - results.dataset['annotations'] = annotations - results.createIndex() - return results - - -class COCOEvalWrapper(cocoeval.COCOeval): - """Wrapper for the pycocotools COCOeval class. - - To evaluate, create two objects (groundtruth_dict and detections_list) - using the conventions listed at http://mscoco.org/dataset/#format. - Then call evaluation as follows: - - groundtruth = coco_tools.COCOWrapper(groundtruth_dict) - detections = groundtruth.LoadAnnotations(detections_list) - evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, - agnostic_mode=False) - - metrics = evaluator.ComputeMetrics() - """ - - def __init__(self, groundtruth=None, detections=None, agnostic_mode=False, - iou_type='bbox'): - """COCOEvalWrapper constructor. - - Note that for the area-based metrics to be meaningful, detection and - groundtruth boxes must be in image coordinates measured in pixels. - - Args: - groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding - groundtruth annotations - detections: a coco.COCO (or coco_tools.COCOWrapper) object holding - detections - agnostic_mode: boolean (default: False). If True, evaluation ignores - class labels, treating all detections as proposals. - iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`. - """ - cocoeval.COCOeval.__init__(self, groundtruth, detections, - iouType=iou_type) - if agnostic_mode: - self.params.useCats = 0 - - def GetCategory(self, category_id): - """Fetches dictionary holding category information given category id. - - Args: - category_id: integer id - Returns: - dictionary holding 'id', 'name'. - """ - return self.cocoGt.cats[category_id] - - def GetAgnosticMode(self): - """Returns true if COCO Eval is configured to evaluate in agnostic mode.""" - return self.params.useCats == 0 - - def GetCategoryIdList(self): - """Returns list of valid category ids.""" - return self.params.catIds - - def ComputeMetrics(self, - include_metrics_per_category=False, - all_metrics_per_category=False): - """Computes detection metrics. - - Args: - include_metrics_per_category: If True, will include metrics per category. - all_metrics_per_category: If true, include all the summery metrics for - each category in per_category_ap. Be careful with setting it to true if - you have more than handful of categories, because it will pollute - your mldash. - - Returns: - 1. summary_metrics: a dictionary holding: - 'Precision/mAP': mean average precision over classes averaged over IOU - thresholds ranging from .5 to .95 with .05 increments - 'Precision/mAP@.50IOU': mean average precision at 50% IOU - 'Precision/mAP@.75IOU': mean average precision at 75% IOU - 'Precision/mAP (small)': mean average precision for small objects - (area < 32^2 pixels) - 'Precision/mAP (medium)': mean average precision for medium sized - objects (32^2 pixels < area < 96^2 pixels) - 'Precision/mAP (large)': mean average precision for large objects - (96^2 pixels < area < 10000^2 pixels) - 'Recall/AR@1': average recall with 1 detection - 'Recall/AR@10': average recall with 10 detections - 'Recall/AR@100': average recall with 100 detections - 'Recall/AR@100 (small)': average recall for small objects with 100 - detections - 'Recall/AR@100 (medium)': average recall for medium objects with 100 - detections - 'Recall/AR@100 (large)': average recall for large objects with 100 - detections - 2. per_category_ap: a dictionary holding category specific results with - keys of the form: 'Precision mAP ByCategory/category' - (without the supercategory part if no supercategories exist). - For backward compatibility 'PerformanceByCategory' is included in the - output regardless of all_metrics_per_category. - If evaluating class-agnostic mode, per_category_ap is an empty - dictionary. - - Raises: - ValueError: If category_stats does not exist. - """ - self.evaluate() - self.accumulate() - self.summarize() - - summary_metrics = OrderedDict([ - ('Precision/mAP', self.stats[0]), - ('Precision/mAP@.50IOU', self.stats[1]), - ('Precision/mAP@.75IOU', self.stats[2]), - ('Precision/mAP (small)', self.stats[3]), - ('Precision/mAP (medium)', self.stats[4]), - ('Precision/mAP (large)', self.stats[5]), - ('Recall/AR@1', self.stats[6]), - ('Recall/AR@10', self.stats[7]), - ('Recall/AR@100', self.stats[8]), - ('Recall/AR@100 (small)', self.stats[9]), - ('Recall/AR@100 (medium)', self.stats[10]), - ('Recall/AR@100 (large)', self.stats[11]) - ]) - if not include_metrics_per_category: - return summary_metrics, {} - if not hasattr(self, 'category_stats'): - raise ValueError('Category stats do not exist') - per_category_ap = OrderedDict([]) - if self.GetAgnosticMode(): - return summary_metrics, per_category_ap - for category_index, category_id in enumerate(self.GetCategoryIdList()): - category = self.GetCategory(category_id)['name'] - # Kept for backward compatilbility - per_category_ap['PerformanceByCategory/mAP/{}'.format( - category)] = self.category_stats[0][category_index] - if all_metrics_per_category: - per_category_ap['Precision mAP ByCategory/{}'.format( - category)] = self.category_stats[0][category_index] - per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format( - category)] = self.category_stats[1][category_index] - per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format( - category)] = self.category_stats[2][category_index] - per_category_ap['Precision mAP (small) ByCategory/{}'.format( - category)] = self.category_stats[3][category_index] - per_category_ap['Precision mAP (medium) ByCategory/{}'.format( - category)] = self.category_stats[4][category_index] - per_category_ap['Precision mAP (large) ByCategory/{}'.format( - category)] = self.category_stats[5][category_index] - per_category_ap['Recall AR@1 ByCategory/{}'.format( - category)] = self.category_stats[6][category_index] - per_category_ap['Recall AR@10 ByCategory/{}'.format( - category)] = self.category_stats[7][category_index] - per_category_ap['Recall AR@100 ByCategory/{}'.format( - category)] = self.category_stats[8][category_index] - per_category_ap['Recall AR@100 (small) ByCategory/{}'.format( - category)] = self.category_stats[9][category_index] - per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format( - category)] = self.category_stats[10][category_index] - per_category_ap['Recall AR@100 (large) ByCategory/{}'.format( - category)] = self.category_stats[11][category_index] - - return summary_metrics, per_category_ap - - -def _ConvertBoxToCOCOFormat(box): - """Converts a box in [ymin, xmin, ymax, xmax] format to COCO format. - - This is a utility function for converting from our internal - [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API - i.e., [xmin, ymin, width, height]. - - Args: - box: a [ymin, xmin, ymax, xmax] numpy array - - Returns: - a list of floats representing [xmin, ymin, width, height] - """ - return [float(box[1]), float(box[0]), float(box[3] - box[1]), - float(box[2] - box[0])] - - -def _RleCompress(masks): - """Compresses mask using Run-length encoding provided by pycocotools. - - Args: - masks: uint8 numpy array of shape [mask_height, mask_width] with values in - {0, 1}. - - Returns: - A pycocotools Run-length encoding of the mask. - """ - return mask.encode(np.asfortranarray(masks)) - - -def ExportSingleImageGroundtruthToCoco(image_id, - next_annotation_id, - category_id_set, - groundtruth_boxes, - groundtruth_classes, - groundtruth_masks=None, - groundtruth_is_crowd=None): - """Export groundtruth of a single image to COCO format. - - This function converts groundtruth detection annotations represented as numpy - arrays to dictionaries that can be ingested by the COCO evaluation API. Note - that the image_ids provided here must match the ones given to - ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in - correspondence - that is: groundtruth_boxes[i, :], and - groundtruth_classes[i] are associated with the same groundtruth annotation. - - In the exported result, "area" fields are always set to the area of the - groundtruth bounding box. - - Args: - image_id: a unique image identifier either of type integer or string. - next_annotation_id: integer specifying the first id to use for the - groundtruth annotations. All annotations are assigned a continuous integer - id starting from this value. - category_id_set: A set of valid class ids. Groundtruth with classes not in - category_id_set are dropped. - groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4] - groundtruth_classes: numpy array (int) with shape [num_gt_boxes] - groundtruth_masks: optional uint8 numpy array of shape [num_detections, - image_height, image_width] containing detection_masks. - groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes] - indicating whether groundtruth boxes are crowd. - - Returns: - a list of groundtruth annotations for a single image in the COCO format. - - Raises: - ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the - right lengths or (2) if each of the elements inside these lists do not - have the correct shapes or (3) if image_ids are not integers - """ - - if len(groundtruth_classes.shape) != 1: - raise ValueError('groundtruth_classes is ' - 'expected to be of rank 1.') - if len(groundtruth_boxes.shape) != 2: - raise ValueError('groundtruth_boxes is expected to be of ' - 'rank 2.') - if groundtruth_boxes.shape[1] != 4: - raise ValueError('groundtruth_boxes should have ' - 'shape[1] == 4.') - num_boxes = groundtruth_classes.shape[0] - if num_boxes != groundtruth_boxes.shape[0]: - raise ValueError('Corresponding entries in groundtruth_classes, ' - 'and groundtruth_boxes should have ' - 'compatible shapes (i.e., agree on the 0th dimension).' - 'Classes shape: %d. Boxes shape: %d. Image ID: %s' % ( - groundtruth_classes.shape[0], - groundtruth_boxes.shape[0], image_id)) - has_is_crowd = groundtruth_is_crowd is not None - if has_is_crowd and len(groundtruth_is_crowd.shape) != 1: - raise ValueError('groundtruth_is_crowd is expected to be of rank 1.') - groundtruth_list = [] - for i in range(num_boxes): - if groundtruth_classes[i] in category_id_set: - iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0 - export_dict = { - 'id': - next_annotation_id + i, - 'image_id': - image_id, - 'category_id': - int(groundtruth_classes[i]), - 'bbox': - list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])), - 'area': - float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) * - (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])), - 'iscrowd': - iscrowd - } - if groundtruth_masks is not None: - export_dict['segmentation'] = _RleCompress(groundtruth_masks[i]) - groundtruth_list.append(export_dict) - return groundtruth_list - - -def ExportGroundtruthToCOCO(image_ids, - groundtruth_boxes, - groundtruth_classes, - categories, - output_path=None): - """Export groundtruth detection annotations in numpy arrays to COCO API. - - This function converts a set of groundtruth detection annotations represented - as numpy arrays to dictionaries that can be ingested by the COCO API. - Inputs to this function are three lists: image ids for each groundtruth image, - groundtruth boxes for each image and groundtruth classes respectively. - Note that the image_ids provided here must match the ones given to the - ExportDetectionsToCOCO function in order for evaluation to work properly. - We assume that for each image, boxes, scores and classes are in - correspondence --- that is: image_id[i], groundtruth_boxes[i, :] and - groundtruth_classes[i] are associated with the same groundtruth annotation. - - In the exported result, "area" fields are always set to the area of the - groundtruth bounding box and "iscrowd" fields are always set to 0. - TODO(jonathanhuang): pass in "iscrowd" array for evaluating on COCO dataset. - - Args: - image_ids: a list of unique image identifier either of type integer or - string. - groundtruth_boxes: list of numpy arrays with shape [num_gt_boxes, 4] - (note that num_gt_boxes can be different for each entry in the list) - groundtruth_classes: list of numpy arrays (int) with shape [num_gt_boxes] - (note that num_gt_boxes can be different for each entry in the list) - categories: a list of dictionaries representing all possible categories. - Each dict in this list has the following keys: - 'id': (required) an integer id uniquely identifying this category - 'name': (required) string representing category name - e.g., 'cat', 'dog', 'pizza' - 'supercategory': (optional) string representing the supercategory - e.g., 'animal', 'vehicle', 'food', etc - output_path: (optional) path for exporting result to JSON - Returns: - dictionary that can be read by COCO API - Raises: - ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the - right lengths or (2) if each of the elements inside these lists do not - have the correct shapes or (3) if image_ids are not integers - """ - category_id_set = set([cat['id'] for cat in categories]) - groundtruth_export_list = [] - image_export_list = [] - if not len(image_ids) == len(groundtruth_boxes) == len(groundtruth_classes): - raise ValueError('Input lists must have the same length') - - # For reasons internal to the COCO API, it is important that annotation ids - # are not equal to zero; we thus start counting from 1. - annotation_id = 1 - for image_id, boxes, classes in zip(image_ids, groundtruth_boxes, - groundtruth_classes): - image_export_list.append({'id': image_id}) - groundtruth_export_list.extend(ExportSingleImageGroundtruthToCoco( - image_id, - annotation_id, - category_id_set, - boxes, - classes)) - num_boxes = classes.shape[0] - annotation_id += num_boxes - - groundtruth_dict = { - 'annotations': groundtruth_export_list, - 'images': image_export_list, - 'categories': categories - } - if output_path: - with tf.gfile.GFile(output_path, 'w') as fid: - json_utils.Dump(groundtruth_dict, fid, float_digits=4, indent=2) - return groundtruth_dict - - -def ExportSingleImageDetectionBoxesToCoco(image_id, - category_id_set, - detection_boxes, - detection_scores, - detection_classes): - """Export detections of a single image to COCO format. - - This function converts detections represented as numpy arrays to dictionaries - that can be ingested by the COCO evaluation API. Note that the image_ids - provided here must match the ones given to the - ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in - correspondence - that is: boxes[i, :], and classes[i] - are associated with the same groundtruth annotation. - - Args: - image_id: unique image identifier either of type integer or string. - category_id_set: A set of valid class ids. Detections with classes not in - category_id_set are dropped. - detection_boxes: float numpy array of shape [num_detections, 4] containing - detection boxes. - detection_scores: float numpy array of shape [num_detections] containing - scored for the detection boxes. - detection_classes: integer numpy array of shape [num_detections] containing - the classes for detection boxes. - - Returns: - a list of detection annotations for a single image in the COCO format. - - Raises: - ValueError: if (1) detection_boxes, detection_scores and detection_classes - do not have the right lengths or (2) if each of the elements inside these - lists do not have the correct shapes or (3) if image_ids are not integers. - """ - - if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: - raise ValueError('All entries in detection_classes and detection_scores' - 'expected to be of rank 1.') - if len(detection_boxes.shape) != 2: - raise ValueError('All entries in detection_boxes expected to be of ' - 'rank 2.') - if detection_boxes.shape[1] != 4: - raise ValueError('All entries in detection_boxes should have ' - 'shape[1] == 4.') - num_boxes = detection_classes.shape[0] - if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]: - raise ValueError('Corresponding entries in detection_classes, ' - 'detection_scores and detection_boxes should have ' - 'compatible shapes (i.e., agree on the 0th dimension). ' - 'Classes shape: %d. Boxes shape: %d. ' - 'Scores shape: %d' % ( - detection_classes.shape[0], detection_boxes.shape[0], - detection_scores.shape[0] - )) - detections_list = [] - for i in range(num_boxes): - if detection_classes[i] in category_id_set: - detections_list.append({ - 'image_id': image_id, - 'category_id': int(detection_classes[i]), - 'bbox': list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])), - 'score': float(detection_scores[i]) - }) - return detections_list - - -def ExportSingleImageDetectionMasksToCoco(image_id, - category_id_set, - detection_masks, - detection_scores, - detection_classes): - """Export detection masks of a single image to COCO format. - - This function converts detections represented as numpy arrays to dictionaries - that can be ingested by the COCO evaluation API. We assume that - detection_masks, detection_scores, and detection_classes are in correspondence - - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i] - are associated with the same annotation. - - Args: - image_id: unique image identifier either of type integer or string. - category_id_set: A set of valid class ids. Detections with classes not in - category_id_set are dropped. - detection_masks: uint8 numpy array of shape [num_detections, image_height, - image_width] containing detection_masks. - detection_scores: float numpy array of shape [num_detections] containing - scores for detection masks. - detection_classes: integer numpy array of shape [num_detections] containing - the classes for detection masks. - - Returns: - a list of detection mask annotations for a single image in the COCO format. - - Raises: - ValueError: if (1) detection_masks, detection_scores and detection_classes - do not have the right lengths or (2) if each of the elements inside these - lists do not have the correct shapes or (3) if image_ids are not integers. - """ - - if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: - raise ValueError('All entries in detection_classes and detection_scores' - 'expected to be of rank 1.') - num_boxes = detection_classes.shape[0] - if not num_boxes == len(detection_masks) == detection_scores.shape[0]: - raise ValueError('Corresponding entries in detection_classes, ' - 'detection_scores and detection_masks should have ' - 'compatible lengths and shapes ' - 'Classes length: %d. Masks length: %d. ' - 'Scores length: %d' % ( - detection_classes.shape[0], len(detection_masks), - detection_scores.shape[0] - )) - detections_list = [] - for i in range(num_boxes): - if detection_classes[i] in category_id_set: - detections_list.append({ - 'image_id': image_id, - 'category_id': int(detection_classes[i]), - 'segmentation': _RleCompress(detection_masks[i]), - 'score': float(detection_scores[i]) - }) - return detections_list - - -def ExportDetectionsToCOCO(image_ids, - detection_boxes, - detection_scores, - detection_classes, - categories, - output_path=None): - """Export detection annotations in numpy arrays to COCO API. - - This function converts a set of predicted detections represented - as numpy arrays to dictionaries that can be ingested by the COCO API. - Inputs to this function are lists, consisting of boxes, scores and - classes, respectively, corresponding to each image for which detections - have been produced. Note that the image_ids provided here must - match the ones given to the ExportGroundtruthToCOCO function in order - for evaluation to work properly. - - We assume that for each image, boxes, scores and classes are in - correspondence --- that is: detection_boxes[i, :], detection_scores[i] and - detection_classes[i] are associated with the same detection. - - Args: - image_ids: a list of unique image identifier either of type integer or - string. - detection_boxes: list of numpy arrays with shape [num_detection_boxes, 4] - detection_scores: list of numpy arrays (float) with shape - [num_detection_boxes]. Note that num_detection_boxes can be different - for each entry in the list. - detection_classes: list of numpy arrays (int) with shape - [num_detection_boxes]. Note that num_detection_boxes can be different - for each entry in the list. - categories: a list of dictionaries representing all possible categories. - Each dict in this list must have an integer 'id' key uniquely identifying - this category. - output_path: (optional) path for exporting result to JSON - - Returns: - list of dictionaries that can be read by COCO API, where each entry - corresponds to a single detection and has keys from: - ['image_id', 'category_id', 'bbox', 'score']. - Raises: - ValueError: if (1) detection_boxes and detection_classes do not have the - right lengths or (2) if each of the elements inside these lists do not - have the correct shapes or (3) if image_ids are not integers. - """ - category_id_set = set([cat['id'] for cat in categories]) - detections_export_list = [] - if not (len(image_ids) == len(detection_boxes) == len(detection_scores) == - len(detection_classes)): - raise ValueError('Input lists must have the same length') - for image_id, boxes, scores, classes in zip(image_ids, detection_boxes, - detection_scores, - detection_classes): - detections_export_list.extend(ExportSingleImageDetectionBoxesToCoco( - image_id, - category_id_set, - boxes, - scores, - classes)) - if output_path: - with tf.gfile.GFile(output_path, 'w') as fid: - json_utils.Dump(detections_export_list, fid, float_digits=4, indent=2) - return detections_export_list - - -def ExportSegmentsToCOCO(image_ids, - detection_masks, - detection_scores, - detection_classes, - categories, - output_path=None): - """Export segmentation masks in numpy arrays to COCO API. - - This function converts a set of predicted instance masks represented - as numpy arrays to dictionaries that can be ingested by the COCO API. - Inputs to this function are lists, consisting of segments, scores and - classes, respectively, corresponding to each image for which detections - have been produced. - - Note this function is recommended to use for small dataset. - For large dataset, it should be used with a merge function - (e.g. in map reduce), otherwise the memory consumption is large. - - We assume that for each image, masks, scores and classes are in - correspondence --- that is: detection_masks[i, :, :, :], detection_scores[i] - and detection_classes[i] are associated with the same detection. - - Args: - image_ids: list of image ids (typically ints or strings) - detection_masks: list of numpy arrays with shape [num_detection, h, w, 1] - and type uint8. The height and width should match the shape of - corresponding image. - detection_scores: list of numpy arrays (float) with shape - [num_detection]. Note that num_detection can be different - for each entry in the list. - detection_classes: list of numpy arrays (int) with shape - [num_detection]. Note that num_detection can be different - for each entry in the list. - categories: a list of dictionaries representing all possible categories. - Each dict in this list must have an integer 'id' key uniquely identifying - this category. - output_path: (optional) path for exporting result to JSON - - Returns: - list of dictionaries that can be read by COCO API, where each entry - corresponds to a single detection and has keys from: - ['image_id', 'category_id', 'segmentation', 'score']. - - Raises: - ValueError: if detection_masks and detection_classes do not have the - right lengths or if each of the elements inside these lists do not - have the correct shapes. - """ - if not (len(image_ids) == len(detection_masks) == len(detection_scores) == - len(detection_classes)): - raise ValueError('Input lists must have the same length') - - segment_export_list = [] - for image_id, masks, scores, classes in zip(image_ids, detection_masks, - detection_scores, - detection_classes): - - if len(classes.shape) != 1 or len(scores.shape) != 1: - raise ValueError('All entries in detection_classes and detection_scores' - 'expected to be of rank 1.') - if len(masks.shape) != 4: - raise ValueError('All entries in masks expected to be of ' - 'rank 4. Given {}'.format(masks.shape)) - - num_boxes = classes.shape[0] - if not num_boxes == masks.shape[0] == scores.shape[0]: - raise ValueError('Corresponding entries in segment_classes, ' - 'detection_scores and detection_boxes should have ' - 'compatible shapes (i.e., agree on the 0th dimension).') - - category_id_set = set([cat['id'] for cat in categories]) - segment_export_list.extend(ExportSingleImageDetectionMasksToCoco( - image_id, category_id_set, np.squeeze(masks, axis=3), scores, classes)) - - if output_path: - with tf.gfile.GFile(output_path, 'w') as fid: - json_utils.Dump(segment_export_list, fid, float_digits=4, indent=2) - return segment_export_list - - -def ExportKeypointsToCOCO(image_ids, - detection_keypoints, - detection_scores, - detection_classes, - categories, - output_path=None): - """Exports keypoints in numpy arrays to COCO API. - - This function converts a set of predicted keypoints represented - as numpy arrays to dictionaries that can be ingested by the COCO API. - Inputs to this function are lists, consisting of keypoints, scores and - classes, respectively, corresponding to each image for which detections - have been produced. - - We assume that for each image, keypoints, scores and classes are in - correspondence --- that is: detection_keypoints[i, :, :, :], - detection_scores[i] and detection_classes[i] are associated with the same - detection. - - Args: - image_ids: list of image ids (typically ints or strings) - detection_keypoints: list of numpy arrays with shape - [num_detection, num_keypoints, 2] and type float32 in absolute - x-y coordinates. - detection_scores: list of numpy arrays (float) with shape - [num_detection]. Note that num_detection can be different - for each entry in the list. - detection_classes: list of numpy arrays (int) with shape - [num_detection]. Note that num_detection can be different - for each entry in the list. - categories: a list of dictionaries representing all possible categories. - Each dict in this list must have an integer 'id' key uniquely identifying - this category and an integer 'num_keypoints' key specifying the number of - keypoints the category has. - output_path: (optional) path for exporting result to JSON - - Returns: - list of dictionaries that can be read by COCO API, where each entry - corresponds to a single detection and has keys from: - ['image_id', 'category_id', 'keypoints', 'score']. - - Raises: - ValueError: if detection_keypoints and detection_classes do not have the - right lengths or if each of the elements inside these lists do not - have the correct shapes. - """ - if not (len(image_ids) == len(detection_keypoints) == - len(detection_scores) == len(detection_classes)): - raise ValueError('Input lists must have the same length') - - keypoints_export_list = [] - for image_id, keypoints, scores, classes in zip( - image_ids, detection_keypoints, detection_scores, detection_classes): - - if len(classes.shape) != 1 or len(scores.shape) != 1: - raise ValueError('All entries in detection_classes and detection_scores' - 'expected to be of rank 1.') - if len(keypoints.shape) != 3: - raise ValueError('All entries in keypoints expected to be of ' - 'rank 3. Given {}'.format(keypoints.shape)) - - num_boxes = classes.shape[0] - if not num_boxes == keypoints.shape[0] == scores.shape[0]: - raise ValueError('Corresponding entries in detection_classes, ' - 'detection_keypoints, and detection_scores should have ' - 'compatible shapes (i.e., agree on the 0th dimension).') - - category_id_set = set([cat['id'] for cat in categories]) - category_id_to_num_keypoints_map = { - cat['id']: cat['num_keypoints'] for cat in categories - if 'num_keypoints' in cat} - - for i in range(num_boxes): - if classes[i] not in category_id_set: - raise ValueError('class id should be in category_id_set\n') - - if classes[i] in category_id_to_num_keypoints_map: - num_keypoints = category_id_to_num_keypoints_map[classes[i]] - # Adds extra ones to indicate the visibility for each keypoint as is - # recommended by MSCOCO. - instance_keypoints = np.concatenate( - [keypoints[i, 0:num_keypoints, :], - np.expand_dims(np.ones(num_keypoints), axis=1)], - axis=1).astype(int) - - instance_keypoints = instance_keypoints.flatten().tolist() - keypoints_export_list.append({ - 'image_id': image_id, - 'category_id': int(classes[i]), - 'keypoints': instance_keypoints, - 'score': float(scores[i]) - }) - - if output_path: - with tf.gfile.GFile(output_path, 'w') as fid: - json_utils.Dump(keypoints_export_list, fid, float_digits=4, indent=2) - return keypoints_export_list diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/coco_tools_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/coco_tools_test.py deleted file mode 100644 index cfb73d8c332420d93e19029f53e4068c9fc7b23b..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/coco_tools_test.py +++ /dev/null @@ -1,295 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for tensorflow_model.object_detection.metrics.coco_tools.""" -import json -import os -import re -import numpy as np - -from pycocotools import mask - -import tensorflow as tf - -from object_detection.metrics import coco_tools - - -class CocoToolsTest(tf.test.TestCase): - - def setUp(self): - groundtruth_annotations_list = [ - { - 'id': 1, - 'image_id': 'first', - 'category_id': 1, - 'bbox': [100., 100., 100., 100.], - 'area': 100.**2, - 'iscrowd': 0 - }, - { - 'id': 2, - 'image_id': 'second', - 'category_id': 1, - 'bbox': [50., 50., 50., 50.], - 'area': 50.**2, - 'iscrowd': 0 - }, - ] - image_list = [{'id': 'first'}, {'id': 'second'}] - category_list = [{'id': 0, 'name': 'person'}, - {'id': 1, 'name': 'cat'}, - {'id': 2, 'name': 'dog'}] - self._groundtruth_dict = { - 'annotations': groundtruth_annotations_list, - 'images': image_list, - 'categories': category_list - } - - self._detections_list = [ - { - 'image_id': 'first', - 'category_id': 1, - 'bbox': [100., 100., 100., 100.], - 'score': .8 - }, - { - 'image_id': 'second', - 'category_id': 1, - 'bbox': [50., 50., 50., 50.], - 'score': .7 - }, - ] - - def testCocoWrappers(self): - groundtruth = coco_tools.COCOWrapper(self._groundtruth_dict) - detections = groundtruth.LoadAnnotations(self._detections_list) - evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections) - summary_metrics, _ = evaluator.ComputeMetrics() - self.assertAlmostEqual(1.0, summary_metrics['Precision/mAP']) - - def testExportGroundtruthToCOCO(self): - image_ids = ['first', 'second'] - groundtruth_boxes = [np.array([[100, 100, 200, 200]], np.float), - np.array([[50, 50, 100, 100]], np.float)] - groundtruth_classes = [np.array([1], np.int32), np.array([1], np.int32)] - categories = [{'id': 0, 'name': 'person'}, - {'id': 1, 'name': 'cat'}, - {'id': 2, 'name': 'dog'}] - output_path = os.path.join(tf.test.get_temp_dir(), 'groundtruth.json') - result = coco_tools.ExportGroundtruthToCOCO( - image_ids, - groundtruth_boxes, - groundtruth_classes, - categories, - output_path=output_path) - self.assertDictEqual(result, self._groundtruth_dict) - with tf.gfile.GFile(output_path, 'r') as f: - written_result = f.read() - # The json output should have floats written to 4 digits of precision. - matcher = re.compile(r'"bbox":\s+\[\n\s+\d+.\d\d\d\d,', re.MULTILINE) - self.assertTrue(matcher.findall(written_result)) - written_result = json.loads(written_result) - self.assertAlmostEqual(result, written_result) - - def testExportDetectionsToCOCO(self): - image_ids = ['first', 'second'] - detections_boxes = [np.array([[100, 100, 200, 200]], np.float), - np.array([[50, 50, 100, 100]], np.float)] - detections_scores = [np.array([.8], np.float), np.array([.7], np.float)] - detections_classes = [np.array([1], np.int32), np.array([1], np.int32)] - categories = [{'id': 0, 'name': 'person'}, - {'id': 1, 'name': 'cat'}, - {'id': 2, 'name': 'dog'}] - output_path = os.path.join(tf.test.get_temp_dir(), 'detections.json') - result = coco_tools.ExportDetectionsToCOCO( - image_ids, - detections_boxes, - detections_scores, - detections_classes, - categories, - output_path=output_path) - self.assertListEqual(result, self._detections_list) - with tf.gfile.GFile(output_path, 'r') as f: - written_result = f.read() - # The json output should have floats written to 4 digits of precision. - matcher = re.compile(r'"bbox":\s+\[\n\s+\d+.\d\d\d\d,', re.MULTILINE) - self.assertTrue(matcher.findall(written_result)) - written_result = json.loads(written_result) - self.assertAlmostEqual(result, written_result) - - def testExportSegmentsToCOCO(self): - image_ids = ['first', 'second'] - detection_masks = [np.array( - [[[0, 1, 0, 1], [0, 1, 1, 0], [0, 0, 0, 1], [0, 1, 0, 1]]], - dtype=np.uint8), np.array( - [[[0, 1, 0, 1], [0, 1, 1, 0], [0, 0, 0, 1], [0, 1, 0, 1]]], - dtype=np.uint8)] - - for i, detection_mask in enumerate(detection_masks): - detection_masks[i] = detection_mask[:, :, :, None] - - detection_scores = [np.array([.8], np.float), np.array([.7], np.float)] - detection_classes = [np.array([1], np.int32), np.array([1], np.int32)] - - categories = [{'id': 0, 'name': 'person'}, - {'id': 1, 'name': 'cat'}, - {'id': 2, 'name': 'dog'}] - output_path = os.path.join(tf.test.get_temp_dir(), 'segments.json') - result = coco_tools.ExportSegmentsToCOCO( - image_ids, - detection_masks, - detection_scores, - detection_classes, - categories, - output_path=output_path) - with tf.gfile.GFile(output_path, 'r') as f: - written_result = f.read() - written_result = json.loads(written_result) - mask_load = mask.decode([written_result[0]['segmentation']]) - self.assertTrue(np.allclose(mask_load, detection_masks[0])) - self.assertAlmostEqual(result, written_result) - - def testExportKeypointsToCOCO(self): - image_ids = ['first', 'second'] - detection_keypoints = [ - np.array( - [[[100, 200], [300, 400], [500, 600]], - [[50, 150], [250, 350], [450, 550]]], dtype=np.int32), - np.array( - [[[110, 210], [310, 410], [510, 610]], - [[60, 160], [260, 360], [460, 560]]], dtype=np.int32)] - - detection_scores = [np.array([.8, 0.2], np.float), - np.array([.7, 0.3], np.float)] - detection_classes = [np.array([1, 1], np.int32), np.array([1, 1], np.int32)] - - categories = [{'id': 1, 'name': 'person', 'num_keypoints': 3}, - {'id': 2, 'name': 'cat'}, - {'id': 3, 'name': 'dog'}] - - output_path = os.path.join(tf.test.get_temp_dir(), 'keypoints.json') - result = coco_tools.ExportKeypointsToCOCO( - image_ids, - detection_keypoints, - detection_scores, - detection_classes, - categories, - output_path=output_path) - - with tf.gfile.GFile(output_path, 'r') as f: - written_result = f.read() - written_result = json.loads(written_result) - self.assertAlmostEqual(result, written_result) - - def testSingleImageDetectionBoxesExport(self): - boxes = np.array([[0, 0, 1, 1], - [0, 0, .5, .5], - [.5, .5, 1, 1]], dtype=np.float32) - classes = np.array([1, 2, 3], dtype=np.int32) - scores = np.array([0.8, 0.2, 0.7], dtype=np.float32) - coco_boxes = np.array([[0, 0, 1, 1], - [0, 0, .5, .5], - [.5, .5, .5, .5]], dtype=np.float32) - coco_annotations = coco_tools.ExportSingleImageDetectionBoxesToCoco( - image_id='first_image', - category_id_set=set([1, 2, 3]), - detection_boxes=boxes, - detection_classes=classes, - detection_scores=scores) - for i, annotation in enumerate(coco_annotations): - self.assertEqual(annotation['image_id'], 'first_image') - self.assertEqual(annotation['category_id'], classes[i]) - self.assertAlmostEqual(annotation['score'], scores[i]) - self.assertTrue(np.all(np.isclose(annotation['bbox'], coco_boxes[i]))) - - def testSingleImageDetectionMaskExport(self): - masks = np.array( - [[[1, 1,], [1, 1]], - [[0, 0], [0, 1]], - [[0, 0], [0, 0]]], dtype=np.uint8) - classes = np.array([1, 2, 3], dtype=np.int32) - scores = np.array([0.8, 0.2, 0.7], dtype=np.float32) - coco_annotations = coco_tools.ExportSingleImageDetectionMasksToCoco( - image_id='first_image', - category_id_set=set([1, 2, 3]), - detection_classes=classes, - detection_scores=scores, - detection_masks=masks) - expected_counts = ['04', '31', '4'] - for i, mask_annotation in enumerate(coco_annotations): - self.assertEqual(mask_annotation['segmentation']['counts'], - expected_counts[i]) - self.assertTrue(np.all(np.equal(mask.decode( - mask_annotation['segmentation']), masks[i]))) - self.assertEqual(mask_annotation['image_id'], 'first_image') - self.assertEqual(mask_annotation['category_id'], classes[i]) - self.assertAlmostEqual(mask_annotation['score'], scores[i]) - - def testSingleImageGroundtruthExport(self): - masks = np.array( - [[[1, 1,], [1, 1]], - [[0, 0], [0, 1]], - [[0, 0], [0, 0]]], dtype=np.uint8) - boxes = np.array([[0, 0, 1, 1], - [0, 0, .5, .5], - [.5, .5, 1, 1]], dtype=np.float32) - coco_boxes = np.array([[0, 0, 1, 1], - [0, 0, .5, .5], - [.5, .5, .5, .5]], dtype=np.float32) - classes = np.array([1, 2, 3], dtype=np.int32) - is_crowd = np.array([0, 1, 0], dtype=np.int32) - next_annotation_id = 1 - expected_counts = ['04', '31', '4'] - - # Tests exporting without passing in is_crowd (for backward compatibility). - coco_annotations = coco_tools.ExportSingleImageGroundtruthToCoco( - image_id='first_image', - category_id_set=set([1, 2, 3]), - next_annotation_id=next_annotation_id, - groundtruth_boxes=boxes, - groundtruth_classes=classes, - groundtruth_masks=masks) - for i, annotation in enumerate(coco_annotations): - self.assertEqual(annotation['segmentation']['counts'], - expected_counts[i]) - self.assertTrue(np.all(np.equal(mask.decode( - annotation['segmentation']), masks[i]))) - self.assertTrue(np.all(np.isclose(annotation['bbox'], coco_boxes[i]))) - self.assertEqual(annotation['image_id'], 'first_image') - self.assertEqual(annotation['category_id'], classes[i]) - self.assertEqual(annotation['id'], i + next_annotation_id) - - # Tests exporting with is_crowd. - coco_annotations = coco_tools.ExportSingleImageGroundtruthToCoco( - image_id='first_image', - category_id_set=set([1, 2, 3]), - next_annotation_id=next_annotation_id, - groundtruth_boxes=boxes, - groundtruth_classes=classes, - groundtruth_masks=masks, - groundtruth_is_crowd=is_crowd) - for i, annotation in enumerate(coco_annotations): - self.assertEqual(annotation['segmentation']['counts'], - expected_counts[i]) - self.assertTrue(np.all(np.equal(mask.decode( - annotation['segmentation']), masks[i]))) - self.assertTrue(np.all(np.isclose(annotation['bbox'], coco_boxes[i]))) - self.assertEqual(annotation['image_id'], 'first_image') - self.assertEqual(annotation['category_id'], classes[i]) - self.assertEqual(annotation['iscrowd'], is_crowd[i]) - self.assertEqual(annotation['id'], i + next_annotation_id) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/offline_eval_map_corloc.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/offline_eval_map_corloc.py deleted file mode 100644 index b7b1eb696ee277858430533e56c43290171b9fdd..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/offline_eval_map_corloc.py +++ /dev/null @@ -1,173 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Evaluation executable for detection data. - -This executable evaluates precomputed detections produced by a detection -model and writes the evaluation results into csv file metrics.csv, stored -in the directory, specified by --eval_dir. - -The evaluation metrics set is supplied in object_detection.protos.EvalConfig -in metrics_set field. -Currently two set of metrics are supported: -- pascal_voc_metrics: standard PASCAL VOC 2007 metric -- open_images_detection_metrics: Open Image V2 metric -All other field of object_detection.protos.EvalConfig are ignored. - -Example usage: - ./compute_metrics \ - --eval_dir=path/to/eval_dir \ - --eval_config_path=path/to/evaluation/configuration/file \ - --input_config_path=path/to/input/configuration/file -""" -import csv -import os -import re -import tensorflow as tf - -from object_detection import evaluator -from object_detection.core import standard_fields -from object_detection.metrics import tf_example_parser -from object_detection.utils import config_util -from object_detection.utils import label_map_util - -flags = tf.app.flags -tf.logging.set_verbosity(tf.logging.INFO) - -flags.DEFINE_string('eval_dir', None, 'Directory to write eval summaries to.') -flags.DEFINE_string('eval_config_path', None, - 'Path to an eval_pb2.EvalConfig config file.') -flags.DEFINE_string('input_config_path', None, - 'Path to an eval_pb2.InputConfig config file.') - -FLAGS = flags.FLAGS - - -def _generate_sharded_filenames(filename): - m = re.search(r'@(\d{1,})', filename) - if m: - num_shards = int(m.group(1)) - return [ - re.sub(r'@(\d{1,})', '-%.5d-of-%.5d' % (i, num_shards), filename) - for i in range(num_shards) - ] - else: - return [filename] - - -def _generate_filenames(filenames): - result = [] - for filename in filenames: - result += _generate_sharded_filenames(filename) - return result - - -def read_data_and_evaluate(input_config, eval_config): - """Reads pre-computed object detections and groundtruth from tf_record. - - Args: - input_config: input config proto of type - object_detection.protos.InputReader. - eval_config: evaluation config proto of type - object_detection.protos.EvalConfig. - - Returns: - Evaluated detections metrics. - - Raises: - ValueError: if input_reader type is not supported or metric type is unknown. - """ - if input_config.WhichOneof('input_reader') == 'tf_record_input_reader': - input_paths = input_config.tf_record_input_reader.input_path - - label_map = label_map_util.load_labelmap(input_config.label_map_path) - max_num_classes = max([item.id for item in label_map.item]) - categories = label_map_util.convert_label_map_to_categories( - label_map, max_num_classes) - - object_detection_evaluators = evaluator.get_evaluators( - eval_config, categories) - # Support a single evaluator - object_detection_evaluator = object_detection_evaluators[0] - - skipped_images = 0 - processed_images = 0 - for input_path in _generate_filenames(input_paths): - tf.logging.info('Processing file: {0}'.format(input_path)) - - record_iterator = tf.python_io.tf_record_iterator(path=input_path) - data_parser = tf_example_parser.TfExampleDetectionAndGTParser() - - for string_record in record_iterator: - tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 1000, - processed_images) - processed_images += 1 - - example = tf.train.Example() - example.ParseFromString(string_record) - decoded_dict = data_parser.parse(example) - - if decoded_dict: - object_detection_evaluator.add_single_ground_truth_image_info( - decoded_dict[standard_fields.DetectionResultFields.key], - decoded_dict) - object_detection_evaluator.add_single_detected_image_info( - decoded_dict[standard_fields.DetectionResultFields.key], - decoded_dict) - else: - skipped_images += 1 - tf.logging.info('Skipped images: {0}'.format(skipped_images)) - - return object_detection_evaluator.evaluate() - - raise ValueError('Unsupported input_reader_config.') - - -def write_metrics(metrics, output_dir): - """Write metrics to the output directory. - - Args: - metrics: A dictionary containing metric names and values. - output_dir: Directory to write metrics to. - """ - tf.logging.info('Writing metrics.') - - with open(os.path.join(output_dir, 'metrics.csv'), 'w') as csvfile: - metrics_writer = csv.writer(csvfile, delimiter=',') - for metric_name, metric_value in metrics.items(): - metrics_writer.writerow([metric_name, str(metric_value)]) - - -def main(argv): - del argv - required_flags = ['input_config_path', 'eval_config_path', 'eval_dir'] - for flag_name in required_flags: - if not getattr(FLAGS, flag_name): - raise ValueError('Flag --{} is required'.format(flag_name)) - - configs = config_util.get_configs_from_multiple_files( - eval_input_config_path=FLAGS.input_config_path, - eval_config_path=FLAGS.eval_config_path) - - eval_config = configs['eval_config'] - input_config = configs['eval_input_config'] - - metrics = read_data_and_evaluate(input_config, eval_config) - - # Save metrics - write_metrics(metrics, FLAGS.eval_dir) - - -if __name__ == '__main__': - tf.app.run(main) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/offline_eval_map_corloc_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/offline_eval_map_corloc_test.py deleted file mode 100644 index 68ac3893530afecb6098a00c58811afe89e04554..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/offline_eval_map_corloc_test.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for utilities in offline_eval_map_corloc binary.""" - -import tensorflow as tf - -from object_detection.metrics import offline_eval_map_corloc as offline_eval - - -class OfflineEvalMapCorlocTest(tf.test.TestCase): - - def test_generateShardedFilenames(self): - test_filename = '/path/to/file' - result = offline_eval._generate_sharded_filenames(test_filename) - self.assertEqual(result, [test_filename]) - - test_filename = '/path/to/file-00000-of-00050' - result = offline_eval._generate_sharded_filenames(test_filename) - self.assertEqual(result, [test_filename]) - - result = offline_eval._generate_sharded_filenames('/path/to/@3.record') - self.assertEqual(result, [ - '/path/to/-00000-of-00003.record', '/path/to/-00001-of-00003.record', - '/path/to/-00002-of-00003.record' - ]) - - result = offline_eval._generate_sharded_filenames('/path/to/abc@3') - self.assertEqual(result, [ - '/path/to/abc-00000-of-00003', '/path/to/abc-00001-of-00003', - '/path/to/abc-00002-of-00003' - ]) - - result = offline_eval._generate_sharded_filenames('/path/to/@1') - self.assertEqual(result, ['/path/to/-00000-of-00001']) - - def test_generateFilenames(self): - test_filenames = ['/path/to/file', '/path/to/@3.record'] - result = offline_eval._generate_filenames(test_filenames) - self.assertEqual(result, [ - '/path/to/file', '/path/to/-00000-of-00003.record', - '/path/to/-00001-of-00003.record', '/path/to/-00002-of-00003.record' - ]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/oid_vrd_challenge_evaluation.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/oid_vrd_challenge_evaluation.py deleted file mode 100644 index 3c8bb54a2d286d641a3629bd1dce269cf7de00a6..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/oid_vrd_challenge_evaluation.py +++ /dev/null @@ -1,151 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Runs evaluation using OpenImages groundtruth and predictions. - -Example usage: - python third_party/tensorflow_models/object_detection/\ - metrics/oid_vrd_challenge_evaluation.py \ - --input_annotations_boxes=/path/to/input/annotations-human-bbox.csv \ - --input_annotations_labels=/path/to/input/annotations-label.csv \ - --input_class_labelmap=/path/to/input/class_labelmap.pbtxt \ - --input_relationship_labelmap=/path/to/input/relationship_labelmap.pbtxt \ - --input_predictions=/path/to/input/predictions.csv \ - --output_metrics=/path/to/output/metric.csv \ - -CSVs with bounding box annotations and image label (including the image URLs) -can be downloaded from the Open Images Challenge website: -https://storage.googleapis.com/openimages/web/challenge.html -The format of the input csv and the metrics itself are described on the -challenge website. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import pandas as pd -from google.protobuf import text_format - -from object_detection.metrics import oid_vrd_challenge_evaluation_utils as utils -from object_detection.protos import string_int_label_map_pb2 -from object_detection.utils import vrd_evaluation - - -def _load_labelmap(labelmap_path): - """Loads labelmap from the labelmap path. - - Args: - labelmap_path: Path to the labelmap. - - Returns: - A dictionary mapping class name to class numerical id. - """ - - label_map = string_int_label_map_pb2.StringIntLabelMap() - with open(labelmap_path, 'r') as fid: - label_map_string = fid.read() - text_format.Merge(label_map_string, label_map) - labelmap_dict = {} - for item in label_map.item: - labelmap_dict[item.name] = item.id - return labelmap_dict - - -def _swap_labelmap_dict(labelmap_dict): - """Swaps keys and labels in labelmap. - - Args: - labelmap_dict: Input dictionary. - - Returns: - A dictionary mapping class name to class numerical id. - """ - return dict((v, k) for k, v in labelmap_dict.iteritems()) - - -def main(parsed_args): - all_box_annotations = pd.read_csv(parsed_args.input_annotations_boxes) - all_label_annotations = pd.read_csv(parsed_args.input_annotations_labels) - all_annotations = pd.concat([all_box_annotations, all_label_annotations]) - - class_label_map = _load_labelmap(parsed_args.input_class_labelmap) - relationship_label_map = _load_labelmap( - parsed_args.input_relationship_labelmap) - - relation_evaluator = vrd_evaluation.VRDRelationDetectionEvaluator() - phrase_evaluator = vrd_evaluation.VRDPhraseDetectionEvaluator() - - for _, groundtruth in enumerate(all_annotations.groupby('ImageID')): - image_id, image_groundtruth = groundtruth - groundtruth_dictionary = utils.build_groundtruth_vrd_dictionary( - image_groundtruth, class_label_map, relationship_label_map) - - relation_evaluator.add_single_ground_truth_image_info( - image_id, groundtruth_dictionary) - phrase_evaluator.add_single_ground_truth_image_info(image_id, - groundtruth_dictionary) - - all_predictions = pd.read_csv(parsed_args.input_predictions) - for _, prediction_data in enumerate(all_predictions.groupby('ImageID')): - image_id, image_predictions = prediction_data - prediction_dictionary = utils.build_predictions_vrd_dictionary( - image_predictions, class_label_map, relationship_label_map) - - relation_evaluator.add_single_detected_image_info(image_id, - prediction_dictionary) - phrase_evaluator.add_single_detected_image_info(image_id, - prediction_dictionary) - - relation_metrics = relation_evaluator.evaluate() - phrase_metrics = phrase_evaluator.evaluate() - - with open(parsed_args.output_metrics, 'w') as fid: - utils.write_csv(fid, relation_metrics) - utils.write_csv(fid, phrase_metrics) - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser( - description= - 'Evaluate Open Images Visual Relationship Detection predictions.') - parser.add_argument( - '--input_annotations_boxes', - required=True, - help='File with groundtruth vrd annotations.') - parser.add_argument( - '--input_annotations_labels', - required=True, - help='File with groundtruth labels annotations') - parser.add_argument( - '--input_predictions', - required=True, - help="""File with detection predictions; NOTE: no postprocessing is - applied in the evaluation script.""") - parser.add_argument( - '--input_class_labelmap', - required=True, - help="""OpenImages Challenge labelmap; note: it is expected to include - attributes.""") - parser.add_argument( - '--input_relationship_labelmap', - required=True, - help="""OpenImages Challenge relationship labelmap.""") - parser.add_argument( - '--output_metrics', required=True, help='Output file with csv metrics') - - args = parser.parse_args() - main(args) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/oid_vrd_challenge_evaluation_utils.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/oid_vrd_challenge_evaluation_utils.py deleted file mode 100644 index 8c834775890df314cd09120d2003b34c66cdac94..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/oid_vrd_challenge_evaluation_utils.py +++ /dev/null @@ -1,133 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Converts data from CSV format to the VRDDetectionEvaluator format.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import csv -import numpy as np -from object_detection.core import standard_fields -from object_detection.utils import vrd_evaluation - - -def build_groundtruth_vrd_dictionary(data, class_label_map, - relationship_label_map): - """Builds a groundtruth dictionary from groundtruth data in CSV file. - - Args: - data: Pandas DataFrame with the groundtruth data for a single image. - class_label_map: Class labelmap from string label name to an integer. - relationship_label_map: Relationship type labelmap from string name to an - integer. - - Returns: - A dictionary with keys suitable for passing to - VRDDetectionEvaluator.add_single_ground_truth_image_info: - standard_fields.InputDataFields.groundtruth_boxes: A numpy array - of structures with the shape [M, 1], representing M tuples, each tuple - containing the same number of named bounding boxes. - Each box is of the format [y_min, x_min, y_max, x_max] (see - datatype vrd_box_data_type, single_box_data_type above). - standard_fields.InputDataFields.groundtruth_classes: A numpy array of - structures shape [M, 1], representing the class labels of the - corresponding bounding boxes and possibly additional classes (see - datatype label_data_type above). - standard_fields.InputDataFields.verified_labels: numpy array - of shape [K] containing verified labels. - """ - data_boxes = data[data.LabelName.isnull()] - data_labels = data[data.LabelName1.isnull()] - - boxes = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.vrd_box_data_type) - boxes['subject'] = data_boxes[['YMin1', 'XMin1', 'YMax1', - 'XMax1']].as_matrix() - boxes['object'] = data_boxes[['YMin2', 'XMin2', 'YMax2', 'XMax2']].as_matrix() - - labels = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.label_data_type) - labels['subject'] = data_boxes['LabelName1'].map(lambda x: class_label_map[x]) - labels['object'] = data_boxes['LabelName2'].map(lambda x: class_label_map[x]) - labels['relation'] = data_boxes['RelationshipLabel'].map( - lambda x: relationship_label_map[x]) - - return { - standard_fields.InputDataFields.groundtruth_boxes: - boxes, - standard_fields.InputDataFields.groundtruth_classes: - labels, - standard_fields.InputDataFields.verified_labels: - data_labels['LabelName'].map(lambda x: class_label_map[x]), - } - - -def build_predictions_vrd_dictionary(data, class_label_map, - relationship_label_map): - """Builds a predictions dictionary from predictions data in CSV file. - - Args: - data: Pandas DataFrame with the predictions data for a single image. - class_label_map: Class labelmap from string label name to an integer. - relationship_label_map: Relationship type labelmap from string name to an - integer. - - Returns: - Dictionary with keys suitable for passing to - VRDDetectionEvaluator.add_single_detected_image_info: - standard_fields.DetectionResultFields.detection_boxes: A numpy array of - structures with shape [N, 1], representing N tuples, each tuple - containing the same number of named bounding boxes. - Each box is of the format [y_min, x_min, y_max, x_max] (as an example - see datatype vrd_box_data_type, single_box_data_type above). - standard_fields.DetectionResultFields.detection_scores: float32 numpy - array of shape [N] containing detection scores for the boxes. - standard_fields.DetectionResultFields.detection_classes: A numpy array - of structures shape [N, 1], representing the class labels of the - corresponding bounding boxes and possibly additional classes (see - datatype label_data_type above). - """ - data_boxes = data - - boxes = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.vrd_box_data_type) - boxes['subject'] = data_boxes[['YMin1', 'XMin1', 'YMax1', - 'XMax1']].as_matrix() - boxes['object'] = data_boxes[['YMin2', 'XMin2', 'YMax2', 'XMax2']].as_matrix() - - labels = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.label_data_type) - labels['subject'] = data_boxes['LabelName1'].map(lambda x: class_label_map[x]) - labels['object'] = data_boxes['LabelName2'].map(lambda x: class_label_map[x]) - labels['relation'] = data_boxes['RelationshipLabel'].map( - lambda x: relationship_label_map[x]) - - return { - standard_fields.DetectionResultFields.detection_boxes: - boxes, - standard_fields.DetectionResultFields.detection_classes: - labels, - standard_fields.DetectionResultFields.detection_scores: - data_boxes['Score'].as_matrix() - } - - -def write_csv(fid, metrics): - """Writes metrics key-value pairs to CSV file. - - Args: - fid: File identifier of an opened file. - metrics: A dictionary with metrics to be written. - """ - metrics_writer = csv.writer(fid, delimiter=',') - for metric_name, metric_value in metrics.items(): - metrics_writer.writerow([metric_name, str(metric_value)]) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/oid_vrd_challenge_evaluation_utils_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/oid_vrd_challenge_evaluation_utils_test.py deleted file mode 100644 index 49ce0898ea5a1afc0ab1c9ebd666c1373cbe54f9..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/oid_vrd_challenge_evaluation_utils_test.py +++ /dev/null @@ -1,149 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for oid_vrd_challenge_evaluation_utils.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import pandas as pd -import tensorflow as tf -from object_detection.core import standard_fields -from object_detection.metrics import oid_vrd_challenge_evaluation_utils as utils -from object_detection.utils import vrd_evaluation - - -class OidVrdChallengeEvaluationUtilsTest(tf.test.TestCase): - - def testBuildGroundtruthDictionary(self): - np_data = pd.DataFrame( - [[ - 'fe58ec1b06db2bb7', '/m/04bcr3', '/m/083vt', 0.0, 0.3, 0.5, 0.6, - 0.0, 0.3, 0.5, 0.6, 'is', None, None - ], [ - 'fe58ec1b06db2bb7', '/m/04bcr3', '/m/02gy9n', 0.0, 0.3, 0.5, 0.6, - 0.1, 0.2, 0.3, 0.4, 'under', None, None - ], [ - 'fe58ec1b06db2bb7', '/m/04bcr3', '/m/083vt', 0.0, 0.1, 0.2, 0.3, - 0.0, 0.1, 0.2, 0.3, 'is', None, None - ], [ - 'fe58ec1b06db2bb7', '/m/083vt', '/m/04bcr3', 0.1, 0.2, 0.3, 0.4, - 0.5, 0.6, 0.7, 0.8, 'at', None, None - ], [ - 'fe58ec1b06db2bb7', None, None, None, None, None, None, None, None, - None, None, None, '/m/04bcr3', 1.0 - ], [ - 'fe58ec1b06db2bb7', None, None, None, None, None, None, None, None, - None, None, None, '/m/083vt', 0.0 - ], [ - 'fe58ec1b06db2bb7', None, None, None, None, None, None, None, None, - None, None, None, '/m/02gy9n', 0.0 - ]], - columns=[ - 'ImageID', 'LabelName1', 'LabelName2', 'XMin1', 'XMax1', 'YMin1', - 'YMax1', 'XMin2', 'XMax2', 'YMin2', 'YMax2', 'RelationshipLabel', - 'LabelName', 'Confidence' - ]) - class_label_map = {'/m/04bcr3': 1, '/m/083vt': 2, '/m/02gy9n': 3} - relationship_label_map = {'is': 1, 'under': 2, 'at': 3} - groundtruth_dictionary = utils.build_groundtruth_vrd_dictionary( - np_data, class_label_map, relationship_label_map) - - self.assertTrue(standard_fields.InputDataFields.groundtruth_boxes in - groundtruth_dictionary) - self.assertTrue(standard_fields.InputDataFields.groundtruth_classes in - groundtruth_dictionary) - self.assertTrue(standard_fields.InputDataFields.verified_labels in - groundtruth_dictionary) - - self.assertAllEqual( - np.array( - [(1, 2, 1), (1, 3, 2), (1, 2, 1), (2, 1, 3)], - dtype=vrd_evaluation.label_data_type), groundtruth_dictionary[ - standard_fields.InputDataFields.groundtruth_classes]) - expected_vrd_data = np.array( - [ - ([0.5, 0.0, 0.6, 0.3], [0.5, 0.0, 0.6, 0.3]), - ([0.5, 0.0, 0.6, 0.3], [0.3, 0.1, 0.4, 0.2]), - ([0.2, 0.0, 0.3, 0.1], [0.2, 0.0, 0.3, 0.1]), - ([0.3, 0.1, 0.4, 0.2], [0.7, 0.5, 0.8, 0.6]), - ], - dtype=vrd_evaluation.vrd_box_data_type) - for field in expected_vrd_data.dtype.fields: - self.assertNDArrayNear( - expected_vrd_data[field], groundtruth_dictionary[ - standard_fields.InputDataFields.groundtruth_boxes][field], 1e-5) - self.assertAllEqual( - np.array([1, 2, 3]), - groundtruth_dictionary[standard_fields.InputDataFields.verified_labels]) - - def testBuildPredictionDictionary(self): - np_data = pd.DataFrame( - [[ - 'fe58ec1b06db2bb7', '/m/04bcr3', '/m/083vt', 0.0, 0.3, 0.5, 0.6, - 0.0, 0.3, 0.5, 0.6, 'is', 0.1 - ], [ - 'fe58ec1b06db2bb7', '/m/04bcr3', '/m/02gy9n', 0.0, 0.3, 0.5, 0.6, - 0.1, 0.2, 0.3, 0.4, 'under', 0.2 - ], [ - 'fe58ec1b06db2bb7', '/m/04bcr3', '/m/083vt', 0.0, 0.1, 0.2, 0.3, - 0.0, 0.1, 0.2, 0.3, 'is', 0.3 - ], [ - 'fe58ec1b06db2bb7', '/m/083vt', '/m/04bcr3', 0.1, 0.2, 0.3, 0.4, - 0.5, 0.6, 0.7, 0.8, 'at', 0.4 - ]], - columns=[ - 'ImageID', 'LabelName1', 'LabelName2', 'XMin1', 'XMax1', 'YMin1', - 'YMax1', 'XMin2', 'XMax2', 'YMin2', 'YMax2', 'RelationshipLabel', - 'Score' - ]) - class_label_map = {'/m/04bcr3': 1, '/m/083vt': 2, '/m/02gy9n': 3} - relationship_label_map = {'is': 1, 'under': 2, 'at': 3} - prediction_dictionary = utils.build_predictions_vrd_dictionary( - np_data, class_label_map, relationship_label_map) - - self.assertTrue(standard_fields.DetectionResultFields.detection_boxes in - prediction_dictionary) - self.assertTrue(standard_fields.DetectionResultFields.detection_classes in - prediction_dictionary) - self.assertTrue(standard_fields.DetectionResultFields.detection_scores in - prediction_dictionary) - - self.assertAllEqual( - np.array( - [(1, 2, 1), (1, 3, 2), (1, 2, 1), (2, 1, 3)], - dtype=vrd_evaluation.label_data_type), prediction_dictionary[ - standard_fields.DetectionResultFields.detection_classes]) - expected_vrd_data = np.array( - [ - ([0.5, 0.0, 0.6, 0.3], [0.5, 0.0, 0.6, 0.3]), - ([0.5, 0.0, 0.6, 0.3], [0.3, 0.1, 0.4, 0.2]), - ([0.2, 0.0, 0.3, 0.1], [0.2, 0.0, 0.3, 0.1]), - ([0.3, 0.1, 0.4, 0.2], [0.7, 0.5, 0.8, 0.6]), - ], - dtype=vrd_evaluation.vrd_box_data_type) - for field in expected_vrd_data.dtype.fields: - self.assertNDArrayNear( - expected_vrd_data[field], prediction_dictionary[ - standard_fields.DetectionResultFields.detection_boxes][field], - 1e-5) - self.assertNDArrayNear( - np.array([0.1, 0.2, 0.3, 0.4]), prediction_dictionary[ - standard_fields.DetectionResultFields.detection_scores], 1e-5) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/tf_example_parser.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/tf_example_parser.py deleted file mode 100644 index 22a28e8ad933eefc773e750efc60074251a48cce..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/tf_example_parser.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tensorflow Example proto parser for data loading. - -A parser to decode data containing serialized tensorflow.Example -protos into materialized tensors (numpy arrays). -""" - -import numpy as np - -from object_detection.core import data_parser -from object_detection.core import standard_fields as fields - - -class FloatParser(data_parser.DataToNumpyParser): - """Tensorflow Example float parser.""" - - def __init__(self, field_name): - self.field_name = field_name - - def parse(self, tf_example): - return np.array( - tf_example.features.feature[self.field_name].float_list.value, - dtype=np.float).transpose() if tf_example.features.feature[ - self.field_name].HasField("float_list") else None - - -class StringParser(data_parser.DataToNumpyParser): - """Tensorflow Example string parser.""" - - def __init__(self, field_name): - self.field_name = field_name - - def parse(self, tf_example): - return "".join(tf_example.features.feature[self.field_name] - .bytes_list.value) if tf_example.features.feature[ - self.field_name].HasField("bytes_list") else None - - -class Int64Parser(data_parser.DataToNumpyParser): - """Tensorflow Example int64 parser.""" - - def __init__(self, field_name): - self.field_name = field_name - - def parse(self, tf_example): - return np.array( - tf_example.features.feature[self.field_name].int64_list.value, - dtype=np.int64).transpose() if tf_example.features.feature[ - self.field_name].HasField("int64_list") else None - - -class BoundingBoxParser(data_parser.DataToNumpyParser): - """Tensorflow Example bounding box parser.""" - - def __init__(self, xmin_field_name, ymin_field_name, xmax_field_name, - ymax_field_name): - self.field_names = [ - ymin_field_name, xmin_field_name, ymax_field_name, xmax_field_name - ] - - def parse(self, tf_example): - result = [] - parsed = True - for field_name in self.field_names: - result.append(tf_example.features.feature[field_name].float_list.value) - parsed &= ( - tf_example.features.feature[field_name].HasField("float_list")) - - return np.array(result).transpose() if parsed else None - - -class TfExampleDetectionAndGTParser(data_parser.DataToNumpyParser): - """Tensorflow Example proto parser.""" - - def __init__(self): - self.items_to_handlers = { - fields.DetectionResultFields.key: - StringParser(fields.TfExampleFields.source_id), - # Object ground truth boxes and classes. - fields.InputDataFields.groundtruth_boxes: (BoundingBoxParser( - fields.TfExampleFields.object_bbox_xmin, - fields.TfExampleFields.object_bbox_ymin, - fields.TfExampleFields.object_bbox_xmax, - fields.TfExampleFields.object_bbox_ymax)), - fields.InputDataFields.groundtruth_classes: ( - Int64Parser(fields.TfExampleFields.object_class_label)), - # Object detections. - fields.DetectionResultFields.detection_boxes: (BoundingBoxParser( - fields.TfExampleFields.detection_bbox_xmin, - fields.TfExampleFields.detection_bbox_ymin, - fields.TfExampleFields.detection_bbox_xmax, - fields.TfExampleFields.detection_bbox_ymax)), - fields.DetectionResultFields.detection_classes: ( - Int64Parser(fields.TfExampleFields.detection_class_label)), - fields.DetectionResultFields.detection_scores: ( - FloatParser(fields.TfExampleFields.detection_score)), - } - - self.optional_items_to_handlers = { - fields.InputDataFields.groundtruth_difficult: - Int64Parser(fields.TfExampleFields.object_difficult), - fields.InputDataFields.groundtruth_group_of: - Int64Parser(fields.TfExampleFields.object_group_of), - fields.InputDataFields.verified_labels: - Int64Parser(fields.TfExampleFields.image_class_label), - } - - def parse(self, tf_example): - """Parses tensorflow example and returns a tensor dictionary. - - Args: - tf_example: a tf.Example object. - - Returns: - A dictionary of the following numpy arrays: - fields.DetectionResultFields.source_id - string containing original image - id. - fields.InputDataFields.groundtruth_boxes - a numpy array containing - groundtruth boxes. - fields.InputDataFields.groundtruth_classes - a numpy array containing - groundtruth classes. - fields.InputDataFields.groundtruth_group_of - a numpy array containing - groundtruth group of flag (optional, None if not specified). - fields.InputDataFields.groundtruth_difficult - a numpy array containing - groundtruth difficult flag (optional, None if not specified). - fields.DetectionResultFields.detection_boxes - a numpy array containing - detection boxes. - fields.DetectionResultFields.detection_classes - a numpy array containing - detection class labels. - fields.DetectionResultFields.detection_scores - a numpy array containing - detection scores. - Returns None if tf.Example was not parsed or non-optional fields were not - found. - """ - results_dict = {} - parsed = True - for key, parser in self.items_to_handlers.items(): - results_dict[key] = parser.parse(tf_example) - parsed &= (results_dict[key] is not None) - - for key, parser in self.optional_items_to_handlers.items(): - results_dict[key] = parser.parse(tf_example) - - return results_dict if parsed else None diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/tf_example_parser_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/tf_example_parser_test.py deleted file mode 100644 index 99a64d5fae33e2db80780122e03c8540e3c453c0..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/metrics/tf_example_parser_test.py +++ /dev/null @@ -1,196 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for object_detection.data_decoders.tf_example_parser.""" - -import numpy as np -import numpy.testing as np_testing -import tensorflow as tf - -from object_detection.core import standard_fields as fields -from object_detection.metrics import tf_example_parser - - -class TfExampleDecoderTest(tf.test.TestCase): - - def _Int64Feature(self, value): - return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) - - def _FloatFeature(self, value): - return tf.train.Feature(float_list=tf.train.FloatList(value=value)) - - def _BytesFeature(self, value): - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) - - def testParseDetectionsAndGT(self): - source_id = 'abc.jpg' - # y_min, x_min, y_max, x_max - object_bb = np.array([[0.0, 0.5, 0.3], [0.0, 0.1, 0.6], [1.0, 0.6, 0.8], - [1.0, 0.6, 0.7]]).transpose() - detection_bb = np.array([[0.1, 0.2], [0.0, 0.8], [1.0, 0.6], - [1.0, 0.85]]).transpose() - - object_class_label = [1, 1, 2] - object_difficult = [1, 0, 0] - object_group_of = [0, 0, 1] - verified_labels = [1, 2, 3, 4] - detection_class_label = [2, 1] - detection_score = [0.5, 0.3] - features = { - fields.TfExampleFields.source_id: - self._BytesFeature(source_id), - fields.TfExampleFields.object_bbox_ymin: - self._FloatFeature(object_bb[:, 0].tolist()), - fields.TfExampleFields.object_bbox_xmin: - self._FloatFeature(object_bb[:, 1].tolist()), - fields.TfExampleFields.object_bbox_ymax: - self._FloatFeature(object_bb[:, 2].tolist()), - fields.TfExampleFields.object_bbox_xmax: - self._FloatFeature(object_bb[:, 3].tolist()), - fields.TfExampleFields.detection_bbox_ymin: - self._FloatFeature(detection_bb[:, 0].tolist()), - fields.TfExampleFields.detection_bbox_xmin: - self._FloatFeature(detection_bb[:, 1].tolist()), - fields.TfExampleFields.detection_bbox_ymax: - self._FloatFeature(detection_bb[:, 2].tolist()), - fields.TfExampleFields.detection_bbox_xmax: - self._FloatFeature(detection_bb[:, 3].tolist()), - fields.TfExampleFields.detection_class_label: - self._Int64Feature(detection_class_label), - fields.TfExampleFields.detection_score: - self._FloatFeature(detection_score), - } - - example = tf.train.Example(features=tf.train.Features(feature=features)) - parser = tf_example_parser.TfExampleDetectionAndGTParser() - - results_dict = parser.parse(example) - self.assertIsNone(results_dict) - - features[fields.TfExampleFields.object_class_label] = ( - self._Int64Feature(object_class_label)) - features[fields.TfExampleFields.object_difficult] = ( - self._Int64Feature(object_difficult)) - - example = tf.train.Example(features=tf.train.Features(feature=features)) - results_dict = parser.parse(example) - - self.assertIsNotNone(results_dict) - self.assertEqual(source_id, results_dict[fields.DetectionResultFields.key]) - np_testing.assert_almost_equal( - object_bb, results_dict[fields.InputDataFields.groundtruth_boxes]) - np_testing.assert_almost_equal( - detection_bb, - results_dict[fields.DetectionResultFields.detection_boxes]) - np_testing.assert_almost_equal( - detection_score, - results_dict[fields.DetectionResultFields.detection_scores]) - np_testing.assert_almost_equal( - detection_class_label, - results_dict[fields.DetectionResultFields.detection_classes]) - np_testing.assert_almost_equal( - object_difficult, - results_dict[fields.InputDataFields.groundtruth_difficult]) - np_testing.assert_almost_equal( - object_class_label, - results_dict[fields.InputDataFields.groundtruth_classes]) - - parser = tf_example_parser.TfExampleDetectionAndGTParser() - - features[fields.TfExampleFields.object_group_of] = ( - self._Int64Feature(object_group_of)) - - example = tf.train.Example(features=tf.train.Features(feature=features)) - results_dict = parser.parse(example) - self.assertIsNotNone(results_dict) - np_testing.assert_equal( - object_group_of, - results_dict[fields.InputDataFields.groundtruth_group_of]) - - features[fields.TfExampleFields.image_class_label] = ( - self._Int64Feature(verified_labels)) - - example = tf.train.Example(features=tf.train.Features(feature=features)) - results_dict = parser.parse(example) - self.assertIsNotNone(results_dict) - np_testing.assert_equal( - verified_labels, results_dict[fields.InputDataFields.verified_labels]) - - def testParseString(self): - string_val = 'abc' - features = {'string': self._BytesFeature(string_val)} - example = tf.train.Example(features=tf.train.Features(feature=features)) - - parser = tf_example_parser.StringParser('string') - result = parser.parse(example) - self.assertIsNotNone(result) - self.assertEqual(result, string_val) - - parser = tf_example_parser.StringParser('another_string') - result = parser.parse(example) - self.assertIsNone(result) - - def testParseFloat(self): - float_array_val = [1.5, 1.4, 2.0] - features = {'floats': self._FloatFeature(float_array_val)} - example = tf.train.Example(features=tf.train.Features(feature=features)) - - parser = tf_example_parser.FloatParser('floats') - result = parser.parse(example) - self.assertIsNotNone(result) - np_testing.assert_almost_equal(result, float_array_val) - - parser = tf_example_parser.StringParser('another_floats') - result = parser.parse(example) - self.assertIsNone(result) - - def testInt64Parser(self): - int_val = [1, 2, 3] - features = {'ints': self._Int64Feature(int_val)} - example = tf.train.Example(features=tf.train.Features(feature=features)) - - parser = tf_example_parser.Int64Parser('ints') - result = parser.parse(example) - self.assertIsNotNone(result) - np_testing.assert_almost_equal(result, int_val) - - parser = tf_example_parser.Int64Parser('another_ints') - result = parser.parse(example) - self.assertIsNone(result) - - def testBoundingBoxParser(self): - bounding_boxes = np.array([[0.0, 0.5, 0.3], [0.0, 0.1, 0.6], - [1.0, 0.6, 0.8], [1.0, 0.6, 0.7]]).transpose() - features = { - 'ymin': self._FloatFeature(bounding_boxes[:, 0]), - 'xmin': self._FloatFeature(bounding_boxes[:, 1]), - 'ymax': self._FloatFeature(bounding_boxes[:, 2]), - 'xmax': self._FloatFeature(bounding_boxes[:, 3]) - } - - example = tf.train.Example(features=tf.train.Features(feature=features)) - - parser = tf_example_parser.BoundingBoxParser('xmin', 'ymin', 'xmax', 'ymax') - result = parser.parse(example) - self.assertIsNotNone(result) - np_testing.assert_almost_equal(result, bounding_boxes) - - parser = tf_example_parser.BoundingBoxParser('xmin', 'ymin', 'xmax', - 'another_ymax') - result = parser.parse(example) - self.assertIsNone(result) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/model_hparams.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/model_hparams.py deleted file mode 100644 index b0d12fceadfb1f79723b8f35377b1a3240ff8c63..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/model_hparams.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Hyperparameters for the object detection model in TF.learn. - -This file consolidates and documents the hyperparameters used by the model. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - - -def create_hparams(hparams_overrides=None): - """Returns hyperparameters, including any flag value overrides. - - Args: - hparams_overrides: Optional hparams overrides, represented as a - string containing comma-separated hparam_name=value pairs. - - Returns: - The hyperparameters as a tf.HParams object. - """ - hparams = tf.contrib.training.HParams( - # Whether a fine tuning checkpoint (provided in the pipeline config) - # should be loaded for training. - load_pretrained=True) - # Override any of the preceding hyperparameter values. - if hparams_overrides: - hparams = hparams.parse(hparams_overrides) - return hparams diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/model_lib.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/model_lib.py deleted file mode 100644 index 973ca259d658ff052c253d57b2dd35b6d1da434e..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/model_lib.py +++ /dev/null @@ -1,717 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Constructs model, inputs, and training environment.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import functools -import os - -import tensorflow as tf - -from object_detection import eval_util -from object_detection import inputs -from object_detection.builders import graph_rewriter_builder -from object_detection.builders import model_builder -from object_detection.builders import optimizer_builder -from object_detection.core import standard_fields as fields -from object_detection.utils import config_util -from object_detection.utils import label_map_util -from object_detection.utils import shape_utils -from object_detection.utils import variables_helper -from object_detection.utils import visualization_utils as vis_utils - -# A map of names to methods that help build the model. -MODEL_BUILD_UTIL_MAP = { - 'get_configs_from_pipeline_file': - config_util.get_configs_from_pipeline_file, - 'create_pipeline_proto_from_configs': - config_util.create_pipeline_proto_from_configs, - 'merge_external_params_with_configs': - config_util.merge_external_params_with_configs, - 'create_train_input_fn': inputs.create_train_input_fn, - 'create_eval_input_fn': inputs.create_eval_input_fn, - 'create_predict_input_fn': inputs.create_predict_input_fn, -} - - -def _prepare_groundtruth_for_eval(detection_model, class_agnostic): - """Extracts groundtruth data from detection_model and prepares it for eval. - - Args: - detection_model: A `DetectionModel` object. - class_agnostic: Whether the detections are class_agnostic. - - Returns: - A tuple of: - groundtruth: Dictionary with the following fields: - 'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in - normalized coordinates. - 'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes. - 'groundtruth_masks': 3D float32 tensor of instance masks (if provided in - groundtruth) - 'groundtruth_is_crowd': [num_boxes] bool tensor indicating is_crowd - annotations (if provided in groundtruth). - class_agnostic: Boolean indicating whether detections are class agnostic. - """ - input_data_fields = fields.InputDataFields() - groundtruth_boxes = detection_model.groundtruth_lists( - fields.BoxListFields.boxes)[0] - # For class-agnostic models, groundtruth one-hot encodings collapse to all - # ones. - if class_agnostic: - groundtruth_boxes_shape = tf.shape(groundtruth_boxes) - groundtruth_classes_one_hot = tf.ones([groundtruth_boxes_shape[0], 1]) - else: - groundtruth_classes_one_hot = detection_model.groundtruth_lists( - fields.BoxListFields.classes)[0] - label_id_offset = 1 # Applying label id offset (b/63711816) - groundtruth_classes = ( - tf.argmax(groundtruth_classes_one_hot, axis=1) + label_id_offset) - groundtruth = { - input_data_fields.groundtruth_boxes: groundtruth_boxes, - input_data_fields.groundtruth_classes: groundtruth_classes - } - if detection_model.groundtruth_has_field(fields.BoxListFields.masks): - groundtruth[input_data_fields.groundtruth_instance_masks] = ( - detection_model.groundtruth_lists(fields.BoxListFields.masks)[0]) - if detection_model.groundtruth_has_field(fields.BoxListFields.is_crowd): - groundtruth[input_data_fields.groundtruth_is_crowd] = ( - detection_model.groundtruth_lists(fields.BoxListFields.is_crowd)[0]) - return groundtruth - - -def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True): - """Unstacks all tensors in `tensor_dict` along 0th dimension. - - Unstacks tensor from the tensor dict along 0th dimension and returns a - tensor_dict containing values that are lists of unstacked tensors. - - Tensors in the `tensor_dict` are expected to be of one of the three shapes: - 1. [batch_size] - 2. [batch_size, height, width, channels] - 3. [batch_size, num_boxes, d1, d2, ... dn] - - When unpad_groundtruth_tensors is set to true, unstacked tensors of form 3 - above are sliced along the `num_boxes` dimension using the value in tensor - field.InputDataFields.num_groundtruth_boxes. - - Note that this function has a static list of input data fields and has to be - kept in sync with the InputDataFields defined in core/standard_fields.py - - Args: - tensor_dict: A dictionary of batched groundtruth tensors. - unpad_groundtruth_tensors: Whether to remove padding along `num_boxes` - dimension of the groundtruth tensors. - - Returns: - A dictionary where the keys are from fields.InputDataFields and values are - a list of unstacked (optionally unpadded) tensors. - - Raises: - ValueError: If unpad_tensors is True and `tensor_dict` does not contain - `num_groundtruth_boxes` tensor. - """ - unbatched_tensor_dict = {key: tf.unstack(tensor) - for key, tensor in tensor_dict.items()} - if unpad_groundtruth_tensors: - if (fields.InputDataFields.num_groundtruth_boxes not in - unbatched_tensor_dict): - raise ValueError('`num_groundtruth_boxes` not found in tensor_dict. ' - 'Keys available: {}'.format( - unbatched_tensor_dict.keys())) - unbatched_unpadded_tensor_dict = {} - unpad_keys = set([ - # List of input data fields that are padded along the num_boxes - # dimension. This list has to be kept in sync with InputDataFields in - # standard_fields.py. - fields.InputDataFields.groundtruth_instance_masks, - fields.InputDataFields.groundtruth_classes, - fields.InputDataFields.groundtruth_boxes, - fields.InputDataFields.groundtruth_keypoints, - fields.InputDataFields.groundtruth_group_of, - fields.InputDataFields.groundtruth_difficult, - fields.InputDataFields.groundtruth_is_crowd, - fields.InputDataFields.groundtruth_area, - fields.InputDataFields.groundtruth_weights - ]).intersection(set(unbatched_tensor_dict.keys())) - - for key in unpad_keys: - unpadded_tensor_list = [] - for num_gt, padded_tensor in zip( - unbatched_tensor_dict[fields.InputDataFields.num_groundtruth_boxes], - unbatched_tensor_dict[key]): - tensor_shape = shape_utils.combined_static_and_dynamic_shape( - padded_tensor) - slice_begin = tf.zeros([len(tensor_shape)], dtype=tf.int32) - slice_size = tf.stack( - [num_gt] + [-1 if dim is None else dim for dim in tensor_shape[1:]]) - unpadded_tensor = tf.slice(padded_tensor, slice_begin, slice_size) - unpadded_tensor_list.append(unpadded_tensor) - unbatched_unpadded_tensor_dict[key] = unpadded_tensor_list - unbatched_tensor_dict.update(unbatched_unpadded_tensor_dict) - - return unbatched_tensor_dict - - -def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False): - """Creates a model function for `Estimator`. - - Args: - detection_model_fn: Function that returns a `DetectionModel` instance. - configs: Dictionary of pipeline config objects. - hparams: `HParams` object. - use_tpu: Boolean indicating whether model should be constructed for - use on TPU. - - Returns: - `model_fn` for `Estimator`. - """ - train_config = configs['train_config'] - eval_input_config = configs['eval_input_config'] - eval_config = configs['eval_config'] - - def model_fn(features, labels, mode, params=None): - """Constructs the object detection model. - - Args: - features: Dictionary of feature tensors, returned from `input_fn`. - labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL, - otherwise None. - mode: Mode key from tf.estimator.ModeKeys. - params: Parameter dictionary passed from the estimator. - - Returns: - An `EstimatorSpec` that encapsulates the model and its serving - configurations. - """ - params = params or {} - total_loss, train_op, detections, export_outputs = None, None, None, None - is_training = mode == tf.estimator.ModeKeys.TRAIN - detection_model = detection_model_fn(is_training=is_training, - add_summaries=(not use_tpu)) - scaffold_fn = None - - if mode == tf.estimator.ModeKeys.TRAIN: - labels = unstack_batch( - labels, - unpad_groundtruth_tensors=train_config.unpad_groundtruth_tensors) - elif mode == tf.estimator.ModeKeys.EVAL: - # For evaling on train data, it is necessary to check whether groundtruth - # must be unpadded. - boxes_shape = ( - labels[fields.InputDataFields.groundtruth_boxes].get_shape() - .as_list()) - unpad_groundtruth_tensors = True if boxes_shape[1] is not None else False - labels = unstack_batch( - labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors) - - if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): - gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes] - gt_classes_list = labels[fields.InputDataFields.groundtruth_classes] - gt_masks_list = None - if fields.InputDataFields.groundtruth_instance_masks in labels: - gt_masks_list = labels[ - fields.InputDataFields.groundtruth_instance_masks] - gt_keypoints_list = None - if fields.InputDataFields.groundtruth_keypoints in labels: - gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints] - if fields.InputDataFields.groundtruth_is_crowd in labels: - gt_is_crowd_list = labels[fields.InputDataFields.groundtruth_is_crowd] - detection_model.provide_groundtruth( - groundtruth_boxes_list=gt_boxes_list, - groundtruth_classes_list=gt_classes_list, - groundtruth_masks_list=gt_masks_list, - groundtruth_keypoints_list=gt_keypoints_list, - groundtruth_weights_list=labels[ - fields.InputDataFields.groundtruth_weights], - groundtruth_is_crowd_list=gt_is_crowd_list) - - preprocessed_images = features[fields.InputDataFields.image] - prediction_dict = detection_model.predict( - preprocessed_images, features[fields.InputDataFields.true_image_shape]) - detections = detection_model.postprocess( - prediction_dict, features[fields.InputDataFields.true_image_shape]) - - if mode == tf.estimator.ModeKeys.TRAIN: - if train_config.fine_tune_checkpoint and hparams.load_pretrained: - if not train_config.fine_tune_checkpoint_type: - # train_config.from_detection_checkpoint field is deprecated. For - # backward compatibility, set train_config.fine_tune_checkpoint_type - # based on train_config.from_detection_checkpoint. - if train_config.from_detection_checkpoint: - train_config.fine_tune_checkpoint_type = 'detection' - else: - train_config.fine_tune_checkpoint_type = 'classification' - asg_map = detection_model.restore_map( - fine_tune_checkpoint_type=train_config.fine_tune_checkpoint_type, - load_all_detection_checkpoint_vars=( - train_config.load_all_detection_checkpoint_vars)) - available_var_map = ( - variables_helper.get_variables_available_in_checkpoint( - asg_map, train_config.fine_tune_checkpoint, - include_global_step=False)) - if use_tpu: - def tpu_scaffold(): - tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint, - available_var_map) - return tf.train.Scaffold() - scaffold_fn = tpu_scaffold - else: - tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint, - available_var_map) - - if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): - losses_dict = detection_model.loss( - prediction_dict, features[fields.InputDataFields.true_image_shape]) - losses = [loss_tensor for loss_tensor in losses_dict.itervalues()] - if train_config.add_regularization_loss: - regularization_losses = tf.get_collection( - tf.GraphKeys.REGULARIZATION_LOSSES) - if regularization_losses: - regularization_loss = tf.add_n(regularization_losses, - name='regularization_loss') - losses.append(regularization_loss) - losses_dict['Loss/regularization_loss'] = regularization_loss - total_loss = tf.add_n(losses, name='total_loss') - losses_dict['Loss/total_loss'] = total_loss - - if 'graph_rewriter_config' in configs: - graph_rewriter_fn = graph_rewriter_builder.build( - configs['graph_rewriter_config'], is_training=is_training) - graph_rewriter_fn() - - # TODO(rathodv): Stop creating optimizer summary vars in EVAL mode once we - # can write learning rate summaries on TPU without host calls. - global_step = tf.train.get_or_create_global_step() - training_optimizer, optimizer_summary_vars = optimizer_builder.build( - train_config.optimizer) - - if mode == tf.estimator.ModeKeys.TRAIN: - if use_tpu: - training_optimizer = tf.contrib.tpu.CrossShardOptimizer( - training_optimizer) - - # Optionally freeze some layers by setting their gradients to be zero. - trainable_variables = None - if train_config.freeze_variables: - trainable_variables = tf.contrib.framework.filter_variables( - tf.trainable_variables(), - exclude_patterns=train_config.freeze_variables) - - clip_gradients_value = None - if train_config.gradient_clipping_by_norm > 0: - clip_gradients_value = train_config.gradient_clipping_by_norm - - if not use_tpu: - for var in optimizer_summary_vars: - tf.summary.scalar(var.op.name, var) - summaries = [] if use_tpu else None - train_op = tf.contrib.layers.optimize_loss( - loss=total_loss, - global_step=global_step, - learning_rate=None, - clip_gradients=clip_gradients_value, - optimizer=training_optimizer, - variables=trainable_variables, - summaries=summaries, - name='') # Preventing scope prefix on all variables. - - if mode == tf.estimator.ModeKeys.PREDICT: - export_outputs = { - tf.saved_model.signature_constants.PREDICT_METHOD_NAME: - tf.estimator.export.PredictOutput(detections) - } - - eval_metric_ops = None - scaffold = None - if mode == tf.estimator.ModeKeys.EVAL: - class_agnostic = (fields.DetectionResultFields.detection_classes - not in detections) - groundtruth = _prepare_groundtruth_for_eval( - detection_model, class_agnostic) - use_original_images = fields.InputDataFields.original_image in features - eval_images = ( - features[fields.InputDataFields.original_image] if use_original_images - else features[fields.InputDataFields.image]) - eval_dict = eval_util.result_dict_for_single_example( - eval_images[0:1], - features[inputs.HASH_KEY][0], - detections, - groundtruth, - class_agnostic=class_agnostic, - scale_to_absolute=True) - - if class_agnostic: - category_index = label_map_util.create_class_agnostic_category_index() - else: - category_index = label_map_util.create_category_index_from_labelmap( - eval_input_config.label_map_path) - img_summary = None - if not use_tpu and use_original_images: - detection_and_groundtruth = ( - vis_utils.draw_side_by_side_evaluation_image( - eval_dict, category_index, max_boxes_to_draw=20, - min_score_thresh=0.2, - use_normalized_coordinates=False)) - img_summary = tf.summary.image('Detections_Left_Groundtruth_Right', - detection_and_groundtruth) - - # Eval metrics on a single example. - eval_metrics = eval_config.metrics_set - if not eval_metrics: - eval_metrics = ['coco_detection_metrics'] - eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators( - eval_metrics, - category_index.values(), - eval_dict, - include_metrics_per_category=eval_config.include_metrics_per_category) - for loss_key, loss_tensor in iter(losses_dict.items()): - eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor) - for var in optimizer_summary_vars: - eval_metric_ops[var.op.name] = (var, tf.no_op()) - if img_summary is not None: - eval_metric_ops['Detections_Left_Groundtruth_Right'] = ( - img_summary, tf.no_op()) - eval_metric_ops = {str(k): v for k, v in eval_metric_ops.iteritems()} - - if eval_config.use_moving_averages: - variable_averages = tf.train.ExponentialMovingAverage(0.0) - variables_to_restore = variable_averages.variables_to_restore() - keep_checkpoint_every_n_hours = ( - train_config.keep_checkpoint_every_n_hours) - saver = tf.train.Saver( - variables_to_restore, - keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours) - scaffold = tf.train.Scaffold(saver=saver) - - if use_tpu: - return tf.contrib.tpu.TPUEstimatorSpec( - mode=mode, - scaffold_fn=scaffold_fn, - predictions=detections, - loss=total_loss, - train_op=train_op, - eval_metrics=eval_metric_ops, - export_outputs=export_outputs) - else: - return tf.estimator.EstimatorSpec( - mode=mode, - predictions=detections, - loss=total_loss, - train_op=train_op, - eval_metric_ops=eval_metric_ops, - export_outputs=export_outputs, - scaffold=scaffold) - - return model_fn - - -def create_estimator_and_inputs(run_config, - hparams, - pipeline_config_path, - train_steps=None, - eval_steps=None, - model_fn_creator=create_model_fn, - use_tpu_estimator=False, - use_tpu=False, - num_shards=1, - params=None, - **kwargs): - """Creates `Estimator`, input functions, and steps. - - Args: - run_config: A `RunConfig`. - hparams: A `HParams`. - pipeline_config_path: A path to a pipeline config file. - train_steps: Number of training steps. If None, the number of training steps - is set from the `TrainConfig` proto. - eval_steps: Number of evaluation steps per evaluation cycle. If None, the - number of evaluation steps is set from the `EvalConfig` proto. - model_fn_creator: A function that creates a `model_fn` for `Estimator`. - Follows the signature: - - * Args: - * `detection_model_fn`: Function that returns `DetectionModel` instance. - * `configs`: Dictionary of pipeline config objects. - * `hparams`: `HParams` object. - * Returns: - `model_fn` for `Estimator`. - - use_tpu_estimator: Whether a `TPUEstimator` should be returned. If False, - an `Estimator` will be returned. - use_tpu: Boolean, whether training and evaluation should run on TPU. Only - used if `use_tpu_estimator` is True. - num_shards: Number of shards (TPU cores). Only used if `use_tpu_estimator` - is True. - params: Parameter dictionary passed from the estimator. Only used if - `use_tpu_estimator` is True. - **kwargs: Additional keyword arguments for configuration override. - - Returns: - A dictionary with the following fields: - 'estimator': An `Estimator` or `TPUEstimator`. - 'train_input_fn': A training input function. - 'eval_input_fn': An evaluation input function. - 'eval_on_train_input_fn': An evaluation-on-train input function. - 'predict_input_fn': A prediction input function. - 'train_steps': Number of training steps. Either directly from input or from - configuration. - 'eval_steps': Number of evaluation steps. Either directly from input or from - configuration. - """ - get_configs_from_pipeline_file = MODEL_BUILD_UTIL_MAP[ - 'get_configs_from_pipeline_file'] - merge_external_params_with_configs = MODEL_BUILD_UTIL_MAP[ - 'merge_external_params_with_configs'] - create_pipeline_proto_from_configs = MODEL_BUILD_UTIL_MAP[ - 'create_pipeline_proto_from_configs'] - create_train_input_fn = MODEL_BUILD_UTIL_MAP['create_train_input_fn'] - create_eval_input_fn = MODEL_BUILD_UTIL_MAP['create_eval_input_fn'] - create_predict_input_fn = MODEL_BUILD_UTIL_MAP['create_predict_input_fn'] - - configs = get_configs_from_pipeline_file(pipeline_config_path) - configs = merge_external_params_with_configs( - configs, - hparams, - train_steps=train_steps, - eval_steps=eval_steps, - **kwargs) - model_config = configs['model'] - train_config = configs['train_config'] - train_input_config = configs['train_input_config'] - eval_config = configs['eval_config'] - eval_input_config = configs['eval_input_config'] - - if train_steps is None: - train_steps = configs['train_config'].num_steps - - if eval_steps is None: - eval_steps = configs['eval_config'].num_examples - - detection_model_fn = functools.partial( - model_builder.build, model_config=model_config) - - # Create the input functions for TRAIN/EVAL/PREDICT. - train_input_fn = create_train_input_fn( - train_config=train_config, - train_input_config=train_input_config, - model_config=model_config) - eval_input_fn = create_eval_input_fn( - eval_config=eval_config, - eval_input_config=eval_input_config, - model_config=model_config) - eval_on_train_input_fn = create_eval_input_fn( - eval_config=eval_config, - eval_input_config=train_input_config, - model_config=model_config) - predict_input_fn = create_predict_input_fn(model_config=model_config) - - model_fn = model_fn_creator(detection_model_fn, configs, hparams, use_tpu) - if use_tpu_estimator: - estimator = tf.contrib.tpu.TPUEstimator( - model_fn=model_fn, - train_batch_size=train_config.batch_size, - # For each core, only batch size 1 is supported for eval. - eval_batch_size=num_shards * 1 if use_tpu else 1, - use_tpu=use_tpu, - config=run_config, - params=params if params else {}) - else: - estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config) - - # Write the as-run pipeline config to disk. - if run_config.is_chief: - pipeline_config_final = create_pipeline_proto_from_configs( - configs) - config_util.save_pipeline_config(pipeline_config_final, estimator.model_dir) - - return dict( - estimator=estimator, - train_input_fn=train_input_fn, - eval_input_fn=eval_input_fn, - eval_on_train_input_fn=eval_on_train_input_fn, - predict_input_fn=predict_input_fn, - train_steps=train_steps, - eval_steps=eval_steps) - - -def create_train_and_eval_specs(train_input_fn, - eval_input_fn, - eval_on_train_input_fn, - predict_input_fn, - train_steps, - eval_steps, - eval_on_train_data=False, - eval_on_train_steps=None, - final_exporter_name='Servo', - eval_spec_name='eval'): - """Creates a `TrainSpec` and `EvalSpec`s. - - Args: - train_input_fn: Function that produces features and labels on train data. - eval_input_fn: Function that produces features and labels on eval data. - eval_on_train_input_fn: Function that produces features and labels for - evaluation on train data. - predict_input_fn: Function that produces features for inference. - train_steps: Number of training steps. - eval_steps: Number of eval steps. - eval_on_train_data: Whether to evaluate model on training data. Default is - False. - eval_on_train_steps: Number of eval steps for training data. If not given, - uses eval_steps. - final_exporter_name: String name given to `FinalExporter`. - eval_spec_name: String name given to main `EvalSpec`. - - Returns: - Tuple of `TrainSpec` and list of `EvalSpecs`. The first `EvalSpec` is for - evaluation data. If `eval_on_train_data` is True, the second `EvalSpec` in - the list will correspond to training data. - """ - - exporter = tf.estimator.FinalExporter( - name=final_exporter_name, serving_input_receiver_fn=predict_input_fn) - - train_spec = tf.estimator.TrainSpec( - input_fn=train_input_fn, max_steps=train_steps) - - eval_specs = [ - tf.estimator.EvalSpec( - name=eval_spec_name, - input_fn=eval_input_fn, - steps=eval_steps, - exporters=exporter) - ] - - if eval_on_train_data: - eval_specs.append( - tf.estimator.EvalSpec( - name='eval_on_train', input_fn=eval_on_train_input_fn, - steps=eval_on_train_steps or eval_steps)) - - return train_spec, eval_specs - - -def continuous_eval(estimator, model_dir, input_fn, eval_steps, train_steps, - name): - """Perform continuous evaluation on checkpoints written to a model directory. - - Args: - estimator: Estimator object to use for evaluation. - model_dir: Model directory to read checkpoints for continuous evaluation. - input_fn: Input function to use for evaluation. - eval_steps: Number of steps to run during each evaluation. - train_steps: Number of training steps. This is used to infer the last - checkpoint and stop evaluation loop. - name: Namescope for eval summary. - """ - def terminate_eval(): - tf.logging.info('Terminating eval after 180 seconds of no checkpoints') - return True - - for ckpt in tf.contrib.training.checkpoints_iterator( - model_dir, min_interval_secs=180, timeout=None, - timeout_fn=terminate_eval): - - tf.logging.info('Starting Evaluation.') - try: - eval_results = estimator.evaluate( - input_fn=input_fn, - steps=eval_steps, - checkpoint_path=ckpt, - name=name) - tf.logging.info('Eval results: %s' % eval_results) - - # Terminate eval job when final checkpoint is reached - current_step = int(os.path.basename(ckpt).split('-')[1]) - if current_step >= train_steps: - tf.logging.info( - 'Evaluation finished after training step %d' % current_step) - break - - except tf.errors.NotFoundError: - tf.logging.info( - 'Checkpoint %s no longer exists, skipping checkpoint' % ckpt) - - -def populate_experiment(run_config, - hparams, - pipeline_config_path, - train_steps=None, - eval_steps=None, - model_fn_creator=create_model_fn, - **kwargs): - """Populates an `Experiment` object. - - EXPERIMENT CLASS IS DEPRECATED. Please switch to - tf.estimator.train_and_evaluate. As an example, see model_main.py. - - Args: - run_config: A `RunConfig`. - hparams: A `HParams`. - pipeline_config_path: A path to a pipeline config file. - train_steps: Number of training steps. If None, the number of training steps - is set from the `TrainConfig` proto. - eval_steps: Number of evaluation steps per evaluation cycle. If None, the - number of evaluation steps is set from the `EvalConfig` proto. - model_fn_creator: A function that creates a `model_fn` for `Estimator`. - Follows the signature: - - * Args: - * `detection_model_fn`: Function that returns `DetectionModel` instance. - * `configs`: Dictionary of pipeline config objects. - * `hparams`: `HParams` object. - * Returns: - `model_fn` for `Estimator`. - - **kwargs: Additional keyword arguments for configuration override. - - Returns: - An `Experiment` that defines all aspects of training, evaluation, and - export. - """ - tf.logging.warning('Experiment is being deprecated. Please use ' - 'tf.estimator.train_and_evaluate(). See model_main.py for ' - 'an example.') - train_and_eval_dict = create_estimator_and_inputs( - run_config, - hparams, - pipeline_config_path, - train_steps=train_steps, - eval_steps=eval_steps, - model_fn_creator=model_fn_creator, - **kwargs) - estimator = train_and_eval_dict['estimator'] - train_input_fn = train_and_eval_dict['train_input_fn'] - eval_input_fn = train_and_eval_dict['eval_input_fn'] - predict_input_fn = train_and_eval_dict['predict_input_fn'] - train_steps = train_and_eval_dict['train_steps'] - eval_steps = train_and_eval_dict['eval_steps'] - - export_strategies = [ - tf.contrib.learn.utils.saved_model_export_utils.make_export_strategy( - serving_input_fn=predict_input_fn) - ] - - return tf.contrib.learn.Experiment( - estimator=estimator, - train_input_fn=train_input_fn, - eval_input_fn=eval_input_fn, - train_steps=train_steps, - eval_steps=eval_steps, - export_strategies=export_strategies, - eval_delay_secs=120,) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/model_lib_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/model_lib_test.py deleted file mode 100644 index ec571e051d3b96e4f9aa8ae069d47fc109416c89..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/model_lib_test.py +++ /dev/null @@ -1,402 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for object detection model library.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import functools -import os - -import numpy as np -import tensorflow as tf - -from tensorflow.contrib.tpu.python.tpu import tpu_config -from tensorflow.contrib.tpu.python.tpu import tpu_estimator - -from object_detection import inputs -from object_detection import model_hparams -from object_detection import model_lib -from object_detection.builders import model_builder -from object_detection.core import standard_fields as fields -from object_detection.utils import config_util - - -# Model for test. Options are: -# 'ssd_inception_v2_pets', 'faster_rcnn_resnet50_pets' -MODEL_NAME_FOR_TEST = 'ssd_inception_v2_pets' - - -def _get_data_path(): - """Returns an absolute path to TFRecord file.""" - return os.path.join(tf.resource_loader.get_data_files_path(), 'test_data', - 'pets_examples.record') - - -def get_pipeline_config_path(model_name): - """Returns path to the local pipeline config file.""" - return os.path.join(tf.resource_loader.get_data_files_path(), 'samples', - 'configs', model_name + '.config') - - -def _get_labelmap_path(): - """Returns an absolute path to label map file.""" - return os.path.join(tf.resource_loader.get_data_files_path(), 'data', - 'pet_label_map.pbtxt') - - -def _get_configs_for_model(model_name): - """Returns configurations for model.""" - filename = get_pipeline_config_path(model_name) - data_path = _get_data_path() - label_map_path = _get_labelmap_path() - configs = config_util.get_configs_from_pipeline_file(filename) - configs = config_util.merge_external_params_with_configs( - configs, - train_input_path=data_path, - eval_input_path=data_path, - label_map_path=label_map_path) - return configs - - -class ModelLibTest(tf.test.TestCase): - - @classmethod - def setUpClass(cls): - tf.reset_default_graph() - - def _assert_model_fn_for_train_eval(self, configs, mode, - class_agnostic=False): - model_config = configs['model'] - train_config = configs['train_config'] - with tf.Graph().as_default(): - if mode == 'train': - features, labels = inputs.create_train_input_fn( - configs['train_config'], - configs['train_input_config'], - configs['model'])() - model_mode = tf.estimator.ModeKeys.TRAIN - batch_size = train_config.batch_size - elif mode == 'eval': - features, labels = inputs.create_eval_input_fn( - configs['eval_config'], - configs['eval_input_config'], - configs['model'])() - model_mode = tf.estimator.ModeKeys.EVAL - batch_size = 1 - elif mode == 'eval_on_train': - features, labels = inputs.create_eval_input_fn( - configs['eval_config'], - configs['train_input_config'], - configs['model'])() - model_mode = tf.estimator.ModeKeys.EVAL - batch_size = 1 - - detection_model_fn = functools.partial( - model_builder.build, model_config=model_config, is_training=True) - - hparams = model_hparams.create_hparams( - hparams_overrides='load_pretrained=false') - - model_fn = model_lib.create_model_fn(detection_model_fn, configs, hparams) - estimator_spec = model_fn(features, labels, model_mode) - - self.assertIsNotNone(estimator_spec.loss) - self.assertIsNotNone(estimator_spec.predictions) - if class_agnostic: - self.assertNotIn('detection_classes', estimator_spec.predictions) - else: - detection_classes = estimator_spec.predictions['detection_classes'] - self.assertEqual(batch_size, detection_classes.shape.as_list()[0]) - self.assertEqual(tf.float32, detection_classes.dtype) - detection_boxes = estimator_spec.predictions['detection_boxes'] - detection_scores = estimator_spec.predictions['detection_scores'] - num_detections = estimator_spec.predictions['num_detections'] - self.assertEqual(batch_size, detection_boxes.shape.as_list()[0]) - self.assertEqual(tf.float32, detection_boxes.dtype) - self.assertEqual(batch_size, detection_scores.shape.as_list()[0]) - self.assertEqual(tf.float32, detection_scores.dtype) - self.assertEqual(tf.float32, num_detections.dtype) - if model_mode == tf.estimator.ModeKeys.TRAIN: - self.assertIsNotNone(estimator_spec.train_op) - return estimator_spec - - def _assert_model_fn_for_predict(self, configs): - model_config = configs['model'] - - with tf.Graph().as_default(): - features, _ = inputs.create_eval_input_fn( - configs['eval_config'], - configs['eval_input_config'], - configs['model'])() - detection_model_fn = functools.partial( - model_builder.build, model_config=model_config, is_training=False) - - hparams = model_hparams.create_hparams( - hparams_overrides='load_pretrained=false') - - model_fn = model_lib.create_model_fn(detection_model_fn, configs, hparams) - estimator_spec = model_fn(features, None, tf.estimator.ModeKeys.PREDICT) - - self.assertIsNone(estimator_spec.loss) - self.assertIsNone(estimator_spec.train_op) - self.assertIsNotNone(estimator_spec.predictions) - self.assertIsNotNone(estimator_spec.export_outputs) - self.assertIn(tf.saved_model.signature_constants.PREDICT_METHOD_NAME, - estimator_spec.export_outputs) - - def test_model_fn_in_train_mode(self): - """Tests the model function in TRAIN mode.""" - configs = _get_configs_for_model(MODEL_NAME_FOR_TEST) - self._assert_model_fn_for_train_eval(configs, 'train') - - def test_model_fn_in_eval_mode(self): - """Tests the model function in EVAL mode.""" - configs = _get_configs_for_model(MODEL_NAME_FOR_TEST) - self._assert_model_fn_for_train_eval(configs, 'eval') - - def test_model_fn_in_eval_on_train_mode(self): - """Tests the model function in EVAL mode with train data.""" - configs = _get_configs_for_model(MODEL_NAME_FOR_TEST) - self._assert_model_fn_for_train_eval(configs, 'eval_on_train') - - def test_model_fn_in_predict_mode(self): - """Tests the model function in PREDICT mode.""" - configs = _get_configs_for_model(MODEL_NAME_FOR_TEST) - self._assert_model_fn_for_predict(configs) - - def test_create_estimator_and_inputs(self): - """Tests that Estimator and input function are constructed correctly.""" - run_config = tf.estimator.RunConfig() - hparams = model_hparams.create_hparams( - hparams_overrides='load_pretrained=false') - pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST) - train_steps = 20 - eval_steps = 10 - train_and_eval_dict = model_lib.create_estimator_and_inputs( - run_config, - hparams, - pipeline_config_path, - train_steps=train_steps, - eval_steps=eval_steps) - estimator = train_and_eval_dict['estimator'] - train_steps = train_and_eval_dict['train_steps'] - eval_steps = train_and_eval_dict['eval_steps'] - self.assertIsInstance(estimator, tf.estimator.Estimator) - self.assertEqual(20, train_steps) - self.assertEqual(10, eval_steps) - self.assertIn('train_input_fn', train_and_eval_dict) - self.assertIn('eval_input_fn', train_and_eval_dict) - self.assertIn('eval_on_train_input_fn', train_and_eval_dict) - - def test_create_estimator_with_default_train_eval_steps(self): - """Tests that number of train/eval defaults to config values.""" - run_config = tf.estimator.RunConfig() - hparams = model_hparams.create_hparams( - hparams_overrides='load_pretrained=false') - pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST) - configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) - config_train_steps = configs['train_config'].num_steps - config_eval_steps = configs['eval_config'].num_examples - train_and_eval_dict = model_lib.create_estimator_and_inputs( - run_config, hparams, pipeline_config_path) - estimator = train_and_eval_dict['estimator'] - train_steps = train_and_eval_dict['train_steps'] - eval_steps = train_and_eval_dict['eval_steps'] - - self.assertIsInstance(estimator, tf.estimator.Estimator) - self.assertEqual(config_train_steps, train_steps) - self.assertEqual(config_eval_steps, eval_steps) - - def test_create_tpu_estimator_and_inputs(self): - """Tests that number of train/eval defaults to config values.""" - - run_config = tpu_config.RunConfig() - hparams = model_hparams.create_hparams( - hparams_overrides='load_pretrained=false') - pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST) - train_steps = 20 - eval_steps = 10 - train_and_eval_dict = model_lib.create_estimator_and_inputs( - run_config, - hparams, - pipeline_config_path, - train_steps=train_steps, - eval_steps=eval_steps, - use_tpu_estimator=True) - estimator = train_and_eval_dict['estimator'] - train_steps = train_and_eval_dict['train_steps'] - eval_steps = train_and_eval_dict['eval_steps'] - - self.assertIsInstance(estimator, tpu_estimator.TPUEstimator) - self.assertEqual(20, train_steps) - self.assertEqual(10, eval_steps) - - def test_create_train_and_eval_specs(self): - """Tests that `TrainSpec` and `EvalSpec` is created correctly.""" - run_config = tf.estimator.RunConfig() - hparams = model_hparams.create_hparams( - hparams_overrides='load_pretrained=false') - pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST) - train_steps = 20 - eval_steps = 10 - eval_on_train_steps = 15 - train_and_eval_dict = model_lib.create_estimator_and_inputs( - run_config, - hparams, - pipeline_config_path, - train_steps=train_steps, - eval_steps=eval_steps) - train_input_fn = train_and_eval_dict['train_input_fn'] - eval_input_fn = train_and_eval_dict['eval_input_fn'] - eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] - predict_input_fn = train_and_eval_dict['predict_input_fn'] - train_steps = train_and_eval_dict['train_steps'] - eval_steps = train_and_eval_dict['eval_steps'] - - train_spec, eval_specs = model_lib.create_train_and_eval_specs( - train_input_fn, - eval_input_fn, - eval_on_train_input_fn, - predict_input_fn, - train_steps, - eval_steps, - eval_on_train_data=True, - eval_on_train_steps=eval_on_train_steps, - final_exporter_name='exporter', - eval_spec_name='holdout') - self.assertEqual(train_steps, train_spec.max_steps) - self.assertEqual(2, len(eval_specs)) - self.assertEqual(eval_steps, eval_specs[0].steps) - self.assertEqual('holdout', eval_specs[0].name) - self.assertEqual('exporter', eval_specs[0].exporters[0].name) - self.assertEqual(eval_on_train_steps, eval_specs[1].steps) - self.assertEqual('eval_on_train', eval_specs[1].name) - - def test_experiment(self): - """Tests that the `Experiment` object is constructed correctly.""" - run_config = tf.estimator.RunConfig() - hparams = model_hparams.create_hparams( - hparams_overrides='load_pretrained=false') - pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST) - experiment = model_lib.populate_experiment( - run_config, - hparams, - pipeline_config_path, - train_steps=10, - eval_steps=20) - self.assertEqual(10, experiment.train_steps) - self.assertEqual(20, experiment.eval_steps) - - -class UnbatchTensorsTest(tf.test.TestCase): - - def test_unbatch_without_unpadding(self): - image_placeholder = tf.placeholder(tf.float32, [2, None, None, None]) - groundtruth_boxes_placeholder = tf.placeholder(tf.float32, [2, None, None]) - groundtruth_classes_placeholder = tf.placeholder(tf.float32, - [2, None, None]) - groundtruth_weights_placeholder = tf.placeholder(tf.float32, [2, None]) - - tensor_dict = { - fields.InputDataFields.image: - image_placeholder, - fields.InputDataFields.groundtruth_boxes: - groundtruth_boxes_placeholder, - fields.InputDataFields.groundtruth_classes: - groundtruth_classes_placeholder, - fields.InputDataFields.groundtruth_weights: - groundtruth_weights_placeholder - } - unbatched_tensor_dict = model_lib.unstack_batch( - tensor_dict, unpad_groundtruth_tensors=False) - - with self.test_session() as sess: - unbatched_tensor_dict_out = sess.run( - unbatched_tensor_dict, - feed_dict={ - image_placeholder: - np.random.rand(2, 4, 4, 3).astype(np.float32), - groundtruth_boxes_placeholder: - np.random.rand(2, 5, 4).astype(np.float32), - groundtruth_classes_placeholder: - np.random.rand(2, 5, 6).astype(np.float32), - groundtruth_weights_placeholder: - np.random.rand(2, 5).astype(np.float32) - }) - for image_out in unbatched_tensor_dict_out[fields.InputDataFields.image]: - self.assertAllEqual(image_out.shape, [4, 4, 3]) - for groundtruth_boxes_out in unbatched_tensor_dict_out[ - fields.InputDataFields.groundtruth_boxes]: - self.assertAllEqual(groundtruth_boxes_out.shape, [5, 4]) - for groundtruth_classes_out in unbatched_tensor_dict_out[ - fields.InputDataFields.groundtruth_classes]: - self.assertAllEqual(groundtruth_classes_out.shape, [5, 6]) - for groundtruth_weights_out in unbatched_tensor_dict_out[ - fields.InputDataFields.groundtruth_weights]: - self.assertAllEqual(groundtruth_weights_out.shape, [5]) - - def test_unbatch_and_unpad_groundtruth_tensors(self): - image_placeholder = tf.placeholder(tf.float32, [2, None, None, None]) - groundtruth_boxes_placeholder = tf.placeholder(tf.float32, [2, 5, None]) - groundtruth_classes_placeholder = tf.placeholder(tf.float32, [2, 5, None]) - groundtruth_weights_placeholder = tf.placeholder(tf.float32, [2, 5]) - num_groundtruth_placeholder = tf.placeholder(tf.int32, [2]) - - tensor_dict = { - fields.InputDataFields.image: - image_placeholder, - fields.InputDataFields.groundtruth_boxes: - groundtruth_boxes_placeholder, - fields.InputDataFields.groundtruth_classes: - groundtruth_classes_placeholder, - fields.InputDataFields.groundtruth_weights: - groundtruth_weights_placeholder, - fields.InputDataFields.num_groundtruth_boxes: - num_groundtruth_placeholder - } - unbatched_tensor_dict = model_lib.unstack_batch( - tensor_dict, unpad_groundtruth_tensors=True) - with self.test_session() as sess: - unbatched_tensor_dict_out = sess.run( - unbatched_tensor_dict, - feed_dict={ - image_placeholder: - np.random.rand(2, 4, 4, 3).astype(np.float32), - groundtruth_boxes_placeholder: - np.random.rand(2, 5, 4).astype(np.float32), - groundtruth_classes_placeholder: - np.random.rand(2, 5, 6).astype(np.float32), - groundtruth_weights_placeholder: - np.random.rand(2, 5).astype(np.float32), - num_groundtruth_placeholder: - np.array([3, 3], np.int32) - }) - for image_out in unbatched_tensor_dict_out[fields.InputDataFields.image]: - self.assertAllEqual(image_out.shape, [4, 4, 3]) - for groundtruth_boxes_out in unbatched_tensor_dict_out[ - fields.InputDataFields.groundtruth_boxes]: - self.assertAllEqual(groundtruth_boxes_out.shape, [3, 4]) - for groundtruth_classes_out in unbatched_tensor_dict_out[ - fields.InputDataFields.groundtruth_classes]: - self.assertAllEqual(groundtruth_classes_out.shape, [3, 6]) - for groundtruth_weights_out in unbatched_tensor_dict_out[ - fields.InputDataFields.groundtruth_weights]: - self.assertAllEqual(groundtruth_weights_out.shape, [3]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/model_main.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/model_main.py deleted file mode 100644 index b4bfcf325eb59086cb0bf7965285c2507a4a089e..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/model_main.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Binary to run train and evaluation on object detection model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl import flags - -import tensorflow as tf - -from object_detection import model_hparams -from object_detection import model_lib - -flags.DEFINE_string( - 'model_dir', None, 'Path to output model directory ' - 'where event and checkpoint files will be written.') -flags.DEFINE_string('pipeline_config_path', None, 'Path to pipeline config ' - 'file.') -flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.') -flags.DEFINE_integer('num_eval_steps', None, 'Number of train steps.') -flags.DEFINE_string( - 'hparams_overrides', None, 'Hyperparameter overrides, ' - 'represented as a string containing comma-separated ' - 'hparam_name=value pairs.') -flags.DEFINE_string( - 'checkpoint_dir', None, 'Path to directory holding a checkpoint. If ' - '`checkpoint_dir` is provided, this binary operates in eval-only mode, ' - 'writing resulting metrics to `model_dir`.') - -FLAGS = flags.FLAGS - - -def main(unused_argv): - flags.mark_flag_as_required('model_dir') - flags.mark_flag_as_required('pipeline_config_path') - config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir) - - train_and_eval_dict = model_lib.create_estimator_and_inputs( - run_config=config, - hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), - pipeline_config_path=FLAGS.pipeline_config_path, - train_steps=FLAGS.num_train_steps, - eval_steps=FLAGS.num_eval_steps) - estimator = train_and_eval_dict['estimator'] - train_input_fn = train_and_eval_dict['train_input_fn'] - eval_input_fn = train_and_eval_dict['eval_input_fn'] - eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] - predict_input_fn = train_and_eval_dict['predict_input_fn'] - train_steps = train_and_eval_dict['train_steps'] - eval_steps = train_and_eval_dict['eval_steps'] - - if FLAGS.checkpoint_dir: - estimator.evaluate(eval_input_fn, - eval_steps, - checkpoint_path=tf.train.latest_checkpoint( - FLAGS.checkpoint_dir)) - else: - train_spec, eval_specs = model_lib.create_train_and_eval_specs( - train_input_fn, - eval_input_fn, - eval_on_train_input_fn, - predict_input_fn, - train_steps, - eval_steps, - eval_on_train_data=False) - - # Currently only a single Eval Spec is allowed. - tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0]) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/model_tpu_main.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/model_tpu_main.py deleted file mode 100644 index 50f8fb9a29ad0147558ecf998e009323d7fe6458..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/model_tpu_main.py +++ /dev/null @@ -1,135 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Creates and runs `Estimator` for object detection model on TPUs. - -This uses the TPUEstimator API to define and run a model in TRAIN/EVAL modes. -""" -# pylint: enable=line-too-long - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl import flags -import tensorflow as tf - -from tensorflow.contrib.tpu.python.tpu import tpu_config - -from object_detection import model_hparams -from object_detection import model_lib - -tf.flags.DEFINE_bool('use_tpu', True, 'Use TPUs rather than plain CPUs') - -# Cloud TPU Cluster Resolvers -flags.DEFINE_string( - 'gcp_project', - default=None, - help='Project name for the Cloud TPU-enabled project. If not specified, we ' - 'will attempt to automatically detect the GCE project from metadata.') -flags.DEFINE_string( - 'tpu_zone', - default=None, - help='GCE zone where the Cloud TPU is located in. If not specified, we ' - 'will attempt to automatically detect the GCE project from metadata.') -flags.DEFINE_string( - 'tpu_name', - default=None, - help='Name of the Cloud TPU for Cluster Resolvers.') - -flags.DEFINE_integer('num_shards', 8, 'Number of shards (TPU cores).') -flags.DEFINE_integer('iterations_per_loop', 100, - 'Number of iterations per TPU training loop.') -# For mode=train_and_eval, evaluation occurs after training is finished. -# Note: independently of steps_per_checkpoint, estimator will save the most -# recent checkpoint every 10 minutes by default for train_and_eval -flags.DEFINE_string('mode', 'train', - 'Mode to run: train, eval') -flags.DEFINE_integer('train_batch_size', None, 'Batch size for training. If ' - 'this is not provided, batch size is read from training ' - 'config.') - -flags.DEFINE_string( - 'hparams_overrides', None, 'Comma-separated list of ' - 'hyperparameters to override defaults.') -flags.DEFINE_boolean('eval_training_data', False, - 'If training data should be evaluated for this job.') -flags.DEFINE_string( - 'model_dir', None, 'Path to output model directory ' - 'where event and checkpoint files will be written.') -flags.DEFINE_string('pipeline_config_path', None, 'Path to pipeline config ' - 'file.') -flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.') -flags.DEFINE_integer('num_eval_steps', None, 'Number of train steps.') - -FLAGS = tf.flags.FLAGS - - -def main(unused_argv): - flags.mark_flag_as_required('model_dir') - flags.mark_flag_as_required('pipeline_config_path') - - tpu_cluster_resolver = ( - tf.contrib.cluster_resolver.python.training.TPUClusterResolver( - tpu_names=[FLAGS.tpu_name], - zone=FLAGS.tpu_zone, - project=FLAGS.gcp_project)) - tpu_grpc_url = tpu_cluster_resolver.get_master() - - config = tpu_config.RunConfig( - master=tpu_grpc_url, - evaluation_master=tpu_grpc_url, - model_dir=FLAGS.model_dir, - tpu_config=tpu_config.TPUConfig( - iterations_per_loop=FLAGS.iterations_per_loop, - num_shards=FLAGS.num_shards)) - - kwargs = {} - if FLAGS.train_batch_size: - kwargs['batch_size'] = FLAGS.train_batch_size - - train_and_eval_dict = model_lib.create_estimator_and_inputs( - run_config=config, - hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), - pipeline_config_path=FLAGS.pipeline_config_path, - train_steps=FLAGS.num_train_steps, - eval_steps=FLAGS.num_eval_steps, - use_tpu_estimator=True, - use_tpu=FLAGS.use_tpu, - num_shards=FLAGS.num_shards, - **kwargs) - estimator = train_and_eval_dict['estimator'] - train_input_fn = train_and_eval_dict['train_input_fn'] - eval_input_fn = train_and_eval_dict['eval_input_fn'] - eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] - train_steps = train_and_eval_dict['train_steps'] - eval_steps = train_and_eval_dict['eval_steps'] - - if FLAGS.mode == 'train': - estimator.train(input_fn=train_input_fn, max_steps=train_steps) - - # Continuously evaluating. - if FLAGS.mode == 'eval': - if FLAGS.eval_training_data: - name = 'training_data' - input_fn = eval_on_train_input_fn - else: - name = 'validation_data' - input_fn = eval_input_fn - model_lib.continuous_eval(estimator, FLAGS.model_dir, input_fn, eval_steps, - train_steps, name) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/__init__.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor.py deleted file mode 100644 index f0cad235408666d386454cccc64a264f25f58c29..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor.py +++ /dev/null @@ -1,165 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Embedded-friendly SSDFeatureExtractor for MobilenetV1 features.""" - -import tensorflow as tf - -from object_detection.meta_architectures import ssd_meta_arch -from object_detection.models import feature_map_generators -from object_detection.utils import context_manager -from object_detection.utils import ops -from nets import mobilenet_v1 - -slim = tf.contrib.slim - - -class EmbeddedSSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): - """Embedded-friendly SSD Feature Extractor using MobilenetV1 features. - - This feature extractor is similar to SSD MobileNetV1 feature extractor, and - it fixes input resolution to be 256x256, reduces the number of feature maps - used for box prediction and ensures convolution kernel to be no larger - than input tensor in spatial dimensions. - - This feature extractor requires support of the following ops if used in - embedded devices: - - Conv - - DepthwiseConv - - Relu6 - - All conv/depthwiseconv use SAME padding, and no additional spatial padding is - needed. - """ - - def __init__(self, - is_training, - depth_multiplier, - min_depth, - pad_to_multiple, - conv_hyperparams_fn, - reuse_weights=None, - use_explicit_padding=False, - use_depthwise=False, - override_base_feature_extractor_hyperparams=False): - """MobileNetV1 Feature Extractor for Embedded-friendly SSD Models. - - Args: - is_training: whether the network is in training mode. - depth_multiplier: float depth multiplier for feature extractor. - min_depth: minimum feature extractor depth. - pad_to_multiple: the nearest multiple to zero pad the input height and - width dimensions to. For EmbeddedSSD it must be set to 1. - conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d - and separable_conv2d ops in the layers that are added on top of the - base feature extractor. - reuse_weights: Whether to reuse variables. Default is None. - use_explicit_padding: Whether to use explicit padding when extracting - features. Default is False. - use_depthwise: Whether to use depthwise convolutions. Default is False. - override_base_feature_extractor_hyperparams: Whether to override - hyperparameters of the base feature extractor with the one from - `conv_hyperparams_fn`. - - Raises: - ValueError: upon invalid `pad_to_multiple` values. - """ - if pad_to_multiple != 1: - raise ValueError('Embedded-specific SSD only supports `pad_to_multiple` ' - 'of 1.') - - super(EmbeddedSSDMobileNetV1FeatureExtractor, self).__init__( - is_training, depth_multiplier, min_depth, pad_to_multiple, - conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise, - override_base_feature_extractor_hyperparams) - - def preprocess(self, resized_inputs): - """SSD preprocessing. - - Maps pixel values to the range [-1, 1]. - - Args: - resized_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - - Returns: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - """ - return (2.0 / 255.0) * resized_inputs - 1.0 - - def extract_features(self, preprocessed_inputs): - """Extract features from preprocessed inputs. - - Args: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - - Returns: - feature_maps: a list of tensors where the ith tensor has shape - [batch, height_i, width_i, depth_i] - - Raises: - ValueError: if image height or width are not 256 pixels. - """ - image_shape = preprocessed_inputs.get_shape() - image_shape.assert_has_rank(4) - image_height = image_shape[1].value - image_width = image_shape[2].value - - if image_height is None or image_width is None: - shape_assert = tf.Assert( - tf.logical_and(tf.equal(tf.shape(preprocessed_inputs)[1], 256), - tf.equal(tf.shape(preprocessed_inputs)[2], 256)), - ['image size must be 256 in both height and width.']) - with tf.control_dependencies([shape_assert]): - preprocessed_inputs = tf.identity(preprocessed_inputs) - elif image_height != 256 or image_width != 256: - raise ValueError('image size must be = 256 in both height and width;' - ' image dim = %d,%d' % (image_height, image_width)) - - feature_map_layout = { - 'from_layer': [ - 'Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', '' - ], - 'layer_depth': [-1, -1, 512, 256, 256], - 'conv_kernel_size': [-1, -1, 3, 3, 2], - 'use_explicit_padding': self._use_explicit_padding, - 'use_depthwise': self._use_depthwise, - } - - with tf.variable_scope('MobilenetV1', - reuse=self._reuse_weights) as scope: - with slim.arg_scope( - mobilenet_v1.mobilenet_v1_arg_scope(is_training=None)): - with (slim.arg_scope(self._conv_hyperparams_fn()) - if self._override_base_feature_extractor_hyperparams - else context_manager.IdentityContextManager()): - _, image_features = mobilenet_v1.mobilenet_v1_base( - ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), - final_endpoint='Conv2d_13_pointwise', - min_depth=self._min_depth, - depth_multiplier=self._depth_multiplier, - use_explicit_padding=self._use_explicit_padding, - scope=scope) - with slim.arg_scope(self._conv_hyperparams_fn()): - feature_maps = feature_map_generators.multi_resolution_feature_maps( - feature_map_layout=feature_map_layout, - depth_multiplier=self._depth_multiplier, - min_depth=self._min_depth, - insert_1x1_conv=True, - image_features=image_features) - - return feature_maps.values() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_test.py deleted file mode 100644 index 1fee66c866b356753c8b9d712a705763f7a76638..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_test.py +++ /dev/null @@ -1,129 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for embedded_ssd_mobilenet_v1_feature_extractor.""" -import numpy as np -import tensorflow as tf - -from object_detection.models import embedded_ssd_mobilenet_v1_feature_extractor -from object_detection.models import ssd_feature_extractor_test - - -class EmbeddedSSDMobileNetV1FeatureExtractorTest( - ssd_feature_extractor_test.SsdFeatureExtractorTestBase): - - def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, - is_training=True): - """Constructs a new feature extractor. - - Args: - depth_multiplier: float depth multiplier for feature extractor - pad_to_multiple: the nearest multiple to zero pad the input height and - width dimensions to. - is_training: whether the network is in training mode. - - Returns: - an ssd_meta_arch.SSDFeatureExtractor object. - """ - min_depth = 32 - return (embedded_ssd_mobilenet_v1_feature_extractor. - EmbeddedSSDMobileNetV1FeatureExtractor( - is_training, depth_multiplier, min_depth, pad_to_multiple, - self.conv_hyperparams_fn, - override_base_feature_extractor_hyperparams=True)) - - def test_extract_features_returns_correct_shapes_256(self): - image_height = 256 - image_width = 256 - depth_multiplier = 1.0 - pad_to_multiple = 1 - expected_feature_map_shape = [(2, 16, 16, 512), (2, 8, 8, 1024), - (2, 4, 4, 512), (2, 2, 2, 256), - (2, 1, 1, 256)] - self.check_extract_features_returns_correct_shape( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_with_dynamic_inputs(self): - image_height = 256 - image_width = 256 - depth_multiplier = 1.0 - pad_to_multiple = 1 - expected_feature_map_shape = [(2, 16, 16, 512), (2, 8, 8, 1024), - (2, 4, 4, 512), (2, 2, 2, 256), - (2, 1, 1, 256)] - self.check_extract_features_returns_correct_shapes_with_dynamic_inputs( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_enforcing_min_depth(self): - image_height = 256 - image_width = 256 - depth_multiplier = 0.5**12 - pad_to_multiple = 1 - expected_feature_map_shape = [(2, 16, 16, 32), (2, 8, 8, 32), (2, 4, 4, 32), - (2, 2, 2, 32), (2, 1, 1, 32)] - self.check_extract_features_returns_correct_shape( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_with_pad_to_multiple_of_1( - self): - image_height = 256 - image_width = 256 - depth_multiplier = 1.0 - pad_to_multiple = 1 - expected_feature_map_shape = [(2, 16, 16, 512), (2, 8, 8, 1024), - (2, 4, 4, 512), (2, 2, 2, 256), - (2, 1, 1, 256)] - self.check_extract_features_returns_correct_shape( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_raises_error_with_pad_to_multiple_not_1(self): - depth_multiplier = 1.0 - pad_to_multiple = 2 - with self.assertRaises(ValueError): - _ = self._create_feature_extractor(depth_multiplier, pad_to_multiple) - - def test_extract_features_raises_error_with_invalid_image_size(self): - image_height = 128 - image_width = 128 - depth_multiplier = 1.0 - pad_to_multiple = 1 - self.check_extract_features_raises_error_with_invalid_image_size( - image_height, image_width, depth_multiplier, pad_to_multiple) - - def test_preprocess_returns_correct_value_range(self): - image_height = 256 - image_width = 256 - depth_multiplier = 1 - pad_to_multiple = 1 - test_image = np.random.rand(4, image_height, image_width, 3) - feature_extractor = self._create_feature_extractor(depth_multiplier, - pad_to_multiple) - preprocessed_image = feature_extractor.preprocess(test_image) - self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0))) - - def test_variables_only_created_in_scope(self): - depth_multiplier = 1 - pad_to_multiple = 1 - scope_name = 'MobilenetV1' - self.check_feature_extractor_variables_under_scope( - depth_multiplier, pad_to_multiple, scope_name) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor.py deleted file mode 100644 index 30b3dd4eccb29b60add46a0c91673c00bcbbdf01..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor.py +++ /dev/null @@ -1,213 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Inception Resnet v2 Faster R-CNN implementation. - -See "Inception-v4, Inception-ResNet and the Impact of Residual Connections on -Learning" by Szegedy et al. (https://arxiv.org/abs/1602.07261) -as well as -"Speed/accuracy trade-offs for modern convolutional object detectors" by -Huang et al. (https://arxiv.org/abs/1611.10012) -""" - -import tensorflow as tf - -from object_detection.meta_architectures import faster_rcnn_meta_arch -from nets import inception_resnet_v2 - -slim = tf.contrib.slim - - -class FasterRCNNInceptionResnetV2FeatureExtractor( - faster_rcnn_meta_arch.FasterRCNNFeatureExtractor): - """Faster R-CNN with Inception Resnet v2 feature extractor implementation.""" - - def __init__(self, - is_training, - first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0): - """Constructor. - - Args: - is_training: See base class. - first_stage_features_stride: See base class. - batch_norm_trainable: See base class. - reuse_weights: See base class. - weight_decay: See base class. - - Raises: - ValueError: If `first_stage_features_stride` is not 8 or 16. - """ - if first_stage_features_stride != 8 and first_stage_features_stride != 16: - raise ValueError('`first_stage_features_stride` must be 8 or 16.') - super(FasterRCNNInceptionResnetV2FeatureExtractor, self).__init__( - is_training, first_stage_features_stride, batch_norm_trainable, - reuse_weights, weight_decay) - - def preprocess(self, resized_inputs): - """Faster R-CNN with Inception Resnet v2 preprocessing. - - Maps pixel values to the range [-1, 1]. - - Args: - resized_inputs: A [batch, height_in, width_in, channels] float32 tensor - representing a batch of images with values between 0 and 255.0. - - Returns: - preprocessed_inputs: A [batch, height_out, width_out, channels] float32 - tensor representing a batch of images. - - """ - return (2.0 / 255.0) * resized_inputs - 1.0 - - def _extract_proposal_features(self, preprocessed_inputs, scope): - """Extracts first stage RPN features. - - Extracts features using the first half of the Inception Resnet v2 network. - We construct the network in `align_feature_maps=True` mode, which means - that all VALID paddings in the network are changed to SAME padding so that - the feature maps are aligned. - - Args: - preprocessed_inputs: A [batch, height, width, channels] float32 tensor - representing a batch of images. - scope: A scope name. - - Returns: - rpn_feature_map: A tensor with shape [batch, height, width, depth] - Raises: - InvalidArgumentError: If the spatial size of `preprocessed_inputs` - (height or width) is less than 33. - ValueError: If the created network is missing the required activation. - """ - if len(preprocessed_inputs.get_shape().as_list()) != 4: - raise ValueError('`preprocessed_inputs` must be 4 dimensional, got a ' - 'tensor of shape %s' % preprocessed_inputs.get_shape()) - - with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope( - weight_decay=self._weight_decay)): - # Forces is_training to False to disable batch norm update. - with slim.arg_scope([slim.batch_norm], - is_training=self._train_batch_norm): - with tf.variable_scope('InceptionResnetV2', - reuse=self._reuse_weights) as scope: - return inception_resnet_v2.inception_resnet_v2_base( - preprocessed_inputs, final_endpoint='PreAuxLogits', - scope=scope, output_stride=self._first_stage_features_stride, - align_feature_maps=True) - - def _extract_box_classifier_features(self, proposal_feature_maps, scope): - """Extracts second stage box classifier features. - - This function reconstructs the "second half" of the Inception ResNet v2 - network after the part defined in `_extract_proposal_features`. - - Args: - proposal_feature_maps: A 4-D float tensor with shape - [batch_size * self.max_num_proposals, crop_height, crop_width, depth] - representing the feature map cropped to each proposal. - scope: A scope name. - - Returns: - proposal_classifier_features: A 4-D float tensor with shape - [batch_size * self.max_num_proposals, height, width, depth] - representing box classifier features for each proposal. - """ - with tf.variable_scope('InceptionResnetV2', reuse=self._reuse_weights): - with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope( - weight_decay=self._weight_decay)): - # Forces is_training to False to disable batch norm update. - with slim.arg_scope([slim.batch_norm], - is_training=self._train_batch_norm): - with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], - stride=1, padding='SAME'): - with tf.variable_scope('Mixed_7a'): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(proposal_feature_maps, - 256, 1, scope='Conv2d_0a_1x1') - tower_conv_1 = slim.conv2d( - tower_conv, 384, 3, stride=2, - padding='VALID', scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_1'): - tower_conv1 = slim.conv2d( - proposal_feature_maps, 256, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d( - tower_conv1, 288, 3, stride=2, - padding='VALID', scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_2'): - tower_conv2 = slim.conv2d( - proposal_feature_maps, 256, 1, scope='Conv2d_0a_1x1') - tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, - scope='Conv2d_0b_3x3') - tower_conv2_2 = slim.conv2d( - tower_conv2_1, 320, 3, stride=2, - padding='VALID', scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_3'): - tower_pool = slim.max_pool2d( - proposal_feature_maps, 3, stride=2, padding='VALID', - scope='MaxPool_1a_3x3') - net = tf.concat( - [tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool], 3) - net = slim.repeat(net, 9, inception_resnet_v2.block8, scale=0.20) - net = inception_resnet_v2.block8(net, activation_fn=None) - proposal_classifier_features = slim.conv2d( - net, 1536, 1, scope='Conv2d_7b_1x1') - return proposal_classifier_features - - def restore_from_classification_checkpoint_fn( - self, - first_stage_feature_extractor_scope, - second_stage_feature_extractor_scope): - """Returns a map of variables to load from a foreign checkpoint. - - Note that this overrides the default implementation in - faster_rcnn_meta_arch.FasterRCNNFeatureExtractor which does not work for - InceptionResnetV2 checkpoints. - - TODO(jonathanhuang,rathodv): revisit whether it's possible to force the - `Repeat` namescope as created in `_extract_box_classifier_features` to - start counting at 2 (e.g. `Repeat_2`) so that the default restore_fn can - be used. - - Args: - first_stage_feature_extractor_scope: A scope name for the first stage - feature extractor. - second_stage_feature_extractor_scope: A scope name for the second stage - feature extractor. - - Returns: - A dict mapping variable names (to load from a checkpoint) to variables in - the model graph. - """ - - variables_to_restore = {} - for variable in tf.global_variables(): - if variable.op.name.startswith( - first_stage_feature_extractor_scope): - var_name = variable.op.name.replace( - first_stage_feature_extractor_scope + '/', '') - variables_to_restore[var_name] = variable - if variable.op.name.startswith( - second_stage_feature_extractor_scope): - var_name = variable.op.name.replace( - second_stage_feature_extractor_scope - + '/InceptionResnetV2/Repeat', 'InceptionResnetV2/Repeat_2') - var_name = var_name.replace( - second_stage_feature_extractor_scope + '/', '') - variables_to_restore[var_name] = variable - return variables_to_restore - diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor_test.py deleted file mode 100644 index 1d9f088f3cbcb434f7305341cb32c5eb2ce35bf5..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor_test.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for models.faster_rcnn_inception_resnet_v2_feature_extractor.""" - -import tensorflow as tf - -from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res - - -class FasterRcnnInceptionResnetV2FeatureExtractorTest(tf.test.TestCase): - - def _build_feature_extractor(self, first_stage_features_stride): - return frcnn_inc_res.FasterRCNNInceptionResnetV2FeatureExtractor( - is_training=False, - first_stage_features_stride=first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0) - - def test_extract_proposal_features_returns_expected_size(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [1, 299, 299, 3], maxval=255, dtype=tf.float32) - rpn_feature_map, _ = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [1, 19, 19, 1088]) - - def test_extract_proposal_features_stride_eight(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=8) - preprocessed_inputs = tf.random_uniform( - [1, 224, 224, 3], maxval=255, dtype=tf.float32) - rpn_feature_map, _ = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [1, 28, 28, 1088]) - - def test_extract_proposal_features_half_size_input(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [1, 112, 112, 3], maxval=255, dtype=tf.float32) - rpn_feature_map, _ = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [1, 7, 7, 1088]) - - def test_extract_proposal_features_dies_on_invalid_stride(self): - with self.assertRaises(ValueError): - self._build_feature_extractor(first_stage_features_stride=99) - - def test_extract_proposal_features_dies_with_incorrect_rank_inputs(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [224, 224, 3], maxval=255, dtype=tf.float32) - with self.assertRaises(ValueError): - feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - - def test_extract_box_classifier_features_returns_expected_size(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - proposal_feature_maps = tf.random_uniform( - [2, 17, 17, 1088], maxval=255, dtype=tf.float32) - proposal_classifier_features = ( - feature_extractor.extract_box_classifier_features( - proposal_feature_maps, scope='TestScope')) - features_shape = tf.shape(proposal_classifier_features) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [2, 8, 8, 1536]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_inception_v2_feature_extractor.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_inception_v2_feature_extractor.py deleted file mode 100644 index fe0675169bfd69611851680d5da81e002fe1b959..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_inception_v2_feature_extractor.py +++ /dev/null @@ -1,254 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Inception V2 Faster R-CNN implementation. - -See "Rethinking the Inception Architecture for Computer Vision" -https://arxiv.org/abs/1512.00567 -""" -import tensorflow as tf - -from object_detection.meta_architectures import faster_rcnn_meta_arch -from nets import inception_v2 - -slim = tf.contrib.slim - - -def _batch_norm_arg_scope(list_ops, - use_batch_norm=True, - batch_norm_decay=0.9997, - batch_norm_epsilon=0.001, - batch_norm_scale=False, - train_batch_norm=False): - """Slim arg scope for InceptionV2 batch norm.""" - if use_batch_norm: - batch_norm_params = { - 'is_training': train_batch_norm, - 'scale': batch_norm_scale, - 'decay': batch_norm_decay, - 'epsilon': batch_norm_epsilon - } - normalizer_fn = slim.batch_norm - else: - normalizer_fn = None - batch_norm_params = None - - return slim.arg_scope(list_ops, - normalizer_fn=normalizer_fn, - normalizer_params=batch_norm_params) - - -class FasterRCNNInceptionV2FeatureExtractor( - faster_rcnn_meta_arch.FasterRCNNFeatureExtractor): - """Faster R-CNN Inception V2 feature extractor implementation.""" - - def __init__(self, - is_training, - first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0, - depth_multiplier=1.0, - min_depth=16): - """Constructor. - - Args: - is_training: See base class. - first_stage_features_stride: See base class. - batch_norm_trainable: See base class. - reuse_weights: See base class. - weight_decay: See base class. - depth_multiplier: float depth multiplier for feature extractor. - min_depth: minimum feature extractor depth. - - Raises: - ValueError: If `first_stage_features_stride` is not 8 or 16. - """ - if first_stage_features_stride != 8 and first_stage_features_stride != 16: - raise ValueError('`first_stage_features_stride` must be 8 or 16.') - self._depth_multiplier = depth_multiplier - self._min_depth = min_depth - super(FasterRCNNInceptionV2FeatureExtractor, self).__init__( - is_training, first_stage_features_stride, batch_norm_trainable, - reuse_weights, weight_decay) - - def preprocess(self, resized_inputs): - """Faster R-CNN Inception V2 preprocessing. - - Maps pixel values to the range [-1, 1]. - - Args: - resized_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - - Returns: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - """ - return (2.0 / 255.0) * resized_inputs - 1.0 - - def _extract_proposal_features(self, preprocessed_inputs, scope): - """Extracts first stage RPN features. - - Args: - preprocessed_inputs: A [batch, height, width, channels] float32 tensor - representing a batch of images. - scope: A scope name. - - Returns: - rpn_feature_map: A tensor with shape [batch, height, width, depth] - activations: A dictionary mapping feature extractor tensor names to - tensors - - Raises: - InvalidArgumentError: If the spatial size of `preprocessed_inputs` - (height or width) is less than 33. - ValueError: If the created network is missing the required activation. - """ - - preprocessed_inputs.get_shape().assert_has_rank(4) - shape_assert = tf.Assert( - tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33), - tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)), - ['image size must at least be 33 in both height and width.']) - - with tf.control_dependencies([shape_assert]): - with tf.variable_scope('InceptionV2', - reuse=self._reuse_weights) as scope: - with _batch_norm_arg_scope([slim.conv2d, slim.separable_conv2d], - batch_norm_scale=True, - train_batch_norm=self._train_batch_norm): - _, activations = inception_v2.inception_v2_base( - preprocessed_inputs, - final_endpoint='Mixed_4e', - min_depth=self._min_depth, - depth_multiplier=self._depth_multiplier, - scope=scope) - - return activations['Mixed_4e'], activations - - def _extract_box_classifier_features(self, proposal_feature_maps, scope): - """Extracts second stage box classifier features. - - Args: - proposal_feature_maps: A 4-D float tensor with shape - [batch_size * self.max_num_proposals, crop_height, crop_width, depth] - representing the feature map cropped to each proposal. - scope: A scope name (unused). - - Returns: - proposal_classifier_features: A 4-D float tensor with shape - [batch_size * self.max_num_proposals, height, width, depth] - representing box classifier features for each proposal. - """ - net = proposal_feature_maps - - depth = lambda d: max(int(d * self._depth_multiplier), self._min_depth) - trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) - - data_format = 'NHWC' - concat_dim = 3 if data_format == 'NHWC' else 1 - - with tf.variable_scope('InceptionV2', reuse=self._reuse_weights): - with slim.arg_scope( - [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], - stride=1, - padding='SAME', - data_format=data_format): - with _batch_norm_arg_scope([slim.conv2d, slim.separable_conv2d], - batch_norm_scale=True, - train_batch_norm=self._train_batch_norm): - - with tf.variable_scope('Mixed_5a'): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d( - net, depth(128), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_0 = slim.conv2d(branch_0, depth(192), [3, 3], stride=2, - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d( - net, depth(192), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, depth(256), [3, 3], - scope='Conv2d_0b_3x3') - branch_1 = slim.conv2d(branch_1, depth(256), [3, 3], stride=2, - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.max_pool2d(net, [3, 3], stride=2, - scope='MaxPool_1a_3x3') - net = tf.concat([branch_0, branch_1, branch_2], concat_dim) - - with tf.variable_scope('Mixed_5b'): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, depth(352), [1, 1], - scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d( - net, depth(192), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, depth(320), [3, 3], - scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d( - net, depth(160), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], - scope='Conv2d_0b_3x3') - branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], - scope='Conv2d_0c_3x3') - with tf.variable_scope('Branch_3'): - branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') - branch_3 = slim.conv2d( - branch_3, depth(128), [1, 1], - weights_initializer=trunc_normal(0.1), - scope='Conv2d_0b_1x1') - net = tf.concat([branch_0, branch_1, branch_2, branch_3], - concat_dim) - - with tf.variable_scope('Mixed_5c'): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, depth(352), [1, 1], - scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d( - net, depth(192), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, depth(320), [3, 3], - scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d( - net, depth(192), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], - scope='Conv2d_0b_3x3') - branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], - scope='Conv2d_0c_3x3') - with tf.variable_scope('Branch_3'): - branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3') - branch_3 = slim.conv2d( - branch_3, depth(128), [1, 1], - weights_initializer=trunc_normal(0.1), - scope='Conv2d_0b_1x1') - proposal_classifier_features = tf.concat( - [branch_0, branch_1, branch_2, branch_3], concat_dim) - - return proposal_classifier_features diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_inception_v2_feature_extractor_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_inception_v2_feature_extractor_test.py deleted file mode 100644 index 6b5bc2f9be3c569d1fc6e5400833066aa42e7559..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_inception_v2_feature_extractor_test.py +++ /dev/null @@ -1,126 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for faster_rcnn_inception_v2_feature_extractor.""" - -import numpy as np -import tensorflow as tf - -from object_detection.models import faster_rcnn_inception_v2_feature_extractor as faster_rcnn_inception_v2 - - -class FasterRcnnInceptionV2FeatureExtractorTest(tf.test.TestCase): - - def _build_feature_extractor(self, first_stage_features_stride): - return faster_rcnn_inception_v2.FasterRCNNInceptionV2FeatureExtractor( - is_training=False, - first_stage_features_stride=first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0) - - def test_extract_proposal_features_returns_expected_size(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [4, 224, 224, 3], maxval=255, dtype=tf.float32) - rpn_feature_map, _ = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [4, 14, 14, 576]) - - def test_extract_proposal_features_stride_eight(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=8) - preprocessed_inputs = tf.random_uniform( - [4, 224, 224, 3], maxval=255, dtype=tf.float32) - rpn_feature_map, _ = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [4, 14, 14, 576]) - - def test_extract_proposal_features_half_size_input(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [1, 112, 112, 3], maxval=255, dtype=tf.float32) - rpn_feature_map, _ = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [1, 7, 7, 576]) - - def test_extract_proposal_features_dies_on_invalid_stride(self): - with self.assertRaises(ValueError): - self._build_feature_extractor(first_stage_features_stride=99) - - def test_extract_proposal_features_dies_on_very_small_images(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3)) - rpn_feature_map, _ = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - with self.assertRaises(tf.errors.InvalidArgumentError): - sess.run( - features_shape, - feed_dict={preprocessed_inputs: np.random.rand(4, 32, 32, 3)}) - - def test_extract_proposal_features_dies_with_incorrect_rank_inputs(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [224, 224, 3], maxval=255, dtype=tf.float32) - with self.assertRaises(ValueError): - feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - - def test_extract_box_classifier_features_returns_expected_size(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - proposal_feature_maps = tf.random_uniform( - [3, 14, 14, 576], maxval=255, dtype=tf.float32) - proposal_classifier_features = ( - feature_extractor.extract_box_classifier_features( - proposal_feature_maps, scope='TestScope')) - features_shape = tf.shape(proposal_classifier_features) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [3, 7, 7, 1024]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_mobilenet_v1_feature_extractor.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_mobilenet_v1_feature_extractor.py deleted file mode 100644 index 52c744b8293b114941edada92c91f9c3f5d8dcd9..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_mobilenet_v1_feature_extractor.py +++ /dev/null @@ -1,194 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Mobilenet v1 Faster R-CNN implementation.""" -import numpy as np - -import tensorflow as tf - -from object_detection.meta_architectures import faster_rcnn_meta_arch -from object_detection.utils import shape_utils -from nets import mobilenet_v1 - -slim = tf.contrib.slim - - -def _get_mobilenet_conv_no_last_stride_defs(conv_depth_ratio_in_percentage): - if conv_depth_ratio_in_percentage not in [25, 50, 75, 100]: - raise ValueError( - 'Only the following ratio percentages are supported: 25, 50, 75, 100') - conv_depth_ratio_in_percentage = float(conv_depth_ratio_in_percentage) / 100.0 - channels = np.array([ - 32, 64, 128, 128, 256, 256, 512, 512, 512, 512, 512, 512, 1024, 1024 - ], dtype=np.float32) - channels = (channels * conv_depth_ratio_in_percentage).astype(np.int32) - return [ - mobilenet_v1.Conv(kernel=[3, 3], stride=2, depth=channels[0]), - mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=channels[1]), - mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=2, depth=channels[2]), - mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=channels[3]), - mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=2, depth=channels[4]), - mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=channels[5]), - mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=2, depth=channels[6]), - mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=channels[7]), - mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=channels[8]), - mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=channels[9]), - mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=channels[10]), - mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=channels[11]), - mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=channels[12]), - mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=channels[13]) - ] - - -class FasterRCNNMobilenetV1FeatureExtractor( - faster_rcnn_meta_arch.FasterRCNNFeatureExtractor): - """Faster R-CNN Mobilenet V1 feature extractor implementation.""" - - def __init__(self, - is_training, - first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0, - depth_multiplier=1.0, - min_depth=16, - skip_last_stride=False, - conv_depth_ratio_in_percentage=100): - """Constructor. - - Args: - is_training: See base class. - first_stage_features_stride: See base class. - batch_norm_trainable: See base class. - reuse_weights: See base class. - weight_decay: See base class. - depth_multiplier: float depth multiplier for feature extractor. - min_depth: minimum feature extractor depth. - skip_last_stride: Skip the last stride if True. - conv_depth_ratio_in_percentage: Conv depth ratio in percentage. Only - applied if skip_last_stride is True. - - Raises: - ValueError: If `first_stage_features_stride` is not 8 or 16. - """ - if first_stage_features_stride != 8 and first_stage_features_stride != 16: - raise ValueError('`first_stage_features_stride` must be 8 or 16.') - self._depth_multiplier = depth_multiplier - self._min_depth = min_depth - self._skip_last_stride = skip_last_stride - self._conv_depth_ratio_in_percentage = conv_depth_ratio_in_percentage - super(FasterRCNNMobilenetV1FeatureExtractor, self).__init__( - is_training, first_stage_features_stride, batch_norm_trainable, - reuse_weights, weight_decay) - - def preprocess(self, resized_inputs): - """Faster R-CNN Mobilenet V1 preprocessing. - - Maps pixel values to the range [-1, 1]. - - Args: - resized_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - - Returns: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - """ - return (2.0 / 255.0) * resized_inputs - 1.0 - - def _extract_proposal_features(self, preprocessed_inputs, scope): - """Extracts first stage RPN features. - - Args: - preprocessed_inputs: A [batch, height, width, channels] float32 tensor - representing a batch of images. - scope: A scope name. - - Returns: - rpn_feature_map: A tensor with shape [batch, height, width, depth] - activations: A dictionary mapping feature extractor tensor names to - tensors - - Raises: - InvalidArgumentError: If the spatial size of `preprocessed_inputs` - (height or width) is less than 33. - ValueError: If the created network is missing the required activation. - """ - - preprocessed_inputs.get_shape().assert_has_rank(4) - preprocessed_inputs = shape_utils.check_min_image_dim( - min_dim=33, image_tensor=preprocessed_inputs) - - with slim.arg_scope( - mobilenet_v1.mobilenet_v1_arg_scope( - is_training=self._train_batch_norm, - weight_decay=self._weight_decay)): - with tf.variable_scope('MobilenetV1', - reuse=self._reuse_weights) as scope: - params = {} - if self._skip_last_stride: - params['conv_defs'] = _get_mobilenet_conv_no_last_stride_defs( - conv_depth_ratio_in_percentage=self. - _conv_depth_ratio_in_percentage) - _, activations = mobilenet_v1.mobilenet_v1_base( - preprocessed_inputs, - final_endpoint='Conv2d_11_pointwise', - min_depth=self._min_depth, - depth_multiplier=self._depth_multiplier, - scope=scope, - **params) - return activations['Conv2d_11_pointwise'], activations - - def _extract_box_classifier_features(self, proposal_feature_maps, scope): - """Extracts second stage box classifier features. - - Args: - proposal_feature_maps: A 4-D float tensor with shape - [batch_size * self.max_num_proposals, crop_height, crop_width, depth] - representing the feature map cropped to each proposal. - scope: A scope name (unused). - - Returns: - proposal_classifier_features: A 4-D float tensor with shape - [batch_size * self.max_num_proposals, height, width, depth] - representing box classifier features for each proposal. - """ - net = proposal_feature_maps - - conv_depth = 1024 - if self._skip_last_stride: - conv_depth_ratio = float(self._conv_depth_ratio_in_percentage) / 100.0 - conv_depth = int(float(conv_depth) * conv_depth_ratio) - - depth = lambda d: max(int(d * 1.0), 16) - with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights): - with slim.arg_scope( - mobilenet_v1.mobilenet_v1_arg_scope( - is_training=self._train_batch_norm, - weight_decay=self._weight_decay)): - with slim.arg_scope( - [slim.conv2d, slim.separable_conv2d], padding='SAME'): - net = slim.separable_conv2d( - net, - depth(conv_depth), [3, 3], - depth_multiplier=1, - stride=2, - scope='Conv2d_12_pointwise') - return slim.separable_conv2d( - net, - depth(conv_depth), [3, 3], - depth_multiplier=1, - stride=1, - scope='Conv2d_13_pointwise') diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_mobilenet_v1_feature_extractor_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_mobilenet_v1_feature_extractor_test.py deleted file mode 100644 index fcefe616f6f938d11ef333559b213a68ed206ec5..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_mobilenet_v1_feature_extractor_test.py +++ /dev/null @@ -1,126 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for faster_rcnn_mobilenet_v1_feature_extractor.""" - -import numpy as np -import tensorflow as tf - -from object_detection.models import faster_rcnn_mobilenet_v1_feature_extractor as faster_rcnn_mobilenet_v1 - - -class FasterRcnnMobilenetV1FeatureExtractorTest(tf.test.TestCase): - - def _build_feature_extractor(self, first_stage_features_stride): - return faster_rcnn_mobilenet_v1.FasterRCNNMobilenetV1FeatureExtractor( - is_training=False, - first_stage_features_stride=first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0) - - def test_extract_proposal_features_returns_expected_size(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [4, 224, 224, 3], maxval=255, dtype=tf.float32) - rpn_feature_map, _ = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [4, 14, 14, 512]) - - def test_extract_proposal_features_stride_eight(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=8) - preprocessed_inputs = tf.random_uniform( - [4, 224, 224, 3], maxval=255, dtype=tf.float32) - rpn_feature_map, _ = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [4, 14, 14, 512]) - - def test_extract_proposal_features_half_size_input(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [1, 112, 112, 3], maxval=255, dtype=tf.float32) - rpn_feature_map, _ = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [1, 7, 7, 512]) - - def test_extract_proposal_features_dies_on_invalid_stride(self): - with self.assertRaises(ValueError): - self._build_feature_extractor(first_stage_features_stride=99) - - def test_extract_proposal_features_dies_on_very_small_images(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3)) - rpn_feature_map, _ = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - with self.assertRaises(tf.errors.InvalidArgumentError): - sess.run( - features_shape, - feed_dict={preprocessed_inputs: np.random.rand(4, 32, 32, 3)}) - - def test_extract_proposal_features_dies_with_incorrect_rank_inputs(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [224, 224, 3], maxval=255, dtype=tf.float32) - with self.assertRaises(ValueError): - feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - - def test_extract_box_classifier_features_returns_expected_size(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - proposal_feature_maps = tf.random_uniform( - [3, 14, 14, 576], maxval=255, dtype=tf.float32) - proposal_classifier_features = ( - feature_extractor.extract_box_classifier_features( - proposal_feature_maps, scope='TestScope')) - features_shape = tf.shape(proposal_classifier_features) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [3, 7, 7, 1024]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_nas_feature_extractor.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_nas_feature_extractor.py deleted file mode 100644 index 5fa6bf7531517b42a85521b71eb3025bd93742a9..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_nas_feature_extractor.py +++ /dev/null @@ -1,324 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""NASNet Faster R-CNN implementation. - -Learning Transferable Architectures for Scalable Image Recognition -Barret Zoph, Vijay Vasudevan, Jonathon Shlens, Quoc V. Le -https://arxiv.org/abs/1707.07012 -""" - -import tensorflow as tf - -from object_detection.meta_architectures import faster_rcnn_meta_arch -from nets.nasnet import nasnet -from nets.nasnet import nasnet_utils - -arg_scope = tf.contrib.framework.arg_scope -slim = tf.contrib.slim - - -def nasnet_large_arg_scope_for_detection(is_batch_norm_training=False): - """Defines the default arg scope for the NASNet-A Large for object detection. - - This provides a small edit to switch batch norm training on and off. - - Args: - is_batch_norm_training: Boolean indicating whether to train with batch norm. - - Returns: - An `arg_scope` to use for the NASNet Large Model. - """ - imagenet_scope = nasnet.nasnet_large_arg_scope() - with arg_scope(imagenet_scope): - with arg_scope([slim.batch_norm], is_training=is_batch_norm_training) as sc: - return sc - - -# Note: This is largely a copy of _build_nasnet_base inside nasnet.py but -# with special edits to remove instantiation of the stem and the special -# ability to receive as input a pair of hidden states. -def _build_nasnet_base(hidden_previous, - hidden, - normal_cell, - reduction_cell, - hparams, - true_cell_num, - start_cell_num): - """Constructs a NASNet image model.""" - - # Find where to place the reduction cells or stride normal cells - reduction_indices = nasnet_utils.calc_reduction_layers( - hparams.num_cells, hparams.num_reduction_layers) - - # Note: The None is prepended to match the behavior of _imagenet_stem() - cell_outputs = [None, hidden_previous, hidden] - net = hidden - - # NOTE: In the nasnet.py code, filter_scaling starts at 1.0. We instead - # start at 2.0 because 1 reduction cell has been created which would - # update the filter_scaling to 2.0. - filter_scaling = 2.0 - - # Run the cells - for cell_num in range(start_cell_num, hparams.num_cells): - stride = 1 - if hparams.skip_reduction_layer_input: - prev_layer = cell_outputs[-2] - if cell_num in reduction_indices: - filter_scaling *= hparams.filter_scaling_rate - net = reduction_cell( - net, - scope='reduction_cell_{}'.format(reduction_indices.index(cell_num)), - filter_scaling=filter_scaling, - stride=2, - prev_layer=cell_outputs[-2], - cell_num=true_cell_num) - true_cell_num += 1 - cell_outputs.append(net) - if not hparams.skip_reduction_layer_input: - prev_layer = cell_outputs[-2] - net = normal_cell( - net, - scope='cell_{}'.format(cell_num), - filter_scaling=filter_scaling, - stride=stride, - prev_layer=prev_layer, - cell_num=true_cell_num) - true_cell_num += 1 - cell_outputs.append(net) - - # Final nonlinearity. - # Note that we have dropped the final pooling, dropout and softmax layers - # from the default nasnet version. - with tf.variable_scope('final_layer'): - net = tf.nn.relu(net) - return net - - -# TODO(shlens): Only fixed_shape_resizer is currently supported for NASNet -# featurization. The reason for this is that nasnet.py only supports -# inputs with fully known shapes. We need to update nasnet.py to handle -# shapes not known at compile time. -class FasterRCNNNASFeatureExtractor( - faster_rcnn_meta_arch.FasterRCNNFeatureExtractor): - """Faster R-CNN with NASNet-A feature extractor implementation.""" - - def __init__(self, - is_training, - first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0): - """Constructor. - - Args: - is_training: See base class. - first_stage_features_stride: See base class. - batch_norm_trainable: See base class. - reuse_weights: See base class. - weight_decay: See base class. - - Raises: - ValueError: If `first_stage_features_stride` is not 16. - """ - if first_stage_features_stride != 16: - raise ValueError('`first_stage_features_stride` must be 16.') - super(FasterRCNNNASFeatureExtractor, self).__init__( - is_training, first_stage_features_stride, batch_norm_trainable, - reuse_weights, weight_decay) - - def preprocess(self, resized_inputs): - """Faster R-CNN with NAS preprocessing. - - Maps pixel values to the range [-1, 1]. - - Args: - resized_inputs: A [batch, height_in, width_in, channels] float32 tensor - representing a batch of images with values between 0 and 255.0. - - Returns: - preprocessed_inputs: A [batch, height_out, width_out, channels] float32 - tensor representing a batch of images. - - """ - return (2.0 / 255.0) * resized_inputs - 1.0 - - def _extract_proposal_features(self, preprocessed_inputs, scope): - """Extracts first stage RPN features. - - Extracts features using the first half of the NASNet network. - We construct the network in `align_feature_maps=True` mode, which means - that all VALID paddings in the network are changed to SAME padding so that - the feature maps are aligned. - - Args: - preprocessed_inputs: A [batch, height, width, channels] float32 tensor - representing a batch of images. - scope: A scope name. - - Returns: - rpn_feature_map: A tensor with shape [batch, height, width, depth] - end_points: A dictionary mapping feature extractor tensor names to tensors - - Raises: - ValueError: If the created network is missing the required activation. - """ - del scope - - if len(preprocessed_inputs.get_shape().as_list()) != 4: - raise ValueError('`preprocessed_inputs` must be 4 dimensional, got a ' - 'tensor of shape %s' % preprocessed_inputs.get_shape()) - - with slim.arg_scope(nasnet_large_arg_scope_for_detection( - is_batch_norm_training=self._train_batch_norm)): - with arg_scope([slim.conv2d, - slim.batch_norm, - slim.separable_conv2d], - reuse=self._reuse_weights): - _, end_points = nasnet.build_nasnet_large( - preprocessed_inputs, num_classes=None, - is_training=self._is_training, - final_endpoint='Cell_11') - - # Note that both 'Cell_10' and 'Cell_11' have equal depth = 2016. - rpn_feature_map = tf.concat([end_points['Cell_10'], - end_points['Cell_11']], 3) - - # nasnet.py does not maintain the batch size in the first dimension. - # This work around permits us retaining the batch for below. - batch = preprocessed_inputs.get_shape().as_list()[0] - shape_without_batch = rpn_feature_map.get_shape().as_list()[1:] - rpn_feature_map_shape = [batch] + shape_without_batch - rpn_feature_map.set_shape(rpn_feature_map_shape) - - return rpn_feature_map, end_points - - def _extract_box_classifier_features(self, proposal_feature_maps, scope): - """Extracts second stage box classifier features. - - This function reconstructs the "second half" of the NASNet-A - network after the part defined in `_extract_proposal_features`. - - Args: - proposal_feature_maps: A 4-D float tensor with shape - [batch_size * self.max_num_proposals, crop_height, crop_width, depth] - representing the feature map cropped to each proposal. - scope: A scope name. - - Returns: - proposal_classifier_features: A 4-D float tensor with shape - [batch_size * self.max_num_proposals, height, width, depth] - representing box classifier features for each proposal. - """ - del scope - - # Note that we always feed into 2 layers of equal depth - # where the first N channels corresponds to previous hidden layer - # and the second N channels correspond to the final hidden layer. - hidden_previous, hidden = tf.split(proposal_feature_maps, 2, axis=3) - - # Note that what follows is largely a copy of build_nasnet_large() within - # nasnet.py. We are copying to minimize code pollution in slim. - - # TODO(shlens,skornblith): Determine the appropriate drop path schedule. - # For now the schedule is the default (1.0->0.7 over 250,000 train steps). - hparams = nasnet.large_imagenet_config() - if not self._is_training: - hparams.set_hparam('drop_path_keep_prob', 1.0) - - # Calculate the total number of cells in the network - # -- Add 2 for the reduction cells. - total_num_cells = hparams.num_cells + 2 - # -- And add 2 for the stem cells for ImageNet training. - total_num_cells += 2 - - normal_cell = nasnet_utils.NasNetANormalCell( - hparams.num_conv_filters, hparams.drop_path_keep_prob, - total_num_cells, hparams.total_training_steps) - reduction_cell = nasnet_utils.NasNetAReductionCell( - hparams.num_conv_filters, hparams.drop_path_keep_prob, - total_num_cells, hparams.total_training_steps) - with arg_scope([slim.dropout, nasnet_utils.drop_path], - is_training=self._is_training): - with arg_scope([slim.batch_norm], is_training=self._train_batch_norm): - with arg_scope([slim.avg_pool2d, - slim.max_pool2d, - slim.conv2d, - slim.batch_norm, - slim.separable_conv2d, - nasnet_utils.factorized_reduction, - nasnet_utils.global_avg_pool, - nasnet_utils.get_channel_index, - nasnet_utils.get_channel_dim], - data_format=hparams.data_format): - - # This corresponds to the cell number just past 'Cell_11' used by - # by _extract_proposal_features(). - start_cell_num = 12 - # Note that this number equals: - # start_cell_num + 2 stem cells + 1 reduction cell - true_cell_num = 15 - - with slim.arg_scope(nasnet.nasnet_large_arg_scope()): - net = _build_nasnet_base(hidden_previous, - hidden, - normal_cell=normal_cell, - reduction_cell=reduction_cell, - hparams=hparams, - true_cell_num=true_cell_num, - start_cell_num=start_cell_num) - - proposal_classifier_features = net - return proposal_classifier_features - - def restore_from_classification_checkpoint_fn( - self, - first_stage_feature_extractor_scope, - second_stage_feature_extractor_scope): - """Returns a map of variables to load from a foreign checkpoint. - - Note that this overrides the default implementation in - faster_rcnn_meta_arch.FasterRCNNFeatureExtractor which does not work for - NASNet-A checkpoints. - - Args: - first_stage_feature_extractor_scope: A scope name for the first stage - feature extractor. - second_stage_feature_extractor_scope: A scope name for the second stage - feature extractor. - - Returns: - A dict mapping variable names (to load from a checkpoint) to variables in - the model graph. - """ - # Note that the NAS checkpoint only contains the moving average version of - # the Variables so we need to generate an appropriate dictionary mapping. - variables_to_restore = {} - for variable in tf.global_variables(): - if variable.op.name.startswith( - first_stage_feature_extractor_scope): - var_name = variable.op.name.replace( - first_stage_feature_extractor_scope + '/', '') - var_name += '/ExponentialMovingAverage' - variables_to_restore[var_name] = variable - if variable.op.name.startswith( - second_stage_feature_extractor_scope): - var_name = variable.op.name.replace( - second_stage_feature_extractor_scope + '/', '') - var_name += '/ExponentialMovingAverage' - variables_to_restore[var_name] = variable - return variables_to_restore - diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_nas_feature_extractor_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_nas_feature_extractor_test.py deleted file mode 100644 index cecfc4f8d115e9f154f098bae2bf63f389b1732b..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_nas_feature_extractor_test.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for models.faster_rcnn_nas_feature_extractor.""" - -import tensorflow as tf - -from object_detection.models import faster_rcnn_nas_feature_extractor as frcnn_nas - - -class FasterRcnnNASFeatureExtractorTest(tf.test.TestCase): - - def _build_feature_extractor(self, first_stage_features_stride): - return frcnn_nas.FasterRCNNNASFeatureExtractor( - is_training=False, - first_stage_features_stride=first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0) - - def test_extract_proposal_features_returns_expected_size(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [1, 299, 299, 3], maxval=255, dtype=tf.float32) - rpn_feature_map, _ = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [1, 19, 19, 4032]) - - def test_extract_proposal_features_input_size_224(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [1, 224, 224, 3], maxval=255, dtype=tf.float32) - rpn_feature_map, _ = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [1, 14, 14, 4032]) - - def test_extract_proposal_features_input_size_112(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [1, 112, 112, 3], maxval=255, dtype=tf.float32) - rpn_feature_map, _ = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [1, 7, 7, 4032]) - - def test_extract_proposal_features_dies_on_invalid_stride(self): - with self.assertRaises(ValueError): - self._build_feature_extractor(first_stage_features_stride=99) - - def test_extract_proposal_features_dies_with_incorrect_rank_inputs(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [224, 224, 3], maxval=255, dtype=tf.float32) - with self.assertRaises(ValueError): - feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - - def test_extract_box_classifier_features_returns_expected_size(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - proposal_feature_maps = tf.random_uniform( - [2, 17, 17, 1088], maxval=255, dtype=tf.float32) - proposal_classifier_features = ( - feature_extractor.extract_box_classifier_features( - proposal_feature_maps, scope='TestScope')) - features_shape = tf.shape(proposal_classifier_features) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [2, 9, 9, 4032]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_pnas_feature_extractor.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_pnas_feature_extractor.py deleted file mode 100644 index b5d0f43afd4cff71880bdedd3615e398a8ef60fb..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_pnas_feature_extractor.py +++ /dev/null @@ -1,318 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""PNASNet Faster R-CNN implementation. - -Based on PNASNet model: https://arxiv.org/abs/1712.00559 -""" - -import tensorflow as tf - -from object_detection.meta_architectures import faster_rcnn_meta_arch -from nets.nasnet import nasnet_utils -from nets.nasnet import pnasnet - -arg_scope = tf.contrib.framework.arg_scope -slim = tf.contrib.slim - - -def pnasnet_large_arg_scope_for_detection(is_batch_norm_training=False): - """Defines the default arg scope for the PNASNet Large for object detection. - - This provides a small edit to switch batch norm training on and off. - - Args: - is_batch_norm_training: Boolean indicating whether to train with batch norm. - - Returns: - An `arg_scope` to use for the PNASNet Large Model. - """ - imagenet_scope = pnasnet.pnasnet_large_arg_scope() - with arg_scope(imagenet_scope): - with arg_scope([slim.batch_norm], is_training=is_batch_norm_training) as sc: - return sc - - -def _filter_scaling(reduction_indices, start_cell_num): - """Compute the expected filter scaling at given PNASNet cell start_cell_num. - - In the pnasnet.py code, filter_scaling starts at 1.0. We instead - adapt filter scaling to depend on the starting cell. - At first cells, before any reduction, filter_scalling is 1.0. With passing - any reduction cell, the filter_scaling is multiplied by 2. - - Args: - reduction_indices: list of int indices. - start_cell_num: int. - Returns: - filter_scaling: float. - """ - filter_scaling = 1.0 - for ind in reduction_indices: - if ind < start_cell_num: - filter_scaling *= 2.0 - return filter_scaling - - -# Note: This is largely a copy of _build_pnasnet_base inside pnasnet.py but -# with special edits to remove instantiation of the stem and the special -# ability to receive as input a pair of hidden states. It constructs only -# a sub-network from the original PNASNet model, starting from the -# start_cell_num cell and with modified final layer. -def _build_pnasnet_base( - hidden_previous, hidden, normal_cell, hparams, true_cell_num, - start_cell_num): - """Constructs a PNASNet image model for proposal classifier features.""" - - # Find where to place the reduction cells or stride normal cells - reduction_indices = nasnet_utils.calc_reduction_layers( - hparams.num_cells, hparams.num_reduction_layers) - filter_scaling = _filter_scaling(reduction_indices, start_cell_num) - - # Note: The None is prepended to match the behavior of _imagenet_stem() - cell_outputs = [None, hidden_previous, hidden] - net = hidden - - # Run the cells - for cell_num in range(start_cell_num, hparams.num_cells): - is_reduction = cell_num in reduction_indices - stride = 2 if is_reduction else 1 - if is_reduction: filter_scaling *= hparams.filter_scaling_rate - prev_layer = cell_outputs[-2] - net = normal_cell( - net, - scope='cell_{}'.format(cell_num), - filter_scaling=filter_scaling, - stride=stride, - prev_layer=prev_layer, - cell_num=true_cell_num) - true_cell_num += 1 - cell_outputs.append(net) - - # Final nonlinearity. - # Note that we have dropped the final pooling, dropout and softmax layers - # from the default pnasnet version. - with tf.variable_scope('final_layer'): - net = tf.nn.relu(net) - return net - - -# TODO(shlens): Only fixed_shape_resizer is currently supported for PNASNet -# featurization. The reason for this is that pnasnet.py only supports -# inputs with fully known shapes. We need to update pnasnet.py to handle -# shapes not known at compile time. -class FasterRCNNPNASFeatureExtractor( - faster_rcnn_meta_arch.FasterRCNNFeatureExtractor): - """Faster R-CNN with PNASNet feature extractor implementation.""" - - def __init__(self, - is_training, - first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0): - """Constructor. - - Args: - is_training: See base class. - first_stage_features_stride: See base class. - batch_norm_trainable: See base class. - reuse_weights: See base class. - weight_decay: See base class. - - Raises: - ValueError: If `first_stage_features_stride` is not 16. - """ - if first_stage_features_stride != 16: - raise ValueError('`first_stage_features_stride` must be 16.') - super(FasterRCNNPNASFeatureExtractor, self).__init__( - is_training, first_stage_features_stride, batch_norm_trainable, - reuse_weights, weight_decay) - - def preprocess(self, resized_inputs): - """Faster R-CNN with PNAS preprocessing. - - Maps pixel values to the range [-1, 1]. - - Args: - resized_inputs: A [batch, height_in, width_in, channels] float32 tensor - representing a batch of images with values between 0 and 255.0. - - Returns: - preprocessed_inputs: A [batch, height_out, width_out, channels] float32 - tensor representing a batch of images. - - """ - return (2.0 / 255.0) * resized_inputs - 1.0 - - def _extract_proposal_features(self, preprocessed_inputs, scope): - """Extracts first stage RPN features. - - Extracts features using the first half of the PNASNet network. - We construct the network in `align_feature_maps=True` mode, which means - that all VALID paddings in the network are changed to SAME padding so that - the feature maps are aligned. - - Args: - preprocessed_inputs: A [batch, height, width, channels] float32 tensor - representing a batch of images. - scope: A scope name. - - Returns: - rpn_feature_map: A tensor with shape [batch, height, width, depth] - end_points: A dictionary mapping feature extractor tensor names to tensors - - Raises: - ValueError: If the created network is missing the required activation. - """ - del scope - - if len(preprocessed_inputs.get_shape().as_list()) != 4: - raise ValueError('`preprocessed_inputs` must be 4 dimensional, got a ' - 'tensor of shape %s' % preprocessed_inputs.get_shape()) - - with slim.arg_scope(pnasnet_large_arg_scope_for_detection( - is_batch_norm_training=self._train_batch_norm)): - with arg_scope([slim.conv2d, - slim.batch_norm, - slim.separable_conv2d], - reuse=self._reuse_weights): - _, end_points = pnasnet.build_pnasnet_large( - preprocessed_inputs, num_classes=None, - is_training=self._is_training, - final_endpoint='Cell_7') - - # Note that both 'Cell_6' and 'Cell_7' have equal depth = 2160. - # Cell_7 is the last cell before second reduction. - rpn_feature_map = tf.concat([end_points['Cell_6'], - end_points['Cell_7']], 3) - - # pnasnet.py does not maintain the batch size in the first dimension. - # This work around permits us retaining the batch for below. - batch = preprocessed_inputs.get_shape().as_list()[0] - shape_without_batch = rpn_feature_map.get_shape().as_list()[1:] - rpn_feature_map_shape = [batch] + shape_without_batch - rpn_feature_map.set_shape(rpn_feature_map_shape) - - return rpn_feature_map, end_points - - def _extract_box_classifier_features(self, proposal_feature_maps, scope): - """Extracts second stage box classifier features. - - This function reconstructs the "second half" of the PNASNet - network after the part defined in `_extract_proposal_features`. - - Args: - proposal_feature_maps: A 4-D float tensor with shape - [batch_size * self.max_num_proposals, crop_height, crop_width, depth] - representing the feature map cropped to each proposal. - scope: A scope name. - - Returns: - proposal_classifier_features: A 4-D float tensor with shape - [batch_size * self.max_num_proposals, height, width, depth] - representing box classifier features for each proposal. - """ - del scope - - # Number of used stem cells. - num_stem_cells = 2 - - # Note that we always feed into 2 layers of equal depth - # where the first N channels corresponds to previous hidden layer - # and the second N channels correspond to the final hidden layer. - hidden_previous, hidden = tf.split(proposal_feature_maps, 2, axis=3) - - # Note that what follows is largely a copy of build_pnasnet_large() within - # pnasnet.py. We are copying to minimize code pollution in slim. - - # TODO(shlens,skornblith): Determine the appropriate drop path schedule. - # For now the schedule is the default (1.0->0.7 over 250,000 train steps). - hparams = pnasnet.large_imagenet_config() - if not self._is_training: - hparams.set_hparam('drop_path_keep_prob', 1.0) - - # Calculate the total number of cells in the network - total_num_cells = hparams.num_cells + num_stem_cells - - normal_cell = pnasnet.PNasNetNormalCell( - hparams.num_conv_filters, hparams.drop_path_keep_prob, - total_num_cells, hparams.total_training_steps) - with arg_scope([slim.dropout, nasnet_utils.drop_path], - is_training=self._is_training): - with arg_scope([slim.batch_norm], is_training=self._train_batch_norm): - with arg_scope([slim.avg_pool2d, - slim.max_pool2d, - slim.conv2d, - slim.batch_norm, - slim.separable_conv2d, - nasnet_utils.factorized_reduction, - nasnet_utils.global_avg_pool, - nasnet_utils.get_channel_index, - nasnet_utils.get_channel_dim], - data_format=hparams.data_format): - - # This corresponds to the cell number just past 'Cell_7' used by - # _extract_proposal_features(). - start_cell_num = 8 - true_cell_num = start_cell_num + num_stem_cells - - with slim.arg_scope(pnasnet.pnasnet_large_arg_scope()): - net = _build_pnasnet_base( - hidden_previous, - hidden, - normal_cell=normal_cell, - hparams=hparams, - true_cell_num=true_cell_num, - start_cell_num=start_cell_num) - - proposal_classifier_features = net - return proposal_classifier_features - - def restore_from_classification_checkpoint_fn( - self, - first_stage_feature_extractor_scope, - second_stage_feature_extractor_scope): - """Returns a map of variables to load from a foreign checkpoint. - - Note that this overrides the default implementation in - faster_rcnn_meta_arch.FasterRCNNFeatureExtractor which does not work for - PNASNet checkpoints. - - Args: - first_stage_feature_extractor_scope: A scope name for the first stage - feature extractor. - second_stage_feature_extractor_scope: A scope name for the second stage - feature extractor. - - Returns: - A dict mapping variable names (to load from a checkpoint) to variables in - the model graph. - """ - variables_to_restore = {} - for variable in tf.global_variables(): - if variable.op.name.startswith( - first_stage_feature_extractor_scope): - var_name = variable.op.name.replace( - first_stage_feature_extractor_scope + '/', '') - var_name += '/ExponentialMovingAverage' - variables_to_restore[var_name] = variable - if variable.op.name.startswith( - second_stage_feature_extractor_scope): - var_name = variable.op.name.replace( - second_stage_feature_extractor_scope + '/', '') - var_name += '/ExponentialMovingAverage' - variables_to_restore[var_name] = variable - return variables_to_restore diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_pnas_feature_extractor_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_pnas_feature_extractor_test.py deleted file mode 100644 index 6bb368041ceba98a986eeeea85711c69fc94b288..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_pnas_feature_extractor_test.py +++ /dev/null @@ -1,122 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for models.faster_rcnn_pnas_feature_extractor.""" - -import tensorflow as tf - -from object_detection.models import faster_rcnn_pnas_feature_extractor as frcnn_pnas - - -class FasterRcnnPNASFeatureExtractorTest(tf.test.TestCase): - - def _build_feature_extractor(self, first_stage_features_stride): - return frcnn_pnas.FasterRCNNPNASFeatureExtractor( - is_training=False, - first_stage_features_stride=first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0) - - def test_extract_proposal_features_returns_expected_size(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [1, 299, 299, 3], maxval=255, dtype=tf.float32) - rpn_feature_map, _ = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [1, 19, 19, 4320]) - - def test_extract_proposal_features_input_size_224(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [1, 224, 224, 3], maxval=255, dtype=tf.float32) - rpn_feature_map, _ = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [1, 14, 14, 4320]) - - def test_extract_proposal_features_input_size_112(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [1, 112, 112, 3], maxval=255, dtype=tf.float32) - rpn_feature_map, _ = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [1, 7, 7, 4320]) - - def test_extract_proposal_features_dies_on_invalid_stride(self): - with self.assertRaises(ValueError): - self._build_feature_extractor(first_stage_features_stride=99) - - def test_extract_proposal_features_dies_with_incorrect_rank_inputs(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [224, 224, 3], maxval=255, dtype=tf.float32) - with self.assertRaises(ValueError): - feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - - def test_extract_box_classifier_features_returns_expected_size(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - proposal_feature_maps = tf.random_uniform( - [2, 17, 17, 1088], maxval=255, dtype=tf.float32) - proposal_classifier_features = ( - feature_extractor.extract_box_classifier_features( - proposal_feature_maps, scope='TestScope')) - features_shape = tf.shape(proposal_classifier_features) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [2, 9, 9, 4320]) - - def test_filter_scaling_computation(self): - expected_filter_scaling = { - ((4, 8), 2): 1.0, - ((4, 8), 7): 2.0, - ((4, 8), 8): 2.0, - ((4, 8), 9): 4.0 - } - for args, filter_scaling in expected_filter_scaling.items(): - reduction_indices, start_cell_num = args - self.assertAlmostEqual( - frcnn_pnas._filter_scaling(reduction_indices, start_cell_num), - filter_scaling) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py deleted file mode 100644 index 286deae3de7f8edfb92073fff263e1f1e6369c6d..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py +++ /dev/null @@ -1,253 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Resnet V1 Faster R-CNN implementation. - -See "Deep Residual Learning for Image Recognition" by He et al., 2015. -https://arxiv.org/abs/1512.03385 - -Note: this implementation assumes that the classification checkpoint used -to finetune this model is trained using the same configuration as that of -the MSRA provided checkpoints -(see https://github.com/KaimingHe/deep-residual-networks), e.g., with -same preprocessing, batch norm scaling, etc. -""" -import tensorflow as tf - -from object_detection.meta_architectures import faster_rcnn_meta_arch -from nets import resnet_utils -from nets import resnet_v1 - -slim = tf.contrib.slim - - -class FasterRCNNResnetV1FeatureExtractor( - faster_rcnn_meta_arch.FasterRCNNFeatureExtractor): - """Faster R-CNN Resnet V1 feature extractor implementation.""" - - def __init__(self, - architecture, - resnet_model, - is_training, - first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0): - """Constructor. - - Args: - architecture: Architecture name of the Resnet V1 model. - resnet_model: Definition of the Resnet V1 model. - is_training: See base class. - first_stage_features_stride: See base class. - batch_norm_trainable: See base class. - reuse_weights: See base class. - weight_decay: See base class. - - Raises: - ValueError: If `first_stage_features_stride` is not 8 or 16. - """ - if first_stage_features_stride != 8 and first_stage_features_stride != 16: - raise ValueError('`first_stage_features_stride` must be 8 or 16.') - self._architecture = architecture - self._resnet_model = resnet_model - super(FasterRCNNResnetV1FeatureExtractor, self).__init__( - is_training, first_stage_features_stride, batch_norm_trainable, - reuse_weights, weight_decay) - - def preprocess(self, resized_inputs): - """Faster R-CNN Resnet V1 preprocessing. - - VGG style channel mean subtraction as described here: - https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-md - - Args: - resized_inputs: A [batch, height_in, width_in, channels] float32 tensor - representing a batch of images with values between 0 and 255.0. - - Returns: - preprocessed_inputs: A [batch, height_out, width_out, channels] float32 - tensor representing a batch of images. - - """ - channel_means = [123.68, 116.779, 103.939] - return resized_inputs - [[channel_means]] - - def _extract_proposal_features(self, preprocessed_inputs, scope): - """Extracts first stage RPN features. - - Args: - preprocessed_inputs: A [batch, height, width, channels] float32 tensor - representing a batch of images. - scope: A scope name. - - Returns: - rpn_feature_map: A tensor with shape [batch, height, width, depth] - activations: A dictionary mapping feature extractor tensor names to - tensors - - Raises: - InvalidArgumentError: If the spatial size of `preprocessed_inputs` - (height or width) is less than 33. - ValueError: If the created network is missing the required activation. - """ - if len(preprocessed_inputs.get_shape().as_list()) != 4: - raise ValueError('`preprocessed_inputs` must be 4 dimensional, got a ' - 'tensor of shape %s' % preprocessed_inputs.get_shape()) - shape_assert = tf.Assert( - tf.logical_and( - tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33), - tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)), - ['image size must at least be 33 in both height and width.']) - - with tf.control_dependencies([shape_assert]): - # Disables batchnorm for fine-tuning with smaller batch sizes. - # TODO(chensun): Figure out if it is needed when image - # batch size is bigger. - with slim.arg_scope( - resnet_utils.resnet_arg_scope( - batch_norm_epsilon=1e-5, - batch_norm_scale=True, - weight_decay=self._weight_decay)): - with tf.variable_scope( - self._architecture, reuse=self._reuse_weights) as var_scope: - _, activations = self._resnet_model( - preprocessed_inputs, - num_classes=None, - is_training=self._train_batch_norm, - global_pool=False, - output_stride=self._first_stage_features_stride, - spatial_squeeze=False, - scope=var_scope) - - handle = scope + '/%s/block3' % self._architecture - return activations[handle], activations - - def _extract_box_classifier_features(self, proposal_feature_maps, scope): - """Extracts second stage box classifier features. - - Args: - proposal_feature_maps: A 4-D float tensor with shape - [batch_size * self.max_num_proposals, crop_height, crop_width, depth] - representing the feature map cropped to each proposal. - scope: A scope name (unused). - - Returns: - proposal_classifier_features: A 4-D float tensor with shape - [batch_size * self.max_num_proposals, height, width, depth] - representing box classifier features for each proposal. - """ - with tf.variable_scope(self._architecture, reuse=self._reuse_weights): - with slim.arg_scope( - resnet_utils.resnet_arg_scope( - batch_norm_epsilon=1e-5, - batch_norm_scale=True, - weight_decay=self._weight_decay)): - with slim.arg_scope([slim.batch_norm], - is_training=self._train_batch_norm): - blocks = [ - resnet_utils.Block('block4', resnet_v1.bottleneck, [{ - 'depth': 2048, - 'depth_bottleneck': 512, - 'stride': 1 - }] * 3) - ] - proposal_classifier_features = resnet_utils.stack_blocks_dense( - proposal_feature_maps, blocks) - return proposal_classifier_features - - -class FasterRCNNResnet50FeatureExtractor(FasterRCNNResnetV1FeatureExtractor): - """Faster R-CNN Resnet 50 feature extractor implementation.""" - - def __init__(self, - is_training, - first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0): - """Constructor. - - Args: - is_training: See base class. - first_stage_features_stride: See base class. - batch_norm_trainable: See base class. - reuse_weights: See base class. - weight_decay: See base class. - - Raises: - ValueError: If `first_stage_features_stride` is not 8 or 16, - or if `architecture` is not supported. - """ - super(FasterRCNNResnet50FeatureExtractor, self).__init__( - 'resnet_v1_50', resnet_v1.resnet_v1_50, is_training, - first_stage_features_stride, batch_norm_trainable, - reuse_weights, weight_decay) - - -class FasterRCNNResnet101FeatureExtractor(FasterRCNNResnetV1FeatureExtractor): - """Faster R-CNN Resnet 101 feature extractor implementation.""" - - def __init__(self, - is_training, - first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0): - """Constructor. - - Args: - is_training: See base class. - first_stage_features_stride: See base class. - batch_norm_trainable: See base class. - reuse_weights: See base class. - weight_decay: See base class. - - Raises: - ValueError: If `first_stage_features_stride` is not 8 or 16, - or if `architecture` is not supported. - """ - super(FasterRCNNResnet101FeatureExtractor, self).__init__( - 'resnet_v1_101', resnet_v1.resnet_v1_101, is_training, - first_stage_features_stride, batch_norm_trainable, - reuse_weights, weight_decay) - - -class FasterRCNNResnet152FeatureExtractor(FasterRCNNResnetV1FeatureExtractor): - """Faster R-CNN Resnet 152 feature extractor implementation.""" - - def __init__(self, - is_training, - first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0): - """Constructor. - - Args: - is_training: See base class. - first_stage_features_stride: See base class. - batch_norm_trainable: See base class. - reuse_weights: See base class. - weight_decay: See base class. - - Raises: - ValueError: If `first_stage_features_stride` is not 8 or 16, - or if `architecture` is not supported. - """ - super(FasterRCNNResnet152FeatureExtractor, self).__init__( - 'resnet_v1_152', resnet_v1.resnet_v1_152, is_training, - first_stage_features_stride, batch_norm_trainable, - reuse_weights, weight_decay) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_resnet_v1_feature_extractor_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_resnet_v1_feature_extractor_test.py deleted file mode 100644 index e2a336f0a8c3bc595670729bc8262ff40fbc366b..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/faster_rcnn_resnet_v1_feature_extractor_test.py +++ /dev/null @@ -1,137 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.models.faster_rcnn_resnet_v1_feature_extractor.""" - -import numpy as np -import tensorflow as tf - -from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as faster_rcnn_resnet_v1 - - -class FasterRcnnResnetV1FeatureExtractorTest(tf.test.TestCase): - - def _build_feature_extractor(self, - first_stage_features_stride, - architecture='resnet_v1_101'): - feature_extractor_map = { - 'resnet_v1_50': - faster_rcnn_resnet_v1.FasterRCNNResnet50FeatureExtractor, - 'resnet_v1_101': - faster_rcnn_resnet_v1.FasterRCNNResnet101FeatureExtractor, - 'resnet_v1_152': - faster_rcnn_resnet_v1.FasterRCNNResnet152FeatureExtractor - } - return feature_extractor_map[architecture]( - is_training=False, - first_stage_features_stride=first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0) - - def test_extract_proposal_features_returns_expected_size(self): - for architecture in ['resnet_v1_50', 'resnet_v1_101', 'resnet_v1_152']: - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16, architecture=architecture) - preprocessed_inputs = tf.random_uniform( - [4, 224, 224, 3], maxval=255, dtype=tf.float32) - rpn_feature_map, _ = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [4, 14, 14, 1024]) - - def test_extract_proposal_features_stride_eight(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=8) - preprocessed_inputs = tf.random_uniform( - [4, 224, 224, 3], maxval=255, dtype=tf.float32) - rpn_feature_map, _ = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [4, 28, 28, 1024]) - - def test_extract_proposal_features_half_size_input(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [1, 112, 112, 3], maxval=255, dtype=tf.float32) - rpn_feature_map, _ = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [1, 7, 7, 1024]) - - def test_extract_proposal_features_dies_on_invalid_stride(self): - with self.assertRaises(ValueError): - self._build_feature_extractor(first_stage_features_stride=99) - - def test_extract_proposal_features_dies_on_very_small_images(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3)) - rpn_feature_map, _ = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - with self.assertRaises(tf.errors.InvalidArgumentError): - sess.run( - features_shape, - feed_dict={preprocessed_inputs: np.random.rand(4, 32, 32, 3)}) - - def test_extract_proposal_features_dies_with_incorrect_rank_inputs(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [224, 224, 3], maxval=255, dtype=tf.float32) - with self.assertRaises(ValueError): - feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - - def test_extract_box_classifier_features_returns_expected_size(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - proposal_feature_maps = tf.random_uniform( - [3, 7, 7, 1024], maxval=255, dtype=tf.float32) - proposal_classifier_features = ( - feature_extractor.extract_box_classifier_features( - proposal_feature_maps, scope='TestScope')) - features_shape = tf.shape(proposal_classifier_features) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [3, 7, 7, 2048]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/feature_map_generators.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/feature_map_generators.py deleted file mode 100644 index 2c72eeb4279c7629fe6894bd6c44d9e97cafca7d..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/feature_map_generators.py +++ /dev/null @@ -1,225 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Functions to generate a list of feature maps based on image features. - -Provides several feature map generators that can be used to build object -detection feature extractors. - -Object detection feature extractors usually are built by stacking two components -- A base feature extractor such as Inception V3 and a feature map generator. -Feature map generators build on the base feature extractors and produce a list -of final feature maps. -""" -import collections -import tensorflow as tf -from object_detection.utils import ops -slim = tf.contrib.slim - - -def get_depth_fn(depth_multiplier, min_depth): - """Builds a callable to compute depth (output channels) of conv filters. - - Args: - depth_multiplier: a multiplier for the nominal depth. - min_depth: a lower bound on the depth of filters. - - Returns: - A callable that takes in a nominal depth and returns the depth to use. - """ - def multiply_depth(depth): - new_depth = int(depth * depth_multiplier) - return max(new_depth, min_depth) - return multiply_depth - - -def multi_resolution_feature_maps(feature_map_layout, depth_multiplier, - min_depth, insert_1x1_conv, image_features): - """Generates multi resolution feature maps from input image features. - - Generates multi-scale feature maps for detection as in the SSD papers by - Liu et al: https://arxiv.org/pdf/1512.02325v2.pdf, See Sec 2.1. - - More specifically, it performs the following two tasks: - 1) If a layer name is provided in the configuration, returns that layer as a - feature map. - 2) If a layer name is left as an empty string, constructs a new feature map - based on the spatial shape and depth configuration. Note that the current - implementation only supports generating new layers using convolution of - stride 2 resulting in a spatial resolution reduction by a factor of 2. - By default convolution kernel size is set to 3, and it can be customized - by caller. - - An example of the configuration for Inception V3: - { - 'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''], - 'layer_depth': [-1, -1, -1, 512, 256, 128] - } - - Args: - feature_map_layout: Dictionary of specifications for the feature map - layouts in the following format (Inception V2/V3 respectively): - { - 'from_layer': ['Mixed_3c', 'Mixed_4c', 'Mixed_5c', '', '', ''], - 'layer_depth': [-1, -1, -1, 512, 256, 128] - } - or - { - 'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', '', ''], - 'layer_depth': [-1, -1, -1, 512, 256, 128] - } - If 'from_layer' is specified, the specified feature map is directly used - as a box predictor layer, and the layer_depth is directly infered from the - feature map (instead of using the provided 'layer_depth' parameter). In - this case, our convention is to set 'layer_depth' to -1 for clarity. - Otherwise, if 'from_layer' is an empty string, then the box predictor - layer will be built from the previous layer using convolution operations. - Note that the current implementation only supports generating new layers - using convolutions of stride 2 (resulting in a spatial resolution - reduction by a factor of 2), and will be extended to a more flexible - design. Convolution kernel size is set to 3 by default, and can be - customized by 'conv_kernel_size' parameter (similarily, 'conv_kernel_size' - should be set to -1 if 'from_layer' is specified). The created convolution - operation will be a normal 2D convolution by default, and a depthwise - convolution followed by 1x1 convolution if 'use_depthwise' is set to True. - depth_multiplier: Depth multiplier for convolutional layers. - min_depth: Minimum depth for convolutional layers. - insert_1x1_conv: A boolean indicating whether an additional 1x1 convolution - should be inserted before shrinking the feature map. - image_features: A dictionary of handles to activation tensors from the - base feature extractor. - - Returns: - feature_maps: an OrderedDict mapping keys (feature map names) to - tensors where each tensor has shape [batch, height_i, width_i, depth_i]. - - Raises: - ValueError: if the number entries in 'from_layer' and - 'layer_depth' do not match. - ValueError: if the generated layer does not have the same resolution - as specified. - """ - depth_fn = get_depth_fn(depth_multiplier, min_depth) - - feature_map_keys = [] - feature_maps = [] - base_from_layer = '' - use_explicit_padding = False - if 'use_explicit_padding' in feature_map_layout: - use_explicit_padding = feature_map_layout['use_explicit_padding'] - use_depthwise = False - if 'use_depthwise' in feature_map_layout: - use_depthwise = feature_map_layout['use_depthwise'] - for index, from_layer in enumerate(feature_map_layout['from_layer']): - layer_depth = feature_map_layout['layer_depth'][index] - conv_kernel_size = 3 - if 'conv_kernel_size' in feature_map_layout: - conv_kernel_size = feature_map_layout['conv_kernel_size'][index] - if from_layer: - feature_map = image_features[from_layer] - base_from_layer = from_layer - feature_map_keys.append(from_layer) - else: - pre_layer = feature_maps[-1] - intermediate_layer = pre_layer - if insert_1x1_conv: - layer_name = '{}_1_Conv2d_{}_1x1_{}'.format( - base_from_layer, index, depth_fn(layer_depth / 2)) - intermediate_layer = slim.conv2d( - pre_layer, - depth_fn(layer_depth / 2), [1, 1], - padding='SAME', - stride=1, - scope=layer_name) - layer_name = '{}_2_Conv2d_{}_{}x{}_s2_{}'.format( - base_from_layer, index, conv_kernel_size, conv_kernel_size, - depth_fn(layer_depth)) - stride = 2 - padding = 'SAME' - if use_explicit_padding: - padding = 'VALID' - intermediate_layer = ops.fixed_padding( - intermediate_layer, conv_kernel_size) - if use_depthwise: - feature_map = slim.separable_conv2d( - intermediate_layer, - None, [conv_kernel_size, conv_kernel_size], - depth_multiplier=1, - padding=padding, - stride=stride, - scope=layer_name + '_depthwise') - feature_map = slim.conv2d( - feature_map, - depth_fn(layer_depth), [1, 1], - padding='SAME', - stride=1, - scope=layer_name) - else: - feature_map = slim.conv2d( - intermediate_layer, - depth_fn(layer_depth), [conv_kernel_size, conv_kernel_size], - padding=padding, - stride=stride, - scope=layer_name) - feature_map_keys.append(layer_name) - feature_maps.append(feature_map) - return collections.OrderedDict( - [(x, y) for (x, y) in zip(feature_map_keys, feature_maps)]) - - -def fpn_top_down_feature_maps(image_features, depth, scope=None): - """Generates `top-down` feature maps for Feature Pyramid Networks. - - See https://arxiv.org/abs/1612.03144 for details. - - Args: - image_features: list of tuples of (tensor_name, image_feature_tensor). - Spatial resolutions of succesive tensors must reduce exactly by a factor - of 2. - depth: depth of output feature maps. - scope: A scope name to wrap this op under. - - Returns: - feature_maps: an OrderedDict mapping keys (feature map names) to - tensors where each tensor has shape [batch, height_i, width_i, depth_i]. - """ - with tf.name_scope(scope, 'top_down'): - num_levels = len(image_features) - output_feature_maps_list = [] - output_feature_map_keys = [] - with slim.arg_scope( - [slim.conv2d], padding='SAME', stride=1): - top_down = slim.conv2d( - image_features[-1][1], - depth, [1, 1], activation_fn=None, normalizer_fn=None, - scope='projection_%d' % num_levels) - output_feature_maps_list.append(top_down) - output_feature_map_keys.append( - 'top_down_%s' % image_features[-1][0]) - - for level in reversed(range(num_levels - 1)): - top_down = ops.nearest_neighbor_upsampling(top_down, 2) - residual = slim.conv2d( - image_features[level][1], depth, [1, 1], - activation_fn=None, normalizer_fn=None, - scope='projection_%d' % (level + 1)) - top_down += residual - output_feature_maps_list.append(slim.conv2d( - top_down, - depth, [3, 3], - scope='smoothing_%d' % (level + 1))) - output_feature_map_keys.append('top_down_%s' % image_features[level][0]) - return collections.OrderedDict( - reversed(zip(output_feature_map_keys, output_feature_maps_list))) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/feature_map_generators_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/feature_map_generators_test.py deleted file mode 100644 index 540bc4efce83a3ec50f581aee01426064a1bc065..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/feature_map_generators_test.py +++ /dev/null @@ -1,179 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for feature map generators.""" - -import tensorflow as tf - -from object_detection.models import feature_map_generators - -INCEPTION_V2_LAYOUT = { - 'from_layer': ['Mixed_3c', 'Mixed_4c', 'Mixed_5c', '', '', ''], - 'layer_depth': [-1, -1, -1, 512, 256, 256], - 'anchor_strides': [16, 32, 64, -1, -1, -1], - 'layer_target_norm': [20.0, -1, -1, -1, -1, -1], -} - -INCEPTION_V3_LAYOUT = { - 'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''], - 'layer_depth': [-1, -1, -1, 512, 256, 128], - 'anchor_strides': [16, 32, 64, -1, -1, -1], - 'aspect_ratios': [1.0, 2.0, 1.0/2, 3.0, 1.0/3] -} - -EMBEDDED_SSD_MOBILENET_V1_LAYOUT = { - 'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', ''], - 'layer_depth': [-1, -1, 512, 256, 256], - 'conv_kernel_size': [-1, -1, 3, 3, 2], -} - - -# TODO(rathodv): add tests with different anchor strides. -class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase): - - def test_get_expected_feature_map_shapes_with_inception_v2(self): - image_features = { - 'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32), - 'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32), - 'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32) - } - feature_maps = feature_map_generators.multi_resolution_feature_maps( - feature_map_layout=INCEPTION_V2_LAYOUT, - depth_multiplier=1, - min_depth=32, - insert_1x1_conv=True, - image_features=image_features) - - expected_feature_map_shapes = { - 'Mixed_3c': (4, 28, 28, 256), - 'Mixed_4c': (4, 14, 14, 576), - 'Mixed_5c': (4, 7, 7, 1024), - 'Mixed_5c_2_Conv2d_3_3x3_s2_512': (4, 4, 4, 512), - 'Mixed_5c_2_Conv2d_4_3x3_s2_256': (4, 2, 2, 256), - 'Mixed_5c_2_Conv2d_5_3x3_s2_256': (4, 1, 1, 256)} - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - out_feature_maps = sess.run(feature_maps) - out_feature_map_shapes = dict( - (key, value.shape) for key, value in out_feature_maps.items()) - self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes) - - def test_get_expected_feature_map_shapes_with_inception_v3(self): - image_features = { - 'Mixed_5d': tf.random_uniform([4, 35, 35, 256], dtype=tf.float32), - 'Mixed_6e': tf.random_uniform([4, 17, 17, 576], dtype=tf.float32), - 'Mixed_7c': tf.random_uniform([4, 8, 8, 1024], dtype=tf.float32) - } - - feature_maps = feature_map_generators.multi_resolution_feature_maps( - feature_map_layout=INCEPTION_V3_LAYOUT, - depth_multiplier=1, - min_depth=32, - insert_1x1_conv=True, - image_features=image_features) - - expected_feature_map_shapes = { - 'Mixed_5d': (4, 35, 35, 256), - 'Mixed_6e': (4, 17, 17, 576), - 'Mixed_7c': (4, 8, 8, 1024), - 'Mixed_7c_2_Conv2d_3_3x3_s2_512': (4, 4, 4, 512), - 'Mixed_7c_2_Conv2d_4_3x3_s2_256': (4, 2, 2, 256), - 'Mixed_7c_2_Conv2d_5_3x3_s2_128': (4, 1, 1, 128)} - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - out_feature_maps = sess.run(feature_maps) - out_feature_map_shapes = dict( - (key, value.shape) for key, value in out_feature_maps.items()) - self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes) - - def test_get_expected_feature_map_shapes_with_embedded_ssd_mobilenet_v1( - self): - image_features = { - 'Conv2d_11_pointwise': tf.random_uniform([4, 16, 16, 512], - dtype=tf.float32), - 'Conv2d_13_pointwise': tf.random_uniform([4, 8, 8, 1024], - dtype=tf.float32), - } - - feature_maps = feature_map_generators.multi_resolution_feature_maps( - feature_map_layout=EMBEDDED_SSD_MOBILENET_V1_LAYOUT, - depth_multiplier=1, - min_depth=32, - insert_1x1_conv=True, - image_features=image_features) - - expected_feature_map_shapes = { - 'Conv2d_11_pointwise': (4, 16, 16, 512), - 'Conv2d_13_pointwise': (4, 8, 8, 1024), - 'Conv2d_13_pointwise_2_Conv2d_2_3x3_s2_512': (4, 4, 4, 512), - 'Conv2d_13_pointwise_2_Conv2d_3_3x3_s2_256': (4, 2, 2, 256), - 'Conv2d_13_pointwise_2_Conv2d_4_2x2_s2_256': (4, 1, 1, 256)} - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - out_feature_maps = sess.run(feature_maps) - out_feature_map_shapes = dict( - (key, value.shape) for key, value in out_feature_maps.items()) - self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes) - - -class FPNFeatureMapGeneratorTest(tf.test.TestCase): - - def test_get_expected_feature_map_shapes(self): - image_features = [ - ('block2', tf.random_uniform([4, 8, 8, 256], dtype=tf.float32)), - ('block3', tf.random_uniform([4, 4, 4, 256], dtype=tf.float32)), - ('block4', tf.random_uniform([4, 2, 2, 256], dtype=tf.float32)), - ('block5', tf.random_uniform([4, 1, 1, 256], dtype=tf.float32)) - ] - feature_maps = feature_map_generators.fpn_top_down_feature_maps( - image_features=image_features, depth=128) - - expected_feature_map_shapes = { - 'top_down_block2': (4, 8, 8, 128), - 'top_down_block3': (4, 4, 4, 128), - 'top_down_block4': (4, 2, 2, 128), - 'top_down_block5': (4, 1, 1, 128) - } - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - out_feature_maps = sess.run(feature_maps) - out_feature_map_shapes = {key: value.shape - for key, value in out_feature_maps.items()} - self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes) - - -class GetDepthFunctionTest(tf.test.TestCase): - - def test_return_min_depth_when_multiplier_is_small(self): - depth_fn = feature_map_generators.get_depth_fn(depth_multiplier=0.5, - min_depth=16) - self.assertEqual(depth_fn(16), 16) - - def test_return_correct_depth_with_multiplier(self): - depth_fn = feature_map_generators.get_depth_fn(depth_multiplier=0.5, - min_depth=16) - self.assertEqual(depth_fn(64), 32) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_feature_extractor_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_feature_extractor_test.py deleted file mode 100644 index 899214b2c40bf1c92570cd2d8432dcdfc7b569d2..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_feature_extractor_test.py +++ /dev/null @@ -1,110 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Base test class SSDFeatureExtractors.""" - -from abc import abstractmethod - -import itertools -import numpy as np -import tensorflow as tf - -from object_detection.utils import test_case - - -class SsdFeatureExtractorTestBase(test_case.TestCase): - - def conv_hyperparams_fn(self): - with tf.contrib.slim.arg_scope([]) as sc: - return sc - - @abstractmethod - def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, - use_explicit_padding=False): - """Constructs a new feature extractor. - - Args: - depth_multiplier: float depth multiplier for feature extractor - pad_to_multiple: the nearest multiple to zero pad the input height and - width dimensions to. - use_explicit_padding: use 'VALID' padding for convolutions, but prepad - inputs so that the output dimensions are the same as if 'SAME' padding - were used. - Returns: - an ssd_meta_arch.SSDFeatureExtractor object. - """ - pass - - def check_extract_features_returns_correct_shape( - self, batch_size, image_height, image_width, depth_multiplier, - pad_to_multiple, expected_feature_map_shapes, use_explicit_padding=False): - def graph_fn(image_tensor): - feature_extractor = self._create_feature_extractor(depth_multiplier, - pad_to_multiple, - use_explicit_padding) - feature_maps = feature_extractor.extract_features(image_tensor) - return feature_maps - - image_tensor = np.random.rand(batch_size, image_height, image_width, - 3).astype(np.float32) - feature_maps = self.execute(graph_fn, [image_tensor]) - for feature_map, expected_shape in itertools.izip( - feature_maps, expected_feature_map_shapes): - self.assertAllEqual(feature_map.shape, expected_shape) - - def check_extract_features_returns_correct_shapes_with_dynamic_inputs( - self, batch_size, image_height, image_width, depth_multiplier, - pad_to_multiple, expected_feature_map_shapes, use_explicit_padding=False): - def graph_fn(image_height, image_width): - feature_extractor = self._create_feature_extractor(depth_multiplier, - pad_to_multiple, - use_explicit_padding) - image_tensor = tf.random_uniform([batch_size, image_height, image_width, - 3], dtype=tf.float32) - feature_maps = feature_extractor.extract_features(image_tensor) - return feature_maps - - feature_maps = self.execute_cpu(graph_fn, [ - np.array(image_height, dtype=np.int32), - np.array(image_width, dtype=np.int32) - ]) - for feature_map, expected_shape in itertools.izip( - feature_maps, expected_feature_map_shapes): - self.assertAllEqual(feature_map.shape, expected_shape) - - def check_extract_features_raises_error_with_invalid_image_size( - self, image_height, image_width, depth_multiplier, pad_to_multiple): - feature_extractor = self._create_feature_extractor(depth_multiplier, - pad_to_multiple) - preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3)) - feature_maps = feature_extractor.extract_features(preprocessed_inputs) - test_preprocessed_image = np.random.rand(4, image_height, image_width, 3) - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - with self.assertRaises(tf.errors.InvalidArgumentError): - sess.run(feature_maps, - feed_dict={preprocessed_inputs: test_preprocessed_image}) - - def check_feature_extractor_variables_under_scope( - self, depth_multiplier, pad_to_multiple, scope_name): - g = tf.Graph() - with g.as_default(): - feature_extractor = self._create_feature_extractor( - depth_multiplier, pad_to_multiple) - preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3)) - feature_extractor.extract_features(preprocessed_inputs) - variables = g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) - for variable in variables: - self.assertTrue(variable.name.startswith(scope_name)) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_inception_v2_feature_extractor.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_inception_v2_feature_extractor.py deleted file mode 100644 index b97b0f2bc9923a4fe9207a4dba28f69db3f52101..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_inception_v2_feature_extractor.py +++ /dev/null @@ -1,126 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""SSDFeatureExtractor for InceptionV2 features.""" -import tensorflow as tf - -from object_detection.meta_architectures import ssd_meta_arch -from object_detection.models import feature_map_generators -from object_detection.utils import ops -from object_detection.utils import shape_utils -from nets import inception_v2 - -slim = tf.contrib.slim - - -class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): - """SSD Feature Extractor using InceptionV2 features.""" - - def __init__(self, - is_training, - depth_multiplier, - min_depth, - pad_to_multiple, - conv_hyperparams_fn, - reuse_weights=None, - use_explicit_padding=False, - use_depthwise=False, - override_base_feature_extractor_hyperparams=False): - """InceptionV2 Feature Extractor for SSD Models. - - Args: - is_training: whether the network is in training mode. - depth_multiplier: float depth multiplier for feature extractor. - min_depth: minimum feature extractor depth. - pad_to_multiple: the nearest multiple to zero pad the input height and - width dimensions to. - conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d - and separable_conv2d ops in the layers that are added on top of the - base feature extractor. - reuse_weights: Whether to reuse variables. Default is None. - use_explicit_padding: Whether to use explicit padding when extracting - features. Default is False. - use_depthwise: Whether to use depthwise convolutions. Default is False. - override_base_feature_extractor_hyperparams: Whether to override - hyperparameters of the base feature extractor with the one from - `conv_hyperparams_fn`. - - Raises: - ValueError: If `override_base_feature_extractor_hyperparams` is False. - """ - super(SSDInceptionV2FeatureExtractor, self).__init__( - is_training, depth_multiplier, min_depth, pad_to_multiple, - conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise, - override_base_feature_extractor_hyperparams) - if not self._override_base_feature_extractor_hyperparams: - raise ValueError('SSD Inception V2 feature extractor always uses' - 'scope returned by `conv_hyperparams_fn` for both the ' - 'base feature extractor and the additional layers ' - 'added since there is no arg_scope defined for the base ' - 'feature extractor.') - - def preprocess(self, resized_inputs): - """SSD preprocessing. - - Maps pixel values to the range [-1, 1]. - - Args: - resized_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - - Returns: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - """ - return (2.0 / 255.0) * resized_inputs - 1.0 - - def extract_features(self, preprocessed_inputs): - """Extract features from preprocessed inputs. - - Args: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - - Returns: - feature_maps: a list of tensors where the ith tensor has shape - [batch, height_i, width_i, depth_i] - """ - preprocessed_inputs = shape_utils.check_min_image_dim( - 33, preprocessed_inputs) - - feature_map_layout = { - 'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', ''], - 'layer_depth': [-1, -1, 512, 256, 256, 128], - 'use_explicit_padding': self._use_explicit_padding, - 'use_depthwise': self._use_depthwise, - } - - with slim.arg_scope(self._conv_hyperparams_fn()): - with tf.variable_scope('InceptionV2', - reuse=self._reuse_weights) as scope: - _, image_features = inception_v2.inception_v2_base( - ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), - final_endpoint='Mixed_5c', - min_depth=self._min_depth, - depth_multiplier=self._depth_multiplier, - scope=scope) - feature_maps = feature_map_generators.multi_resolution_feature_maps( - feature_map_layout=feature_map_layout, - depth_multiplier=self._depth_multiplier, - min_depth=self._min_depth, - insert_1x1_conv=True, - image_features=image_features) - - return feature_maps.values() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_inception_v2_feature_extractor_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_inception_v2_feature_extractor_test.py deleted file mode 100644 index 054dcc4e4e4057113762bc2b83e6b678911ce662..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_inception_v2_feature_extractor_test.py +++ /dev/null @@ -1,134 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.models.ssd_inception_v2_feature_extractor.""" -import numpy as np -import tensorflow as tf - -from object_detection.models import ssd_feature_extractor_test -from object_detection.models import ssd_inception_v2_feature_extractor - - -class SsdInceptionV2FeatureExtractorTest( - ssd_feature_extractor_test.SsdFeatureExtractorTestBase): - - def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, - is_training=True): - """Constructs a SsdInceptionV2FeatureExtractor. - - Args: - depth_multiplier: float depth multiplier for feature extractor - pad_to_multiple: the nearest multiple to zero pad the input height and - width dimensions to. - is_training: whether the network is in training mode. - - Returns: - an ssd_inception_v2_feature_extractor.SsdInceptionV2FeatureExtractor. - """ - min_depth = 32 - return ssd_inception_v2_feature_extractor.SSDInceptionV2FeatureExtractor( - is_training, depth_multiplier, min_depth, pad_to_multiple, - self.conv_hyperparams_fn, - override_base_feature_extractor_hyperparams=True) - - def test_extract_features_returns_correct_shapes_128(self): - image_height = 128 - image_width = 128 - depth_multiplier = 1.0 - pad_to_multiple = 1 - expected_feature_map_shape = [(2, 8, 8, 576), (2, 4, 4, 1024), - (2, 2, 2, 512), (2, 1, 1, 256), - (2, 1, 1, 256), (2, 1, 1, 128)] - self.check_extract_features_returns_correct_shape( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_with_dynamic_inputs(self): - image_height = 128 - image_width = 128 - depth_multiplier = 1.0 - pad_to_multiple = 1 - expected_feature_map_shape = [(2, 8, 8, 576), (2, 4, 4, 1024), - (2, 2, 2, 512), (2, 1, 1, 256), - (2, 1, 1, 256), (2, 1, 1, 128)] - self.check_extract_features_returns_correct_shapes_with_dynamic_inputs( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_299(self): - image_height = 299 - image_width = 299 - depth_multiplier = 1.0 - pad_to_multiple = 1 - expected_feature_map_shape = [(2, 19, 19, 576), (2, 10, 10, 1024), - (2, 5, 5, 512), (2, 3, 3, 256), - (2, 2, 2, 256), (2, 1, 1, 128)] - self.check_extract_features_returns_correct_shape( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_enforcing_min_depth(self): - image_height = 299 - image_width = 299 - depth_multiplier = 0.5**12 - pad_to_multiple = 1 - expected_feature_map_shape = [(2, 19, 19, 128), (2, 10, 10, 128), - (2, 5, 5, 32), (2, 3, 3, 32), - (2, 2, 2, 32), (2, 1, 1, 32)] - self.check_extract_features_returns_correct_shape( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self): - image_height = 299 - image_width = 299 - depth_multiplier = 1.0 - pad_to_multiple = 32 - expected_feature_map_shape = [(2, 20, 20, 576), (2, 10, 10, 1024), - (2, 5, 5, 512), (2, 3, 3, 256), - (2, 2, 2, 256), (2, 1, 1, 128)] - self.check_extract_features_returns_correct_shape( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_raises_error_with_invalid_image_size(self): - image_height = 32 - image_width = 32 - depth_multiplier = 1.0 - pad_to_multiple = 1 - self.check_extract_features_raises_error_with_invalid_image_size( - image_height, image_width, depth_multiplier, pad_to_multiple) - - def test_preprocess_returns_correct_value_range(self): - image_height = 128 - image_width = 128 - depth_multiplier = 1 - pad_to_multiple = 1 - test_image = np.random.rand(4, image_height, image_width, 3) - feature_extractor = self._create_feature_extractor(depth_multiplier, - pad_to_multiple) - preprocessed_image = feature_extractor.preprocess(test_image) - self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0))) - - def test_variables_only_created_in_scope(self): - depth_multiplier = 1 - pad_to_multiple = 1 - scope_name = 'InceptionV2' - self.check_feature_extractor_variables_under_scope( - depth_multiplier, pad_to_multiple, scope_name) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_inception_v3_feature_extractor.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_inception_v3_feature_extractor.py deleted file mode 100644 index 5d97e7b5c9c130edc8173f16a3d1deaa02ffa364..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_inception_v3_feature_extractor.py +++ /dev/null @@ -1,126 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""SSDFeatureExtractor for InceptionV3 features.""" -import tensorflow as tf - -from object_detection.meta_architectures import ssd_meta_arch -from object_detection.models import feature_map_generators -from object_detection.utils import ops -from object_detection.utils import shape_utils -from nets import inception_v3 - -slim = tf.contrib.slim - - -class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): - """SSD Feature Extractor using InceptionV3 features.""" - - def __init__(self, - is_training, - depth_multiplier, - min_depth, - pad_to_multiple, - conv_hyperparams_fn, - reuse_weights=None, - use_explicit_padding=False, - use_depthwise=False, - override_base_feature_extractor_hyperparams=False): - """InceptionV3 Feature Extractor for SSD Models. - - Args: - is_training: whether the network is in training mode. - depth_multiplier: float depth multiplier for feature extractor. - min_depth: minimum feature extractor depth. - pad_to_multiple: the nearest multiple to zero pad the input height and - width dimensions to. - conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d - and separable_conv2d ops in the layers that are added on top of the - base feature extractor. - reuse_weights: Whether to reuse variables. Default is None. - use_explicit_padding: Whether to use explicit padding when extracting - features. Default is False. - use_depthwise: Whether to use depthwise convolutions. Default is False. - override_base_feature_extractor_hyperparams: Whether to override - hyperparameters of the base feature extractor with the one from - `conv_hyperparams_fn`. - - Raises: - ValueError: If `override_base_feature_extractor_hyperparams` is False. - """ - super(SSDInceptionV3FeatureExtractor, self).__init__( - is_training, depth_multiplier, min_depth, pad_to_multiple, - conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise, - override_base_feature_extractor_hyperparams) - - if not self._override_base_feature_extractor_hyperparams: - raise ValueError('SSD Inception V3 feature extractor always uses' - 'scope returned by `conv_hyperparams_fn` for both the ' - 'base feature extractor and the additional layers ' - 'added since there is no arg_scope defined for the base ' - 'feature extractor.') - - def preprocess(self, resized_inputs): - """SSD preprocessing. - - Maps pixel values to the range [-1, 1]. - - Args: - resized_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - - Returns: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - """ - return (2.0 / 255.0) * resized_inputs - 1.0 - - def extract_features(self, preprocessed_inputs): - """Extract features from preprocessed inputs. - - Args: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - - Returns: - feature_maps: a list of tensors where the ith tensor has shape - [batch, height_i, width_i, depth_i] - """ - preprocessed_inputs = shape_utils.check_min_image_dim( - 33, preprocessed_inputs) - - feature_map_layout = { - 'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''], - 'layer_depth': [-1, -1, -1, 512, 256, 128], - 'use_explicit_padding': self._use_explicit_padding, - 'use_depthwise': self._use_depthwise, - } - - with slim.arg_scope(self._conv_hyperparams_fn()): - with tf.variable_scope('InceptionV3', reuse=self._reuse_weights) as scope: - _, image_features = inception_v3.inception_v3_base( - ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), - final_endpoint='Mixed_7c', - min_depth=self._min_depth, - depth_multiplier=self._depth_multiplier, - scope=scope) - feature_maps = feature_map_generators.multi_resolution_feature_maps( - feature_map_layout=feature_map_layout, - depth_multiplier=self._depth_multiplier, - min_depth=self._min_depth, - insert_1x1_conv=True, - image_features=image_features) - - return feature_maps.values() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_inception_v3_feature_extractor_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_inception_v3_feature_extractor_test.py deleted file mode 100644 index fe3d3520d737942efefcc9623a6f0b0c07b72f55..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_inception_v3_feature_extractor_test.py +++ /dev/null @@ -1,134 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.models.ssd_inception_v3_feature_extractor.""" -import numpy as np -import tensorflow as tf - -from object_detection.models import ssd_feature_extractor_test -from object_detection.models import ssd_inception_v3_feature_extractor - - -class SsdInceptionV3FeatureExtractorTest( - ssd_feature_extractor_test.SsdFeatureExtractorTestBase): - - def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, - is_training=True): - """Constructs a SsdInceptionV3FeatureExtractor. - - Args: - depth_multiplier: float depth multiplier for feature extractor - pad_to_multiple: the nearest multiple to zero pad the input height and - width dimensions to. - is_training: whether the network is in training mode. - - Returns: - an ssd_inception_v3_feature_extractor.SsdInceptionV3FeatureExtractor. - """ - min_depth = 32 - return ssd_inception_v3_feature_extractor.SSDInceptionV3FeatureExtractor( - is_training, depth_multiplier, min_depth, pad_to_multiple, - self.conv_hyperparams_fn, - override_base_feature_extractor_hyperparams=True) - - def test_extract_features_returns_correct_shapes_128(self): - image_height = 128 - image_width = 128 - depth_multiplier = 1.0 - pad_to_multiple = 1 - expected_feature_map_shape = [(2, 13, 13, 288), (2, 6, 6, 768), - (2, 2, 2, 2048), (2, 1, 1, 512), - (2, 1, 1, 256), (2, 1, 1, 128)] - self.check_extract_features_returns_correct_shape( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_with_dynamic_inputs(self): - image_height = 128 - image_width = 128 - depth_multiplier = 1.0 - pad_to_multiple = 1 - expected_feature_map_shape = [(2, 13, 13, 288), (2, 6, 6, 768), - (2, 2, 2, 2048), (2, 1, 1, 512), - (2, 1, 1, 256), (2, 1, 1, 128)] - self.check_extract_features_returns_correct_shapes_with_dynamic_inputs( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_299(self): - image_height = 299 - image_width = 299 - depth_multiplier = 1.0 - pad_to_multiple = 1 - expected_feature_map_shape = [(2, 35, 35, 288), (2, 17, 17, 768), - (2, 8, 8, 2048), (2, 4, 4, 512), - (2, 2, 2, 256), (2, 1, 1, 128)] - self.check_extract_features_returns_correct_shape( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_enforcing_min_depth(self): - image_height = 299 - image_width = 299 - depth_multiplier = 0.5**12 - pad_to_multiple = 1 - expected_feature_map_shape = [(2, 35, 35, 128), (2, 17, 17, 128), - (2, 8, 8, 192), (2, 4, 4, 32), - (2, 2, 2, 32), (2, 1, 1, 32)] - self.check_extract_features_returns_correct_shape( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self): - image_height = 299 - image_width = 299 - depth_multiplier = 1.0 - pad_to_multiple = 32 - expected_feature_map_shape = [(2, 37, 37, 288), (2, 18, 18, 768), - (2, 8, 8, 2048), (2, 4, 4, 512), - (2, 2, 2, 256), (2, 1, 1, 128)] - self.check_extract_features_returns_correct_shape( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_raises_error_with_invalid_image_size(self): - image_height = 32 - image_width = 32 - depth_multiplier = 1.0 - pad_to_multiple = 1 - self.check_extract_features_raises_error_with_invalid_image_size( - image_height, image_width, depth_multiplier, pad_to_multiple) - - def test_preprocess_returns_correct_value_range(self): - image_height = 128 - image_width = 128 - depth_multiplier = 1 - pad_to_multiple = 1 - test_image = np.random.rand(4, image_height, image_width, 3) - feature_extractor = self._create_feature_extractor(depth_multiplier, - pad_to_multiple) - preprocessed_image = feature_extractor.preprocess(test_image) - self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0))) - - def test_variables_only_created_in_scope(self): - depth_multiplier = 1 - pad_to_multiple = 1 - scope_name = 'InceptionV3' - self.check_feature_extractor_variables_under_scope( - depth_multiplier, pad_to_multiple, scope_name) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_mobilenet_v1_feature_extractor.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_mobilenet_v1_feature_extractor.py deleted file mode 100644 index aada1111ed659cbaa0180a0535ea6095d0f1d9c9..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_mobilenet_v1_feature_extractor.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""SSDFeatureExtractor for MobilenetV1 features.""" - -import tensorflow as tf - -from object_detection.meta_architectures import ssd_meta_arch -from object_detection.models import feature_map_generators -from object_detection.utils import context_manager -from object_detection.utils import ops -from object_detection.utils import shape_utils -from nets import mobilenet_v1 - -slim = tf.contrib.slim - - -class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): - """SSD Feature Extractor using MobilenetV1 features.""" - - def __init__(self, - is_training, - depth_multiplier, - min_depth, - pad_to_multiple, - conv_hyperparams_fn, - reuse_weights=None, - use_explicit_padding=False, - use_depthwise=False, - override_base_feature_extractor_hyperparams=False): - """MobileNetV1 Feature Extractor for SSD Models. - - Args: - is_training: whether the network is in training mode. - depth_multiplier: float depth multiplier for feature extractor. - min_depth: minimum feature extractor depth. - pad_to_multiple: the nearest multiple to zero pad the input height and - width dimensions to. - conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d - and separable_conv2d ops in the layers that are added on top of the - base feature extractor. - reuse_weights: Whether to reuse variables. Default is None. - use_explicit_padding: Use 'VALID' padding for convolutions, but prepad - inputs so that the output dimensions are the same as if 'SAME' padding - were used. - use_depthwise: Whether to use depthwise convolutions. Default is False. - override_base_feature_extractor_hyperparams: Whether to override - hyperparameters of the base feature extractor with the one from - `conv_hyperparams_fn`. - """ - super(SSDMobileNetV1FeatureExtractor, self).__init__( - is_training, depth_multiplier, min_depth, pad_to_multiple, - conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise, - override_base_feature_extractor_hyperparams) - - def preprocess(self, resized_inputs): - """SSD preprocessing. - - Maps pixel values to the range [-1, 1]. - - Args: - resized_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - - Returns: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - """ - return (2.0 / 255.0) * resized_inputs - 1.0 - - def extract_features(self, preprocessed_inputs): - """Extract features from preprocessed inputs. - - Args: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - - Returns: - feature_maps: a list of tensors where the ith tensor has shape - [batch, height_i, width_i, depth_i] - """ - preprocessed_inputs = shape_utils.check_min_image_dim( - 33, preprocessed_inputs) - - feature_map_layout = { - 'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', - '', ''], - 'layer_depth': [-1, -1, 512, 256, 256, 128], - 'use_explicit_padding': self._use_explicit_padding, - 'use_depthwise': self._use_depthwise, - } - - with tf.variable_scope('MobilenetV1', - reuse=self._reuse_weights) as scope: - with slim.arg_scope( - mobilenet_v1.mobilenet_v1_arg_scope( - is_training=None, regularize_depthwise=True)): - with (slim.arg_scope(self._conv_hyperparams_fn()) - if self._override_base_feature_extractor_hyperparams - else context_manager.IdentityContextManager()): - _, image_features = mobilenet_v1.mobilenet_v1_base( - ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), - final_endpoint='Conv2d_13_pointwise', - min_depth=self._min_depth, - depth_multiplier=self._depth_multiplier, - use_explicit_padding=self._use_explicit_padding, - scope=scope) - with slim.arg_scope(self._conv_hyperparams_fn()): - feature_maps = feature_map_generators.multi_resolution_feature_maps( - feature_map_layout=feature_map_layout, - depth_multiplier=self._depth_multiplier, - min_depth=self._min_depth, - insert_1x1_conv=True, - image_features=image_features) - - return feature_maps.values() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py deleted file mode 100644 index d3a9542b3596712aa1d85da6b9d68416c23c3ff3..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for ssd_mobilenet_v1_feature_extractor.""" -import numpy as np -import tensorflow as tf - -from object_detection.models import ssd_feature_extractor_test -from object_detection.models import ssd_mobilenet_v1_feature_extractor - -slim = tf.contrib.slim - - -class SsdMobilenetV1FeatureExtractorTest( - ssd_feature_extractor_test.SsdFeatureExtractorTestBase): - - def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, - is_training=True, use_explicit_padding=False): - """Constructs a new feature extractor. - - Args: - depth_multiplier: float depth multiplier for feature extractor - pad_to_multiple: the nearest multiple to zero pad the input height and - width dimensions to. - is_training: whether the network is in training mode. - use_explicit_padding: Use 'VALID' padding for convolutions, but prepad - inputs so that the output dimensions are the same as if 'SAME' padding - were used. - Returns: - an ssd_meta_arch.SSDFeatureExtractor object. - """ - min_depth = 32 - return ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor( - is_training, depth_multiplier, min_depth, pad_to_multiple, - self.conv_hyperparams_fn, - use_explicit_padding=use_explicit_padding) - - def test_extract_features_returns_correct_shapes_128(self): - image_height = 128 - image_width = 128 - depth_multiplier = 1.0 - pad_to_multiple = 1 - expected_feature_map_shape = [(2, 8, 8, 512), (2, 4, 4, 1024), - (2, 2, 2, 512), (2, 1, 1, 256), - (2, 1, 1, 256), (2, 1, 1, 128)] - self.check_extract_features_returns_correct_shape( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape, use_explicit_padding=False) - self.check_extract_features_returns_correct_shape( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape, use_explicit_padding=True) - - def test_extract_features_returns_correct_shapes_299(self): - image_height = 299 - image_width = 299 - depth_multiplier = 1.0 - pad_to_multiple = 1 - expected_feature_map_shape = [(2, 19, 19, 512), (2, 10, 10, 1024), - (2, 5, 5, 512), (2, 3, 3, 256), - (2, 2, 2, 256), (2, 1, 1, 128)] - self.check_extract_features_returns_correct_shape( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape, use_explicit_padding=False) - self.check_extract_features_returns_correct_shape( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape, use_explicit_padding=True) - - def test_extract_features_with_dynamic_image_shape(self): - image_height = 128 - image_width = 128 - depth_multiplier = 1.0 - pad_to_multiple = 1 - expected_feature_map_shape = [(2, 8, 8, 512), (2, 4, 4, 1024), - (2, 2, 2, 512), (2, 1, 1, 256), - (2, 1, 1, 256), (2, 1, 1, 128)] - self.check_extract_features_returns_correct_shapes_with_dynamic_inputs( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape, use_explicit_padding=False) - self.check_extract_features_returns_correct_shape( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape, use_explicit_padding=True) - - def test_extract_features_returns_correct_shapes_enforcing_min_depth(self): - image_height = 299 - image_width = 299 - depth_multiplier = 0.5**12 - pad_to_multiple = 1 - expected_feature_map_shape = [(2, 19, 19, 32), (2, 10, 10, 32), - (2, 5, 5, 32), (2, 3, 3, 32), - (2, 2, 2, 32), (2, 1, 1, 32)] - self.check_extract_features_returns_correct_shape( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape, use_explicit_padding=False) - self.check_extract_features_returns_correct_shape( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape, use_explicit_padding=True) - - def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self): - image_height = 299 - image_width = 299 - depth_multiplier = 1.0 - pad_to_multiple = 32 - expected_feature_map_shape = [(2, 20, 20, 512), (2, 10, 10, 1024), - (2, 5, 5, 512), (2, 3, 3, 256), - (2, 2, 2, 256), (2, 1, 1, 128)] - self.check_extract_features_returns_correct_shape( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape, use_explicit_padding=False) - self.check_extract_features_returns_correct_shape( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape, use_explicit_padding=True) - - def test_extract_features_raises_error_with_invalid_image_size(self): - image_height = 32 - image_width = 32 - depth_multiplier = 1.0 - pad_to_multiple = 1 - self.check_extract_features_raises_error_with_invalid_image_size( - image_height, image_width, depth_multiplier, pad_to_multiple) - - def test_preprocess_returns_correct_value_range(self): - image_height = 128 - image_width = 128 - depth_multiplier = 1 - pad_to_multiple = 1 - test_image = np.random.rand(2, image_height, image_width, 3) - feature_extractor = self._create_feature_extractor(depth_multiplier, - pad_to_multiple) - preprocessed_image = feature_extractor.preprocess(test_image) - self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0))) - - def test_variables_only_created_in_scope(self): - depth_multiplier = 1 - pad_to_multiple = 1 - scope_name = 'MobilenetV1' - self.check_feature_extractor_variables_under_scope( - depth_multiplier, pad_to_multiple, scope_name) - - def test_has_fused_batchnorm(self): - image_height = 40 - image_width = 40 - depth_multiplier = 1 - pad_to_multiple = 1 - image_placeholder = tf.placeholder(tf.float32, - [1, image_height, image_width, 3]) - feature_extractor = self._create_feature_extractor(depth_multiplier, - pad_to_multiple) - preprocessed_image = feature_extractor.preprocess(image_placeholder) - _ = feature_extractor.extract_features(preprocessed_image) - self.assertTrue(any(op.type == 'FusedBatchNorm' - for op in tf.get_default_graph().get_operations())) - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_mobilenet_v2_feature_extractor.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_mobilenet_v2_feature_extractor.py deleted file mode 100644 index 014b93a8e293608fde713f556979f8ecffc9a52d..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_mobilenet_v2_feature_extractor.py +++ /dev/null @@ -1,129 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""SSDFeatureExtractor for MobilenetV2 features.""" - -import tensorflow as tf - -from object_detection.meta_architectures import ssd_meta_arch -from object_detection.models import feature_map_generators -from object_detection.utils import context_manager -from object_detection.utils import ops -from object_detection.utils import shape_utils -from nets.mobilenet import mobilenet -from nets.mobilenet import mobilenet_v2 - -slim = tf.contrib.slim - - -class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): - """SSD Feature Extractor using MobilenetV2 features.""" - - def __init__(self, - is_training, - depth_multiplier, - min_depth, - pad_to_multiple, - conv_hyperparams_fn, - reuse_weights=None, - use_explicit_padding=False, - use_depthwise=False, - override_base_feature_extractor_hyperparams=False): - """MobileNetV2 Feature Extractor for SSD Models. - - Mobilenet v2 (experimental), designed by sandler@. More details can be found - in //knowledge/cerebra/brain/compression/mobilenet/mobilenet_experimental.py - - Args: - is_training: whether the network is in training mode. - depth_multiplier: float depth multiplier for feature extractor. - min_depth: minimum feature extractor depth. - pad_to_multiple: the nearest multiple to zero pad the input height and - width dimensions to. - conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d - and separable_conv2d ops in the layers that are added on top of the - base feature extractor. - reuse_weights: Whether to reuse variables. Default is None. - use_explicit_padding: Whether to use explicit padding when extracting - features. Default is False. - use_depthwise: Whether to use depthwise convolutions. Default is False. - override_base_feature_extractor_hyperparams: Whether to override - hyperparameters of the base feature extractor with the one from - `conv_hyperparams_fn`. - """ - super(SSDMobileNetV2FeatureExtractor, self).__init__( - is_training, depth_multiplier, min_depth, pad_to_multiple, - conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise, - override_base_feature_extractor_hyperparams) - - def preprocess(self, resized_inputs): - """SSD preprocessing. - - Maps pixel values to the range [-1, 1]. - - Args: - resized_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - - Returns: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - """ - return (2.0 / 255.0) * resized_inputs - 1.0 - - def extract_features(self, preprocessed_inputs): - """Extract features from preprocessed inputs. - - Args: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - - Returns: - feature_maps: a list of tensors where the ith tensor has shape - [batch, height_i, width_i, depth_i] - """ - preprocessed_inputs = shape_utils.check_min_image_dim( - 33, preprocessed_inputs) - - feature_map_layout = { - 'from_layer': ['layer_15/expansion_output', 'layer_19', '', '', '', ''], - 'layer_depth': [-1, -1, 512, 256, 256, 128], - 'use_depthwise': self._use_depthwise, - 'use_explicit_padding': self._use_explicit_padding, - } - - with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope: - with slim.arg_scope( - mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \ - slim.arg_scope( - [mobilenet.depth_multiplier], min_depth=self._min_depth): - with (slim.arg_scope(self._conv_hyperparams_fn()) - if self._override_base_feature_extractor_hyperparams else - context_manager.IdentityContextManager()): - _, image_features = mobilenet_v2.mobilenet_base( - ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), - final_endpoint='layer_19', - depth_multiplier=self._depth_multiplier, - use_explicit_padding=self._use_explicit_padding, - scope=scope) - with slim.arg_scope(self._conv_hyperparams_fn()): - feature_maps = feature_map_generators.multi_resolution_feature_maps( - feature_map_layout=feature_map_layout, - depth_multiplier=self._depth_multiplier, - min_depth=self._min_depth, - insert_1x1_conv=True, - image_features=image_features) - - return feature_maps.values() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_mobilenet_v2_feature_extractor_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_mobilenet_v2_feature_extractor_test.py deleted file mode 100644 index 0b374749f2947f848692275bd79129d9f5d4bd76..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_mobilenet_v2_feature_extractor_test.py +++ /dev/null @@ -1,154 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for ssd_mobilenet_v2_feature_extractor.""" -import numpy as np -import tensorflow as tf - -from object_detection.models import ssd_feature_extractor_test -from object_detection.models import ssd_mobilenet_v2_feature_extractor - -slim = tf.contrib.slim - - -class SsdMobilenetV2FeatureExtractorTest( - ssd_feature_extractor_test.SsdFeatureExtractorTestBase): - - def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, - use_explicit_padding=False): - """Constructs a new feature extractor. - - Args: - depth_multiplier: float depth multiplier for feature extractor - pad_to_multiple: the nearest multiple to zero pad the input height and - width dimensions to. - use_explicit_padding: use 'VALID' padding for convolutions, but prepad - inputs so that the output dimensions are the same as if 'SAME' padding - were used. - Returns: - an ssd_meta_arch.SSDFeatureExtractor object. - """ - min_depth = 32 - return ssd_mobilenet_v2_feature_extractor.SSDMobileNetV2FeatureExtractor( - False, - depth_multiplier, - min_depth, - pad_to_multiple, - self.conv_hyperparams_fn, - use_explicit_padding=use_explicit_padding) - - def test_extract_features_returns_correct_shapes_128(self): - image_height = 128 - image_width = 128 - depth_multiplier = 1.0 - pad_to_multiple = 1 - expected_feature_map_shape = [(2, 8, 8, 576), (2, 4, 4, 1280), - (2, 2, 2, 512), (2, 1, 1, 256), - (2, 1, 1, 256), (2, 1, 1, 128)] - self.check_extract_features_returns_correct_shape( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_with_dynamic_inputs(self): - image_height = 128 - image_width = 128 - depth_multiplier = 1.0 - pad_to_multiple = 1 - expected_feature_map_shape = [(2, 8, 8, 576), (2, 4, 4, 1280), - (2, 2, 2, 512), (2, 1, 1, 256), - (2, 1, 1, 256), (2, 1, 1, 128)] - self.check_extract_features_returns_correct_shapes_with_dynamic_inputs( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_299(self): - image_height = 299 - image_width = 299 - depth_multiplier = 1.0 - pad_to_multiple = 1 - expected_feature_map_shape = [(2, 19, 19, 576), (2, 10, 10, 1280), - (2, 5, 5, 512), (2, 3, 3, 256), - (2, 2, 2, 256), (2, 1, 1, 128)] - self.check_extract_features_returns_correct_shape( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_enforcing_min_depth(self): - image_height = 299 - image_width = 299 - depth_multiplier = 0.5**12 - pad_to_multiple = 1 - expected_feature_map_shape = [(2, 19, 19, 192), (2, 10, 10, 32), - (2, 5, 5, 32), (2, 3, 3, 32), - (2, 2, 2, 32), (2, 1, 1, 32)] - self.check_extract_features_returns_correct_shape( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self): - image_height = 299 - image_width = 299 - depth_multiplier = 1.0 - pad_to_multiple = 32 - expected_feature_map_shape = [(2, 20, 20, 576), (2, 10, 10, 1280), - (2, 5, 5, 512), (2, 3, 3, 256), - (2, 2, 2, 256), (2, 1, 1, 128)] - self.check_extract_features_returns_correct_shape( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_raises_error_with_invalid_image_size(self): - image_height = 32 - image_width = 32 - depth_multiplier = 1.0 - pad_to_multiple = 1 - self.check_extract_features_raises_error_with_invalid_image_size( - image_height, image_width, depth_multiplier, pad_to_multiple) - - def test_preprocess_returns_correct_value_range(self): - image_height = 128 - image_width = 128 - depth_multiplier = 1 - pad_to_multiple = 1 - test_image = np.random.rand(4, image_height, image_width, 3) - feature_extractor = self._create_feature_extractor(depth_multiplier, - pad_to_multiple) - preprocessed_image = feature_extractor.preprocess(test_image) - self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0))) - - def test_variables_only_created_in_scope(self): - depth_multiplier = 1 - pad_to_multiple = 1 - scope_name = 'MobilenetV2' - self.check_feature_extractor_variables_under_scope( - depth_multiplier, pad_to_multiple, scope_name) - - def test_has_fused_batchnorm(self): - image_height = 40 - image_width = 40 - depth_multiplier = 1 - pad_to_multiple = 1 - image_placeholder = tf.placeholder(tf.float32, - [1, image_height, image_width, 3]) - feature_extractor = self._create_feature_extractor(depth_multiplier, - pad_to_multiple) - preprocessed_image = feature_extractor.preprocess(image_placeholder) - _ = feature_extractor.extract_features(preprocessed_image) - self.assertTrue(any(op.type == 'FusedBatchNorm' - for op in tf.get_default_graph().get_operations())) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_resnet_v1_fpn_feature_extractor.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_resnet_v1_fpn_feature_extractor.py deleted file mode 100644 index 65bda3f4fc3a33e9bf6f356438ebb35c51bbae59..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_resnet_v1_fpn_feature_extractor.py +++ /dev/null @@ -1,290 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""SSD Feature Pyramid Network (FPN) feature extractors based on Resnet v1. - -See https://arxiv.org/abs/1708.02002 for details. -""" - -import tensorflow as tf - -from object_detection.meta_architectures import ssd_meta_arch -from object_detection.models import feature_map_generators -from object_detection.utils import context_manager -from object_detection.utils import ops -from object_detection.utils import shape_utils -from nets import resnet_v1 - -slim = tf.contrib.slim - - -class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): - """SSD FPN feature extractor based on Resnet v1 architecture.""" - - def __init__(self, - is_training, - depth_multiplier, - min_depth, - pad_to_multiple, - conv_hyperparams_fn, - resnet_base_fn, - resnet_scope_name, - fpn_scope_name, - reuse_weights=None, - use_explicit_padding=False, - use_depthwise=False, - override_base_feature_extractor_hyperparams=False): - """SSD FPN feature extractor based on Resnet v1 architecture. - - Args: - is_training: whether the network is in training mode. - depth_multiplier: float depth multiplier for feature extractor. - UNUSED currently. - min_depth: minimum feature extractor depth. UNUSED Currently. - pad_to_multiple: the nearest multiple to zero pad the input height and - width dimensions to. - conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d - and separable_conv2d ops in the layers that are added on top of the - base feature extractor. - resnet_base_fn: base resnet network to use. - resnet_scope_name: scope name under which to construct resnet - fpn_scope_name: scope name under which to construct the feature pyramid - network. - reuse_weights: Whether to reuse variables. Default is None. - use_explicit_padding: Whether to use explicit padding when extracting - features. Default is False. UNUSED currently. - use_depthwise: Whether to use depthwise convolutions. UNUSED currently. - override_base_feature_extractor_hyperparams: Whether to override - hyperparameters of the base feature extractor with the one from - `conv_hyperparams_fn`. - - Raises: - ValueError: On supplying invalid arguments for unused arguments. - """ - super(_SSDResnetV1FpnFeatureExtractor, self).__init__( - is_training, depth_multiplier, min_depth, pad_to_multiple, - conv_hyperparams_fn, reuse_weights, use_explicit_padding, - override_base_feature_extractor_hyperparams) - if self._depth_multiplier != 1.0: - raise ValueError('Only depth 1.0 is supported, found: {}'. - format(self._depth_multiplier)) - if self._use_explicit_padding is True: - raise ValueError('Explicit padding is not a valid option.') - self._resnet_base_fn = resnet_base_fn - self._resnet_scope_name = resnet_scope_name - self._fpn_scope_name = fpn_scope_name - - def preprocess(self, resized_inputs): - """SSD preprocessing. - - VGG style channel mean subtraction as described here: - https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-mdnge. - - Args: - resized_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - - Returns: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - """ - channel_means = [123.68, 116.779, 103.939] - return resized_inputs - [[channel_means]] - - def _filter_features(self, image_features): - # TODO(rathodv): Change resnet endpoint to strip scope prefixes instead - # of munging the scope here. - filtered_image_features = dict({}) - for key, feature in image_features.items(): - feature_name = key.split('/')[-1] - if feature_name in ['block2', 'block3', 'block4']: - filtered_image_features[feature_name] = feature - return filtered_image_features - - def extract_features(self, preprocessed_inputs): - """Extract features from preprocessed inputs. - - Args: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - - Returns: - feature_maps: a list of tensors where the ith tensor has shape - [batch, height_i, width_i, depth_i] - - Raises: - ValueError: depth multiplier is not supported. - """ - if self._depth_multiplier != 1.0: - raise ValueError('Depth multiplier not supported.') - - preprocessed_inputs = shape_utils.check_min_image_dim( - 129, preprocessed_inputs) - - with tf.variable_scope( - self._resnet_scope_name, reuse=self._reuse_weights) as scope: - with slim.arg_scope(resnet_v1.resnet_arg_scope()): - with (slim.arg_scope(self._conv_hyperparams_fn()) - if self._override_base_feature_extractor_hyperparams else - context_manager.IdentityContextManager()): - _, image_features = self._resnet_base_fn( - inputs=ops.pad_to_multiple(preprocessed_inputs, - self._pad_to_multiple), - num_classes=None, - is_training=None, - global_pool=False, - output_stride=None, - store_non_strided_activations=True, - scope=scope) - image_features = self._filter_features(image_features) - with slim.arg_scope(self._conv_hyperparams_fn()): - with tf.variable_scope(self._fpn_scope_name, - reuse=self._reuse_weights): - fpn_features = feature_map_generators.fpn_top_down_feature_maps( - [(key, image_features[key]) - for key in ['block2', 'block3', 'block4']], - depth=256) - last_feature_map = fpn_features['top_down_block4'] - coarse_features = {} - for i in range(5, 7): - last_feature_map = slim.conv2d( - last_feature_map, - num_outputs=256, - kernel_size=[3, 3], - stride=2, - padding='SAME', - scope='bottom_up_block{}'.format(i)) - coarse_features['bottom_up_block{}'.format(i)] = last_feature_map - return [fpn_features['top_down_block2'], - fpn_features['top_down_block3'], - fpn_features['top_down_block4'], - coarse_features['bottom_up_block5'], - coarse_features['bottom_up_block6']] - - -class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor): - - def __init__(self, - is_training, - depth_multiplier, - min_depth, - pad_to_multiple, - conv_hyperparams_fn, - reuse_weights=None, - use_explicit_padding=False, - use_depthwise=False, - override_base_feature_extractor_hyperparams=False): - """SSD Resnet50 V1 FPN feature extractor based on Resnet v1 architecture. - - Args: - is_training: whether the network is in training mode. - depth_multiplier: float depth multiplier for feature extractor. - UNUSED currently. - min_depth: minimum feature extractor depth. UNUSED Currently. - pad_to_multiple: the nearest multiple to zero pad the input height and - width dimensions to. - conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d - and separable_conv2d ops in the layers that are added on top of the - base feature extractor. - reuse_weights: Whether to reuse variables. Default is None. - use_explicit_padding: Whether to use explicit padding when extracting - features. Default is False. UNUSED currently. - use_depthwise: Whether to use depthwise convolutions. UNUSED currently. - override_base_feature_extractor_hyperparams: Whether to override - hyperparameters of the base feature extractor with the one from - `conv_hyperparams_fn`. - """ - super(SSDResnet50V1FpnFeatureExtractor, self).__init__( - is_training, depth_multiplier, min_depth, pad_to_multiple, - conv_hyperparams_fn, resnet_v1.resnet_v1_50, 'resnet_v1_50', 'fpn', - reuse_weights, use_explicit_padding, - override_base_feature_extractor_hyperparams) - - -class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor): - - def __init__(self, - is_training, - depth_multiplier, - min_depth, - pad_to_multiple, - conv_hyperparams_fn, - reuse_weights=None, - use_explicit_padding=False, - use_depthwise=False, - override_base_feature_extractor_hyperparams=False): - """SSD Resnet101 V1 FPN feature extractor based on Resnet v1 architecture. - - Args: - is_training: whether the network is in training mode. - depth_multiplier: float depth multiplier for feature extractor. - UNUSED currently. - min_depth: minimum feature extractor depth. UNUSED Currently. - pad_to_multiple: the nearest multiple to zero pad the input height and - width dimensions to. - conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d - and separable_conv2d ops in the layers that are added on top of the - base feature extractor. - reuse_weights: Whether to reuse variables. Default is None. - use_explicit_padding: Whether to use explicit padding when extracting - features. Default is False. UNUSED currently. - use_depthwise: Whether to use depthwise convolutions. UNUSED currently. - override_base_feature_extractor_hyperparams: Whether to override - hyperparameters of the base feature extractor with the one from - `conv_hyperparams_fn`. - """ - super(SSDResnet101V1FpnFeatureExtractor, self).__init__( - is_training, depth_multiplier, min_depth, pad_to_multiple, - conv_hyperparams_fn, resnet_v1.resnet_v1_101, 'resnet_v1_101', 'fpn', - reuse_weights, use_explicit_padding, - override_base_feature_extractor_hyperparams) - - -class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor): - - def __init__(self, - is_training, - depth_multiplier, - min_depth, - pad_to_multiple, - conv_hyperparams_fn, - reuse_weights=None, - use_explicit_padding=False, - use_depthwise=False, - override_base_feature_extractor_hyperparams=False): - """SSD Resnet152 V1 FPN feature extractor based on Resnet v1 architecture. - - Args: - is_training: whether the network is in training mode. - depth_multiplier: float depth multiplier for feature extractor. - UNUSED currently. - min_depth: minimum feature extractor depth. UNUSED Currently. - pad_to_multiple: the nearest multiple to zero pad the input height and - width dimensions to. - conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d - and separable_conv2d ops in the layers that are added on top of the - base feature extractor. - reuse_weights: Whether to reuse variables. Default is None. - use_explicit_padding: Whether to use explicit padding when extracting - features. Default is False. UNUSED currently. - use_depthwise: Whether to use depthwise convolutions. UNUSED currently. - override_base_feature_extractor_hyperparams: Whether to override - hyperparameters of the base feature extractor with the one from - `conv_hyperparams_fn`. - """ - super(SSDResnet152V1FpnFeatureExtractor, self).__init__( - is_training, depth_multiplier, min_depth, pad_to_multiple, - conv_hyperparams_fn, resnet_v1.resnet_v1_152, 'resnet_v1_152', 'fpn', - reuse_weights, use_explicit_padding, - override_base_feature_extractor_hyperparams) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_test.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_test.py deleted file mode 100644 index 5f406359ac15f5b6cfaaad2cde196a7529d0404a..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_test.py +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for ssd resnet v1 FPN feature extractors.""" -import tensorflow as tf - -from object_detection.models import ssd_resnet_v1_fpn_feature_extractor -from object_detection.models import ssd_resnet_v1_fpn_feature_extractor_testbase - - -class SSDResnet50V1FeatureExtractorTest( - ssd_resnet_v1_fpn_feature_extractor_testbase. - SSDResnetFPNFeatureExtractorTestBase): - """SSDResnet50v1Fpn feature extractor test.""" - - def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, - use_explicit_padding=False): - min_depth = 32 - is_training = True - return ssd_resnet_v1_fpn_feature_extractor.SSDResnet50V1FpnFeatureExtractor( - is_training, depth_multiplier, min_depth, pad_to_multiple, - self.conv_hyperparams_fn, use_explicit_padding=use_explicit_padding) - - def _resnet_scope_name(self): - return 'resnet_v1_50' - - -class SSDResnet101V1FeatureExtractorTest( - ssd_resnet_v1_fpn_feature_extractor_testbase. - SSDResnetFPNFeatureExtractorTestBase): - """SSDResnet101v1Fpn feature extractor test.""" - - def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, - use_explicit_padding=False): - min_depth = 32 - is_training = True - return ( - ssd_resnet_v1_fpn_feature_extractor.SSDResnet101V1FpnFeatureExtractor( - is_training, - depth_multiplier, - min_depth, - pad_to_multiple, - self.conv_hyperparams_fn, - use_explicit_padding=use_explicit_padding)) - - def _resnet_scope_name(self): - return 'resnet_v1_101' - - -class SSDResnet152V1FeatureExtractorTest( - ssd_resnet_v1_fpn_feature_extractor_testbase. - SSDResnetFPNFeatureExtractorTestBase): - """SSDResnet152v1Fpn feature extractor test.""" - - def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, - use_explicit_padding=False): - min_depth = 32 - is_training = True - return ( - ssd_resnet_v1_fpn_feature_extractor.SSDResnet152V1FpnFeatureExtractor( - is_training, - depth_multiplier, - min_depth, - pad_to_multiple, - self.conv_hyperparams_fn, - use_explicit_padding=use_explicit_padding)) - - def _resnet_scope_name(self): - return 'resnet_v1_152' - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_testbase.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_testbase.py deleted file mode 100644 index 186f2b1748b8038ebbe3236b3ba35be24b33afea..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_testbase.py +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for ssd resnet v1 FPN feature extractors.""" -import abc -import numpy as np -import tensorflow as tf - -from object_detection.models import ssd_feature_extractor_test - - -class SSDResnetFPNFeatureExtractorTestBase( - ssd_feature_extractor_test.SsdFeatureExtractorTestBase): - """Helper test class for SSD Resnet v1 FPN feature extractors.""" - - @abc.abstractmethod - def _resnet_scope_name(self): - pass - - @abc.abstractmethod - def _fpn_scope_name(self): - return 'fpn' - - def test_extract_features_returns_correct_shapes_256(self): - image_height = 256 - image_width = 256 - depth_multiplier = 1.0 - pad_to_multiple = 1 - expected_feature_map_shape = [(2, 32, 32, 256), (2, 16, 16, 256), - (2, 8, 8, 256), (2, 4, 4, 256), - (2, 2, 2, 256)] - self.check_extract_features_returns_correct_shape( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_with_dynamic_inputs(self): - image_height = 256 - image_width = 256 - depth_multiplier = 1.0 - pad_to_multiple = 1 - expected_feature_map_shape = [(2, 32, 32, 256), (2, 16, 16, 256), - (2, 8, 8, 256), (2, 4, 4, 256), - (2, 2, 2, 256)] - self.check_extract_features_returns_correct_shapes_with_dynamic_inputs( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self): - image_height = 254 - image_width = 254 - depth_multiplier = 1.0 - pad_to_multiple = 32 - expected_feature_map_shape = [(2, 32, 32, 256), (2, 16, 16, 256), - (2, 8, 8, 256), (2, 4, 4, 256), - (2, 2, 2, 256)] - - self.check_extract_features_returns_correct_shape( - 2, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_raises_error_with_invalid_image_size(self): - image_height = 32 - image_width = 32 - depth_multiplier = 1.0 - pad_to_multiple = 1 - self.check_extract_features_raises_error_with_invalid_image_size( - image_height, image_width, depth_multiplier, pad_to_multiple) - - def test_preprocess_returns_correct_value_range(self): - image_height = 128 - image_width = 128 - depth_multiplier = 1 - pad_to_multiple = 1 - test_image = np.random.rand(4, image_height, image_width, 3) - feature_extractor = self._create_feature_extractor(depth_multiplier, - pad_to_multiple) - preprocessed_image = feature_extractor.preprocess(test_image) - self.assertAllClose(preprocessed_image, - test_image - [[123.68, 116.779, 103.939]]) - - def test_variables_only_created_in_scope(self): - depth_multiplier = 1 - pad_to_multiple = 1 - g = tf.Graph() - with g.as_default(): - feature_extractor = self._create_feature_extractor( - depth_multiplier, pad_to_multiple) - preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3)) - feature_extractor.extract_features(preprocessed_inputs) - variables = g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) - for variable in variables: - self.assertTrue( - variable.name.startswith(self._resnet_scope_name()) - or variable.name.startswith(self._fpn_scope_name())) - - diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/object_detection_tutorial.ipynb b/research/mlperf_object_detection/Mask_RCNN/object_detection/object_detection_tutorial.ipynb deleted file mode 100644 index b5acce97818a61fba0dfb1efbe3fe5cc4edc48ef..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/object_detection_tutorial.ipynb +++ /dev/null @@ -1,343 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Object Detection Demo\n", - "Welcome to the object detection inference walkthrough! This notebook will walk you step by step through the process of using a pre-trained model to detect objects in an image. Make sure to follow the [installation instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md) before you start." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Imports" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "import os\n", - "import six.moves.urllib as urllib\n", - "import sys\n", - "import tarfile\n", - "import tensorflow as tf\n", - "import zipfile\n", - "\n", - "from collections import defaultdict\n", - "from io import StringIO\n", - "from matplotlib import pyplot as plt\n", - "from PIL import Image\n", - "\n", - "# This is needed since the notebook is stored in the object_detection folder.\n", - "sys.path.append(\"..\")\n", - "from object_detection.utils import ops as utils_ops\n", - "\n", - "if tf.__version__ < '1.4.0':\n", - " raise ImportError('Please upgrade your tensorflow installation to v1.4.* or later!')\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Env setup" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# This is needed to display the images.\n", - "%matplotlib inline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Object detection imports\n", - "Here are the imports from the object detection module." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from utils import label_map_util\n", - "\n", - "from utils import visualization_utils as vis_util" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Model preparation " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Variables\n", - "\n", - "Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_CKPT` to point to a new .pb file. \n", - "\n", - "By default we use an \"SSD with Mobilenet\" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# What model to download.\n", - "MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'\n", - "MODEL_FILE = MODEL_NAME + '.tar.gz'\n", - "DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'\n", - "\n", - "# Path to frozen detection graph. This is the actual model that is used for the object detection.\n", - "PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'\n", - "\n", - "# List of the strings that is used to add correct label for each box.\n", - "PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')\n", - "\n", - "NUM_CLASSES = 90" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Download Model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "opener = urllib.request.URLopener()\n", - "opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)\n", - "tar_file = tarfile.open(MODEL_FILE)\n", - "for file in tar_file.getmembers():\n", - " file_name = os.path.basename(file.name)\n", - " if 'frozen_inference_graph.pb' in file_name:\n", - " tar_file.extract(file, os.getcwd())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Load a (frozen) Tensorflow model into memory." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "detection_graph = tf.Graph()\n", - "with detection_graph.as_default():\n", - " od_graph_def = tf.GraphDef()\n", - " with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:\n", - " serialized_graph = fid.read()\n", - " od_graph_def.ParseFromString(serialized_graph)\n", - " tf.import_graph_def(od_graph_def, name='')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Loading label map\n", - "Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "label_map = label_map_util.load_labelmap(PATH_TO_LABELS)\n", - "categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)\n", - "category_index = label_map_util.create_category_index(categories)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Helper code" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def load_image_into_numpy_array(image):\n", - " (im_width, im_height) = image.size\n", - " return np.array(image.getdata()).reshape(\n", - " (im_height, im_width, 3)).astype(np.uint8)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Detection" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# For the sake of simplicity we will use only 2 images:\n", - "# image1.jpg\n", - "# image2.jpg\n", - "# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.\n", - "PATH_TO_TEST_IMAGES_DIR = 'test_images'\n", - "TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 3) ]\n", - "\n", - "# Size, in inches, of the output images.\n", - "IMAGE_SIZE = (12, 8)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def run_inference_for_single_image(image, graph):\n", - " with graph.as_default():\n", - " with tf.Session() as sess:\n", - " # Get handles to input and output tensors\n", - " ops = tf.get_default_graph().get_operations()\n", - " all_tensor_names = {output.name for op in ops for output in op.outputs}\n", - " tensor_dict = {}\n", - " for key in [\n", - " 'num_detections', 'detection_boxes', 'detection_scores',\n", - " 'detection_classes', 'detection_masks'\n", - " ]:\n", - " tensor_name = key + ':0'\n", - " if tensor_name in all_tensor_names:\n", - " tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(\n", - " tensor_name)\n", - " if 'detection_masks' in tensor_dict:\n", - " # The following processing is only for single image\n", - " detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])\n", - " detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])\n", - " # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.\n", - " real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)\n", - " detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])\n", - " detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])\n", - " detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(\n", - " detection_masks, detection_boxes, image.shape[0], image.shape[1])\n", - " detection_masks_reframed = tf.cast(\n", - " tf.greater(detection_masks_reframed, 0.5), tf.uint8)\n", - " # Follow the convention by adding back the batch dimension\n", - " tensor_dict['detection_masks'] = tf.expand_dims(\n", - " detection_masks_reframed, 0)\n", - " image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')\n", - "\n", - " # Run inference\n", - " output_dict = sess.run(tensor_dict,\n", - " feed_dict={image_tensor: np.expand_dims(image, 0)})\n", - "\n", - " # all outputs are float32 numpy arrays, so convert types as appropriate\n", - " output_dict['num_detections'] = int(output_dict['num_detections'][0])\n", - " output_dict['detection_classes'] = output_dict[\n", - " 'detection_classes'][0].astype(np.uint8)\n", - " output_dict['detection_boxes'] = output_dict['detection_boxes'][0]\n", - " output_dict['detection_scores'] = output_dict['detection_scores'][0]\n", - " if 'detection_masks' in output_dict:\n", - " output_dict['detection_masks'] = output_dict['detection_masks'][0]\n", - " return output_dict" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "for image_path in TEST_IMAGE_PATHS:\n", - " image = Image.open(image_path)\n", - " # the array based representation of the image will be used later in order to prepare the\n", - " # result image with boxes and labels on it.\n", - " image_np = load_image_into_numpy_array(image)\n", - " # Expand dimensions since the model expects images to have shape: [1, None, None, 3]\n", - " image_np_expanded = np.expand_dims(image_np, axis=0)\n", - " # Actual detection.\n", - " output_dict = run_inference_for_single_image(image_np, detection_graph)\n", - " # Visualization of the results of a detection.\n", - " vis_util.visualize_boxes_and_labels_on_image_array(\n", - " image_np,\n", - " output_dict['detection_boxes'],\n", - " output_dict['detection_classes'],\n", - " output_dict['detection_scores'],\n", - " category_index,\n", - " instance_masks=output_dict.get('detection_masks'),\n", - " use_normalized_coordinates=True,\n", - " line_thickness=8)\n", - " plt.figure(figsize=IMAGE_SIZE)\n", - " plt.imshow(image_np)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "colab": { - "version": "0.3.2" - }, - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.10" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/__init__.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/anchor_generator.proto b/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/anchor_generator.proto deleted file mode 100644 index c47b558f0689e40b66d097994a23f08b4bde03b5..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/anchor_generator.proto +++ /dev/null @@ -1,17 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -import "object_detection/protos/grid_anchor_generator.proto"; -import "object_detection/protos/ssd_anchor_generator.proto"; -import "object_detection/protos/multiscale_anchor_generator.proto"; - -// Configuration proto for the anchor generator to use in the object detection -// pipeline. See core/anchor_generator.py for details. -message AnchorGenerator { - oneof anchor_generator_oneof { - GridAnchorGenerator grid_anchor_generator = 1; - SsdAnchorGenerator ssd_anchor_generator = 2; - MultiscaleAnchorGenerator multiscale_anchor_generator = 3; - } -} diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/anchor_generator_pb2.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/anchor_generator_pb2.py deleted file mode 100644 index bfb80a76b249a1f6072612d7b2e81ac80e215807..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/anchor_generator_pb2.py +++ /dev/null @@ -1,102 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/anchor_generator.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from object_detection.protos import grid_anchor_generator_pb2 as object__detection_dot_protos_dot_grid__anchor__generator__pb2 -from object_detection.protos import ssd_anchor_generator_pb2 as object__detection_dot_protos_dot_ssd__anchor__generator__pb2 -from object_detection.protos import multiscale_anchor_generator_pb2 as object__detection_dot_protos_dot_multiscale__anchor__generator__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/anchor_generator.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n.object_detection/protos/anchor_generator.proto\x12\x17object_detection.protos\x1a\x33object_detection/protos/grid_anchor_generator.proto\x1a\x32object_detection/protos/ssd_anchor_generator.proto\x1a\x39object_detection/protos/multiscale_anchor_generator.proto\"\xa2\x02\n\x0f\x41nchorGenerator\x12M\n\x15grid_anchor_generator\x18\x01 \x01(\x0b\x32,.object_detection.protos.GridAnchorGeneratorH\x00\x12K\n\x14ssd_anchor_generator\x18\x02 \x01(\x0b\x32+.object_detection.protos.SsdAnchorGeneratorH\x00\x12Y\n\x1bmultiscale_anchor_generator\x18\x03 \x01(\x0b\x32\x32.object_detection.protos.MultiscaleAnchorGeneratorH\x00\x42\x18\n\x16\x61nchor_generator_oneof') - , - dependencies=[object__detection_dot_protos_dot_grid__anchor__generator__pb2.DESCRIPTOR,object__detection_dot_protos_dot_ssd__anchor__generator__pb2.DESCRIPTOR,object__detection_dot_protos_dot_multiscale__anchor__generator__pb2.DESCRIPTOR,]) -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - - - - -_ANCHORGENERATOR = _descriptor.Descriptor( - name='AnchorGenerator', - full_name='object_detection.protos.AnchorGenerator', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='grid_anchor_generator', full_name='object_detection.protos.AnchorGenerator.grid_anchor_generator', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='ssd_anchor_generator', full_name='object_detection.protos.AnchorGenerator.ssd_anchor_generator', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='multiscale_anchor_generator', full_name='object_detection.protos.AnchorGenerator.multiscale_anchor_generator', index=2, - number=3, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name='anchor_generator_oneof', full_name='object_detection.protos.AnchorGenerator.anchor_generator_oneof', - index=0, containing_type=None, fields=[]), - ], - serialized_start=240, - serialized_end=530, -) - -_ANCHORGENERATOR.fields_by_name['grid_anchor_generator'].message_type = object__detection_dot_protos_dot_grid__anchor__generator__pb2._GRIDANCHORGENERATOR -_ANCHORGENERATOR.fields_by_name['ssd_anchor_generator'].message_type = object__detection_dot_protos_dot_ssd__anchor__generator__pb2._SSDANCHORGENERATOR -_ANCHORGENERATOR.fields_by_name['multiscale_anchor_generator'].message_type = object__detection_dot_protos_dot_multiscale__anchor__generator__pb2._MULTISCALEANCHORGENERATOR -_ANCHORGENERATOR.oneofs_by_name['anchor_generator_oneof'].fields.append( - _ANCHORGENERATOR.fields_by_name['grid_anchor_generator']) -_ANCHORGENERATOR.fields_by_name['grid_anchor_generator'].containing_oneof = _ANCHORGENERATOR.oneofs_by_name['anchor_generator_oneof'] -_ANCHORGENERATOR.oneofs_by_name['anchor_generator_oneof'].fields.append( - _ANCHORGENERATOR.fields_by_name['ssd_anchor_generator']) -_ANCHORGENERATOR.fields_by_name['ssd_anchor_generator'].containing_oneof = _ANCHORGENERATOR.oneofs_by_name['anchor_generator_oneof'] -_ANCHORGENERATOR.oneofs_by_name['anchor_generator_oneof'].fields.append( - _ANCHORGENERATOR.fields_by_name['multiscale_anchor_generator']) -_ANCHORGENERATOR.fields_by_name['multiscale_anchor_generator'].containing_oneof = _ANCHORGENERATOR.oneofs_by_name['anchor_generator_oneof'] -DESCRIPTOR.message_types_by_name['AnchorGenerator'] = _ANCHORGENERATOR - -AnchorGenerator = _reflection.GeneratedProtocolMessageType('AnchorGenerator', (_message.Message,), dict( - DESCRIPTOR = _ANCHORGENERATOR, - __module__ = 'object_detection.protos.anchor_generator_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.AnchorGenerator) - )) -_sym_db.RegisterMessage(AnchorGenerator) - - -# @@protoc_insertion_point(module_scope) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/argmax_matcher.proto b/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/argmax_matcher.proto deleted file mode 100644 index 947fcb983dcbe3bcb0b39b7c8bd48a50b1667edf..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/argmax_matcher.proto +++ /dev/null @@ -1,29 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -// Configuration proto for ArgMaxMatcher. See -// matchers/argmax_matcher.py for details. -message ArgMaxMatcher { - // Threshold for positive matches. - optional float matched_threshold = 1 [default = 0.5]; - - // Threshold for negative matches. - optional float unmatched_threshold = 2 [default = 0.5]; - - // Whether to construct ArgMaxMatcher without thresholds. - optional bool ignore_thresholds = 3 [default = false]; - - // If True then negative matches are the ones below the unmatched_threshold, - // whereas ignored matches are in between the matched and umatched - // threshold. If False, then negative matches are in between the matched - // and unmatched threshold, and everything lower than unmatched is ignored. - optional bool negatives_lower_than_unmatched = 4 [default = true]; - - // Whether to ensure each row is matched to at least one column. - optional bool force_match_for_each_row = 5 [default = false]; - - // Force constructed match objects to use matrix multiplication based gather - // instead of standard tf.gather - optional bool use_matmul_gather = 6 [default = false]; -} diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/argmax_matcher_pb2.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/argmax_matcher_pb2.py deleted file mode 100644 index 08209d5d0ca2785b6ea80a4ccdbd4609dec7e929..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/argmax_matcher_pb2.py +++ /dev/null @@ -1,104 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/argmax_matcher.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/argmax_matcher.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n,object_detection/protos/argmax_matcher.proto\x12\x17object_detection.protos\"\xec\x01\n\rArgMaxMatcher\x12\x1e\n\x11matched_threshold\x18\x01 \x01(\x02:\x03\x30.5\x12 \n\x13unmatched_threshold\x18\x02 \x01(\x02:\x03\x30.5\x12 \n\x11ignore_thresholds\x18\x03 \x01(\x08:\x05\x66\x61lse\x12,\n\x1enegatives_lower_than_unmatched\x18\x04 \x01(\x08:\x04true\x12\'\n\x18\x66orce_match_for_each_row\x18\x05 \x01(\x08:\x05\x66\x61lse\x12 \n\x11use_matmul_gather\x18\x06 \x01(\x08:\x05\x66\x61lse') -) -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - - - - -_ARGMAXMATCHER = _descriptor.Descriptor( - name='ArgMaxMatcher', - full_name='object_detection.protos.ArgMaxMatcher', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='matched_threshold', full_name='object_detection.protos.ArgMaxMatcher.matched_threshold', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.5), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='unmatched_threshold', full_name='object_detection.protos.ArgMaxMatcher.unmatched_threshold', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.5), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='ignore_thresholds', full_name='object_detection.protos.ArgMaxMatcher.ignore_thresholds', index=2, - number=3, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='negatives_lower_than_unmatched', full_name='object_detection.protos.ArgMaxMatcher.negatives_lower_than_unmatched', index=3, - number=4, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='force_match_for_each_row', full_name='object_detection.protos.ArgMaxMatcher.force_match_for_each_row', index=4, - number=5, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='use_matmul_gather', full_name='object_detection.protos.ArgMaxMatcher.use_matmul_gather', index=5, - number=6, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=74, - serialized_end=310, -) - -DESCRIPTOR.message_types_by_name['ArgMaxMatcher'] = _ARGMAXMATCHER - -ArgMaxMatcher = _reflection.GeneratedProtocolMessageType('ArgMaxMatcher', (_message.Message,), dict( - DESCRIPTOR = _ARGMAXMATCHER, - __module__ = 'object_detection.protos.argmax_matcher_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.ArgMaxMatcher) - )) -_sym_db.RegisterMessage(ArgMaxMatcher) - - -# @@protoc_insertion_point(module_scope) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/bipartite_matcher.proto b/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/bipartite_matcher.proto deleted file mode 100644 index 175ecdd109653ae1c0b37a9655873b72161e963e..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/bipartite_matcher.proto +++ /dev/null @@ -1,11 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -// Configuration proto for bipartite matcher. See -// matchers/bipartite_matcher.py for details. -message BipartiteMatcher { - // Force constructed match objects to use matrix multiplication based gather - // instead of standard tf.gather - optional bool use_matmul_gather = 6 [default = false]; -} diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/bipartite_matcher_pb2.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/bipartite_matcher_pb2.py deleted file mode 100644 index d311068f1dc3bf26d2461898235124cde0b9f49e..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/bipartite_matcher_pb2.py +++ /dev/null @@ -1,69 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/bipartite_matcher.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/bipartite_matcher.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n/object_detection/protos/bipartite_matcher.proto\x12\x17object_detection.protos\"4\n\x10\x42ipartiteMatcher\x12 \n\x11use_matmul_gather\x18\x06 \x01(\x08:\x05\x66\x61lse') -) -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - - - - -_BIPARTITEMATCHER = _descriptor.Descriptor( - name='BipartiteMatcher', - full_name='object_detection.protos.BipartiteMatcher', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='use_matmul_gather', full_name='object_detection.protos.BipartiteMatcher.use_matmul_gather', index=0, - number=6, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=76, - serialized_end=128, -) - -DESCRIPTOR.message_types_by_name['BipartiteMatcher'] = _BIPARTITEMATCHER - -BipartiteMatcher = _reflection.GeneratedProtocolMessageType('BipartiteMatcher', (_message.Message,), dict( - DESCRIPTOR = _BIPARTITEMATCHER, - __module__ = 'object_detection.protos.bipartite_matcher_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.BipartiteMatcher) - )) -_sym_db.RegisterMessage(BipartiteMatcher) - - -# @@protoc_insertion_point(module_scope) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/box_coder.proto b/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/box_coder.proto deleted file mode 100644 index 79b818125a33c39022262b9fc7754f4081f6b169..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/box_coder.proto +++ /dev/null @@ -1,19 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -import "object_detection/protos/faster_rcnn_box_coder.proto"; -import "object_detection/protos/keypoint_box_coder.proto"; -import "object_detection/protos/mean_stddev_box_coder.proto"; -import "object_detection/protos/square_box_coder.proto"; - -// Configuration proto for the box coder to be used in the object detection -// pipeline. See core/box_coder.py for details. -message BoxCoder { - oneof box_coder_oneof { - FasterRcnnBoxCoder faster_rcnn_box_coder = 1; - MeanStddevBoxCoder mean_stddev_box_coder = 2; - SquareBoxCoder square_box_coder = 3; - KeypointBoxCoder keypoint_box_coder = 4; - } -} diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/box_coder_pb2.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/box_coder_pb2.py deleted file mode 100644 index 3cf2a2e5496a915682a070430e746cfecee5c89f..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/box_coder_pb2.py +++ /dev/null @@ -1,114 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/box_coder.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from object_detection.protos import faster_rcnn_box_coder_pb2 as object__detection_dot_protos_dot_faster__rcnn__box__coder__pb2 -from object_detection.protos import keypoint_box_coder_pb2 as object__detection_dot_protos_dot_keypoint__box__coder__pb2 -from object_detection.protos import mean_stddev_box_coder_pb2 as object__detection_dot_protos_dot_mean__stddev__box__coder__pb2 -from object_detection.protos import square_box_coder_pb2 as object__detection_dot_protos_dot_square__box__coder__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/box_coder.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n\'object_detection/protos/box_coder.proto\x12\x17object_detection.protos\x1a\x33object_detection/protos/faster_rcnn_box_coder.proto\x1a\x30object_detection/protos/keypoint_box_coder.proto\x1a\x33object_detection/protos/mean_stddev_box_coder.proto\x1a.object_detection/protos/square_box_coder.proto\"\xc7\x02\n\x08\x42oxCoder\x12L\n\x15\x66\x61ster_rcnn_box_coder\x18\x01 \x01(\x0b\x32+.object_detection.protos.FasterRcnnBoxCoderH\x00\x12L\n\x15mean_stddev_box_coder\x18\x02 \x01(\x0b\x32+.object_detection.protos.MeanStddevBoxCoderH\x00\x12\x43\n\x10square_box_coder\x18\x03 \x01(\x0b\x32\'.object_detection.protos.SquareBoxCoderH\x00\x12G\n\x12keypoint_box_coder\x18\x04 \x01(\x0b\x32).object_detection.protos.KeypointBoxCoderH\x00\x42\x11\n\x0f\x62ox_coder_oneof') - , - dependencies=[object__detection_dot_protos_dot_faster__rcnn__box__coder__pb2.DESCRIPTOR,object__detection_dot_protos_dot_keypoint__box__coder__pb2.DESCRIPTOR,object__detection_dot_protos_dot_mean__stddev__box__coder__pb2.DESCRIPTOR,object__detection_dot_protos_dot_square__box__coder__pb2.DESCRIPTOR,]) -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - - - - -_BOXCODER = _descriptor.Descriptor( - name='BoxCoder', - full_name='object_detection.protos.BoxCoder', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='faster_rcnn_box_coder', full_name='object_detection.protos.BoxCoder.faster_rcnn_box_coder', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='mean_stddev_box_coder', full_name='object_detection.protos.BoxCoder.mean_stddev_box_coder', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='square_box_coder', full_name='object_detection.protos.BoxCoder.square_box_coder', index=2, - number=3, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='keypoint_box_coder', full_name='object_detection.protos.BoxCoder.keypoint_box_coder', index=3, - number=4, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name='box_coder_oneof', full_name='object_detection.protos.BoxCoder.box_coder_oneof', - index=0, containing_type=None, fields=[]), - ], - serialized_start=273, - serialized_end=600, -) - -_BOXCODER.fields_by_name['faster_rcnn_box_coder'].message_type = object__detection_dot_protos_dot_faster__rcnn__box__coder__pb2._FASTERRCNNBOXCODER -_BOXCODER.fields_by_name['mean_stddev_box_coder'].message_type = object__detection_dot_protos_dot_mean__stddev__box__coder__pb2._MEANSTDDEVBOXCODER -_BOXCODER.fields_by_name['square_box_coder'].message_type = object__detection_dot_protos_dot_square__box__coder__pb2._SQUAREBOXCODER -_BOXCODER.fields_by_name['keypoint_box_coder'].message_type = object__detection_dot_protos_dot_keypoint__box__coder__pb2._KEYPOINTBOXCODER -_BOXCODER.oneofs_by_name['box_coder_oneof'].fields.append( - _BOXCODER.fields_by_name['faster_rcnn_box_coder']) -_BOXCODER.fields_by_name['faster_rcnn_box_coder'].containing_oneof = _BOXCODER.oneofs_by_name['box_coder_oneof'] -_BOXCODER.oneofs_by_name['box_coder_oneof'].fields.append( - _BOXCODER.fields_by_name['mean_stddev_box_coder']) -_BOXCODER.fields_by_name['mean_stddev_box_coder'].containing_oneof = _BOXCODER.oneofs_by_name['box_coder_oneof'] -_BOXCODER.oneofs_by_name['box_coder_oneof'].fields.append( - _BOXCODER.fields_by_name['square_box_coder']) -_BOXCODER.fields_by_name['square_box_coder'].containing_oneof = _BOXCODER.oneofs_by_name['box_coder_oneof'] -_BOXCODER.oneofs_by_name['box_coder_oneof'].fields.append( - _BOXCODER.fields_by_name['keypoint_box_coder']) -_BOXCODER.fields_by_name['keypoint_box_coder'].containing_oneof = _BOXCODER.oneofs_by_name['box_coder_oneof'] -DESCRIPTOR.message_types_by_name['BoxCoder'] = _BOXCODER - -BoxCoder = _reflection.GeneratedProtocolMessageType('BoxCoder', (_message.Message,), dict( - DESCRIPTOR = _BOXCODER, - __module__ = 'object_detection.protos.box_coder_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.BoxCoder) - )) -_sym_db.RegisterMessage(BoxCoder) - - -# @@protoc_insertion_point(module_scope) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/box_predictor.proto b/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/box_predictor.proto deleted file mode 100644 index f5ceae683f854ef0262d230d45c1a879684e72ab..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/box_predictor.proto +++ /dev/null @@ -1,153 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -import "object_detection/protos/hyperparams.proto"; - - -// Configuration proto for box predictor. See core/box_predictor.py for details. -message BoxPredictor { - oneof box_predictor_oneof { - ConvolutionalBoxPredictor convolutional_box_predictor = 1; - MaskRCNNBoxPredictor mask_rcnn_box_predictor = 2; - RfcnBoxPredictor rfcn_box_predictor = 3; - WeightSharedConvolutionalBoxPredictor weight_shared_convolutional_box_predictor = 4; - } -} - -// Configuration proto for Convolutional box predictor. -message ConvolutionalBoxPredictor { - // Hyperparameters for convolution ops used in the box predictor. - optional Hyperparams conv_hyperparams = 1; - - // Minimum feature depth prior to predicting box encodings and class - // predictions. - optional int32 min_depth = 2 [default = 0]; - - // Maximum feature depth prior to predicting box encodings and class - // predictions. If max_depth is set to 0, no additional feature map will be - // inserted before location and class predictions. - optional int32 max_depth = 3 [default = 0]; - - // Number of the additional conv layers before the predictor. - optional int32 num_layers_before_predictor = 4 [default = 0]; - - // Whether to use dropout for class prediction. - optional bool use_dropout = 5 [default = true]; - - // Keep probability for dropout - optional float dropout_keep_probability = 6 [default = 0.8]; - - // Size of final convolution kernel. If the spatial resolution of the feature - // map is smaller than the kernel size, then the kernel size is set to - // min(feature_width, feature_height). - optional int32 kernel_size = 7 [default = 1]; - - // Size of the encoding for boxes. - optional int32 box_code_size = 8 [default = 4]; - - // Whether to apply sigmoid to the output of class predictions. - // TODO(jonathanhuang): Do we need this since we have a post processing - // module.? - optional bool apply_sigmoid_to_scores = 9 [default = false]; - - optional float class_prediction_bias_init = 10 [default = 0.0]; - - // Whether to use depthwise separable convolution for box predictor layers. - optional bool use_depthwise = 11 [default = false]; -} - -// Configuration proto for weight shared convolutional box predictor. -message WeightSharedConvolutionalBoxPredictor { - // Hyperparameters for convolution ops used in the box predictor. - optional Hyperparams conv_hyperparams = 1; - - // Number of the additional conv layers before the predictor. - optional int32 num_layers_before_predictor = 4 [default = 0]; - - // Output depth for the convolution ops prior to predicting box encodings - // and class predictions. - optional int32 depth = 2 [default = 0]; - - // Size of final convolution kernel. If the spatial resolution of the feature - // map is smaller than the kernel size, then the kernel size is set to - // min(feature_width, feature_height). - optional int32 kernel_size = 7 [default = 3]; - - // Size of the encoding for boxes. - optional int32 box_code_size = 8 [default = 4]; - - // Bias initialization for class prediction. It has been show to stabilize - // training where there are large number of negative boxes. See - // https://arxiv.org/abs/1708.02002 for details. - optional float class_prediction_bias_init = 10 [default = 0.0]; - - // Whether to use dropout for class prediction. - optional bool use_dropout = 11 [default = false]; - - // Keep probability for dropout - optional float dropout_keep_probability = 12 [default = 0.8]; -} - -message MaskRCNNBoxPredictor { - // Hyperparameters for fully connected ops used in the box predictor. - optional Hyperparams fc_hyperparams = 1; - - // Whether to use dropout op prior to the both box and class predictions. - optional bool use_dropout = 2 [default= false]; - - // Keep probability for dropout. This is only used if use_dropout is true. - optional float dropout_keep_probability = 3 [default = 0.5]; - - // Size of the encoding for the boxes. - optional int32 box_code_size = 4 [default = 4]; - - // Hyperparameters for convolution ops used in the box predictor. - optional Hyperparams conv_hyperparams = 5; - - // Whether to predict instance masks inside detection boxes. - optional bool predict_instance_masks = 6 [default = false]; - - // The depth for the first conv2d_transpose op applied to the - // image_features in the mask prediction branch. If set to 0, the value - // will be set automatically based on the number of channels in the image - // features and the number of classes. - optional int32 mask_prediction_conv_depth = 7 [default = 256]; - - // Whether to predict keypoints inside detection boxes. - optional bool predict_keypoints = 8 [default = false]; - - // The height and the width of the predicted mask. - optional int32 mask_height = 9 [default = 15]; - optional int32 mask_width = 10 [default = 15]; - - // The number of convolutions applied to image_features in the mask prediction - // branch. - optional int32 mask_prediction_num_conv_layers = 11 [default = 2]; - optional bool masks_are_class_agnostic = 12 [default = false]; - - // Whether to use one box for all classes rather than a different box for each - // class. - optional bool share_box_across_classes = 13 [default = false]; -} - -message RfcnBoxPredictor { - // Hyperparameters for convolution ops used in the box predictor. - optional Hyperparams conv_hyperparams = 1; - - // Bin sizes for RFCN crops. - optional int32 num_spatial_bins_height = 2 [default = 3]; - - optional int32 num_spatial_bins_width = 3 [default = 3]; - - // Target depth to reduce the input image features to. - optional int32 depth = 4 [default=1024]; - - // Size of the encoding for the boxes. - optional int32 box_code_size = 5 [default = 4]; - - // Size to resize the rfcn crops to. - optional int32 crop_height = 6 [default= 12]; - - optional int32 crop_width = 7 [default=12]; -} diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/box_predictor_pb2.py b/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/box_predictor_pb2.py deleted file mode 100644 index 8434aea1585391dc3ef5d7ee58e31e1bb6b8a35b..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/box_predictor_pb2.py +++ /dev/null @@ -1,517 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/box_predictor.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from object_detection.protos import hyperparams_pb2 as object__detection_dot_protos_dot_hyperparams__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/box_predictor.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n+object_detection/protos/box_predictor.proto\x12\x17object_detection.protos\x1a)object_detection/protos/hyperparams.proto\"\x90\x03\n\x0c\x42oxPredictor\x12Y\n\x1b\x63onvolutional_box_predictor\x18\x01 \x01(\x0b\x32\x32.object_detection.protos.ConvolutionalBoxPredictorH\x00\x12P\n\x17mask_rcnn_box_predictor\x18\x02 \x01(\x0b\x32-.object_detection.protos.MaskRCNNBoxPredictorH\x00\x12G\n\x12rfcn_box_predictor\x18\x03 \x01(\x0b\x32).object_detection.protos.RfcnBoxPredictorH\x00\x12s\n)weight_shared_convolutional_box_predictor\x18\x04 \x01(\x0b\x32>.object_detection.protos.WeightSharedConvolutionalBoxPredictorH\x00\x42\x15\n\x13\x62ox_predictor_oneof\"\x90\x03\n\x19\x43onvolutionalBoxPredictor\x12>\n\x10\x63onv_hyperparams\x18\x01 \x01(\x0b\x32$.object_detection.protos.Hyperparams\x12\x14\n\tmin_depth\x18\x02 \x01(\x05:\x01\x30\x12\x14\n\tmax_depth\x18\x03 \x01(\x05:\x01\x30\x12&\n\x1bnum_layers_before_predictor\x18\x04 \x01(\x05:\x01\x30\x12\x19\n\x0buse_dropout\x18\x05 \x01(\x08:\x04true\x12%\n\x18\x64ropout_keep_probability\x18\x06 \x01(\x02:\x03\x30.8\x12\x16\n\x0bkernel_size\x18\x07 \x01(\x05:\x01\x31\x12\x18\n\rbox_code_size\x18\x08 \x01(\x05:\x01\x34\x12&\n\x17\x61pply_sigmoid_to_scores\x18\t \x01(\x08:\x05\x66\x61lse\x12%\n\x1a\x63lass_prediction_bias_init\x18\n \x01(\x02:\x01\x30\x12\x1c\n\ruse_depthwise\x18\x0b \x01(\x08:\x05\x66\x61lse\"\xbd\x02\n%WeightSharedConvolutionalBoxPredictor\x12>\n\x10\x63onv_hyperparams\x18\x01 \x01(\x0b\x32$.object_detection.protos.Hyperparams\x12&\n\x1bnum_layers_before_predictor\x18\x04 \x01(\x05:\x01\x30\x12\x10\n\x05\x64\x65pth\x18\x02 \x01(\x05:\x01\x30\x12\x16\n\x0bkernel_size\x18\x07 \x01(\x05:\x01\x33\x12\x18\n\rbox_code_size\x18\x08 \x01(\x05:\x01\x34\x12%\n\x1a\x63lass_prediction_bias_init\x18\n \x01(\x02:\x01\x30\x12\x1a\n\x0buse_dropout\x18\x0b \x01(\x08:\x05\x66\x61lse\x12%\n\x18\x64ropout_keep_probability\x18\x0c \x01(\x02:\x03\x30.8\"\x92\x04\n\x14MaskRCNNBoxPredictor\x12<\n\x0e\x66\x63_hyperparams\x18\x01 \x01(\x0b\x32$.object_detection.protos.Hyperparams\x12\x1a\n\x0buse_dropout\x18\x02 \x01(\x08:\x05\x66\x61lse\x12%\n\x18\x64ropout_keep_probability\x18\x03 \x01(\x02:\x03\x30.5\x12\x18\n\rbox_code_size\x18\x04 \x01(\x05:\x01\x34\x12>\n\x10\x63onv_hyperparams\x18\x05 \x01(\x0b\x32$.object_detection.protos.Hyperparams\x12%\n\x16predict_instance_masks\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\'\n\x1amask_prediction_conv_depth\x18\x07 \x01(\x05:\x03\x32\x35\x36\x12 \n\x11predict_keypoints\x18\x08 \x01(\x08:\x05\x66\x61lse\x12\x17\n\x0bmask_height\x18\t \x01(\x05:\x02\x31\x35\x12\x16\n\nmask_width\x18\n \x01(\x05:\x02\x31\x35\x12*\n\x1fmask_prediction_num_conv_layers\x18\x0b \x01(\x05:\x01\x32\x12\'\n\x18masks_are_class_agnostic\x18\x0c \x01(\x08:\x05\x66\x61lse\x12\'\n\x18share_box_across_classes\x18\r \x01(\x08:\x05\x66\x61lse\"\xf9\x01\n\x10RfcnBoxPredictor\x12>\n\x10\x63onv_hyperparams\x18\x01 \x01(\x0b\x32$.object_detection.protos.Hyperparams\x12\"\n\x17num_spatial_bins_height\x18\x02 \x01(\x05:\x01\x33\x12!\n\x16num_spatial_bins_width\x18\x03 \x01(\x05:\x01\x33\x12\x13\n\x05\x64\x65pth\x18\x04 \x01(\x05:\x04\x31\x30\x32\x34\x12\x18\n\rbox_code_size\x18\x05 \x01(\x05:\x01\x34\x12\x17\n\x0b\x63rop_height\x18\x06 \x01(\x05:\x02\x31\x32\x12\x16\n\ncrop_width\x18\x07 \x01(\x05:\x02\x31\x32') - , - dependencies=[object__detection_dot_protos_dot_hyperparams__pb2.DESCRIPTOR,]) -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - - - - -_BOXPREDICTOR = _descriptor.Descriptor( - name='BoxPredictor', - full_name='object_detection.protos.BoxPredictor', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='convolutional_box_predictor', full_name='object_detection.protos.BoxPredictor.convolutional_box_predictor', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='mask_rcnn_box_predictor', full_name='object_detection.protos.BoxPredictor.mask_rcnn_box_predictor', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='rfcn_box_predictor', full_name='object_detection.protos.BoxPredictor.rfcn_box_predictor', index=2, - number=3, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='weight_shared_convolutional_box_predictor', full_name='object_detection.protos.BoxPredictor.weight_shared_convolutional_box_predictor', index=3, - number=4, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name='box_predictor_oneof', full_name='object_detection.protos.BoxPredictor.box_predictor_oneof', - index=0, containing_type=None, fields=[]), - ], - serialized_start=116, - serialized_end=516, -) - - -_CONVOLUTIONALBOXPREDICTOR = _descriptor.Descriptor( - name='ConvolutionalBoxPredictor', - full_name='object_detection.protos.ConvolutionalBoxPredictor', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='conv_hyperparams', full_name='object_detection.protos.ConvolutionalBoxPredictor.conv_hyperparams', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='min_depth', full_name='object_detection.protos.ConvolutionalBoxPredictor.min_depth', index=1, - number=2, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='max_depth', full_name='object_detection.protos.ConvolutionalBoxPredictor.max_depth', index=2, - number=3, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='num_layers_before_predictor', full_name='object_detection.protos.ConvolutionalBoxPredictor.num_layers_before_predictor', index=3, - number=4, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='use_dropout', full_name='object_detection.protos.ConvolutionalBoxPredictor.use_dropout', index=4, - number=5, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='dropout_keep_probability', full_name='object_detection.protos.ConvolutionalBoxPredictor.dropout_keep_probability', index=5, - number=6, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.8), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='kernel_size', full_name='object_detection.protos.ConvolutionalBoxPredictor.kernel_size', index=6, - number=7, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='box_code_size', full_name='object_detection.protos.ConvolutionalBoxPredictor.box_code_size', index=7, - number=8, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=4, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='apply_sigmoid_to_scores', full_name='object_detection.protos.ConvolutionalBoxPredictor.apply_sigmoid_to_scores', index=8, - number=9, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='class_prediction_bias_init', full_name='object_detection.protos.ConvolutionalBoxPredictor.class_prediction_bias_init', index=9, - number=10, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='use_depthwise', full_name='object_detection.protos.ConvolutionalBoxPredictor.use_depthwise', index=10, - number=11, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=519, - serialized_end=919, -) - - -_WEIGHTSHAREDCONVOLUTIONALBOXPREDICTOR = _descriptor.Descriptor( - name='WeightSharedConvolutionalBoxPredictor', - full_name='object_detection.protos.WeightSharedConvolutionalBoxPredictor', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='conv_hyperparams', full_name='object_detection.protos.WeightSharedConvolutionalBoxPredictor.conv_hyperparams', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='num_layers_before_predictor', full_name='object_detection.protos.WeightSharedConvolutionalBoxPredictor.num_layers_before_predictor', index=1, - number=4, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='depth', full_name='object_detection.protos.WeightSharedConvolutionalBoxPredictor.depth', index=2, - number=2, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='kernel_size', full_name='object_detection.protos.WeightSharedConvolutionalBoxPredictor.kernel_size', index=3, - number=7, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=3, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='box_code_size', full_name='object_detection.protos.WeightSharedConvolutionalBoxPredictor.box_code_size', index=4, - number=8, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=4, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='class_prediction_bias_init', full_name='object_detection.protos.WeightSharedConvolutionalBoxPredictor.class_prediction_bias_init', index=5, - number=10, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='use_dropout', full_name='object_detection.protos.WeightSharedConvolutionalBoxPredictor.use_dropout', index=6, - number=11, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='dropout_keep_probability', full_name='object_detection.protos.WeightSharedConvolutionalBoxPredictor.dropout_keep_probability', index=7, - number=12, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.8), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=922, - serialized_end=1239, -) - - -_MASKRCNNBOXPREDICTOR = _descriptor.Descriptor( - name='MaskRCNNBoxPredictor', - full_name='object_detection.protos.MaskRCNNBoxPredictor', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='fc_hyperparams', full_name='object_detection.protos.MaskRCNNBoxPredictor.fc_hyperparams', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='use_dropout', full_name='object_detection.protos.MaskRCNNBoxPredictor.use_dropout', index=1, - number=2, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='dropout_keep_probability', full_name='object_detection.protos.MaskRCNNBoxPredictor.dropout_keep_probability', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.5), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='box_code_size', full_name='object_detection.protos.MaskRCNNBoxPredictor.box_code_size', index=3, - number=4, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=4, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='conv_hyperparams', full_name='object_detection.protos.MaskRCNNBoxPredictor.conv_hyperparams', index=4, - number=5, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='predict_instance_masks', full_name='object_detection.protos.MaskRCNNBoxPredictor.predict_instance_masks', index=5, - number=6, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='mask_prediction_conv_depth', full_name='object_detection.protos.MaskRCNNBoxPredictor.mask_prediction_conv_depth', index=6, - number=7, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=256, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='predict_keypoints', full_name='object_detection.protos.MaskRCNNBoxPredictor.predict_keypoints', index=7, - number=8, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='mask_height', full_name='object_detection.protos.MaskRCNNBoxPredictor.mask_height', index=8, - number=9, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=15, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='mask_width', full_name='object_detection.protos.MaskRCNNBoxPredictor.mask_width', index=9, - number=10, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=15, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='mask_prediction_num_conv_layers', full_name='object_detection.protos.MaskRCNNBoxPredictor.mask_prediction_num_conv_layers', index=10, - number=11, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=2, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='masks_are_class_agnostic', full_name='object_detection.protos.MaskRCNNBoxPredictor.masks_are_class_agnostic', index=11, - number=12, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='share_box_across_classes', full_name='object_detection.protos.MaskRCNNBoxPredictor.share_box_across_classes', index=12, - number=13, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1242, - serialized_end=1772, -) - - -_RFCNBOXPREDICTOR = _descriptor.Descriptor( - name='RfcnBoxPredictor', - full_name='object_detection.protos.RfcnBoxPredictor', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='conv_hyperparams', full_name='object_detection.protos.RfcnBoxPredictor.conv_hyperparams', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='num_spatial_bins_height', full_name='object_detection.protos.RfcnBoxPredictor.num_spatial_bins_height', index=1, - number=2, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=3, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='num_spatial_bins_width', full_name='object_detection.protos.RfcnBoxPredictor.num_spatial_bins_width', index=2, - number=3, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=3, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='depth', full_name='object_detection.protos.RfcnBoxPredictor.depth', index=3, - number=4, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=1024, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='box_code_size', full_name='object_detection.protos.RfcnBoxPredictor.box_code_size', index=4, - number=5, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=4, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='crop_height', full_name='object_detection.protos.RfcnBoxPredictor.crop_height', index=5, - number=6, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=12, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='crop_width', full_name='object_detection.protos.RfcnBoxPredictor.crop_width', index=6, - number=7, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=12, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1775, - serialized_end=2024, -) - -_BOXPREDICTOR.fields_by_name['convolutional_box_predictor'].message_type = _CONVOLUTIONALBOXPREDICTOR -_BOXPREDICTOR.fields_by_name['mask_rcnn_box_predictor'].message_type = _MASKRCNNBOXPREDICTOR -_BOXPREDICTOR.fields_by_name['rfcn_box_predictor'].message_type = _RFCNBOXPREDICTOR -_BOXPREDICTOR.fields_by_name['weight_shared_convolutional_box_predictor'].message_type = _WEIGHTSHAREDCONVOLUTIONALBOXPREDICTOR -_BOXPREDICTOR.oneofs_by_name['box_predictor_oneof'].fields.append( - _BOXPREDICTOR.fields_by_name['convolutional_box_predictor']) -_BOXPREDICTOR.fields_by_name['convolutional_box_predictor'].containing_oneof = _BOXPREDICTOR.oneofs_by_name['box_predictor_oneof'] -_BOXPREDICTOR.oneofs_by_name['box_predictor_oneof'].fields.append( - _BOXPREDICTOR.fields_by_name['mask_rcnn_box_predictor']) -_BOXPREDICTOR.fields_by_name['mask_rcnn_box_predictor'].containing_oneof = _BOXPREDICTOR.oneofs_by_name['box_predictor_oneof'] -_BOXPREDICTOR.oneofs_by_name['box_predictor_oneof'].fields.append( - _BOXPREDICTOR.fields_by_name['rfcn_box_predictor']) -_BOXPREDICTOR.fields_by_name['rfcn_box_predictor'].containing_oneof = _BOXPREDICTOR.oneofs_by_name['box_predictor_oneof'] -_BOXPREDICTOR.oneofs_by_name['box_predictor_oneof'].fields.append( - _BOXPREDICTOR.fields_by_name['weight_shared_convolutional_box_predictor']) -_BOXPREDICTOR.fields_by_name['weight_shared_convolutional_box_predictor'].containing_oneof = _BOXPREDICTOR.oneofs_by_name['box_predictor_oneof'] -_CONVOLUTIONALBOXPREDICTOR.fields_by_name['conv_hyperparams'].message_type = object__detection_dot_protos_dot_hyperparams__pb2._HYPERPARAMS -_WEIGHTSHAREDCONVOLUTIONALBOXPREDICTOR.fields_by_name['conv_hyperparams'].message_type = object__detection_dot_protos_dot_hyperparams__pb2._HYPERPARAMS -_MASKRCNNBOXPREDICTOR.fields_by_name['fc_hyperparams'].message_type = object__detection_dot_protos_dot_hyperparams__pb2._HYPERPARAMS -_MASKRCNNBOXPREDICTOR.fields_by_name['conv_hyperparams'].message_type = object__detection_dot_protos_dot_hyperparams__pb2._HYPERPARAMS -_RFCNBOXPREDICTOR.fields_by_name['conv_hyperparams'].message_type = object__detection_dot_protos_dot_hyperparams__pb2._HYPERPARAMS -DESCRIPTOR.message_types_by_name['BoxPredictor'] = _BOXPREDICTOR -DESCRIPTOR.message_types_by_name['ConvolutionalBoxPredictor'] = _CONVOLUTIONALBOXPREDICTOR -DESCRIPTOR.message_types_by_name['WeightSharedConvolutionalBoxPredictor'] = _WEIGHTSHAREDCONVOLUTIONALBOXPREDICTOR -DESCRIPTOR.message_types_by_name['MaskRCNNBoxPredictor'] = _MASKRCNNBOXPREDICTOR -DESCRIPTOR.message_types_by_name['RfcnBoxPredictor'] = _RFCNBOXPREDICTOR - -BoxPredictor = _reflection.GeneratedProtocolMessageType('BoxPredictor', (_message.Message,), dict( - DESCRIPTOR = _BOXPREDICTOR, - __module__ = 'object_detection.protos.box_predictor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.BoxPredictor) - )) -_sym_db.RegisterMessage(BoxPredictor) - -ConvolutionalBoxPredictor = _reflection.GeneratedProtocolMessageType('ConvolutionalBoxPredictor', (_message.Message,), dict( - DESCRIPTOR = _CONVOLUTIONALBOXPREDICTOR, - __module__ = 'object_detection.protos.box_predictor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.ConvolutionalBoxPredictor) - )) -_sym_db.RegisterMessage(ConvolutionalBoxPredictor) - -WeightSharedConvolutionalBoxPredictor = _reflection.GeneratedProtocolMessageType('WeightSharedConvolutionalBoxPredictor', (_message.Message,), dict( - DESCRIPTOR = _WEIGHTSHAREDCONVOLUTIONALBOXPREDICTOR, - __module__ = 'object_detection.protos.box_predictor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.WeightSharedConvolutionalBoxPredictor) - )) -_sym_db.RegisterMessage(WeightSharedConvolutionalBoxPredictor) - -MaskRCNNBoxPredictor = _reflection.GeneratedProtocolMessageType('MaskRCNNBoxPredictor', (_message.Message,), dict( - DESCRIPTOR = _MASKRCNNBOXPREDICTOR, - __module__ = 'object_detection.protos.box_predictor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.MaskRCNNBoxPredictor) - )) -_sym_db.RegisterMessage(MaskRCNNBoxPredictor) - -RfcnBoxPredictor = _reflection.GeneratedProtocolMessageType('RfcnBoxPredictor', (_message.Message,), dict( - DESCRIPTOR = _RFCNBOXPREDICTOR, - __module__ = 'object_detection.protos.box_predictor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.RfcnBoxPredictor) - )) -_sym_db.RegisterMessage(RfcnBoxPredictor) - - -# @@protoc_insertion_point(module_scope) diff --git a/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/eval.proto b/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/eval.proto deleted file mode 100644 index 2108ddf41f0f343381d66c9fd52e74a102237c2c..0000000000000000000000000000000000000000 --- a/research/mlperf_object_detection/Mask_RCNN/object_detection/protos/eval.proto +++ /dev/null @@ -1,78 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -// Message for configuring DetectionModel evaluation jobs (eval.py). -message EvalConfig { - // Number of visualization images to generate. - optional uint32 num_visualizations = 1 [default=10]; - - // Number of examples to process of evaluation. - optional uint32 num_examples = 2 [default=5000]; - - // How often to run evaluation. - optional uint32 eval_interval_secs = 3 [default=300]; - - // Maximum number of times to run evaluation. If set to 0, will run forever. - optional uint32 max_evals = 4 [default=0]; - - // Whether the TensorFlow graph used for evaluation should be saved to disk. - optional bool save_graph = 5 [default=false]; - - // Path to directory to store visualizations in. If empty, visualization - // images are not exported (only shown on Tensorboard). - optional string visualization_export_dir = 6 [default=""]; - - // BNS name of the TensorFlow master. - optional string eval_master = 7 [default=""]; - - // Type of metrics to use for evaluation. - repeated string metrics_set = 8; - - // Path to export detections to COCO compatible JSON format. - optional string export_path = 9 [default='']; - - // Option to not read groundtruth labels and only export detections to - // COCO-compatible JSON file. - optional bool ignore_groundtruth = 10 [default=false]; - - // Use exponential moving averages of variables for evaluation. - // TODO(rathodv): When this is false make sure the model is constructed - // without moving averages in restore_fn. - optional bool use_moving_averages = 11 [default=false]; - - // Whether to evaluate instance masks. - // Note that since there is no evaluation code currently for instance - // segmenation this option is unused. - optional bool eval_instance_masks = 12 [default=false]; - - // Minimum score threshold for a detected object box to be visualized - optional float min_score_threshold = 13 [default=0.5]; - - // Maximum number of detections to visualize - optional int32 max_num_boxes_to_visualize = 14 [default=20]; - - // When drawing a single detection, each label is by default visualized as - //