Unverified Commit 97760186 authored by Jonathan Huang's avatar Jonathan Huang Committed by GitHub
Browse files

Merge pull request #4460 from pkulzc/master

Release evaluation code for OI Challenge 2018 and minor fixes. 
parents ed901b73 a703fc0c
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Utilities for creating TFRecords of TF examples for the Open Images dataset.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
def open_sharded_output_tfrecords(exit_stack, base_path, num_shards):
"""Opens all TFRecord shards for writing and adds them to an exit stack.
Args:
exit_stack: A context2.ExitStack used to automatically closed the TFRecords
opened in this function.
base_path: The base path for all shards
num_shards: The number of shards
Returns:
The list of opened TFRecords. Position k in the list corresponds to shard k.
"""
tf_record_output_filenames = [
'{}-{:05d}-of-{:05d}'.format(base_path, idx, num_shards)
for idx in range(num_shards)
]
tfrecords = [
exit_stack.enter_context(tf.python_io.TFRecordWriter(file_name))
for file_name in tf_record_output_filenames
]
return tfrecords
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for tf_record_creation_util.py."""
import os
import contextlib2
import tensorflow as tf
from object_detection.dataset_tools import tf_record_creation_util
class OpenOutputTfrecordsTests(tf.test.TestCase):
def test_sharded_tfrecord_writes(self):
with contextlib2.ExitStack() as tf_record_close_stack:
output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
tf_record_close_stack,
os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), 10)
for idx in range(10):
output_tfrecords[idx].write('test_{}'.format(idx))
for idx in range(10):
tf_record_path = '{}-{:05d}-of-00010'.format(
os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), idx)
records = list(tf.python_io.tf_record_iterator(tf_record_path))
self.assertAllEqual(records, ['test_{}'.format(idx)])
if __name__ == '__main__':
tf.test.main()
...@@ -39,12 +39,14 @@ EVAL_METRICS_CLASS_DICT = { ...@@ -39,12 +39,14 @@ EVAL_METRICS_CLASS_DICT = {
object_detection_evaluation.PascalInstanceSegmentationEvaluator, object_detection_evaluation.PascalInstanceSegmentationEvaluator,
'weighted_pascal_voc_instance_segmentation_metrics': 'weighted_pascal_voc_instance_segmentation_metrics':
object_detection_evaluation.WeightedPascalInstanceSegmentationEvaluator, object_detection_evaluation.WeightedPascalInstanceSegmentationEvaluator,
'open_images_detection_metrics': 'open_images_V2_detection_metrics':
object_detection_evaluation.OpenImagesDetectionEvaluator, object_detection_evaluation.OpenImagesDetectionEvaluator,
'coco_detection_metrics': 'coco_detection_metrics':
coco_evaluation.CocoDetectionEvaluator, coco_evaluation.CocoDetectionEvaluator,
'coco_mask_metrics': 'coco_mask_metrics':
coco_evaluation.CocoMaskEvaluator, coco_evaluation.CocoMaskEvaluator,
'oid_challenge_object_detection_metrics':
object_detection_evaluation.OpenImagesDetectionChallengeEvaluator,
} }
EVAL_DEFAULT_METRIC = 'pascal_voc_detection_metrics' EVAL_DEFAULT_METRIC = 'pascal_voc_detection_metrics'
......
...@@ -4,12 +4,14 @@ The Tensorflow Object Detection API currently supports three evaluation protocol ...@@ -4,12 +4,14 @@ The Tensorflow Object Detection API currently supports three evaluation protocol
that can be configured in `EvalConfig` by setting `metrics_set` to the that can be configured in `EvalConfig` by setting `metrics_set` to the
corresponding value. corresponding value.
## PASCAL VOC 2007 detection metric ## PASCAL VOC 2010 detection metric
`EvalConfig.metrics_set='pascal_voc_detection_metrics'` `EvalConfig.metrics_set='pascal_voc_detection_metrics'`
The commonly used mAP metric for evaluating the quality of object detectors, computed according to the protocol of the PASCAL VOC Challenge 2007. The commonly used mAP metric for evaluating the quality of object detectors,
The protocol is available [here](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/devkit_doc_07-Jun-2007.pdf). computed according to the protocol of the PASCAL VOC Challenge 2010-2012. The
protocol is available
[here](http://host.robots.ox.ac.uk/pascal/VOC/voc2010/devkit_doc_08-May-2010.pdf).
## Weighted PASCAL VOC detection metric ## Weighted PASCAL VOC detection metric
...@@ -20,33 +22,36 @@ precision when treating all classes as a single class. In comparison, ...@@ -20,33 +22,36 @@ precision when treating all classes as a single class. In comparison,
PASCAL metrics computes the mean average precision as the mean of the PASCAL metrics computes the mean average precision as the mean of the
per-class average precisions. per-class average precisions.
For example, the test set consists of two classes, "cat" and "dog", and there are ten times more boxes of "cat" than those of "dog". For example, the test set consists of two classes, "cat" and "dog", and there
According to PASCAL VOC 2007 metric, performance on each of the two classes would contribute equally towards the final mAP value, are ten times more boxes of "cat" than those of "dog". According to PASCAL VOC
while for the Weighted PASCAL VOC metric the final mAP value will be influenced by frequency of each class. 2010 metric, performance on each of the two classes would contribute equally
towards the final mAP value, while for the Weighted PASCAL VOC metric the final
mAP value will be influenced by frequency of each class.
## PASCAL VOC 2007 instance segmentation metric ## PASCAL VOC 2010 instance segmentation metric
`EvalConfig.metrics_set='pascal_voc_instance_segmentation_metrics'` `EvalConfig.metrics_set='pascal_voc_instance_segmentation_metrics'`
Similar to pascal voc 2007 detection metric, but computes the intersection over Similar to Pascal VOC 2010 detection metric, but computes the intersection over
union based on the object masks instead of object boxes. union based on the object masks instead of object boxes.
## Weighted PASCAL VOC instance segmentation metric ## Weighted PASCAL VOC instance segmentation metric
`EvalConfig.metrics_set='weighted_pascal_voc_instance_segmentation_metrics'` `EvalConfig.metrics_set='weighted_pascal_voc_instance_segmentation_metrics'`
Similar to the weighted pascal voc 2007 detection metric, but computes the Similar to the weighted pascal voc 2010 detection metric, but computes the
intersection over union based on the object masks instead of object boxes. intersection over union based on the object masks instead of object boxes.
## Open Images detection metric {#open-images} ## Open Images V2 detection metric
`EvalConfig.metrics_set='open_images_metrics'` `EvalConfig.metrics_set='open_images_V2_detection_metrics'`
This metric is defined originally for evaluating detector performance on [Open Images V2 dataset](https://github.com/openimages/dataset) This metric is defined originally for evaluating detector performance on [Open
and is fairly similar to the PASCAL VOC 2007 metric mentioned above. Images V2 dataset](https://github.com/openimages/dataset) and is fairly similar
It computes interpolated average precision (AP) for each class and averages it among all classes (mAP). to the PASCAL VOC 2010 metric mentioned above. It computes interpolated average
precision (AP) for each class and averages it among all classes (mAP).
The difference to the PASCAL VOC 2007 metric is the following: Open Images The difference to the PASCAL VOC 2010 metric is the following: Open Images
annotations contain `group-of` ground-truth boxes (see [Open Images data annotations contain `group-of` ground-truth boxes (see [Open Images data
description](https://github.com/openimages/dataset#annotations-human-bboxcsv)), description](https://github.com/openimages/dataset#annotations-human-bboxcsv)),
that are treated differently for the purpose of deciding whether detections are that are treated differently for the purpose of deciding whether detections are
...@@ -61,7 +66,7 @@ such that: ...@@ -61,7 +66,7 @@ such that:
box is greater than the IoU threshold (default value 0.5). \ box is greater than the IoU threshold (default value 0.5). \
Illustration of handling non-group-of boxes: \ Illustration of handling non-group-of boxes: \
![alt ![alt
groupof_case_eval](img/nongroupof_case_eval.png "illustration of handling non-group-of boxes: yellow box - ground truth bounding box; green box - true positive; red box - false positives."){width="500" height="270"} groupof_case_eval](img/nongroupof_case_eval.png "illustration of handling non-group-of boxes: yellow box - ground truth bounding box; green box - true positive; red box - false positives.")
* yellow box - ground-truth box; * yellow box - ground-truth box;
* green box - true positive; * green box - true positive;
...@@ -80,7 +85,7 @@ ground-truth box such that: ...@@ -80,7 +85,7 @@ ground-truth box such that:
ground-truth box. \ ground-truth box. \
Illustration of handling `group-of` boxes: \ Illustration of handling `group-of` boxes: \
![alt ![alt
groupof_case_eval](img/groupof_case_eval.png "illustration of handling group-of boxes: yellow box - ground truth bounding box; grey boxes - two detections of cars, that are ignored; red box - false positive."){width="500" height="270"} groupof_case_eval](img/groupof_case_eval.png "illustration of handling group-of boxes: yellow box - ground truth bounding box; grey boxes - two detections of cars, that are ignored; red box - false positive.")
* yellow box - ground-truth box; * yellow box - ground-truth box;
* grey boxes - two detections on cars, that are ignored; * grey boxes - two detections on cars, that are ignored;
...@@ -105,3 +110,20 @@ other kind of car is annotated as "car" (for example, a sedan). Given this ...@@ -105,3 +110,20 @@ other kind of car is annotated as "car" (for example, a sedan). Given this
convention, the evaluation software treats all classes independently, ignoring convention, the evaluation software treats all classes independently, ignoring
the hierarchy. To achieve high performance values, object detectors should the hierarchy. To achieve high performance values, object detectors should
output bounding-boxes labelled in the same manner. output bounding-boxes labelled in the same manner.
## OID Challenge Object Detection Metric 2018
`EvalConfig.metrics_set='oid_challenge_object_detection_metrics'`
The metric for the OID Challenge Object Detection Metric 2018, Object Detection
track. The description is provided on the [Open Images Challenge
website](https://storage.googleapis.com/openimages/web/challenge.html).
## OID Challenge Visual Relationship Detection Metric 2018
The metric for the OID Challenge Visual Relationship Detection Metric 2018, Visual
Relationship Detection track. The description is provided on the [Open Images
Challenge
website](https://storage.googleapis.com/openimages/web/challenge.html). Note:
this is currently a stand-alone metric, that can be used only through the
`metrics/oid_vrd_challenge_evaluation.py` util.
...@@ -93,7 +93,7 @@ mkdir ${SPLIT}_tfrecords ...@@ -93,7 +93,7 @@ mkdir ${SPLIT}_tfrecords
PYTHONPATH=$PYTHONPATH:$(readlink -f ..) \ PYTHONPATH=$PYTHONPATH:$(readlink -f ..) \
python -m object_detection/dataset_tools/create_oid_tf_record \ python -m object_detection/dataset_tools/create_oid_tf_record \
--input_annotations_csv 2017_07/$SPLIT/annotations-human-bbox.csv \ --input_box_annotations_csv 2017_07/$SPLIT/annotations-human-bbox.csv \
--input_images_directory raw_images_${SPLIT} \ --input_images_directory raw_images_${SPLIT} \
--input_label_map ../object_detection/data/oid_bbox_trainable_label_map.pbtxt \ --input_label_map ../object_detection/data/oid_bbox_trainable_label_map.pbtxt \
--output_tf_record_path_prefix ${SPLIT}_tfrecords/$SPLIT.tfrecord \ --output_tf_record_path_prefix ${SPLIT}_tfrecords/$SPLIT.tfrecord \
...@@ -214,7 +214,7 @@ tf_record_input_reader: { input_path: '${SPLIT}_detections.tfrecord@${NUM_SHARDS ...@@ -214,7 +214,7 @@ tf_record_input_reader: { input_path: '${SPLIT}_detections.tfrecord@${NUM_SHARDS
" > ${SPLIT}_eval_metrics/${SPLIT}_input_config.pbtxt " > ${SPLIT}_eval_metrics/${SPLIT}_input_config.pbtxt
echo " echo "
metrics_set: 'open_images_metrics' metrics_set: 'open_images_V2_detection_metrics'
" > ${SPLIT}_eval_metrics/${SPLIT}_eval_config.pbtxt " > ${SPLIT}_eval_metrics/${SPLIT}_eval_config.pbtxt
``` ```
......
...@@ -56,12 +56,15 @@ def transform_input_data(tensor_dict, ...@@ -56,12 +56,15 @@ def transform_input_data(tensor_dict,
"""A single function that is responsible for all input data transformations. """A single function that is responsible for all input data transformations.
Data transformation functions are applied in the following order. Data transformation functions are applied in the following order.
1. data_augmentation_fn (optional): applied on tensor_dict. 1. If key fields.InputDataFields.image_additional_channels is present in
2. model_preprocess_fn: applied only on image tensor in tensor_dict. tensor_dict, the additional channels will be merged into
3. image_resizer_fn: applied on original image and instance mask tensor in fields.InputDataFields.image.
2. data_augmentation_fn (optional): applied on tensor_dict.
3. model_preprocess_fn: applied only on image tensor in tensor_dict.
4. image_resizer_fn: applied on original image and instance mask tensor in
tensor_dict. tensor_dict.
4. one_hot_encoding: applied to classes tensor in tensor_dict. 5. one_hot_encoding: applied to classes tensor in tensor_dict.
5. merge_multiple_boxes (optional): when groundtruth boxes are exactly the 6. merge_multiple_boxes (optional): when groundtruth boxes are exactly the
same they can be merged into a single box with an associated k-hot class same they can be merged into a single box with an associated k-hot class
label. label.
...@@ -88,6 +91,11 @@ def transform_input_data(tensor_dict, ...@@ -88,6 +91,11 @@ def transform_input_data(tensor_dict,
A dictionary keyed by fields.InputDataFields containing the tensors obtained A dictionary keyed by fields.InputDataFields containing the tensors obtained
after applying all the transformations. after applying all the transformations.
""" """
if fields.InputDataFields.image_additional_channels in tensor_dict:
channels = tensor_dict[fields.InputDataFields.image_additional_channels]
tensor_dict[fields.InputDataFields.image] = tf.concat(
[tensor_dict[fields.InputDataFields.image], channels], axis=2)
if retain_original_image: if retain_original_image:
tensor_dict[fields.InputDataFields.original_image] = tf.cast( tensor_dict[fields.InputDataFields.original_image] = tf.cast(
tensor_dict[fields.InputDataFields.image], tf.uint8) tensor_dict[fields.InputDataFields.image], tf.uint8)
......
...@@ -398,6 +398,33 @@ def _fake_image_resizer_fn(image, mask): ...@@ -398,6 +398,33 @@ def _fake_image_resizer_fn(image, mask):
class DataTransformationFnTest(tf.test.TestCase): class DataTransformationFnTest(tf.test.TestCase):
def test_combine_additional_channels_if_present(self):
image = np.random.rand(4, 4, 3).astype(np.float32)
additional_channels = np.random.rand(4, 4, 2).astype(np.float32)
tensor_dict = {
fields.InputDataFields.image:
tf.constant(image),
fields.InputDataFields.image_additional_channels:
tf.constant(additional_channels),
fields.InputDataFields.groundtruth_classes:
tf.constant(np.array([1, 1], np.int32))
}
input_transformation_fn = functools.partial(
inputs.transform_input_data,
model_preprocess_fn=_fake_model_preprocessor_fn,
image_resizer_fn=_fake_image_resizer_fn,
num_classes=1)
with self.test_session() as sess:
transformed_inputs = sess.run(
input_transformation_fn(tensor_dict=tensor_dict))
self.assertAllEqual(transformed_inputs[fields.InputDataFields.image].dtype,
tf.float32)
self.assertAllEqual(transformed_inputs[fields.InputDataFields.image].shape,
[4, 4, 5])
self.assertAllClose(transformed_inputs[fields.InputDataFields.image],
np.concatenate((image, additional_channels), axis=2))
def test_returns_correct_class_label_encodings(self): def test_returns_correct_class_label_encodings(self):
tensor_dict = { tensor_dict = {
fields.InputDataFields.image: fields.InputDataFields.image:
......
...@@ -1199,7 +1199,7 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1199,7 +1199,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
if self._is_training: if self._is_training:
proposal_boxes = tf.stop_gradient(proposal_boxes) proposal_boxes = tf.stop_gradient(proposal_boxes)
if not self._hard_example_miner: if not self._hard_example_miner:
(groundtruth_boxlists, groundtruth_classes_with_background_list, (groundtruth_boxlists, groundtruth_classes_with_background_list, _,
_) = self._format_groundtruth_data(true_image_shapes) _) = self._format_groundtruth_data(true_image_shapes)
(proposal_boxes, proposal_scores, (proposal_boxes, proposal_scores,
num_proposals) = self._unpad_proposals_and_sample_box_classifier_batch( num_proposals) = self._unpad_proposals_and_sample_box_classifier_batch(
...@@ -1358,9 +1358,13 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1358,9 +1358,13 @@ class FasterRCNNMetaArch(model.DetectionModel):
resized_masks_list.append(resized_mask) resized_masks_list.append(resized_mask)
groundtruth_masks_list = resized_masks_list groundtruth_masks_list = resized_masks_list
groundtruth_weights_list = None
if self.groundtruth_has_field(fields.BoxListFields.weights):
groundtruth_weights_list = self.groundtruth_lists(
fields.BoxListFields.weights)
return (groundtruth_boxlists, groundtruth_classes_with_background_list, return (groundtruth_boxlists, groundtruth_classes_with_background_list,
groundtruth_masks_list) groundtruth_masks_list, groundtruth_weights_list)
def _sample_box_classifier_minibatch(self, def _sample_box_classifier_minibatch(self,
proposal_boxlist, proposal_boxlist,
...@@ -1586,14 +1590,13 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1586,14 +1590,13 @@ class FasterRCNNMetaArch(model.DetectionModel):
""" """
with tf.name_scope(scope, 'Loss', prediction_dict.values()): with tf.name_scope(scope, 'Loss', prediction_dict.values()):
(groundtruth_boxlists, groundtruth_classes_with_background_list, (groundtruth_boxlists, groundtruth_classes_with_background_list,
groundtruth_masks_list) = self._format_groundtruth_data( groundtruth_masks_list, groundtruth_weights_list
true_image_shapes) ) = self._format_groundtruth_data(true_image_shapes)
loss_dict = self._loss_rpn( loss_dict = self._loss_rpn(
prediction_dict['rpn_box_encodings'], prediction_dict['rpn_box_encodings'],
prediction_dict['rpn_objectness_predictions_with_background'], prediction_dict['rpn_objectness_predictions_with_background'],
prediction_dict['anchors'], prediction_dict['anchors'], groundtruth_boxlists,
groundtruth_boxlists, groundtruth_classes_with_background_list, groundtruth_weights_list)
groundtruth_classes_with_background_list)
if self._number_of_stages > 1: if self._number_of_stages > 1:
loss_dict.update( loss_dict.update(
self._loss_box_classifier( self._loss_box_classifier(
...@@ -1603,18 +1606,17 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1603,18 +1606,17 @@ class FasterRCNNMetaArch(model.DetectionModel):
prediction_dict['num_proposals'], prediction_dict['num_proposals'],
groundtruth_boxlists, groundtruth_boxlists,
groundtruth_classes_with_background_list, groundtruth_classes_with_background_list,
groundtruth_weights_list,
prediction_dict['image_shape'], prediction_dict['image_shape'],
prediction_dict.get('mask_predictions'), prediction_dict.get('mask_predictions'),
groundtruth_masks_list, groundtruth_masks_list,
)) ))
return loss_dict return loss_dict
def _loss_rpn(self, def _loss_rpn(self, rpn_box_encodings,
rpn_box_encodings, rpn_objectness_predictions_with_background, anchors,
rpn_objectness_predictions_with_background, groundtruth_boxlists, groundtruth_classes_with_background_list,
anchors, groundtruth_weights_list):
groundtruth_boxlists,
groundtruth_classes_with_background_list):
"""Computes scalar RPN loss tensors. """Computes scalar RPN loss tensors.
Uses self._proposal_target_assigner to obtain regression and classification Uses self._proposal_target_assigner to obtain regression and classification
...@@ -1637,6 +1639,8 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1637,6 +1639,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
groundtruth_classes_with_background_list: A list of 2-D one-hot groundtruth_classes_with_background_list: A list of 2-D one-hot
(or k-hot) tensors of shape [num_boxes, num_classes+1] containing the (or k-hot) tensors of shape [num_boxes, num_classes+1] containing the
class targets with the 0th index assumed to map to the background class. class targets with the 0th index assumed to map to the background class.
groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape
[num_boxes] containing weights for groundtruth boxes.
Returns: Returns:
a dictionary mapping loss keys (`first_stage_localization_loss`, a dictionary mapping loss keys (`first_stage_localization_loss`,
...@@ -1647,7 +1651,8 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1647,7 +1651,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
(batch_cls_targets, batch_cls_weights, batch_reg_targets, (batch_cls_targets, batch_cls_weights, batch_reg_targets,
batch_reg_weights, _) = target_assigner.batch_assign_targets( batch_reg_weights, _) = target_assigner.batch_assign_targets(
self._proposal_target_assigner, box_list.BoxList(anchors), self._proposal_target_assigner, box_list.BoxList(anchors),
groundtruth_boxlists, len(groundtruth_boxlists)*[None]) groundtruth_boxlists,
len(groundtruth_boxlists) * [None], groundtruth_weights_list)
batch_cls_targets = tf.squeeze(batch_cls_targets, axis=2) batch_cls_targets = tf.squeeze(batch_cls_targets, axis=2)
def _minibatch_subsample_fn(inputs): def _minibatch_subsample_fn(inputs):
...@@ -1695,6 +1700,7 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1695,6 +1700,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
num_proposals, num_proposals,
groundtruth_boxlists, groundtruth_boxlists,
groundtruth_classes_with_background_list, groundtruth_classes_with_background_list,
groundtruth_weights_list,
image_shape, image_shape,
prediction_masks=None, prediction_masks=None,
groundtruth_masks_list=None): groundtruth_masks_list=None):
...@@ -1731,6 +1737,8 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1731,6 +1737,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
groundtruth_classes_with_background_list: a list of 2-D one-hot groundtruth_classes_with_background_list: a list of 2-D one-hot
(or k-hot) tensors of shape [num_boxes, num_classes + 1] containing the (or k-hot) tensors of shape [num_boxes, num_classes + 1] containing the
class targets with the 0th index assumed to map to the background class. class targets with the 0th index assumed to map to the background class.
groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape
[num_boxes] containing weights for groundtruth boxes.
image_shape: a 1-D tensor of shape [4] representing the image shape. image_shape: a 1-D tensor of shape [4] representing the image shape.
prediction_masks: an optional 4-D tensor with shape [total_num_proposals, prediction_masks: an optional 4-D tensor with shape [total_num_proposals,
num_classes, mask_height, mask_width] containing the instance masks for num_classes, mask_height, mask_width] containing the instance masks for
...@@ -1765,7 +1773,8 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1765,7 +1773,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
(batch_cls_targets_with_background, batch_cls_weights, batch_reg_targets, (batch_cls_targets_with_background, batch_cls_weights, batch_reg_targets,
batch_reg_weights, _) = target_assigner.batch_assign_targets( batch_reg_weights, _) = target_assigner.batch_assign_targets(
self._detector_target_assigner, proposal_boxlists, self._detector_target_assigner, proposal_boxlists,
groundtruth_boxlists, groundtruth_classes_with_background_list) groundtruth_boxlists, groundtruth_classes_with_background_list,
groundtruth_weights_list)
class_predictions_with_background = tf.reshape( class_predictions_with_background = tf.reshape(
class_predictions_with_background, class_predictions_with_background,
...@@ -1847,8 +1856,8 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1847,8 +1856,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
unmatched_cls_target=tf.zeros(image_shape[1:3], dtype=tf.float32)) unmatched_cls_target=tf.zeros(image_shape[1:3], dtype=tf.float32))
(batch_mask_targets, _, _, (batch_mask_targets, _, _,
batch_mask_target_weights, _) = target_assigner.batch_assign_targets( batch_mask_target_weights, _) = target_assigner.batch_assign_targets(
mask_target_assigner, proposal_boxlists, mask_target_assigner, proposal_boxlists, groundtruth_boxlists,
groundtruth_boxlists, groundtruth_masks_list) groundtruth_masks_list, groundtruth_weights_list)
# Pad the prediction_masks with to add zeros for background class to be # Pad the prediction_masks with to add zeros for background class to be
# consistent with class predictions. # consistent with class predictions.
......
...@@ -26,6 +26,7 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch ...@@ -26,6 +26,7 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch
from object_detection.protos import box_predictor_pb2 from object_detection.protos import box_predictor_pb2
from object_detection.protos import hyperparams_pb2 from object_detection.protos import hyperparams_pb2
from object_detection.protos import post_processing_pb2 from object_detection.protos import post_processing_pb2
from object_detection.utils import test_utils
slim = tf.contrib.slim slim = tf.contrib.slim
BOX_CODE_SIZE = 4 BOX_CODE_SIZE = 4
...@@ -650,8 +651,11 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase): ...@@ -650,8 +651,11 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
with self.test_session() as sess: with self.test_session() as sess:
proposals_out = sess.run(proposals) proposals_out = sess.run(proposals)
self.assertAllClose(proposals_out['detection_boxes'], for image_idx in range(batch_size):
expected_proposal_boxes) self.assertTrue(
test_utils.first_rows_close_as_set(
proposals_out['detection_boxes'][image_idx].tolist(),
expected_proposal_boxes[image_idx]))
self.assertAllClose(proposals_out['detection_scores'], self.assertAllClose(proposals_out['detection_scores'],
expected_proposal_scores) expected_proposal_scores)
self.assertAllEqual(proposals_out['num_detections'], self.assertAllEqual(proposals_out['num_detections'],
...@@ -810,7 +814,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase): ...@@ -810,7 +814,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
def test_loss_full(self): def test_loss_full(self):
model = self._build_model( model = self._build_model(
is_training=True, number_of_stages=2, second_stage_batch_size=6) is_training=True, number_of_stages=2, second_stage_batch_size=6)
batch_size = 2 batch_size = 3
anchors = tf.constant( anchors = tf.constant(
[[0, 0, 16, 16], [[0, 0, 16, 16],
[0, 16, 16, 32], [0, 16, 16, 32],
...@@ -822,31 +826,25 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase): ...@@ -822,31 +826,25 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
BOX_CODE_SIZE], dtype=tf.float32) BOX_CODE_SIZE], dtype=tf.float32)
# use different numbers for the objectness category to break ties in # use different numbers for the objectness category to break ties in
# order of boxes returned by NMS # order of boxes returned by NMS
rpn_objectness_predictions_with_background = tf.constant([ rpn_objectness_predictions_with_background = tf.constant(
[[-10, 13], [[[-10, 13], [10, -10], [10, -11], [-10, 12]], [[10, -10], [-10, 13], [
[10, -10], -10, 12
[10, -11], ], [10, -11]], [[10, -10], [-10, 13], [-10, 12], [10, -11]]],
[-10, 12]], dtype=tf.float32)
[[10, -10],
[-10, 13],
[-10, 12],
[10, -11]]], dtype=tf.float32)
image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32) image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32)
num_proposals = tf.constant([6, 6], dtype=tf.int32) num_proposals = tf.constant([6, 6, 6], dtype=tf.int32)
proposal_boxes = tf.constant( proposal_boxes = tf.constant(
2 * [[[0, 0, 16, 16], 3 * [[[0, 0, 16, 16], [0, 16, 16, 32], [16, 0, 32, 16],
[0, 16, 16, 32], [16, 16, 32, 32], [0, 0, 16, 16], [0, 16, 16, 32]]],
[16, 0, 32, 16], dtype=tf.float32)
[16, 16, 32, 32],
[0, 0, 16, 16],
[0, 16, 16, 32]]], dtype=tf.float32)
refined_box_encodings = tf.zeros( refined_box_encodings = tf.zeros(
(batch_size * model.max_num_proposals, (batch_size * model.max_num_proposals,
model.num_classes, model.num_classes,
BOX_CODE_SIZE), dtype=tf.float32) BOX_CODE_SIZE), dtype=tf.float32)
class_predictions_with_background = tf.constant( class_predictions_with_background = tf.constant(
[[-10, 10, -10], # first image [
[-10, 10, -10], # first image
[10, -10, -10], [10, -10, -10],
[10, -10, -10], [10, -10, -10],
[-10, -10, 10], [-10, -10, 10],
...@@ -857,7 +855,15 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase): ...@@ -857,7 +855,15 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
[-10, 10, -10], [-10, 10, -10],
[10, -10, -10], [10, -10, -10],
[10, -10, -10], [10, -10, -10],
[-10, 10, -10]], dtype=tf.float32) [-10, 10, -10],
[10, -10, -10], # third image
[-10, 10, -10],
[-10, 10, -10],
[10, -10, -10],
[10, -10, -10],
[-10, 10, -10]
],
dtype=tf.float32)
mask_predictions_logits = 20 * tf.ones((batch_size * mask_predictions_logits = 20 * tf.ones((batch_size *
model.max_num_proposals, model.max_num_proposals,
...@@ -867,18 +873,29 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase): ...@@ -867,18 +873,29 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
groundtruth_boxes_list = [ groundtruth_boxes_list = [
tf.constant([[0, 0, .5, .5], [.5, .5, 1, 1]], dtype=tf.float32), tf.constant([[0, 0, .5, .5], [.5, .5, 1, 1]], dtype=tf.float32),
tf.constant([[0, .5, .5, 1], [.5, 0, 1, .5]], dtype=tf.float32)] tf.constant([[0, .5, .5, 1], [.5, 0, 1, .5]], dtype=tf.float32),
groundtruth_classes_list = [tf.constant([[1, 0], [0, 1]], dtype=tf.float32), tf.constant([[0, .5, .5, 1], [.5, 0, 1, 1]], dtype=tf.float32)
tf.constant([[1, 0], [1, 0]], dtype=tf.float32)] ]
groundtruth_classes_list = [
tf.constant([[1, 0], [0, 1]], dtype=tf.float32),
tf.constant([[1, 0], [1, 0]], dtype=tf.float32),
tf.constant([[1, 0], [0, 1]], dtype=tf.float32)
]
# Set all elements of groundtruth mask to 1.0. In this case all proposal # Set all elements of groundtruth mask to 1.0. In this case all proposal
# crops of the groundtruth masks should return a mask that covers the entire # crops of the groundtruth masks should return a mask that covers the entire
# proposal. Thus, if mask_predictions_logits element values are all greater # proposal. Thus, if mask_predictions_logits element values are all greater
# than 20, the loss should be zero. # than 20, the loss should be zero.
groundtruth_masks_list = [tf.convert_to_tensor(np.ones((2, 32, 32)), groundtruth_masks_list = [
dtype=tf.float32), tf.convert_to_tensor(np.ones((2, 32, 32)), dtype=tf.float32),
tf.convert_to_tensor(np.ones((2, 32, 32)), tf.convert_to_tensor(np.ones((2, 32, 32)), dtype=tf.float32),
dtype=tf.float32)] tf.convert_to_tensor(np.ones((2, 32, 32)), dtype=tf.float32)
]
groundtruth_weights_list = [
tf.constant([1, 1], dtype=tf.float32),
tf.constant([1, 1], dtype=tf.float32),
tf.constant([1, 0], dtype=tf.float32)
]
prediction_dict = { prediction_dict = {
'rpn_box_encodings': rpn_box_encodings, 'rpn_box_encodings': rpn_box_encodings,
'rpn_objectness_predictions_with_background': 'rpn_objectness_predictions_with_background':
...@@ -892,9 +909,11 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase): ...@@ -892,9 +909,11 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
'mask_predictions': mask_predictions_logits 'mask_predictions': mask_predictions_logits
} }
_, true_image_shapes = model.preprocess(tf.zeros(image_shape)) _, true_image_shapes = model.preprocess(tf.zeros(image_shape))
model.provide_groundtruth(groundtruth_boxes_list, model.provide_groundtruth(
groundtruth_boxes_list,
groundtruth_classes_list, groundtruth_classes_list,
groundtruth_masks_list) groundtruth_masks_list,
groundtruth_weights_list=groundtruth_weights_list)
loss_dict = model.loss(prediction_dict, true_image_shapes) loss_dict = model.loss(prediction_dict, true_image_shapes)
with self.test_session() as sess: with self.test_session() as sess:
......
...@@ -402,8 +402,9 @@ class SSDMetaArch(model.DetectionModel): ...@@ -402,8 +402,9 @@ class SSDMetaArch(model.DetectionModel):
im_width=image_shape[2])) im_width=image_shape[2]))
prediction_dict = self._box_predictor.predict( prediction_dict = self._box_predictor.predict(
feature_maps, self._anchor_generator.num_anchors_per_location()) feature_maps, self._anchor_generator.num_anchors_per_location())
box_encodings = tf.squeeze( box_encodings = tf.concat(prediction_dict['box_encodings'], axis=1)
tf.concat(prediction_dict['box_encodings'], axis=1), axis=2) if box_encodings.shape.ndims == 4 and box_encodings.shape[2] == 1:
box_encodings = tf.squeeze(box_encodings, axis=2)
class_predictions_with_background = tf.concat( class_predictions_with_background = tf.concat(
prediction_dict['class_predictions_with_background'], axis=1) prediction_dict['class_predictions_with_background'], axis=1)
predictions_dict = { predictions_dict = {
......
...@@ -243,21 +243,24 @@ class SsdMetaArchTest(test_case.TestCase): ...@@ -243,21 +243,24 @@ class SsdMetaArchTest(test_case.TestCase):
(batch_size, None, None, 3), (batch_size, None, None, 3),
(None, None, None, 3)] (None, None, None, 3)]
expected_boxes = np.array([[[0, 0, .5, .5], expected_boxes = [
[
[0, 0, .5, .5],
[0, .5, .5, 1], [0, .5, .5, 1],
[.5, 0, 1, .5], [.5, 0, 1, .5],
[0, 0, 0, 0], # pruned prediction [0, 0, 0, 0], # pruned prediction
[0, 0, 0, 0]], # padding [0, 0, 0, 0]
[[0, 0, .5, .5], ], # padding
[
[0, 0, .5, .5],
[0, .5, .5, 1], [0, .5, .5, 1],
[.5, 0, 1, .5], [.5, 0, 1, .5],
[0, 0, 0, 0], # pruned prediction [0, 0, 0, 0], # pruned prediction
[0, 0, 0, 0]] # padding [0, 0, 0, 0]
]) ]
expected_scores = np.array([[0, 0, 0, 0, 0], ] # padding
[0, 0, 0, 0, 0]]) expected_scores = [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]
expected_classes = np.array([[0, 0, 0, 0, 0], expected_classes = [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]
[0, 0, 0, 0, 0]])
expected_num_detections = np.array([3, 3]) expected_num_detections = np.array([3, 3])
for input_shape in input_shapes: for input_shape in input_shapes:
...@@ -282,7 +285,11 @@ class SsdMetaArchTest(test_case.TestCase): ...@@ -282,7 +285,11 @@ class SsdMetaArchTest(test_case.TestCase):
input_placeholder: input_placeholder:
np.random.uniform( np.random.uniform(
size=(batch_size, 2, 2, 3))}) size=(batch_size, 2, 2, 3))})
self.assertAllClose(detections_out['detection_boxes'], expected_boxes) for image_idx in range(batch_size):
self.assertTrue(
test_utils.first_rows_close_as_set(
detections_out['detection_boxes'][image_idx].tolist(),
expected_boxes[image_idx]))
self.assertAllClose(detections_out['detection_scores'], expected_scores) self.assertAllClose(detections_out['detection_scores'], expected_scores)
self.assertAllClose(detections_out['detection_classes'], expected_classes) self.assertAllClose(detections_out['detection_classes'], expected_classes)
self.assertAllClose(detections_out['num_detections'], self.assertAllClose(detections_out['num_detections'],
...@@ -429,7 +436,7 @@ class SsdMetaArchTest(test_case.TestCase): ...@@ -429,7 +436,7 @@ class SsdMetaArchTest(test_case.TestCase):
def test_restore_map_for_detection_ckpt(self): def test_restore_map_for_detection_ckpt(self):
model, _, _, _ = self._create_model() model, _, _, _ = self._create_model()
model.predict(tf.constant(np.array([[[0, 0], [1, 1]], [[1, 0], [0, 1]]], model.predict(tf.constant(np.array([[[[0, 0], [1, 1]], [[1, 0], [0, 1]]]],
dtype=np.float32)), dtype=np.float32)),
true_image_shapes=None) true_image_shapes=None)
init_op = tf.global_variables_initializer() init_op = tf.global_variables_initializer()
......
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Runs evaluation using OpenImages groundtruth and predictions.
Example usage:
python third_party/tensorflow_models/object_detection/\
metrics/oid_vrd_challenge_evaluation.py \
--input_annotations_boxes=/path/to/input/annotations-human-bbox.csv \
--input_annotations_labels=/path/to/input/annotations-label.csv \
--input_class_labelmap=/path/to/input/class_labelmap.pbtxt \
--input_relationship_labelmap=/path/to/input/relationship_labelmap.pbtxt \
--input_predictions=/path/to/input/predictions.csv \
--output_metrics=/path/to/output/metric.csv \
CSVs with bounding box annotations and image label (including the image URLs)
can be downloaded from the Open Images Challenge website:
https://storage.googleapis.com/openimages/web/challenge.html
The format of the input csv and the metrics itself are described on the
challenge website.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import pandas as pd
from google.protobuf import text_format
from object_detection.metrics import oid_vrd_challenge_evaluation_utils as utils
from object_detection.protos import string_int_label_map_pb2
from object_detection.utils import vrd_evaluation
def _load_labelmap(labelmap_path):
"""Loads labelmap from the labelmap path.
Args:
labelmap_path: Path to the labelmap.
Returns:
A dictionary mapping class name to class numerical id.
"""
label_map = string_int_label_map_pb2.StringIntLabelMap()
with open(labelmap_path, 'r') as fid:
label_map_string = fid.read()
text_format.Merge(label_map_string, label_map)
labelmap_dict = {}
for item in label_map.item:
labelmap_dict[item.name] = item.id
return labelmap_dict
def _swap_labelmap_dict(labelmap_dict):
"""Swaps keys and labels in labelmap.
Args:
labelmap_dict: Input dictionary.
Returns:
A dictionary mapping class name to class numerical id.
"""
return dict((v, k) for k, v in labelmap_dict.iteritems())
def main(parsed_args):
all_box_annotations = pd.read_csv(parsed_args.input_annotations_boxes)
all_label_annotations = pd.read_csv(parsed_args.input_annotations_labels)
all_annotations = pd.concat([all_box_annotations, all_label_annotations])
class_label_map = _load_labelmap(parsed_args.input_class_labelmap)
relationship_label_map = _load_labelmap(
parsed_args.input_relationship_labelmap)
relation_evaluator = vrd_evaluation.VRDRelationDetectionEvaluator()
phrase_evaluator = vrd_evaluation.VRDPhraseDetectionEvaluator()
for _, groundtruth in enumerate(all_annotations.groupby('ImageID')):
image_id, image_groundtruth = groundtruth
groundtruth_dictionary = utils.build_groundtruth_vrd_dictionary(
image_groundtruth, class_label_map, relationship_label_map)
relation_evaluator.add_single_ground_truth_image_info(
image_id, groundtruth_dictionary)
phrase_evaluator.add_single_ground_truth_image_info(image_id,
groundtruth_dictionary)
all_predictions = pd.read_csv(parsed_args.input_predictions)
for _, prediction_data in enumerate(all_predictions.groupby('ImageID')):
image_id, image_predictions = prediction_data
prediction_dictionary = utils.build_predictions_vrd_dictionary(
image_predictions, class_label_map, relationship_label_map)
relation_evaluator.add_single_detected_image_info(image_id,
prediction_dictionary)
phrase_evaluator.add_single_detected_image_info(image_id,
prediction_dictionary)
relation_metrics = relation_evaluator.evaluate()
phrase_metrics = phrase_evaluator.evaluate()
with open(parsed_args.output_metrics, 'w') as fid:
utils.write_csv(fid, relation_metrics)
utils.write_csv(fid, phrase_metrics)
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description=
'Evaluate Open Images Visual Relationship Detection predictions.')
parser.add_argument(
'--input_annotations_boxes',
required=True,
help='File with groundtruth vrd annotations.')
parser.add_argument(
'--input_annotations_labels',
required=True,
help='File with groundtruth labels annotations')
parser.add_argument(
'--input_predictions',
required=True,
help="""File with detection predictions; NOTE: no postprocessing is
applied in the evaluation script.""")
parser.add_argument(
'--input_class_labelmap',
required=True,
help="""OpenImages Challenge labelmap; note: it is expected to include
attributes.""")
parser.add_argument(
'--input_relationship_labelmap',
required=True,
help="""OpenImages Challenge relationship labelmap.""")
parser.add_argument(
'--output_metrics', required=True, help='Output file with csv metrics')
args = parser.parse_args()
main(args)
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Converts data from CSV format to the VRDDetectionEvaluator format."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import csv
import numpy as np
from object_detection.core import standard_fields
from object_detection.utils import vrd_evaluation
def build_groundtruth_vrd_dictionary(data, class_label_map,
relationship_label_map):
"""Builds a groundtruth dictionary from groundtruth data in CSV file.
Args:
data: Pandas DataFrame with the groundtruth data for a single image.
class_label_map: Class labelmap from string label name to an integer.
relationship_label_map: Relationship type labelmap from string name to an
integer.
Returns:
A dictionary with keys suitable for passing to
VRDDetectionEvaluator.add_single_ground_truth_image_info:
standard_fields.InputDataFields.groundtruth_boxes: A numpy array
of structures with the shape [M, 1], representing M tuples, each tuple
containing the same number of named bounding boxes.
Each box is of the format [y_min, x_min, y_max, x_max] (see
datatype vrd_box_data_type, single_box_data_type above).
standard_fields.InputDataFields.groundtruth_classes: A numpy array of
structures shape [M, 1], representing the class labels of the
corresponding bounding boxes and possibly additional classes (see
datatype label_data_type above).
standard_fields.InputDataFields.verified_labels: numpy array
of shape [K] containing verified labels.
"""
data_boxes = data[data.LabelName.isnull()]
data_labels = data[data.LabelName1.isnull()]
boxes = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.vrd_box_data_type)
boxes['subject'] = data_boxes[['YMin1', 'XMin1', 'YMax1',
'XMax1']].as_matrix()
boxes['object'] = data_boxes[['YMin2', 'XMin2', 'YMax2', 'XMax2']].as_matrix()
labels = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.label_data_type)
labels['subject'] = data_boxes['LabelName1'].map(lambda x: class_label_map[x])
labels['object'] = data_boxes['LabelName2'].map(lambda x: class_label_map[x])
labels['relation'] = data_boxes['RelationshipLabel'].map(
lambda x: relationship_label_map[x])
return {
standard_fields.InputDataFields.groundtruth_boxes:
boxes,
standard_fields.InputDataFields.groundtruth_classes:
labels,
standard_fields.InputDataFields.verified_labels:
data_labels['LabelName'].map(lambda x: class_label_map[x]),
}
def build_predictions_vrd_dictionary(data, class_label_map,
relationship_label_map):
"""Builds a predictions dictionary from predictions data in CSV file.
Args:
data: Pandas DataFrame with the predictions data for a single image.
class_label_map: Class labelmap from string label name to an integer.
relationship_label_map: Relationship type labelmap from string name to an
integer.
Returns:
Dictionary with keys suitable for passing to
VRDDetectionEvaluator.add_single_detected_image_info:
standard_fields.DetectionResultFields.detection_boxes: A numpy array of
structures with shape [N, 1], representing N tuples, each tuple
containing the same number of named bounding boxes.
Each box is of the format [y_min, x_min, y_max, x_max] (as an example
see datatype vrd_box_data_type, single_box_data_type above).
standard_fields.DetectionResultFields.detection_scores: float32 numpy
array of shape [N] containing detection scores for the boxes.
standard_fields.DetectionResultFields.detection_classes: A numpy array
of structures shape [N, 1], representing the class labels of the
corresponding bounding boxes and possibly additional classes (see
datatype label_data_type above).
"""
data_boxes = data
boxes = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.vrd_box_data_type)
boxes['subject'] = data_boxes[['YMin1', 'XMin1', 'YMax1',
'XMax1']].as_matrix()
boxes['object'] = data_boxes[['YMin2', 'XMin2', 'YMax2', 'XMax2']].as_matrix()
labels = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.label_data_type)
labels['subject'] = data_boxes['LabelName1'].map(lambda x: class_label_map[x])
labels['object'] = data_boxes['LabelName2'].map(lambda x: class_label_map[x])
labels['relation'] = data_boxes['RelationshipLabel'].map(
lambda x: relationship_label_map[x])
return {
standard_fields.DetectionResultFields.detection_boxes:
boxes,
standard_fields.DetectionResultFields.detection_classes:
labels,
standard_fields.DetectionResultFields.detection_scores:
data_boxes['Score'].as_matrix()
}
def write_csv(fid, metrics):
"""Writes metrics key-value pairs to CSV file.
Args:
fid: File identifier of an opened file.
metrics: A dictionary with metrics to be written.
"""
metrics_writer = csv.writer(fid, delimiter=',')
for metric_name, metric_value in metrics.items():
metrics_writer.writerow([metric_name, str(metric_value)])
...@@ -113,7 +113,9 @@ class TfExampleDetectionAndGTParser(data_parser.DataToNumpyParser): ...@@ -113,7 +113,9 @@ class TfExampleDetectionAndGTParser(data_parser.DataToNumpyParser):
fields.InputDataFields.groundtruth_difficult: fields.InputDataFields.groundtruth_difficult:
Int64Parser(fields.TfExampleFields.object_difficult), Int64Parser(fields.TfExampleFields.object_difficult),
fields.InputDataFields.groundtruth_group_of: fields.InputDataFields.groundtruth_group_of:
Int64Parser(fields.TfExampleFields.object_group_of) Int64Parser(fields.TfExampleFields.object_group_of),
fields.InputDataFields.verified_labels:
Int64Parser(fields.TfExampleFields.image_class_label),
} }
def parse(self, tf_example): def parse(self, tf_example):
......
...@@ -44,6 +44,7 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -44,6 +44,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
object_class_label = [1, 1, 2] object_class_label = [1, 1, 2]
object_difficult = [1, 0, 0] object_difficult = [1, 0, 0]
object_group_of = [0, 0, 1] object_group_of = [0, 0, 1]
verified_labels = [1, 2, 3, 4]
detection_class_label = [2, 1] detection_class_label = [2, 1]
detection_score = [0.5, 0.3] detection_score = [0.5, 0.3]
features = { features = {
...@@ -113,10 +114,19 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -113,10 +114,19 @@ class TfExampleDecoderTest(tf.test.TestCase):
example = tf.train.Example(features=tf.train.Features(feature=features)) example = tf.train.Example(features=tf.train.Features(feature=features))
results_dict = parser.parse(example) results_dict = parser.parse(example)
self.assertIsNotNone(results_dict) self.assertIsNotNone(results_dict)
np_testing.assert_almost_equal( np_testing.assert_equal(
object_group_of, object_group_of,
results_dict[fields.InputDataFields.groundtruth_group_of]) results_dict[fields.InputDataFields.groundtruth_group_of])
features[fields.TfExampleFields.image_class_label] = (
self._Int64Feature(verified_labels))
example = tf.train.Example(features=tf.train.Features(feature=features))
results_dict = parser.parse(example)
self.assertIsNotNone(results_dict)
np_testing.assert_equal(
verified_labels, results_dict[fields.InputDataFields.verified_labels])
def testParseString(self): def testParseString(self):
string_val = 'abc' string_val = 'abc'
features = {'string': self._BytesFeature(string_val)} features = {'string': self._BytesFeature(string_val)}
......
This diff is collapsed.
...@@ -147,8 +147,6 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -147,8 +147,6 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
with (slim.arg_scope(self._conv_hyperparams_fn()) with (slim.arg_scope(self._conv_hyperparams_fn())
if self._override_base_feature_extractor_hyperparams if self._override_base_feature_extractor_hyperparams
else context_manager.IdentityContextManager()): else context_manager.IdentityContextManager()):
# TODO(skligys): Enable fused batch norm once quantization supports it.
with slim.arg_scope([slim.batch_norm], fused=False):
_, image_features = mobilenet_v1.mobilenet_v1_base( _, image_features = mobilenet_v1.mobilenet_v1_base(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
final_endpoint='Conv2d_13_pointwise', final_endpoint='Conv2d_13_pointwise',
...@@ -157,8 +155,6 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -157,8 +155,6 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
use_explicit_padding=self._use_explicit_padding, use_explicit_padding=self._use_explicit_padding,
scope=scope) scope=scope)
with slim.arg_scope(self._conv_hyperparams_fn()): with slim.arg_scope(self._conv_hyperparams_fn()):
# TODO(skligys): Enable fused batch norm once quantization supports it.
with slim.arg_scope([slim.batch_norm], fused=False):
feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_map_layout=feature_map_layout, feature_map_layout=feature_map_layout,
depth_multiplier=self._depth_multiplier, depth_multiplier=self._depth_multiplier,
......
...@@ -110,8 +110,6 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -110,8 +110,6 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
with (slim.arg_scope(self._conv_hyperparams_fn()) with (slim.arg_scope(self._conv_hyperparams_fn())
if self._override_base_feature_extractor_hyperparams if self._override_base_feature_extractor_hyperparams
else context_manager.IdentityContextManager()): else context_manager.IdentityContextManager()):
# TODO(skligys): Enable fused batch norm once quantization supports it.
with slim.arg_scope([slim.batch_norm], fused=False):
_, image_features = mobilenet_v1.mobilenet_v1_base( _, image_features = mobilenet_v1.mobilenet_v1_base(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
final_endpoint='Conv2d_13_pointwise', final_endpoint='Conv2d_13_pointwise',
...@@ -120,8 +118,6 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -120,8 +118,6 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
use_explicit_padding=self._use_explicit_padding, use_explicit_padding=self._use_explicit_padding,
scope=scope) scope=scope)
with slim.arg_scope(self._conv_hyperparams_fn()): with slim.arg_scope(self._conv_hyperparams_fn()):
# TODO(skligys): Enable fused batch norm once quantization supports it.
with slim.arg_scope([slim.batch_norm], fused=False):
feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_maps = feature_map_generators.multi_resolution_feature_maps(
feature_map_layout=feature_map_layout, feature_map_layout=feature_map_layout,
depth_multiplier=self._depth_multiplier, depth_multiplier=self._depth_multiplier,
......
...@@ -148,7 +148,7 @@ class SsdMobilenetV1FeatureExtractorTest( ...@@ -148,7 +148,7 @@ class SsdMobilenetV1FeatureExtractorTest(
self.check_feature_extractor_variables_under_scope( self.check_feature_extractor_variables_under_scope(
depth_multiplier, pad_to_multiple, scope_name) depth_multiplier, pad_to_multiple, scope_name)
def test_nofused_batchnorm(self): def test_has_fused_batchnorm(self):
image_height = 40 image_height = 40
image_width = 40 image_width = 40
depth_multiplier = 1 depth_multiplier = 1
...@@ -159,7 +159,7 @@ class SsdMobilenetV1FeatureExtractorTest( ...@@ -159,7 +159,7 @@ class SsdMobilenetV1FeatureExtractorTest(
pad_to_multiple) pad_to_multiple)
preprocessed_image = feature_extractor.preprocess(image_placeholder) preprocessed_image = feature_extractor.preprocess(image_placeholder)
_ = feature_extractor.extract_features(preprocessed_image) _ = feature_extractor.extract_features(preprocessed_image)
self.assertFalse(any(op.type == 'FusedBatchNorm' self.assertTrue(any(op.type == 'FusedBatchNorm'
for op in tf.get_default_graph().get_operations())) for op in tf.get_default_graph().get_operations()))
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment