Unverified Commit 62ce5d2a authored by pkulzc's avatar pkulzc Committed by GitHub
Browse files

Object detection and slim changes (#5843)

Add more eval metrics to model_main and support group norm for mobilenet v1 based models.
parents 7d032ea3 a1337e01
......@@ -23,6 +23,44 @@ message Loss {
// If not left to default, applies random example sampling.
optional RandomExampleSampler random_example_sampler = 6;
// Equalization loss.
message EqualizationLoss {
// Weight equalization loss strength.
optional float weight = 1 [default=0.0];
// When computing equalization loss, ops that start with
// equalization_exclude_prefixes will be ignored. Only used when
// equalization_weight > 0.
repeated string exclude_prefixes = 2;
}
optional EqualizationLoss equalization_loss = 7;
enum ExpectedLossWeights {
NONE = 0;
// Use expected_classification_loss_by_expected_sampling
// from third_party/tensorflow_models/object_detection/utils/ops.py
EXPECTED_SAMPLING = 1;
// Use expected_classification_loss_by_reweighting_unmatched_anchors
// from third_party/tensorflow_models/object_detection/utils/ops.py
REWEIGHTING_UNMATCHED_ANCHORS = 2;
}
// Method to compute expected loss weights with respect to balanced
// positive/negative sampling scheme. If NONE, use explicit sampling.
// TODO(birdbrain): Move under ExpectedLossWeights.
optional ExpectedLossWeights expected_loss_weights = 18 [default = NONE];
// Minimum number of effective negative samples.
// Only applies if expected_loss_weights is not NONE.
// TODO(birdbrain): Move under ExpectedLossWeights.
optional float min_num_negative_samples = 19 [default=0];
// Desired number of effective negative samples per positive sample.
// Only applies if expected_loss_weights is not NONE.
// TODO(birdbrain): Move under ExpectedLossWeights.
optional float desired_negative_sampling_ratio = 20 [default=3];
}
// Configuration for bounding box localization loss function.
......
......@@ -166,13 +166,13 @@ message RandomCropImage {
message RandomPadImage {
// Minimum dimensions for padded image. If unset, will use original image
// dimension as a lower bound.
optional float min_image_height = 1;
optional float min_image_width = 2;
optional int32 min_image_height = 1;
optional int32 min_image_width = 2;
// Maximum dimensions for padded image. If unset, will use double the original
// image dimension as a lower bound.
optional float max_image_height = 3;
optional float max_image_width = 4;
optional int32 max_image_height = 3;
optional int32 max_image_width = 4;
// Color of the padding. If unset, will pad using average color of the input
// image.
......
......@@ -12,7 +12,7 @@ import "object_detection/protos/post_processing.proto";
import "object_detection/protos/region_similarity_calculator.proto";
// Configuration for Single Shot Detection (SSD) models.
// Next id: 22
// Next id: 26
message Ssd {
// Number of classes to predict.
......@@ -35,7 +35,7 @@ message Ssd {
// Whether background targets are to be encoded as an all
// zeros vector or a one-hot vector (where background is the 0th class).
optional bool encode_background_as_zeros = 12 [default=false];
optional bool encode_background_as_zeros = 12 [default = false];
// classification weight to be associated to negative
// anchors (default: 1.0). The weight must be in [0., 1.].
......@@ -52,11 +52,11 @@ message Ssd {
// Whether to normalize the loss by number of groundtruth boxes that match to
// the anchors.
optional bool normalize_loss_by_num_matches = 10 [default=true];
optional bool normalize_loss_by_num_matches = 10 [default = true];
// Whether to normalize the localization loss by the code size of the box
// encodings. This is applied along with other normalization factors.
optional bool normalize_loc_loss_by_codesize = 14 [default=false];
optional bool normalize_loc_loss_by_codesize = 14 [default = false];
// Loss configuration for training.
optional Loss loss = 11;
......@@ -82,29 +82,66 @@ message Ssd {
// to update the batch norm moving average parameters.
optional bool inplace_batchnorm_update = 15 [default = false];
// Whether to weight the regression loss by the score of the ground truth box
// the anchor matches to.
optional bool weight_regression_loss_by_score = 17 [default=false];
// Whether to add an implicit background class to one-hot encodings of
// groundtruth labels. Set to false if training a single
// class model or using an explicit background class.
optional bool add_background_class = 21 [default = true];
// Whether to compute expected loss with respect to balanced positive/negative
// sampling scheme. If false, use explicit sampling.
optional bool use_expected_classification_loss_under_sampling = 18 [default=false];
// Whether to use an explicit background class. Set to true if using
// groundtruth labels with an explicit background class, as in multiclass
// scores.
optional bool explicit_background_class = 24 [default = false];
// Minimum number of effective negative samples.
// Only applies if use_expected_classification_loss_under_sampling is true.
optional float min_num_negative_samples = 19 [default=0];
optional bool use_confidences_as_targets = 22 [default = false];
// Desired number of effective negative samples per positive sample.
// Only applies if use_expected_classification_loss_under_sampling is true.
optional float desired_negative_sampling_ratio = 20 [default=3];
optional float implicit_example_weight = 23 [default = 1.0];
// Whether to add an implicit background class to one-hot encodings of
// groundtruth labels. Set to false if using groundtruth labels with an
// explicit background class, using multiclass scores, or if training a single
// class model.
optional bool add_background_class = 21 [default = true];
}
// Configuration proto for MaskHead.
// Next id: 11
message MaskHead {
// The height and the width of the predicted mask. Only used when
// predict_instance_masks is true.
optional int32 mask_height = 1 [default = 15];
optional int32 mask_width = 2 [default = 15];
// Whether to predict class agnostic masks. Only used when
// predict_instance_masks is true.
optional bool masks_are_class_agnostic = 3 [default = true];
// The depth for the first conv2d_transpose op applied to the
// image_features in the mask prediction branch. If set to 0, the value
// will be set automatically based on the number of channels in the image
// features and the number of classes.
optional int32 mask_prediction_conv_depth = 4 [default = 256];
// The number of convolutions applied to image_features in the mask prediction
// branch.
optional int32 mask_prediction_num_conv_layers = 5 [default = 2];
// Whether to apply convolutions on mask features before upsampling using
// nearest neighbor resizing.
// By default, mask features are resized to [`mask_height`, `mask_width`]
// before applying convolutions and predicting masks.
optional bool convolve_then_upsample_masks = 6 [default = false];
// Mask loss weight.
optional float mask_loss_weight = 7 [default=5.0];
// Number of boxes to be generated at training time for computing mask loss.
optional int32 mask_loss_sample_size = 8 [default=16];
// Hyperparameters for convolution ops used in the box predictor.
optional Hyperparams conv_hyperparams = 9;
// Output size (width and height are set to be the same) of the initial
// bilinear interpolation based cropping during ROI pooling. Only used when
// we have second stage prediction head enabled (e.g. mask head).
optional int32 initial_crop_size = 10 [default = 15];
}
// Configs for mask head.
optional MaskHead mask_head_config = 25;
}
message SsdFeatureExtractor {
reserved 6;
......@@ -113,10 +150,10 @@ message SsdFeatureExtractor {
optional string type = 1;
// The factor to alter the depth of the channels in the feature extractor.
optional float depth_multiplier = 2 [default=1.0];
optional float depth_multiplier = 2 [default = 1.0];
// Minimum number of the channels in the feature extractor.
optional int32 min_depth = 3 [default=16];
optional int32 min_depth = 3 [default = 16];
// Hyperparameters that affect the layers of feature extractor added on top
// of the base feature extractor.
......@@ -128,7 +165,8 @@ message SsdFeatureExtractor {
// layers while base feature extractor uses its own default hyperparams. If
// this value is set to true, the base feature extractor's hyperparams will be
// overridden with the `conv_hyperparams`.
optional bool override_base_feature_extractor_hyperparams = 9 [default = false];
optional bool override_base_feature_extractor_hyperparams = 9
[default = false];
// The nearest multiple to zero-pad the input height and width dimensions to.
// For example, if pad_to_multiple = 2, input dimensions are zero-padded
......@@ -138,11 +176,11 @@ message SsdFeatureExtractor {
// Whether to use explicit padding when extracting SSD multiresolution
// features. This will also apply to the base feature extractor if a MobileNet
// architecture is used.
optional bool use_explicit_padding = 7 [default=false];
optional bool use_explicit_padding = 7 [default = false];
// Whether to use depthwise separable convolutions for to extract additional
// feature maps added by SSD.
optional bool use_depthwise = 8 [default=false];
optional bool use_depthwise = 8 [default = false];
// Feature Pyramid Networks config.
optional FeaturePyramidNetworks fpn = 10;
......@@ -173,4 +211,3 @@ message FeaturePyramidNetworks {
// channel depth for additional coarse feature layers.
optional int32 additional_layer_depth = 3 [default = 256];
}
......@@ -20,7 +20,7 @@ message TrainConfig {
optional bool sync_replicas = 3 [default=false];
// How frequently to keep checkpoints.
optional uint32 keep_checkpoint_every_n_hours = 4 [default=1000];
optional float keep_checkpoint_every_n_hours = 4 [default=10000.0];
// Optimizer used to train the DetectionModel.
optional Optimizer optimizer = 5;
......
......@@ -33,6 +33,7 @@ import collections
import logging
import unicodedata
import numpy as np
import tensorflow as tf
from object_detection.core import standard_fields
from object_detection.utils import label_map_util
......@@ -126,6 +127,7 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
categories,
matching_iou_threshold=0.5,
evaluate_corlocs=False,
evaluate_precision_recall=False,
metric_prefix=None,
use_weighted_mean_ap=False,
evaluate_masks=False,
......@@ -140,6 +142,8 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
boxes to detection boxes.
evaluate_corlocs: (optional) boolean which determines if corloc scores
are to be returned or not.
evaluate_precision_recall: (optional) boolean which determines if
precision and recall values are to be returned or not.
metric_prefix: (optional) string prefix for metric name; if None, no
prefix is used.
use_weighted_mean_ap: (optional) boolean which determines if the mean
......@@ -174,7 +178,50 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
group_of_weight=self._group_of_weight)
self._image_ids = set([])
self._evaluate_corlocs = evaluate_corlocs
self._evaluate_precision_recall = evaluate_precision_recall
self._metric_prefix = (metric_prefix + '_') if metric_prefix else ''
self._expected_keys = set([
standard_fields.InputDataFields.key,
standard_fields.InputDataFields.groundtruth_boxes,
standard_fields.InputDataFields.groundtruth_classes,
standard_fields.InputDataFields.groundtruth_difficult,
standard_fields.InputDataFields.groundtruth_instance_masks,
standard_fields.DetectionResultFields.detection_boxes,
standard_fields.DetectionResultFields.detection_scores,
standard_fields.DetectionResultFields.detection_classes,
standard_fields.DetectionResultFields.detection_masks
])
self._build_metric_names()
def _build_metric_names(self):
"""Builds a list with metric names."""
self._metric_names = [
self._metric_prefix + 'Precision/mAP@{}IOU'.format(
self._matching_iou_threshold)
]
if self._evaluate_corlocs:
self._metric_names.append(
self._metric_prefix +
'Precision/meanCorLoc@{}IOU'.format(self._matching_iou_threshold))
category_index = label_map_util.create_category_index(self._categories)
for idx in range(self._num_classes):
if idx + self._label_id_offset in category_index:
category_name = category_index[idx + self._label_id_offset]['name']
try:
category_name = unicode(category_name, 'utf-8')
except TypeError:
pass
category_name = unicodedata.normalize('NFKD', category_name).encode(
'ascii', 'ignore')
self._metric_names.append(
self._metric_prefix + 'PerformanceByCategory/AP@{}IOU/{}'.format(
self._matching_iou_threshold, category_name))
if self._evaluate_corlocs:
self._metric_names.append(
self._metric_prefix + 'PerformanceByCategory/CorLoc@{}IOU/{}'
.format(self._matching_iou_threshold, category_name))
def add_single_ground_truth_image_info(self, image_id, groundtruth_dict):
"""Adds groundtruth for a single image to be used for evaluation.
......@@ -283,22 +330,19 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
A dictionary of metrics with the following fields -
1. summary_metrics:
'Precision/mAP@<matching_iou_threshold>IOU': mean average precision at
the specified IOU threshold.
'<prefix if not empty>_Precision/mAP@<matching_iou_threshold>IOU': mean
average precision at the specified IOU threshold.
2. per_category_ap: category specific results with keys of the form
'PerformanceByCategory/mAP@<matching_iou_threshold>IOU/category'.
'<prefix if not empty>_PerformanceByCategory/
mAP@<matching_iou_threshold>IOU/category'.
"""
(per_class_ap, mean_ap, _, _, per_class_corloc, mean_corloc) = (
self._evaluation.evaluate())
pascal_metrics = {
self._metric_prefix +
'Precision/mAP@{}IOU'.format(self._matching_iou_threshold):
mean_ap
}
(per_class_ap, mean_ap, per_class_precision, per_class_recall,
per_class_corloc, mean_corloc) = (
self._evaluation.evaluate())
pascal_metrics = {self._metric_names[0]: mean_ap}
if self._evaluate_corlocs:
pascal_metrics[self._metric_prefix + 'Precision/meanCorLoc@{}IOU'.format(
self._matching_iou_threshold)] = mean_corloc
pascal_metrics[self._metric_names[1]] = mean_corloc
category_index = label_map_util.create_category_index(self._categories)
for idx in range(per_class_ap.size):
if idx + self._label_id_offset in category_index:
......@@ -314,6 +358,19 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
self._matching_iou_threshold, category_name))
pascal_metrics[display_name] = per_class_ap[idx]
# Optionally add precision and recall values
if self._evaluate_precision_recall:
display_name = (
self._metric_prefix +
'PerformanceByCategory/Precision@{}IOU/{}'.format(
self._matching_iou_threshold, category_name))
pascal_metrics[display_name] = per_class_precision[idx]
display_name = (
self._metric_prefix +
'PerformanceByCategory/Recall@{}IOU/{}'.format(
self._matching_iou_threshold, category_name))
pascal_metrics[display_name] = per_class_recall[idx]
# Optionally add CorLoc metrics.classes
if self._evaluate_corlocs:
display_name = (
......@@ -332,6 +389,74 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
label_id_offset=self._label_id_offset)
self._image_ids.clear()
def get_estimator_eval_metric_ops(self, eval_dict):
"""Returns dict of metrics to use with `tf.estimator.EstimatorSpec`.
Note that this must only be implemented if performing evaluation with a
`tf.estimator.Estimator`.
Args:
eval_dict: A dictionary that holds tensors for evaluating an object
detection model, returned from
eval_util.result_dict_for_single_example(). It must contain
standard_fields.InputDataFields.key.
Returns:
A dictionary of metric names to tuple of value_op and update_op that can
be used as eval metric ops in `tf.estimator.EstimatorSpec`.
"""
# remove unexpected fields
eval_dict_filtered = dict()
for key, value in eval_dict.items():
if key in self._expected_keys:
eval_dict_filtered[key] = value
eval_dict_keys = eval_dict_filtered.keys()
def update_op(image_id, *eval_dict_batched_as_list):
"""Update operation that adds batch of images to ObjectDetectionEvaluator.
Args:
image_id: image id (single id or an array)
*eval_dict_batched_as_list: the values of the dictionary of tensors.
"""
if np.isscalar(image_id):
single_example_dict = dict(
zip(eval_dict_keys, eval_dict_batched_as_list))
self.add_single_ground_truth_image_info(image_id, single_example_dict)
self.add_single_detected_image_info(image_id, single_example_dict)
else:
for unzipped_tuple in zip(*eval_dict_batched_as_list):
single_example_dict = dict(zip(eval_dict_keys, unzipped_tuple))
image_id = single_example_dict[standard_fields.InputDataFields.key]
self.add_single_ground_truth_image_info(image_id, single_example_dict)
self.add_single_detected_image_info(image_id, single_example_dict)
args = [eval_dict_filtered[standard_fields.InputDataFields.key]]
args.extend(eval_dict_filtered.values())
update_op = tf.py_func(update_op, args, [])
def first_value_func():
self._metrics = self.evaluate()
self.clear()
return np.float32(self._metrics[self._metric_names[0]])
def value_func_factory(metric_name):
def value_func():
return np.float32(self._metrics[metric_name])
return value_func
# Ensure that the metrics are only evaluated once.
first_value_op = tf.py_func(first_value_func, [], tf.float32)
eval_metric_ops = {self._metric_names[0]: (first_value_op, update_op)}
with tf.control_dependencies([first_value_op]):
for metric_name in self._metric_names[1:]:
eval_metric_ops[metric_name] = (tf.py_func(
value_func_factory(metric_name), [], np.float32), update_op)
return eval_metric_ops
class PascalDetectionEvaluator(ObjectDetectionEvaluator):
"""A class to evaluate detections using PASCAL metrics."""
......@@ -442,6 +567,15 @@ class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator):
evaluate_corlocs,
metric_prefix=metric_prefix,
group_of_weight=group_of_weight)
self._expected_keys = set([
standard_fields.InputDataFields.key,
standard_fields.InputDataFields.groundtruth_boxes,
standard_fields.InputDataFields.groundtruth_classes,
standard_fields.InputDataFields.groundtruth_group_of,
standard_fields.DetectionResultFields.detection_boxes,
standard_fields.DetectionResultFields.detection_scores,
standard_fields.DetectionResultFields.detection_classes,
])
def add_single_ground_truth_image_info(self, image_id, groundtruth_dict):
"""Adds groundtruth for a single image to be used for evaluation.
......@@ -535,6 +669,16 @@ class OpenImagesDetectionChallengeEvaluator(OpenImagesDetectionEvaluator):
group_of_weight=group_of_weight)
self._evaluatable_labels = {}
self._expected_keys = set([
standard_fields.InputDataFields.key,
standard_fields.InputDataFields.groundtruth_boxes,
standard_fields.InputDataFields.groundtruth_classes,
standard_fields.InputDataFields.groundtruth_group_of,
standard_fields.InputDataFields.groundtruth_image_classes,
standard_fields.DetectionResultFields.detection_boxes,
standard_fields.DetectionResultFields.detection_scores,
standard_fields.DetectionResultFields.detection_classes,
])
def add_single_ground_truth_image_info(self, image_id, groundtruth_dict):
"""Adds groundtruth for a single image to be used for evaluation.
......@@ -890,15 +1034,14 @@ class ObjectDetectionEvaluation(object):
if self.use_weighted_mean_ap:
all_scores = np.append(all_scores, scores)
all_tp_fp_labels = np.append(all_tp_fp_labels, tp_fp_labels)
logging.info('Scores and tpfp per class label: %d', class_index)
logging.info(tp_fp_labels)
logging.info(scores)
precision, recall = metrics.compute_precision_recall(
scores, tp_fp_labels, self.num_gt_instances_per_class[class_index])
self.precisions_per_class[class_index] = precision
self.recalls_per_class[class_index] = recall
average_precision = metrics.compute_average_precision(precision, recall)
self.average_precision_per_class[class_index] = average_precision
logging.info('average_precision: %f', average_precision)
self.corloc_per_class = metrics.compute_cor_loc(
self.num_gt_imgs_per_class,
......
......@@ -15,9 +15,10 @@
"""Tests for object_detection.utils.object_detection_evaluation."""
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
from object_detection import eval_util
from object_detection.core import standard_fields
from object_detection.utils import object_detection_evaluation
......@@ -683,5 +684,141 @@ class ObjectDetectionEvaluationTest(tf.test.TestCase):
self.assertAlmostEqual(expected_mean_corloc, mean_corloc)
class ObjectDetectionEvaluatorTest(tf.test.TestCase, parameterized.TestCase):
def setUp(self):
self.categories = [{
'id': 1,
'name': 'person'
}, {
'id': 2,
'name': 'dog'
}, {
'id': 3,
'name': 'cat'
}]
self.od_eval = object_detection_evaluation.ObjectDetectionEvaluator(
categories=self.categories)
def _make_evaluation_dict(self,
resized_groundtruth_masks=False,
batch_size=1,
max_gt_boxes=None,
scale_to_absolute=False):
input_data_fields = standard_fields.InputDataFields
detection_fields = standard_fields.DetectionResultFields
image = tf.zeros(shape=[batch_size, 20, 20, 3], dtype=tf.uint8)
if batch_size == 1:
key = tf.constant('image1')
else:
key = tf.constant([str(i) for i in range(batch_size)])
detection_boxes = tf.concat([
tf.tile(
tf.constant([[[0., 0., 1., 1.]]]), multiples=[batch_size - 1, 1, 1
]),
tf.constant([[[0., 0., 0.5, 0.5]]])
],
axis=0)
detection_scores = tf.concat([
tf.tile(tf.constant([[0.5]]), multiples=[batch_size - 1, 1]),
tf.constant([[0.8]])
],
axis=0)
detection_classes = tf.tile(tf.constant([[0]]), multiples=[batch_size, 1])
detection_masks = tf.tile(
tf.ones(shape=[1, 2, 20, 20], dtype=tf.float32),
multiples=[batch_size, 1, 1, 1])
groundtruth_boxes = tf.constant([[0., 0., 1., 1.]])
groundtruth_classes = tf.constant([1])
groundtruth_instance_masks = tf.ones(shape=[1, 20, 20], dtype=tf.uint8)
num_detections = tf.ones([batch_size])
if resized_groundtruth_masks:
groundtruth_instance_masks = tf.ones(shape=[1, 10, 10], dtype=tf.uint8)
if batch_size > 1:
groundtruth_boxes = tf.tile(
tf.expand_dims(groundtruth_boxes, 0), multiples=[batch_size, 1, 1])
groundtruth_classes = tf.tile(
tf.expand_dims(groundtruth_classes, 0), multiples=[batch_size, 1])
groundtruth_instance_masks = tf.tile(
tf.expand_dims(groundtruth_instance_masks, 0),
multiples=[batch_size, 1, 1, 1])
detections = {
detection_fields.detection_boxes: detection_boxes,
detection_fields.detection_scores: detection_scores,
detection_fields.detection_classes: detection_classes,
detection_fields.detection_masks: detection_masks,
detection_fields.num_detections: num_detections
}
groundtruth = {
input_data_fields.groundtruth_boxes:
groundtruth_boxes,
input_data_fields.groundtruth_classes:
groundtruth_classes,
input_data_fields.groundtruth_instance_masks:
groundtruth_instance_masks,
}
if batch_size > 1:
return eval_util.result_dict_for_batched_example(
image,
key,
detections,
groundtruth,
scale_to_absolute=scale_to_absolute,
max_gt_boxes=max_gt_boxes)
else:
return eval_util.result_dict_for_single_example(
image,
key,
detections,
groundtruth,
scale_to_absolute=scale_to_absolute)
@parameterized.parameters({
'batch_size': 1,
'expected_map': 0,
'max_gt_boxes': None,
'scale_to_absolute': True
}, {
'batch_size': 8,
'expected_map': 0.765625,
'max_gt_boxes': [1],
'scale_to_absolute': True
}, {
'batch_size': 1,
'expected_map': 0,
'max_gt_boxes': None,
'scale_to_absolute': False
}, {
'batch_size': 8,
'expected_map': 0.765625,
'max_gt_boxes': [1],
'scale_to_absolute': False
})
def test_get_estimator_eval_metric_ops(self,
batch_size=1,
expected_map=1,
max_gt_boxes=None,
scale_to_absolute=False):
eval_dict = self._make_evaluation_dict(
batch_size=batch_size,
max_gt_boxes=max_gt_boxes,
scale_to_absolute=scale_to_absolute)
tf.logging.info('eval_dict: {}'.format(eval_dict))
metric_ops = self.od_eval.get_estimator_eval_metric_ops(eval_dict)
_, update_op = metric_ops['Precision/mAP@0.5IOU']
with self.test_session() as sess:
metrics = {}
for key, (value_op, _) in metric_ops.iteritems():
metrics[key] = value_op
sess.run(update_op)
metrics = sess.run(metrics)
self.assertAlmostEqual(expected_map, metrics['Precision/mAP@0.5IOU'])
if __name__ == '__main__':
tf.test.main()
......@@ -14,6 +14,7 @@
# ==============================================================================
"""A module for helper tensorflow ops."""
import collections
import math
import numpy as np
import six
......@@ -1087,81 +1088,10 @@ def native_crop_and_resize(image, boxes, crop_size, scope=None):
return tf.reshape(cropped_regions, final_shape)
def expected_classification_loss_under_sampling(
batch_cls_targets, cls_losses, unmatched_cls_losses,
desired_negative_sampling_ratio, min_num_negative_samples):
"""Computes classification loss by background/foreground weighting.
The weighting is such that the effective background/foreground weight ratio
is the desired_negative_sampling_ratio. if p_i is the foreground probability
of anchor a_i, L(a_i) is the anchors loss, N is the number of anchors, M
is the sum of foreground probabilities across anchors, and K is the desired
ratio between the number of negative and positive samples, then the total loss
L is calculated as:
beta = K*M/(N-M)
L = sum_{i=1}^N [p_i * L_p(a_i) + beta * (1 - p_i) * L_n(a_i)]
where L_p(a_i) is the loss against target assuming the anchor was matched,
otherwise zero, and L_n(a_i) is the loss against the background target
assuming the anchor was unmatched, otherwise zero.
Args:
batch_cls_targets: A tensor with shape [batch_size, num_anchors, num_classes
+ 1], where 0'th index is the background class, containing the class
distrubution for the target assigned to a given anchor.
cls_losses: Float tensor of shape [batch_size, num_anchors] representing
anchorwise classification losses.
unmatched_cls_losses: loss for each anchor against the unmatched class
target.
desired_negative_sampling_ratio: The desired background/foreground weight
ratio.
min_num_negative_samples: Minimum number of effective negative samples.
Used only when there are no positive examples.
Returns:
The classification loss.
"""
num_anchors = tf.cast(tf.shape(batch_cls_targets)[1], tf.float32)
# find the p_i
foreground_probabilities = 1 - batch_cls_targets[:, :, 0]
foreground_sum = tf.reduce_sum(foreground_probabilities, axis=-1)
# for each anchor, expected_j is the expected number of positive anchors
# given that this anchor was sampled as negative.
tiled_foreground_sum = tf.tile(
tf.reshape(foreground_sum, [-1, 1]),
[1, tf.cast(num_anchors, tf.int32)])
expected_j = tiled_foreground_sum - foreground_probabilities
k = desired_negative_sampling_ratio
# compute beta
expected_negatives = tf.to_float(num_anchors) - expected_j
desired_negatives = k * expected_j
desired_negatives = tf.where(
tf.greater(desired_negatives, expected_negatives), expected_negatives,
desired_negatives)
# probability that an anchor is sampled for the loss computation given that it
# is negative.
beta = desired_negatives / expected_negatives
# where the foreground sum is zero, use a minimum negative weight.
min_negative_weight = 1.0 * min_num_negative_samples / num_anchors
beta = tf.where(
tf.equal(tiled_foreground_sum, 0),
min_negative_weight * tf.ones_like(beta), beta)
foreground_weights = foreground_probabilities
background_weights = (1 - foreground_weights) * beta
weighted_foreground_losses = foreground_weights * cls_losses
weighted_background_losses = background_weights * unmatched_cls_losses
EqualizationLossConfig = collections.namedtuple('EqualizationLossConfig',
['weight', 'exclude_prefixes'])
cls_losses = tf.reduce_sum(
weighted_foreground_losses, axis=-1) + tf.reduce_sum(
weighted_background_losses, axis=-1)
return cls_losses
......@@ -21,6 +21,8 @@ from object_detection.core import standard_fields as fields
from object_detection.utils import ops
from object_detection.utils import test_case
slim = tf.contrib.slim
class NormalizedToImageCoordinatesTest(tf.test.TestCase):
......@@ -1466,189 +1468,9 @@ class OpsTestCropAndResize(test_case.TestCase):
self.assertAllClose(crop_output, expected_output)
class OpsTestExpectedClassificationLoss(test_case.TestCase):
def testExpectedClassificationLossUnderSamplingWithHardLabels(self):
def graph_fn(batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples):
return ops.expected_classification_loss_under_sampling(
batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples)
batch_cls_targets = np.array(
[[[1., 0, 0], [0, 1., 0]], [[1., 0, 0], [0, 1., 0]]], dtype=np.float32)
cls_losses = np.array([[1, 2], [3, 4]], dtype=np.float32)
unmatched_cls_losses = np.array([[10, 20], [30, 40]], dtype=np.float32)
negative_to_positive_ratio = np.array([2], dtype=np.float32)
min_num_negative_samples = np.array([1], dtype=np.float32)
classification_loss = self.execute(graph_fn, [
batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples
])
# expected_foreground_sum = [1,1]
# expected_expected_j = [[1, 0], [1, 0]]
# expected_expected_negatives = [[1, 2], [1, 2]]
# expected_desired_negatives = [[2, 0], [2, 0]]
# expected_beta = [[1, 0], [1, 0]]
# expected_foreground_weights = [[0, 1], [0, 1]]
# expected_background_weights = [[1, 0], [1, 0]]
# expected_weighted_foreground_losses = [[0, 2], [0, 4]]
# expected_weighted_background_losses = [[10, 0], [30, 0]]
# expected_classification_loss_under_sampling = [6, 40]
expected_classification_loss_under_sampling = [2 + 10, 4 + 30]
self.assertAllClose(expected_classification_loss_under_sampling,
classification_loss)
def testExpectedClassificationLossUnderSamplingWithHardLabelsMoreNegatives(
self):
def graph_fn(batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples):
return ops.expected_classification_loss_under_sampling(
batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples)
batch_cls_targets = np.array(
[[[1., 0, 0], [0, 1., 0], [1., 0, 0], [1., 0, 0], [1., 0, 0]]],
dtype=np.float32)
cls_losses = np.array([[1, 2, 3, 4, 5]], dtype=np.float32)
unmatched_cls_losses = np.array([[10, 20, 30, 40, 50]], dtype=np.float32)
negative_to_positive_ratio = np.array([2], dtype=np.float32)
min_num_negative_samples = np.array([1], dtype=np.float32)
classification_loss = self.execute(graph_fn, [
batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples
])
# expected_foreground_sum = [1]
# expected_expected_j = [[1, 0, 1, 1, 1]]
# expected_expected_negatives = [[4, 5, 4, 4, 4]]
# expected_desired_negatives = [[2, 0, 2, 2, 2]]
# expected_beta = [[.5, 0, .5, .5, .5]]
# expected_foreground_weights = [[0, 1, 0, 0, 0]]
# expected_background_weights = [[.5, 0, .5, .5, .5]]
# expected_weighted_foreground_losses = [[0, 2, 0, 0, 0]]
# expected_weighted_background_losses = [[10*.5, 0, 30*.5, 40*.5, 50*.5]]
# expected_classification_loss_under_sampling = [5+2+15+20+25]
expected_classification_loss_under_sampling = [5 + 2 + 15 + 20 + 25]
self.assertAllClose(expected_classification_loss_under_sampling,
classification_loss)
def testExpectedClassificationLossUnderSamplingWithAllNegative(self):
def graph_fn(batch_cls_targets, cls_losses, unmatched_cls_losses):
return ops.expected_classification_loss_under_sampling(
batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples)
batch_cls_targets = np.array(
[[[1, 0, 0], [1, 0, 0]], [[1, 0, 0], [1, 0, 0]]], dtype=np.float32)
cls_losses = np.array([[1, 2], [3, 4]], dtype=np.float32)
unmatched_cls_losses = np.array([[10, 20], [30, 40]], dtype=np.float32)
negative_to_positive_ratio = np.array([2], dtype=np.float32)
min_num_negative_samples = np.array([1], dtype=np.float32)
classification_loss = self.execute(
graph_fn, [batch_cls_targets, cls_losses, unmatched_cls_losses])
# expected_foreground_sum = [0,0]
# expected_expected_j = [[0, 0], [0, 0]]
# expected_expected_negatives = [[2, 2], [2, 2]]
# expected_desired_negatives = [[0, 0], [0, 0]]
# expected_beta = [[0, 0],[0, 0]]
# expected_foreground_weights = [[0, 0], [0, 0]]
# expected_background_weights = [[.5, .5], [.5, .5]]
# expected_weighted_foreground_losses = [[0, 0], [0, 0]]
# expected_weighted_background_losses = [[5, 10], [15, 20]]
# expected_classification_loss_under_sampling = [15, 35]
expected_classification_loss_under_sampling = [
10 * .5 + 20 * .5, 30 * .5 + 40 * .5
]
self.assertAllClose(expected_classification_loss_under_sampling,
classification_loss)
def testExpectedClassificationLossUnderSamplingWithAllPositive(self):
def graph_fn(batch_cls_targets, cls_losses, unmatched_cls_losses):
return ops.expected_classification_loss_under_sampling(
batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples)
batch_cls_targets = np.array(
[[[0, 1., 0], [0, 1., 0]], [[0, 1, 0], [0, 0, 1]]], dtype=np.float32)
cls_losses = np.array([[1, 2], [3, 4]], dtype=np.float32)
unmatched_cls_losses = np.array([[10, 20], [30, 40]], dtype=np.float32)
negative_to_positive_ratio = np.array([2], dtype=np.float32)
min_num_negative_samples = np.array([1], dtype=np.float32)
classification_loss = self.execute(
graph_fn, [batch_cls_targets, cls_losses, unmatched_cls_losses])
# expected_foreground_sum = [2,2]
# expected_expected_j = [[1, 1], [1, 1]]
# expected_expected_negatives = [[1, 1], [1, 1]]
# expected_desired_negatives = [[1, 1], [1, 1]]
# expected_beta = [[1, 1],[1, 1]]
# expected_foreground_weights = [[1, 1], [1, 1]]
# expected_background_weights = [[0, 0], [0, 0]]
# expected_weighted_foreground_losses = [[1, 2], [3, 4]]
# expected_weighted_background_losses = [[0, 0], [0, 0]]
# expected_classification_loss_under_sampling = [15, 35]
expected_classification_loss_under_sampling = [1 + 2, 3 + 4]
self.assertAllClose(expected_classification_loss_under_sampling,
classification_loss)
def testExpectedClassificationLossUnderSamplingWithSoftLabels(self):
def graph_fn(batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples):
return ops.expected_classification_loss_under_sampling(
batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples)
batch_cls_targets = np.array([[[.75, .25, 0], [0.25, .75, 0], [.75, .25, 0],
[0.25, .75, 0], [1., 0, 0]]],
dtype=np.float32)
cls_losses = np.array([[1, 2, 3, 4, 5]], dtype=np.float32)
unmatched_cls_losses = np.array([[10, 20, 30, 40, 50]], dtype=np.float32)
negative_to_positive_ratio = np.array([2], dtype=np.float32)
min_num_negative_samples = np.array([1], dtype=np.float32)
classification_loss = self.execute(graph_fn, [
batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples
])
# expected_foreground_sum = [2]
# expected_expected_j = [[1.75, 1.25, 1.75, 1.25, 2]]
# expected_expected_negatives = [[3.25, 3.75, 3.25, 3.75, 3]]
# expected_desired_negatives = [[3.25, 2.5, 3.25, 2.5, 3]]
# expected_beta = [[1, 2/3, 1, 2/3, 1]]
# expected_foreground_weights = [[0.25, .75, .25, .75, 0]]
# expected_background_weights = [[[.75, 1/6., .75, 1/6., 1]]]
# expected_weighted_foreground_losses = [[.25*1, .75*2, .25*3, .75*4, 0*5]]
# expected_weighted_background_losses = [[
# .75*10, 1/6.*20, .75*30, 1/6.*40, 1*50]]
# expected_classification_loss_under_sampling = sum([
# .25*1, .75*2, .25*3, .75*4, 0, .75*10, 1/6.*20, .75*30,
# 1/6.*40, 1*50])
expected_classification_loss_under_sampling = [
sum([
.25 * 1, .75 * 2, .25 * 3, .75 * 4, 0, .75 * 10, 1 / 6. * 20,
.75 * 30, 1 / 6. * 40, 1 * 50
])
]
self.assertAllClose(expected_classification_loss_under_sampling,
classification_loss)
if __name__ == '__main__':
......
......@@ -42,14 +42,25 @@ class MockBoxCoder(box_coder.BoxCoder):
return box_list.BoxList(rel_codes + anchors.get())
class MockMaskHead(object):
"""Simple maskhead that returns all zeros as mask predictions."""
def __init__(self, num_classes):
self._num_classes = num_classes
def predict(self, features):
batch_size = tf.shape(features)[0]
return tf.zeros((batch_size, 1, self._num_classes, DEFAULT_MASK_SIZE,
DEFAULT_MASK_SIZE),
dtype=tf.float32)
class MockBoxPredictor(box_predictor.BoxPredictor):
"""Simple box predictor that ignores inputs and outputs all zeros."""
def __init__(self, is_training, num_classes, add_background_class=True,
predict_mask=False):
def __init__(self, is_training, num_classes, add_background_class=True):
super(MockBoxPredictor, self).__init__(is_training, num_classes)
self._add_background_class = add_background_class
self._predict_mask = predict_mask
def _predict(self, image_features, num_predictions_per_location):
image_feature = image_features[0]
......@@ -66,31 +77,22 @@ class MockBoxPredictor(box_predictor.BoxPredictor):
(batch_size, num_anchors, 1, code_size), dtype=tf.float32)
class_predictions_with_background = zero + tf.zeros(
(batch_size, num_anchors, num_class_slots), dtype=tf.float32)
masks = zero + tf.zeros(
(batch_size, num_anchors, self.num_classes, DEFAULT_MASK_SIZE,
DEFAULT_MASK_SIZE),
dtype=tf.float32)
predictions_dict = {
box_predictor.BOX_ENCODINGS:
box_encodings,
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND:
class_predictions_with_background
}
if self._predict_mask:
predictions_dict[box_predictor.MASK_PREDICTIONS] = masks
return predictions_dict
class MockKerasBoxPredictor(box_predictor.KerasBoxPredictor):
"""Simple box predictor that ignores inputs and outputs all zeros."""
def __init__(self, is_training, num_classes, add_background_class=True,
predict_mask=False):
def __init__(self, is_training, num_classes, add_background_class=True):
super(MockKerasBoxPredictor, self).__init__(
is_training, num_classes, False, False)
self._add_background_class = add_background_class
self._predict_mask = predict_mask
def _predict(self, image_features, **kwargs):
image_feature = image_features[0]
......@@ -107,18 +109,12 @@ class MockKerasBoxPredictor(box_predictor.KerasBoxPredictor):
(batch_size, num_anchors, 1, code_size), dtype=tf.float32)
class_predictions_with_background = zero + tf.zeros(
(batch_size, num_anchors, num_class_slots), dtype=tf.float32)
masks = zero + tf.zeros(
(batch_size, num_anchors, self.num_classes, DEFAULT_MASK_SIZE,
DEFAULT_MASK_SIZE),
dtype=tf.float32)
predictions_dict = {
box_predictor.BOX_ENCODINGS:
box_encodings,
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND:
class_predictions_with_background
}
if self._predict_mask:
predictions_dict[box_predictor.MASK_PREDICTIONS] = masks
return predictions_dict
......
......@@ -19,8 +19,7 @@ These functions often receive an image, perform some visualization on the image.
The functions do not return a value, instead they modify the image itself.
"""
from abc import ABCMeta
from abc import abstractmethod
import abc
import collections
import functools
# Set headless-friendly backend.
......@@ -35,7 +34,7 @@ import six
import tensorflow as tf
from object_detection.core import standard_fields as fields
from object_detection.utils import shape_utils
_TITLE_LEFT_MARGIN = 10
_TITLE_TOP_MARGIN = 10
......@@ -309,11 +308,23 @@ def _visualize_boxes_and_masks_and_keypoints(
**kwargs)
def _resize_original_image(image, image_shape):
image = tf.expand_dims(image, 0)
image = tf.image.resize_images(
image,
image_shape,
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
align_corners=True)
return tf.cast(tf.squeeze(image, 0), tf.uint8)
def draw_bounding_boxes_on_image_tensors(images,
boxes,
classes,
scores,
category_index,
original_image_spatial_shape=None,
true_image_shape=None,
instance_masks=None,
keypoints=None,
max_boxes_to_draw=20,
......@@ -323,13 +334,18 @@ def draw_bounding_boxes_on_image_tensors(images,
Args:
images: A 4D uint8 image tensor of shape [N, H, W, C]. If C > 3, additional
channels will be ignored.
channels will be ignored. If C = 1, then we convert the images to RGB
images.
boxes: [N, max_detections, 4] float32 tensor of detection boxes.
classes: [N, max_detections] int tensor of detection classes. Note that
classes are 1-indexed.
scores: [N, max_detections] float32 tensor of detection scores.
category_index: a dict that maps integer ids to category dicts. e.g.
{1: {1: 'dog'}, 2: {2: 'cat'}, ...}
original_image_spatial_shape: [N, 2] tensor containing the spatial size of
the original image.
true_image_shape: [N, 3] tensor containing the spatial size of unpadded
original_image.
instance_masks: A 4D uint8 tensor of shape [N, max_detection, H, W] with
instance masks.
keypoints: A 4D float32 tensor of shape [N, max_detection, num_keypoints, 2]
......@@ -344,7 +360,10 @@ def draw_bounding_boxes_on_image_tensors(images,
4D image tensor of type uint8, with boxes drawn on top.
"""
# Additional channels are being ignored.
images = images[:, :, :, 0:3]
if images.shape[3] > 3:
images = images[:, :, :, 0:3]
elif images.shape[3] == 1:
images = tf.image.grayscale_to_rgb(images)
visualization_keyword_args = {
'use_normalized_coordinates': use_normalized_coordinates,
'max_boxes_to_draw': max_boxes_to_draw,
......@@ -352,35 +371,61 @@ def draw_bounding_boxes_on_image_tensors(images,
'agnostic_mode': False,
'line_thickness': 4
}
if true_image_shape is None:
true_shapes = tf.constant(-1, shape=[images.shape.as_list()[0], 3])
else:
true_shapes = true_image_shape
if original_image_spatial_shape is None:
original_shapes = tf.constant(-1, shape=[images.shape.as_list()[0], 2])
else:
original_shapes = original_image_spatial_shape
if instance_masks is not None and keypoints is None:
visualize_boxes_fn = functools.partial(
_visualize_boxes_and_masks,
category_index=category_index,
**visualization_keyword_args)
elems = [images, boxes, classes, scores, instance_masks]
elems = [
true_shapes, original_shapes, images, boxes, classes, scores,
instance_masks
]
elif instance_masks is None and keypoints is not None:
visualize_boxes_fn = functools.partial(
_visualize_boxes_and_keypoints,
category_index=category_index,
**visualization_keyword_args)
elems = [images, boxes, classes, scores, keypoints]
elems = [
true_shapes, original_shapes, images, boxes, classes, scores, keypoints
]
elif instance_masks is not None and keypoints is not None:
visualize_boxes_fn = functools.partial(
_visualize_boxes_and_masks_and_keypoints,
category_index=category_index,
**visualization_keyword_args)
elems = [images, boxes, classes, scores, instance_masks, keypoints]
elems = [
true_shapes, original_shapes, images, boxes, classes, scores,
instance_masks, keypoints
]
else:
visualize_boxes_fn = functools.partial(
_visualize_boxes,
category_index=category_index,
**visualization_keyword_args)
elems = [images, boxes, classes, scores]
elems = [
true_shapes, original_shapes, images, boxes, classes, scores
]
def draw_boxes(image_and_detections):
"""Draws boxes on image."""
image_with_boxes = tf.py_func(visualize_boxes_fn, image_and_detections,
true_shape = image_and_detections[0]
original_shape = image_and_detections[1]
if true_image_shape is not None:
image = shape_utils.pad_or_clip_nd(image_and_detections[2],
[true_shape[0], true_shape[1], 3])
if original_image_spatial_shape is not None:
image_and_detections[2] = _resize_original_image(image, original_shape)
image_with_boxes = tf.py_func(visualize_boxes_fn, image_and_detections[2:],
tf.uint8)
return image_with_boxes
......@@ -400,6 +445,7 @@ def draw_side_by_side_evaluation_image(eval_dict,
Args:
eval_dict: The evaluation dictionary returned by
eval_util.result_dict_for_batched_example() or
eval_util.result_dict_for_single_example().
category_index: A category index (dictionary) produced from a labelmap.
max_boxes_to_draw: The maximum number of boxes to draw for detections.
......@@ -409,53 +455,85 @@ def draw_side_by_side_evaluation_image(eval_dict,
Default is True.
Returns:
A [1, H, 2 * W, C] uint8 tensor. The subimage on the left corresponds to
detections, while the subimage on the right corresponds to groundtruth.
A list of [1, H, 2 * W, C] uint8 tensor. The subimage on the left
corresponds to detections, while the subimage on the right corresponds to
groundtruth.
"""
detection_fields = fields.DetectionResultFields()
input_data_fields = fields.InputDataFields()
instance_masks = None
if detection_fields.detection_masks in eval_dict:
instance_masks = tf.cast(
tf.expand_dims(eval_dict[detection_fields.detection_masks], axis=0),
tf.uint8)
keypoints = None
if detection_fields.detection_keypoints in eval_dict:
keypoints = tf.expand_dims(
eval_dict[detection_fields.detection_keypoints], axis=0)
groundtruth_instance_masks = None
if input_data_fields.groundtruth_instance_masks in eval_dict:
groundtruth_instance_masks = tf.cast(
images_with_detections_list = []
# Add the batch dimension if the eval_dict is for single example.
if len(eval_dict[detection_fields.detection_classes].shape) == 1:
for key in eval_dict:
if key != input_data_fields.original_image:
eval_dict[key] = tf.expand_dims(eval_dict[key], 0)
for indx in range(eval_dict[input_data_fields.original_image].shape[0]):
instance_masks = None
if detection_fields.detection_masks in eval_dict:
instance_masks = tf.cast(
tf.expand_dims(
eval_dict[detection_fields.detection_masks][indx], axis=0),
tf.uint8)
keypoints = None
if detection_fields.detection_keypoints in eval_dict:
keypoints = tf.expand_dims(
eval_dict[detection_fields.detection_keypoints][indx], axis=0)
groundtruth_instance_masks = None
if input_data_fields.groundtruth_instance_masks in eval_dict:
groundtruth_instance_masks = tf.cast(
tf.expand_dims(
eval_dict[input_data_fields.groundtruth_instance_masks][indx],
axis=0), tf.uint8)
images_with_detections = draw_bounding_boxes_on_image_tensors(
tf.expand_dims(
eval_dict[input_data_fields.groundtruth_instance_masks], axis=0),
tf.uint8)
images_with_detections = draw_bounding_boxes_on_image_tensors(
eval_dict[input_data_fields.original_image],
tf.expand_dims(eval_dict[detection_fields.detection_boxes], axis=0),
tf.expand_dims(eval_dict[detection_fields.detection_classes], axis=0),
tf.expand_dims(eval_dict[detection_fields.detection_scores], axis=0),
category_index,
instance_masks=instance_masks,
keypoints=keypoints,
max_boxes_to_draw=max_boxes_to_draw,
min_score_thresh=min_score_thresh,
use_normalized_coordinates=use_normalized_coordinates)
images_with_groundtruth = draw_bounding_boxes_on_image_tensors(
eval_dict[input_data_fields.original_image],
tf.expand_dims(eval_dict[input_data_fields.groundtruth_boxes], axis=0),
tf.expand_dims(eval_dict[input_data_fields.groundtruth_classes], axis=0),
tf.expand_dims(
tf.ones_like(
eval_dict[input_data_fields.groundtruth_classes],
dtype=tf.float32),
axis=0),
category_index,
instance_masks=groundtruth_instance_masks,
keypoints=None,
max_boxes_to_draw=None,
min_score_thresh=0.0,
use_normalized_coordinates=use_normalized_coordinates)
return tf.concat([images_with_detections, images_with_groundtruth], axis=2)
eval_dict[input_data_fields.original_image][indx], axis=0),
tf.expand_dims(
eval_dict[detection_fields.detection_boxes][indx], axis=0),
tf.expand_dims(
eval_dict[detection_fields.detection_classes][indx], axis=0),
tf.expand_dims(
eval_dict[detection_fields.detection_scores][indx], axis=0),
category_index,
original_image_spatial_shape=tf.expand_dims(
eval_dict[input_data_fields.original_image_spatial_shape][indx],
axis=0),
true_image_shape=tf.expand_dims(
eval_dict[input_data_fields.true_image_shape][indx], axis=0),
instance_masks=instance_masks,
keypoints=keypoints,
max_boxes_to_draw=max_boxes_to_draw,
min_score_thresh=min_score_thresh,
use_normalized_coordinates=use_normalized_coordinates)
images_with_groundtruth = draw_bounding_boxes_on_image_tensors(
tf.expand_dims(
eval_dict[input_data_fields.original_image][indx], axis=0),
tf.expand_dims(
eval_dict[input_data_fields.groundtruth_boxes][indx], axis=0),
tf.expand_dims(
eval_dict[input_data_fields.groundtruth_classes][indx], axis=0),
tf.expand_dims(
tf.ones_like(
eval_dict[input_data_fields.groundtruth_classes][indx],
dtype=tf.float32),
axis=0),
category_index,
original_image_spatial_shape=tf.expand_dims(
eval_dict[input_data_fields.original_image_spatial_shape][indx],
axis=0),
true_image_shape=tf.expand_dims(
eval_dict[input_data_fields.true_image_shape][indx], axis=0),
instance_masks=groundtruth_instance_masks,
keypoints=None,
max_boxes_to_draw=None,
min_score_thresh=0.0,
use_normalized_coordinates=use_normalized_coordinates)
images_with_detections_list.append(
tf.concat([images_with_detections, images_with_groundtruth], axis=2))
return images_with_detections_list
def draw_keypoints_on_image_array(image,
......@@ -744,7 +822,7 @@ class EvalMetricOpsVisualization(object):
responsible for accruing images (with overlaid detections and groundtruth)
and returning a dictionary that can be passed to `eval_metric_ops`.
"""
__metaclass__ = ABCMeta
__metaclass__ = abc.ABCMeta
def __init__(self,
category_index,
......@@ -792,26 +870,33 @@ class EvalMetricOpsVisualization(object):
Args:
eval_dict: A dictionary that holds an image, groundtruth, and detections
for a single example. See eval_util.result_dict_for_single_example() for
a convenient method for constructing such a dictionary. The dictionary
for a batched example. Note that, we use only the first example for
visualization. See eval_util.result_dict_for_batched_example() for a
convenient method for constructing such a dictionary. The dictionary
contains
fields.InputDataFields.original_image: [1, H, W, 3] image.
fields.InputDataFields.groundtruth_boxes - [num_boxes, 4] float32
tensor with groundtruth boxes in range [0.0, 1.0].
fields.InputDataFields.groundtruth_classes - [num_boxes] int64
tensor with 1-indexed groundtruth classes.
fields.InputDataFields.original_image: [batch_size, H, W, 3] image.
fields.InputDataFields.original_image_spatial_shape: [batch_size, 2]
tensor containing the size of the original image.
fields.InputDataFields.true_image_shape: [batch_size, 3]
tensor containing the spatial size of the upadded original image.
fields.InputDataFields.groundtruth_boxes - [batch_size, num_boxes, 4]
float32 tensor with groundtruth boxes in range [0.0, 1.0].
fields.InputDataFields.groundtruth_classes - [batch_size, num_boxes]
int64 tensor with 1-indexed groundtruth classes.
fields.InputDataFields.groundtruth_instance_masks - (optional)
[num_boxes, H, W] int64 tensor with instance masks.
fields.DetectionResultFields.detection_boxes - [max_num_boxes, 4]
float32 tensor with detection boxes in range [0.0, 1.0].
fields.DetectionResultFields.detection_classes - [max_num_boxes]
int64 tensor with 1-indexed detection classes.
fields.DetectionResultFields.detection_scores - [max_num_boxes]
float32 tensor with detection scores.
fields.DetectionResultFields.detection_masks - (optional)
[max_num_boxes, H, W] float32 tensor of binarized masks.
[batch_size, num_boxes, H, W] int64 tensor with instance masks.
fields.DetectionResultFields.detection_boxes - [batch_size,
max_num_boxes, 4] float32 tensor with detection boxes in range [0.0,
1.0].
fields.DetectionResultFields.detection_classes - [batch_size,
max_num_boxes] int64 tensor with 1-indexed detection classes.
fields.DetectionResultFields.detection_scores - [batch_size,
max_num_boxes] float32 tensor with detection scores.
fields.DetectionResultFields.detection_masks - (optional) [batch_size,
max_num_boxes, H, W] float32 tensor of binarized masks.
fields.DetectionResultFields.detection_keypoints - (optional)
[max_num_boxes, num_keypoints, 2] float32 tensor with keypooints.
[batch_size, max_num_boxes, num_keypoints, 2] float32 tensor with
keypoints.
Returns:
A dictionary of image summary names to tuple of (value_op, update_op). The
......@@ -820,6 +905,8 @@ class EvalMetricOpsVisualization(object):
groundtruth. Each `value_op` holds the tf.summary.image string for a given
image.
"""
if self._max_examples_to_draw == 0:
return {}
images = self.images_from_evaluation_dict(eval_dict)
def get_images():
......@@ -837,7 +924,7 @@ class EvalMetricOpsVisualization(object):
lambda: tf.summary.image(summary_name, image),
lambda: tf.constant(''))
update_op = tf.py_func(self.add_images, [images], [])
update_op = tf.py_func(self.add_images, [[images[0]]], [])
image_tensors = tf.py_func(
get_images, [], [tf.uint8] * self._max_examples_to_draw)
eval_metric_ops = {}
......@@ -847,7 +934,7 @@ class EvalMetricOpsVisualization(object):
eval_metric_ops[summary_name] = (value_op, update_op)
return eval_metric_ops
@abstractmethod
@abc.abstractmethod
def images_from_evaluation_dict(self, eval_dict):
"""Converts evaluation dictionary into a list of image tensors.
......@@ -882,9 +969,6 @@ class VisualizeSingleFrameDetections(EvalMetricOpsVisualization):
summary_name_prefix=summary_name_prefix)
def images_from_evaluation_dict(self, eval_dict):
return [draw_side_by_side_evaluation_image(
eval_dict,
self._category_index,
self._max_boxes_to_draw,
self._min_score_thresh,
self._use_normalized_coordinates)]
return draw_side_by_side_evaluation_image(
eval_dict, self._category_index, self._max_boxes_to_draw,
self._min_score_thresh, self._use_normalized_coordinates)
......@@ -52,6 +52,9 @@ class VisualizationUtilsTest(tf.test.TestCase):
def create_test_image_with_five_channels(self):
return np.full([100, 200, 5], 255, dtype=np.uint8)
def create_test_grayscale_image(self):
return np.full([100, 200, 1], 255, dtype=np.uint8)
def test_draw_bounding_box_on_image(self):
test_image = self.create_colorful_test_image()
test_image = Image.fromarray(test_image)
......@@ -119,9 +122,11 @@ class VisualizationUtilsTest(tf.test.TestCase):
fname = os.path.join(_TESTDATA_PATH, 'image1.jpg')
image_np = np.array(Image.open(fname))
images_np = np.stack((image_np, image_np), axis=0)
original_image_shape = [[636, 512], [636, 512]]
with tf.Graph().as_default():
images_tensor = tf.constant(value=images_np, dtype=tf.uint8)
image_shape = tf.constant(original_image_shape, dtype=tf.int32)
boxes = tf.constant([[[0.4, 0.25, 0.75, 0.75], [0.5, 0.3, 0.6, 0.9]],
[[0.25, 0.25, 0.75, 0.75], [0.1, 0.3, 0.6, 1.0]]])
classes = tf.constant([[1, 1], [1, 2]], dtype=tf.int64)
......@@ -133,6 +138,8 @@ class VisualizationUtilsTest(tf.test.TestCase):
classes,
scores,
category_index,
original_image_spatial_shape=image_shape,
true_image_shape=image_shape,
min_score_thresh=0.2))
with self.test_session() as sess:
......@@ -140,7 +147,10 @@ class VisualizationUtilsTest(tf.test.TestCase):
# Write output images for visualization.
images_with_boxes_np = sess.run(images_with_boxes)
self.assertEqual(images_np.shape, images_with_boxes_np.shape)
self.assertEqual(images_np.shape[0], images_with_boxes_np.shape[0])
self.assertEqual(images_np.shape[3], images_with_boxes_np.shape[3])
self.assertEqual(
tuple(original_image_shape[0]), images_with_boxes_np.shape[1:3])
for i in range(images_with_boxes_np.shape[0]):
img_name = 'image_' + str(i) + '.png'
output_file = os.path.join(self.get_temp_dir(), img_name)
......@@ -174,6 +184,35 @@ class VisualizationUtilsTest(tf.test.TestCase):
final_images_np = sess.run(images_with_boxes)
self.assertEqual((2, 100, 200, 3), final_images_np.shape)
def test_draw_bounding_boxes_on_image_tensors_grayscale(self):
"""Tests the case where input image tensor has one channel."""
category_index = {1: {'id': 1, 'name': 'dog'}}
image_np = self.create_test_grayscale_image()
images_np = np.stack((image_np, image_np), axis=0)
with tf.Graph().as_default():
images_tensor = tf.constant(value=images_np, dtype=tf.uint8)
image_shape = tf.constant([[100, 200], [100, 200]], dtype=tf.int32)
boxes = tf.constant(0, dtype=tf.float32, shape=[2, 0, 4])
classes = tf.constant(0, dtype=tf.int64, shape=[2, 0])
scores = tf.constant(0, dtype=tf.float32, shape=[2, 0])
images_with_boxes = (
visualization_utils.draw_bounding_boxes_on_image_tensors(
images_tensor,
boxes,
classes,
scores,
category_index,
original_image_spatial_shape=image_shape,
true_image_shape=image_shape,
min_score_thresh=0.2))
with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
final_images_np = sess.run(images_with_boxes)
self.assertEqual((2, 100, 200, 3), final_images_np.shape)
def test_draw_keypoints_on_image(self):
test_image = self.create_colorful_test_image()
test_image = Image.fromarray(test_image)
......@@ -234,34 +273,46 @@ class VisualizationUtilsTest(tf.test.TestCase):
category_index,
max_examples_to_draw=max_examples_to_draw,
summary_name_prefix=metric_op_base)
original_image = tf.placeholder(tf.uint8, [1, None, None, 3])
detection_boxes = tf.random_uniform([20, 4],
original_image = tf.placeholder(tf.uint8, [4, None, None, 3])
original_image_spatial_shape = tf.placeholder(tf.int32, [4, 2])
true_image_shape = tf.placeholder(tf.int32, [4, 3])
detection_boxes = tf.random_uniform([4, 20, 4],
minval=0.0,
maxval=1.0,
dtype=tf.float32)
detection_classes = tf.random_uniform([20],
detection_classes = tf.random_uniform([4, 20],
minval=1,
maxval=3,
dtype=tf.int64)
detection_scores = tf.random_uniform([20],
detection_scores = tf.random_uniform([4, 20],
minval=0.,
maxval=1.,
dtype=tf.float32)
groundtruth_boxes = tf.random_uniform([8, 4],
groundtruth_boxes = tf.random_uniform([4, 8, 4],
minval=0.0,
maxval=1.0,
dtype=tf.float32)
groundtruth_classes = tf.random_uniform([8],
groundtruth_classes = tf.random_uniform([4, 8],
minval=1,
maxval=3,
dtype=tf.int64)
eval_dict = {
fields.DetectionResultFields.detection_boxes: detection_boxes,
fields.DetectionResultFields.detection_classes: detection_classes,
fields.DetectionResultFields.detection_scores: detection_scores,
fields.InputDataFields.original_image: original_image,
fields.InputDataFields.groundtruth_boxes: groundtruth_boxes,
fields.InputDataFields.groundtruth_classes: groundtruth_classes}
fields.DetectionResultFields.detection_boxes:
detection_boxes,
fields.DetectionResultFields.detection_classes:
detection_classes,
fields.DetectionResultFields.detection_scores:
detection_scores,
fields.InputDataFields.original_image:
original_image,
fields.InputDataFields.original_image_spatial_shape: (
original_image_spatial_shape),
fields.InputDataFields.true_image_shape: (true_image_shape),
fields.InputDataFields.groundtruth_boxes:
groundtruth_boxes,
fields.InputDataFields.groundtruth_classes:
groundtruth_classes
}
metric_ops = eval_metric_ops.get_estimator_eval_metric_ops(eval_dict)
_, update_op = metric_ops[metric_ops.keys()[0]]
......@@ -274,12 +325,20 @@ class VisualizationUtilsTest(tf.test.TestCase):
# First run enough update steps to surpass `max_examples_to_draw`.
for i in range(max_examples_to_draw):
# Use a unique image shape on each eval image.
sess.run(update_op, feed_dict={
original_image: np.random.randint(low=0,
high=256,
size=(1, 6 + i, 7 + i, 3),
dtype=np.uint8)
})
sess.run(
update_op,
feed_dict={
original_image:
np.random.randint(
low=0,
high=256,
size=(4, 6 + i, 7 + i, 3),
dtype=np.uint8),
original_image_spatial_shape: [[6 + i, 7 + i], [6 + i, 7 + i],
[6 + i, 7 + i], [6 + i, 7 + i]],
true_image_shape: [[6 + i, 7 + i, 3], [6 + i, 7 + i, 3],
[6 + i, 7 + i, 3], [6 + i, 7 + i, 3]]
})
value_ops_out = sess.run(value_ops)
for key, value_op in value_ops_out.iteritems():
self.assertNotEqual('', value_op)
......@@ -289,12 +348,20 @@ class VisualizationUtilsTest(tf.test.TestCase):
# produced.
for i in range(max_examples_to_draw - 1):
# Use a unique image shape on each eval image.
sess.run(update_op, feed_dict={
original_image: np.random.randint(low=0,
high=256,
size=(1, 6 + i, 7 + i, 3),
dtype=np.uint8)
})
sess.run(
update_op,
feed_dict={
original_image:
np.random.randint(
low=0,
high=256,
size=(4, 6 + i, 7 + i, 3),
dtype=np.uint8),
original_image_spatial_shape: [[6 + i, 7 + i], [6 + i, 7 + i],
[6 + i, 7 + i], [6 + i, 7 + i]],
true_image_shape: [[6 + i, 7 + i, 3], [6 + i, 7 + i, 3],
[6 + i, 7 + i, 3], [6 + i, 7 + i, 3]]
})
value_ops_out = sess.run(value_ops)
self.assertEqual(
'',
......
......@@ -63,7 +63,8 @@ def cyclegan_arg_scope(instance_norm_center=True,
return sc
def cyclegan_upsample(net, num_outputs, stride, method='conv2d_transpose'):
def cyclegan_upsample(net, num_outputs, stride, method='conv2d_transpose',
pad_mode='REFLECT', align_corners=False):
"""Upsamples the given inputs.
Args:
......@@ -75,6 +76,10 @@ def cyclegan_upsample(net, num_outputs, stride, method='conv2d_transpose'):
times the input size.
method: The upsampling method: 'nn_upsample_conv', 'bilinear_upsample_conv',
or 'conv2d_transpose'.
pad_mode: mode for tf.pad, one of "CONSTANT", "REFLECT", or "SYMMETRIC".
align_corners: option for method, 'bilinear_upsample_conv'. If true, the
centers of the 4 corner pixels of the input and output tensors are
aligned, preserving the values at the corner pixels.
Returns:
A Tensor which was upsampled using the specified method.
......@@ -95,12 +100,13 @@ def cyclegan_upsample(net, num_outputs, stride, method='conv2d_transpose'):
if method == 'nn_upsample_conv':
net = tf.image.resize_nearest_neighbor(
net, [stride[0] * height, stride[1] * width])
net = tf.pad(net, spatial_pad_1, 'REFLECT')
net = tf.pad(net, spatial_pad_1, pad_mode)
net = layers.conv2d(net, num_outputs, kernel_size=[3, 3], padding='valid')
elif method == 'bilinear_upsample_conv':
net = tf.image.resize_bilinear(
net, [stride[0] * height, stride[1] * width])
net = tf.pad(net, spatial_pad_1, 'REFLECT')
net, [stride[0] * height, stride[1] * width],
align_corners=align_corners)
net = tf.pad(net, spatial_pad_1, pad_mode)
net = layers.conv2d(net, num_outputs, kernel_size=[3, 3], padding='valid')
elif method == 'conv2d_transpose':
# This corrects 1 pixel offset for images with even width and height.
......@@ -111,7 +117,7 @@ def cyclegan_upsample(net, num_outputs, stride, method='conv2d_transpose'):
net, num_outputs, kernel_size=[3, 3], stride=stride, padding='valid')
net = net[:, 1:, 1:, :]
else:
raise ValueError('Unknown method: [%s]', method)
raise ValueError('Unknown method: [%s]' % method)
return net
......
......@@ -370,7 +370,8 @@ def inception_resnet_v2_arg_scope(
batch_norm_decay=0.9997,
batch_norm_epsilon=0.001,
activation_fn=tf.nn.relu,
batch_norm_updates_collections=tf.GraphKeys.UPDATE_OPS):
batch_norm_updates_collections=tf.GraphKeys.UPDATE_OPS,
batch_norm_scale=False):
"""Returns the scope with the default parameters for inception_resnet_v2.
Args:
......@@ -380,6 +381,8 @@ def inception_resnet_v2_arg_scope(
activation_fn: Activation function for conv2d.
batch_norm_updates_collections: Collection for the update ops for
batch norm.
batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the
activations in the batch normalization layer.
Returns:
a arg_scope with the parameters needed for inception_resnet_v2.
......@@ -394,6 +397,7 @@ def inception_resnet_v2_arg_scope(
'epsilon': batch_norm_epsilon,
'updates_collections': batch_norm_updates_collections,
'fused': None, # Use fused batch norm if possible.
'scale': batch_norm_scale,
}
# Set activation_fn and parameters for batch_norm.
with slim.arg_scope([slim.conv2d], activation_fn=activation_fn,
......
......@@ -306,6 +306,29 @@ class InceptionTest(tf.test.TestCase):
output = sess.run(predictions)
self.assertEquals(output.shape, (eval_batch_size,))
def testNoBatchNormScaleByDefault(self):
height, width = 299, 299
num_classes = 1000
inputs = tf.placeholder(tf.float32, (1, height, width, 3))
with tf.contrib.slim.arg_scope(inception.inception_resnet_v2_arg_scope()):
inception.inception_resnet_v2(inputs, num_classes, is_training=False)
self.assertEqual(tf.global_variables('.*/BatchNorm/gamma:0$'), [])
def testBatchNormScale(self):
height, width = 299, 299
num_classes = 1000
inputs = tf.placeholder(tf.float32, (1, height, width, 3))
with tf.contrib.slim.arg_scope(
inception.inception_resnet_v2_arg_scope(batch_norm_scale=True)):
inception.inception_resnet_v2(inputs, num_classes, is_training=False)
gamma_names = set(
v.op.name for v in tf.global_variables('.*/BatchNorm/gamma:0$'))
self.assertGreater(len(gamma_names), 0)
for v in tf.global_variables('.*/BatchNorm/moving_mean:0$'):
self.assertIn(v.op.name[:-len('moving_mean')] + 'gamma', gamma_names)
if __name__ == '__main__':
tf.test.main()
......@@ -34,7 +34,8 @@ def inception_arg_scope(weight_decay=0.00004,
batch_norm_decay=0.9997,
batch_norm_epsilon=0.001,
activation_fn=tf.nn.relu,
batch_norm_updates_collections=tf.GraphKeys.UPDATE_OPS):
batch_norm_updates_collections=tf.GraphKeys.UPDATE_OPS,
batch_norm_scale=False):
"""Defines the default arg scope for inception models.
Args:
......@@ -46,6 +47,8 @@ def inception_arg_scope(weight_decay=0.00004,
activation_fn: Activation function for conv2d.
batch_norm_updates_collections: Collection for the update ops for
batch norm.
batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the
activations in the batch normalization layer.
Returns:
An `arg_scope` to use for the inception models.
......@@ -59,6 +62,7 @@ def inception_arg_scope(weight_decay=0.00004,
'updates_collections': batch_norm_updates_collections,
# use fused batch norm if possible.
'fused': None,
'scale': batch_norm_scale,
}
if use_batch_norm:
normalizer_fn = slim.batch_norm
......
......@@ -237,6 +237,29 @@ class InceptionV1Test(tf.test.TestCase):
logits_out = sess.run(logits)
self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes])
def testNoBatchNormScaleByDefault(self):
height, width = 224, 224
num_classes = 1000
inputs = tf.placeholder(tf.float32, (1, height, width, 3))
with slim.arg_scope(inception.inception_v1_arg_scope()):
inception.inception_v1(inputs, num_classes, is_training=False)
self.assertEqual(tf.global_variables('.*/BatchNorm/gamma:0$'), [])
def testBatchNormScale(self):
height, width = 224, 224
num_classes = 1000
inputs = tf.placeholder(tf.float32, (1, height, width, 3))
with slim.arg_scope(
inception.inception_v1_arg_scope(batch_norm_scale=True)):
inception.inception_v1(inputs, num_classes, is_training=False)
gamma_names = set(
v.op.name for v in tf.global_variables('.*/BatchNorm/gamma:0$'))
self.assertGreater(len(gamma_names), 0)
for v in tf.global_variables('.*/BatchNorm/moving_mean:0$'):
self.assertIn(v.op.name[:-len('moving_mean')] + 'gamma', gamma_names)
if __name__ == '__main__':
tf.test.main()
......@@ -351,6 +351,29 @@ class InceptionV2Test(tf.test.TestCase):
logits_out = sess.run(logits)
self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes])
def testNoBatchNormScaleByDefault(self):
height, width = 224, 224
num_classes = 1000
inputs = tf.placeholder(tf.float32, (1, height, width, 3))
with slim.arg_scope(inception.inception_v2_arg_scope()):
inception.inception_v2(inputs, num_classes, is_training=False)
self.assertEqual(tf.global_variables('.*/BatchNorm/gamma:0$'), [])
def testBatchNormScale(self):
height, width = 224, 224
num_classes = 1000
inputs = tf.placeholder(tf.float32, (1, height, width, 3))
with slim.arg_scope(
inception.inception_v2_arg_scope(batch_norm_scale=True)):
inception.inception_v2(inputs, num_classes, is_training=False)
gamma_names = set(
v.op.name for v in tf.global_variables('.*/BatchNorm/gamma:0$'))
self.assertGreater(len(gamma_names), 0)
for v in tf.global_variables('.*/BatchNorm/moving_mean:0$'):
self.assertIn(v.op.name[:-len('moving_mean')] + 'gamma', gamma_names)
if __name__ == '__main__':
tf.test.main()
......@@ -318,6 +318,29 @@ class InceptionV3Test(tf.test.TestCase):
logits_out = sess.run(logits)
self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes])
def testNoBatchNormScaleByDefault(self):
height, width = 299, 299
num_classes = 1000
inputs = tf.placeholder(tf.float32, (1, height, width, 3))
with slim.arg_scope(inception.inception_v3_arg_scope()):
inception.inception_v3(inputs, num_classes, is_training=False)
self.assertEqual(tf.global_variables('.*/BatchNorm/gamma:0$'), [])
def testBatchNormScale(self):
height, width = 299, 299
num_classes = 1000
inputs = tf.placeholder(tf.float32, (1, height, width, 3))
with slim.arg_scope(
inception.inception_v3_arg_scope(batch_norm_scale=True)):
inception.inception_v3(inputs, num_classes, is_training=False)
gamma_names = set(
v.op.name for v in tf.global_variables('.*/BatchNorm/gamma:0$'))
self.assertGreater(len(gamma_names), 0)
for v in tf.global_variables('.*/BatchNorm/moving_mean:0$'):
self.assertIn(v.op.name[:-len('moving_mean')] + 'gamma', gamma_names)
if __name__ == '__main__':
tf.test.main()
......@@ -255,6 +255,29 @@ class InceptionTest(tf.test.TestCase):
output = sess.run(predictions)
self.assertEquals(output.shape, (eval_batch_size,))
def testNoBatchNormScaleByDefault(self):
height, width = 299, 299
num_classes = 1000
inputs = tf.placeholder(tf.float32, (1, height, width, 3))
with tf.contrib.slim.arg_scope(inception.inception_v4_arg_scope()):
inception.inception_v4(inputs, num_classes, is_training=False)
self.assertEqual(tf.global_variables('.*/BatchNorm/gamma:0$'), [])
def testBatchNormScale(self):
height, width = 299, 299
num_classes = 1000
inputs = tf.placeholder(tf.float32, (1, height, width, 3))
with tf.contrib.slim.arg_scope(
inception.inception_v4_arg_scope(batch_norm_scale=True)):
inception.inception_v4(inputs, num_classes, is_training=False)
gamma_names = set(
v.op.name for v in tf.global_variables('.*/BatchNorm/gamma:0$'))
self.assertGreater(len(gamma_names), 0)
for v in tf.global_variables('.*/BatchNorm/moving_mean:0$'):
self.assertIn(v.op.name[:-len('moving_mean')] + 'gamma', gamma_names)
if __name__ == '__main__':
tf.test.main()
......@@ -263,7 +263,6 @@ def mobilenet_v1_base(inputs,
net = _fixed_padding(net, conv_def.kernel)
net = slim.conv2d(net, depth(conv_def.depth), conv_def.kernel,
stride=conv_def.stride,
normalizer_fn=slim.batch_norm,
scope=end_point)
end_points[end_point] = net
if end_point == final_endpoint:
......@@ -280,7 +279,6 @@ def mobilenet_v1_base(inputs,
depth_multiplier=1,
stride=layer_stride,
rate=layer_rate,
normalizer_fn=slim.batch_norm,
scope=end_point)
end_points[end_point] = net
......@@ -291,7 +289,6 @@ def mobilenet_v1_base(inputs,
net = slim.conv2d(net, depth(conv_def.depth), [1, 1],
stride=1,
normalizer_fn=slim.batch_norm,
scope=end_point)
end_points[end_point] = net
......@@ -432,7 +429,8 @@ def mobilenet_v1_arg_scope(
regularize_depthwise=False,
batch_norm_decay=0.9997,
batch_norm_epsilon=0.001,
batch_norm_updates_collections=tf.GraphKeys.UPDATE_OPS):
batch_norm_updates_collections=tf.GraphKeys.UPDATE_OPS,
normalizer_fn=slim.batch_norm):
"""Defines the default MobilenetV1 arg scope.
Args:
......@@ -446,6 +444,7 @@ def mobilenet_v1_arg_scope(
in batch norm.
batch_norm_updates_collections: Collection for the update ops for
batch norm.
normalizer_fn: Normalization function to apply after convolution.
Returns:
An `arg_scope` to use for the mobilenet v1 model.
......@@ -469,7 +468,7 @@ def mobilenet_v1_arg_scope(
depthwise_regularizer = None
with slim.arg_scope([slim.conv2d, slim.separable_conv2d],
weights_initializer=weights_init,
activation_fn=tf.nn.relu6, normalizer_fn=slim.batch_norm):
activation_fn=tf.nn.relu6, normalizer_fn=normalizer_fn):
with slim.arg_scope([slim.batch_norm], **batch_norm_params):
with slim.arg_scope([slim.conv2d], weights_regularizer=regularizer):
with slim.arg_scope([slim.separable_conv2d],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment