Unverified Commit 62ce5d2a authored by pkulzc's avatar pkulzc Committed by GitHub
Browse files

Object detection and slim changes (#5843)

Add more eval metrics to model_main and support group norm for mobilenet v1 based models.
parents 7d032ea3 a1337e01
...@@ -23,6 +23,44 @@ message Loss { ...@@ -23,6 +23,44 @@ message Loss {
// If not left to default, applies random example sampling. // If not left to default, applies random example sampling.
optional RandomExampleSampler random_example_sampler = 6; optional RandomExampleSampler random_example_sampler = 6;
// Equalization loss.
message EqualizationLoss {
// Weight equalization loss strength.
optional float weight = 1 [default=0.0];
// When computing equalization loss, ops that start with
// equalization_exclude_prefixes will be ignored. Only used when
// equalization_weight > 0.
repeated string exclude_prefixes = 2;
}
optional EqualizationLoss equalization_loss = 7;
enum ExpectedLossWeights {
NONE = 0;
// Use expected_classification_loss_by_expected_sampling
// from third_party/tensorflow_models/object_detection/utils/ops.py
EXPECTED_SAMPLING = 1;
// Use expected_classification_loss_by_reweighting_unmatched_anchors
// from third_party/tensorflow_models/object_detection/utils/ops.py
REWEIGHTING_UNMATCHED_ANCHORS = 2;
}
// Method to compute expected loss weights with respect to balanced
// positive/negative sampling scheme. If NONE, use explicit sampling.
// TODO(birdbrain): Move under ExpectedLossWeights.
optional ExpectedLossWeights expected_loss_weights = 18 [default = NONE];
// Minimum number of effective negative samples.
// Only applies if expected_loss_weights is not NONE.
// TODO(birdbrain): Move under ExpectedLossWeights.
optional float min_num_negative_samples = 19 [default=0];
// Desired number of effective negative samples per positive sample.
// Only applies if expected_loss_weights is not NONE.
// TODO(birdbrain): Move under ExpectedLossWeights.
optional float desired_negative_sampling_ratio = 20 [default=3];
} }
// Configuration for bounding box localization loss function. // Configuration for bounding box localization loss function.
......
...@@ -166,13 +166,13 @@ message RandomCropImage { ...@@ -166,13 +166,13 @@ message RandomCropImage {
message RandomPadImage { message RandomPadImage {
// Minimum dimensions for padded image. If unset, will use original image // Minimum dimensions for padded image. If unset, will use original image
// dimension as a lower bound. // dimension as a lower bound.
optional float min_image_height = 1; optional int32 min_image_height = 1;
optional float min_image_width = 2; optional int32 min_image_width = 2;
// Maximum dimensions for padded image. If unset, will use double the original // Maximum dimensions for padded image. If unset, will use double the original
// image dimension as a lower bound. // image dimension as a lower bound.
optional float max_image_height = 3; optional int32 max_image_height = 3;
optional float max_image_width = 4; optional int32 max_image_width = 4;
// Color of the padding. If unset, will pad using average color of the input // Color of the padding. If unset, will pad using average color of the input
// image. // image.
......
...@@ -12,7 +12,7 @@ import "object_detection/protos/post_processing.proto"; ...@@ -12,7 +12,7 @@ import "object_detection/protos/post_processing.proto";
import "object_detection/protos/region_similarity_calculator.proto"; import "object_detection/protos/region_similarity_calculator.proto";
// Configuration for Single Shot Detection (SSD) models. // Configuration for Single Shot Detection (SSD) models.
// Next id: 22 // Next id: 26
message Ssd { message Ssd {
// Number of classes to predict. // Number of classes to predict.
...@@ -35,7 +35,7 @@ message Ssd { ...@@ -35,7 +35,7 @@ message Ssd {
// Whether background targets are to be encoded as an all // Whether background targets are to be encoded as an all
// zeros vector or a one-hot vector (where background is the 0th class). // zeros vector or a one-hot vector (where background is the 0th class).
optional bool encode_background_as_zeros = 12 [default=false]; optional bool encode_background_as_zeros = 12 [default = false];
// classification weight to be associated to negative // classification weight to be associated to negative
// anchors (default: 1.0). The weight must be in [0., 1.]. // anchors (default: 1.0). The weight must be in [0., 1.].
...@@ -52,11 +52,11 @@ message Ssd { ...@@ -52,11 +52,11 @@ message Ssd {
// Whether to normalize the loss by number of groundtruth boxes that match to // Whether to normalize the loss by number of groundtruth boxes that match to
// the anchors. // the anchors.
optional bool normalize_loss_by_num_matches = 10 [default=true]; optional bool normalize_loss_by_num_matches = 10 [default = true];
// Whether to normalize the localization loss by the code size of the box // Whether to normalize the localization loss by the code size of the box
// encodings. This is applied along with other normalization factors. // encodings. This is applied along with other normalization factors.
optional bool normalize_loc_loss_by_codesize = 14 [default=false]; optional bool normalize_loc_loss_by_codesize = 14 [default = false];
// Loss configuration for training. // Loss configuration for training.
optional Loss loss = 11; optional Loss loss = 11;
...@@ -82,29 +82,66 @@ message Ssd { ...@@ -82,29 +82,66 @@ message Ssd {
// to update the batch norm moving average parameters. // to update the batch norm moving average parameters.
optional bool inplace_batchnorm_update = 15 [default = false]; optional bool inplace_batchnorm_update = 15 [default = false];
// Whether to weight the regression loss by the score of the ground truth box // Whether to add an implicit background class to one-hot encodings of
// the anchor matches to. // groundtruth labels. Set to false if training a single
optional bool weight_regression_loss_by_score = 17 [default=false]; // class model or using an explicit background class.
optional bool add_background_class = 21 [default = true];
// Whether to compute expected loss with respect to balanced positive/negative // Whether to use an explicit background class. Set to true if using
// sampling scheme. If false, use explicit sampling. // groundtruth labels with an explicit background class, as in multiclass
optional bool use_expected_classification_loss_under_sampling = 18 [default=false]; // scores.
optional bool explicit_background_class = 24 [default = false];
// Minimum number of effective negative samples. optional bool use_confidences_as_targets = 22 [default = false];
// Only applies if use_expected_classification_loss_under_sampling is true.
optional float min_num_negative_samples = 19 [default=0];
// Desired number of effective negative samples per positive sample. optional float implicit_example_weight = 23 [default = 1.0];
// Only applies if use_expected_classification_loss_under_sampling is true.
optional float desired_negative_sampling_ratio = 20 [default=3];
// Whether to add an implicit background class to one-hot encodings of // Configuration proto for MaskHead.
// groundtruth labels. Set to false if using groundtruth labels with an // Next id: 11
// explicit background class, using multiclass scores, or if training a single message MaskHead {
// class model. // The height and the width of the predicted mask. Only used when
optional bool add_background_class = 21 [default = true]; // predict_instance_masks is true.
} optional int32 mask_height = 1 [default = 15];
optional int32 mask_width = 2 [default = 15];
// Whether to predict class agnostic masks. Only used when
// predict_instance_masks is true.
optional bool masks_are_class_agnostic = 3 [default = true];
// The depth for the first conv2d_transpose op applied to the
// image_features in the mask prediction branch. If set to 0, the value
// will be set automatically based on the number of channels in the image
// features and the number of classes.
optional int32 mask_prediction_conv_depth = 4 [default = 256];
// The number of convolutions applied to image_features in the mask prediction
// branch.
optional int32 mask_prediction_num_conv_layers = 5 [default = 2];
// Whether to apply convolutions on mask features before upsampling using
// nearest neighbor resizing.
// By default, mask features are resized to [`mask_height`, `mask_width`]
// before applying convolutions and predicting masks.
optional bool convolve_then_upsample_masks = 6 [default = false];
// Mask loss weight.
optional float mask_loss_weight = 7 [default=5.0];
// Number of boxes to be generated at training time for computing mask loss.
optional int32 mask_loss_sample_size = 8 [default=16];
// Hyperparameters for convolution ops used in the box predictor.
optional Hyperparams conv_hyperparams = 9;
// Output size (width and height are set to be the same) of the initial
// bilinear interpolation based cropping during ROI pooling. Only used when
// we have second stage prediction head enabled (e.g. mask head).
optional int32 initial_crop_size = 10 [default = 15];
}
// Configs for mask head.
optional MaskHead mask_head_config = 25;
}
message SsdFeatureExtractor { message SsdFeatureExtractor {
reserved 6; reserved 6;
...@@ -113,10 +150,10 @@ message SsdFeatureExtractor { ...@@ -113,10 +150,10 @@ message SsdFeatureExtractor {
optional string type = 1; optional string type = 1;
// The factor to alter the depth of the channels in the feature extractor. // The factor to alter the depth of the channels in the feature extractor.
optional float depth_multiplier = 2 [default=1.0]; optional float depth_multiplier = 2 [default = 1.0];
// Minimum number of the channels in the feature extractor. // Minimum number of the channels in the feature extractor.
optional int32 min_depth = 3 [default=16]; optional int32 min_depth = 3 [default = 16];
// Hyperparameters that affect the layers of feature extractor added on top // Hyperparameters that affect the layers of feature extractor added on top
// of the base feature extractor. // of the base feature extractor.
...@@ -128,7 +165,8 @@ message SsdFeatureExtractor { ...@@ -128,7 +165,8 @@ message SsdFeatureExtractor {
// layers while base feature extractor uses its own default hyperparams. If // layers while base feature extractor uses its own default hyperparams. If
// this value is set to true, the base feature extractor's hyperparams will be // this value is set to true, the base feature extractor's hyperparams will be
// overridden with the `conv_hyperparams`. // overridden with the `conv_hyperparams`.
optional bool override_base_feature_extractor_hyperparams = 9 [default = false]; optional bool override_base_feature_extractor_hyperparams = 9
[default = false];
// The nearest multiple to zero-pad the input height and width dimensions to. // The nearest multiple to zero-pad the input height and width dimensions to.
// For example, if pad_to_multiple = 2, input dimensions are zero-padded // For example, if pad_to_multiple = 2, input dimensions are zero-padded
...@@ -138,11 +176,11 @@ message SsdFeatureExtractor { ...@@ -138,11 +176,11 @@ message SsdFeatureExtractor {
// Whether to use explicit padding when extracting SSD multiresolution // Whether to use explicit padding when extracting SSD multiresolution
// features. This will also apply to the base feature extractor if a MobileNet // features. This will also apply to the base feature extractor if a MobileNet
// architecture is used. // architecture is used.
optional bool use_explicit_padding = 7 [default=false]; optional bool use_explicit_padding = 7 [default = false];
// Whether to use depthwise separable convolutions for to extract additional // Whether to use depthwise separable convolutions for to extract additional
// feature maps added by SSD. // feature maps added by SSD.
optional bool use_depthwise = 8 [default=false]; optional bool use_depthwise = 8 [default = false];
// Feature Pyramid Networks config. // Feature Pyramid Networks config.
optional FeaturePyramidNetworks fpn = 10; optional FeaturePyramidNetworks fpn = 10;
...@@ -173,4 +211,3 @@ message FeaturePyramidNetworks { ...@@ -173,4 +211,3 @@ message FeaturePyramidNetworks {
// channel depth for additional coarse feature layers. // channel depth for additional coarse feature layers.
optional int32 additional_layer_depth = 3 [default = 256]; optional int32 additional_layer_depth = 3 [default = 256];
} }
...@@ -20,7 +20,7 @@ message TrainConfig { ...@@ -20,7 +20,7 @@ message TrainConfig {
optional bool sync_replicas = 3 [default=false]; optional bool sync_replicas = 3 [default=false];
// How frequently to keep checkpoints. // How frequently to keep checkpoints.
optional uint32 keep_checkpoint_every_n_hours = 4 [default=1000]; optional float keep_checkpoint_every_n_hours = 4 [default=10000.0];
// Optimizer used to train the DetectionModel. // Optimizer used to train the DetectionModel.
optional Optimizer optimizer = 5; optional Optimizer optimizer = 5;
......
...@@ -33,6 +33,7 @@ import collections ...@@ -33,6 +33,7 @@ import collections
import logging import logging
import unicodedata import unicodedata
import numpy as np import numpy as np
import tensorflow as tf
from object_detection.core import standard_fields from object_detection.core import standard_fields
from object_detection.utils import label_map_util from object_detection.utils import label_map_util
...@@ -126,6 +127,7 @@ class ObjectDetectionEvaluator(DetectionEvaluator): ...@@ -126,6 +127,7 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
categories, categories,
matching_iou_threshold=0.5, matching_iou_threshold=0.5,
evaluate_corlocs=False, evaluate_corlocs=False,
evaluate_precision_recall=False,
metric_prefix=None, metric_prefix=None,
use_weighted_mean_ap=False, use_weighted_mean_ap=False,
evaluate_masks=False, evaluate_masks=False,
...@@ -140,6 +142,8 @@ class ObjectDetectionEvaluator(DetectionEvaluator): ...@@ -140,6 +142,8 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
boxes to detection boxes. boxes to detection boxes.
evaluate_corlocs: (optional) boolean which determines if corloc scores evaluate_corlocs: (optional) boolean which determines if corloc scores
are to be returned or not. are to be returned or not.
evaluate_precision_recall: (optional) boolean which determines if
precision and recall values are to be returned or not.
metric_prefix: (optional) string prefix for metric name; if None, no metric_prefix: (optional) string prefix for metric name; if None, no
prefix is used. prefix is used.
use_weighted_mean_ap: (optional) boolean which determines if the mean use_weighted_mean_ap: (optional) boolean which determines if the mean
...@@ -174,7 +178,50 @@ class ObjectDetectionEvaluator(DetectionEvaluator): ...@@ -174,7 +178,50 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
group_of_weight=self._group_of_weight) group_of_weight=self._group_of_weight)
self._image_ids = set([]) self._image_ids = set([])
self._evaluate_corlocs = evaluate_corlocs self._evaluate_corlocs = evaluate_corlocs
self._evaluate_precision_recall = evaluate_precision_recall
self._metric_prefix = (metric_prefix + '_') if metric_prefix else '' self._metric_prefix = (metric_prefix + '_') if metric_prefix else ''
self._expected_keys = set([
standard_fields.InputDataFields.key,
standard_fields.InputDataFields.groundtruth_boxes,
standard_fields.InputDataFields.groundtruth_classes,
standard_fields.InputDataFields.groundtruth_difficult,
standard_fields.InputDataFields.groundtruth_instance_masks,
standard_fields.DetectionResultFields.detection_boxes,
standard_fields.DetectionResultFields.detection_scores,
standard_fields.DetectionResultFields.detection_classes,
standard_fields.DetectionResultFields.detection_masks
])
self._build_metric_names()
def _build_metric_names(self):
"""Builds a list with metric names."""
self._metric_names = [
self._metric_prefix + 'Precision/mAP@{}IOU'.format(
self._matching_iou_threshold)
]
if self._evaluate_corlocs:
self._metric_names.append(
self._metric_prefix +
'Precision/meanCorLoc@{}IOU'.format(self._matching_iou_threshold))
category_index = label_map_util.create_category_index(self._categories)
for idx in range(self._num_classes):
if idx + self._label_id_offset in category_index:
category_name = category_index[idx + self._label_id_offset]['name']
try:
category_name = unicode(category_name, 'utf-8')
except TypeError:
pass
category_name = unicodedata.normalize('NFKD', category_name).encode(
'ascii', 'ignore')
self._metric_names.append(
self._metric_prefix + 'PerformanceByCategory/AP@{}IOU/{}'.format(
self._matching_iou_threshold, category_name))
if self._evaluate_corlocs:
self._metric_names.append(
self._metric_prefix + 'PerformanceByCategory/CorLoc@{}IOU/{}'
.format(self._matching_iou_threshold, category_name))
def add_single_ground_truth_image_info(self, image_id, groundtruth_dict): def add_single_ground_truth_image_info(self, image_id, groundtruth_dict):
"""Adds groundtruth for a single image to be used for evaluation. """Adds groundtruth for a single image to be used for evaluation.
...@@ -283,22 +330,19 @@ class ObjectDetectionEvaluator(DetectionEvaluator): ...@@ -283,22 +330,19 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
A dictionary of metrics with the following fields - A dictionary of metrics with the following fields -
1. summary_metrics: 1. summary_metrics:
'Precision/mAP@<matching_iou_threshold>IOU': mean average precision at '<prefix if not empty>_Precision/mAP@<matching_iou_threshold>IOU': mean
the specified IOU threshold. average precision at the specified IOU threshold.
2. per_category_ap: category specific results with keys of the form 2. per_category_ap: category specific results with keys of the form
'PerformanceByCategory/mAP@<matching_iou_threshold>IOU/category'. '<prefix if not empty>_PerformanceByCategory/
mAP@<matching_iou_threshold>IOU/category'.
""" """
(per_class_ap, mean_ap, _, _, per_class_corloc, mean_corloc) = ( (per_class_ap, mean_ap, per_class_precision, per_class_recall,
self._evaluation.evaluate()) per_class_corloc, mean_corloc) = (
pascal_metrics = { self._evaluation.evaluate())
self._metric_prefix + pascal_metrics = {self._metric_names[0]: mean_ap}
'Precision/mAP@{}IOU'.format(self._matching_iou_threshold):
mean_ap
}
if self._evaluate_corlocs: if self._evaluate_corlocs:
pascal_metrics[self._metric_prefix + 'Precision/meanCorLoc@{}IOU'.format( pascal_metrics[self._metric_names[1]] = mean_corloc
self._matching_iou_threshold)] = mean_corloc
category_index = label_map_util.create_category_index(self._categories) category_index = label_map_util.create_category_index(self._categories)
for idx in range(per_class_ap.size): for idx in range(per_class_ap.size):
if idx + self._label_id_offset in category_index: if idx + self._label_id_offset in category_index:
...@@ -314,6 +358,19 @@ class ObjectDetectionEvaluator(DetectionEvaluator): ...@@ -314,6 +358,19 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
self._matching_iou_threshold, category_name)) self._matching_iou_threshold, category_name))
pascal_metrics[display_name] = per_class_ap[idx] pascal_metrics[display_name] = per_class_ap[idx]
# Optionally add precision and recall values
if self._evaluate_precision_recall:
display_name = (
self._metric_prefix +
'PerformanceByCategory/Precision@{}IOU/{}'.format(
self._matching_iou_threshold, category_name))
pascal_metrics[display_name] = per_class_precision[idx]
display_name = (
self._metric_prefix +
'PerformanceByCategory/Recall@{}IOU/{}'.format(
self._matching_iou_threshold, category_name))
pascal_metrics[display_name] = per_class_recall[idx]
# Optionally add CorLoc metrics.classes # Optionally add CorLoc metrics.classes
if self._evaluate_corlocs: if self._evaluate_corlocs:
display_name = ( display_name = (
...@@ -332,6 +389,74 @@ class ObjectDetectionEvaluator(DetectionEvaluator): ...@@ -332,6 +389,74 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
label_id_offset=self._label_id_offset) label_id_offset=self._label_id_offset)
self._image_ids.clear() self._image_ids.clear()
def get_estimator_eval_metric_ops(self, eval_dict):
"""Returns dict of metrics to use with `tf.estimator.EstimatorSpec`.
Note that this must only be implemented if performing evaluation with a
`tf.estimator.Estimator`.
Args:
eval_dict: A dictionary that holds tensors for evaluating an object
detection model, returned from
eval_util.result_dict_for_single_example(). It must contain
standard_fields.InputDataFields.key.
Returns:
A dictionary of metric names to tuple of value_op and update_op that can
be used as eval metric ops in `tf.estimator.EstimatorSpec`.
"""
# remove unexpected fields
eval_dict_filtered = dict()
for key, value in eval_dict.items():
if key in self._expected_keys:
eval_dict_filtered[key] = value
eval_dict_keys = eval_dict_filtered.keys()
def update_op(image_id, *eval_dict_batched_as_list):
"""Update operation that adds batch of images to ObjectDetectionEvaluator.
Args:
image_id: image id (single id or an array)
*eval_dict_batched_as_list: the values of the dictionary of tensors.
"""
if np.isscalar(image_id):
single_example_dict = dict(
zip(eval_dict_keys, eval_dict_batched_as_list))
self.add_single_ground_truth_image_info(image_id, single_example_dict)
self.add_single_detected_image_info(image_id, single_example_dict)
else:
for unzipped_tuple in zip(*eval_dict_batched_as_list):
single_example_dict = dict(zip(eval_dict_keys, unzipped_tuple))
image_id = single_example_dict[standard_fields.InputDataFields.key]
self.add_single_ground_truth_image_info(image_id, single_example_dict)
self.add_single_detected_image_info(image_id, single_example_dict)
args = [eval_dict_filtered[standard_fields.InputDataFields.key]]
args.extend(eval_dict_filtered.values())
update_op = tf.py_func(update_op, args, [])
def first_value_func():
self._metrics = self.evaluate()
self.clear()
return np.float32(self._metrics[self._metric_names[0]])
def value_func_factory(metric_name):
def value_func():
return np.float32(self._metrics[metric_name])
return value_func
# Ensure that the metrics are only evaluated once.
first_value_op = tf.py_func(first_value_func, [], tf.float32)
eval_metric_ops = {self._metric_names[0]: (first_value_op, update_op)}
with tf.control_dependencies([first_value_op]):
for metric_name in self._metric_names[1:]:
eval_metric_ops[metric_name] = (tf.py_func(
value_func_factory(metric_name), [], np.float32), update_op)
return eval_metric_ops
class PascalDetectionEvaluator(ObjectDetectionEvaluator): class PascalDetectionEvaluator(ObjectDetectionEvaluator):
"""A class to evaluate detections using PASCAL metrics.""" """A class to evaluate detections using PASCAL metrics."""
...@@ -442,6 +567,15 @@ class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator): ...@@ -442,6 +567,15 @@ class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator):
evaluate_corlocs, evaluate_corlocs,
metric_prefix=metric_prefix, metric_prefix=metric_prefix,
group_of_weight=group_of_weight) group_of_weight=group_of_weight)
self._expected_keys = set([
standard_fields.InputDataFields.key,
standard_fields.InputDataFields.groundtruth_boxes,
standard_fields.InputDataFields.groundtruth_classes,
standard_fields.InputDataFields.groundtruth_group_of,
standard_fields.DetectionResultFields.detection_boxes,
standard_fields.DetectionResultFields.detection_scores,
standard_fields.DetectionResultFields.detection_classes,
])
def add_single_ground_truth_image_info(self, image_id, groundtruth_dict): def add_single_ground_truth_image_info(self, image_id, groundtruth_dict):
"""Adds groundtruth for a single image to be used for evaluation. """Adds groundtruth for a single image to be used for evaluation.
...@@ -535,6 +669,16 @@ class OpenImagesDetectionChallengeEvaluator(OpenImagesDetectionEvaluator): ...@@ -535,6 +669,16 @@ class OpenImagesDetectionChallengeEvaluator(OpenImagesDetectionEvaluator):
group_of_weight=group_of_weight) group_of_weight=group_of_weight)
self._evaluatable_labels = {} self._evaluatable_labels = {}
self._expected_keys = set([
standard_fields.InputDataFields.key,
standard_fields.InputDataFields.groundtruth_boxes,
standard_fields.InputDataFields.groundtruth_classes,
standard_fields.InputDataFields.groundtruth_group_of,
standard_fields.InputDataFields.groundtruth_image_classes,
standard_fields.DetectionResultFields.detection_boxes,
standard_fields.DetectionResultFields.detection_scores,
standard_fields.DetectionResultFields.detection_classes,
])
def add_single_ground_truth_image_info(self, image_id, groundtruth_dict): def add_single_ground_truth_image_info(self, image_id, groundtruth_dict):
"""Adds groundtruth for a single image to be used for evaluation. """Adds groundtruth for a single image to be used for evaluation.
...@@ -890,15 +1034,14 @@ class ObjectDetectionEvaluation(object): ...@@ -890,15 +1034,14 @@ class ObjectDetectionEvaluation(object):
if self.use_weighted_mean_ap: if self.use_weighted_mean_ap:
all_scores = np.append(all_scores, scores) all_scores = np.append(all_scores, scores)
all_tp_fp_labels = np.append(all_tp_fp_labels, tp_fp_labels) all_tp_fp_labels = np.append(all_tp_fp_labels, tp_fp_labels)
logging.info('Scores and tpfp per class label: %d', class_index)
logging.info(tp_fp_labels)
logging.info(scores)
precision, recall = metrics.compute_precision_recall( precision, recall = metrics.compute_precision_recall(
scores, tp_fp_labels, self.num_gt_instances_per_class[class_index]) scores, tp_fp_labels, self.num_gt_instances_per_class[class_index])
self.precisions_per_class[class_index] = precision self.precisions_per_class[class_index] = precision
self.recalls_per_class[class_index] = recall self.recalls_per_class[class_index] = recall
average_precision = metrics.compute_average_precision(precision, recall) average_precision = metrics.compute_average_precision(precision, recall)
self.average_precision_per_class[class_index] = average_precision self.average_precision_per_class[class_index] = average_precision
logging.info('average_precision: %f', average_precision)
self.corloc_per_class = metrics.compute_cor_loc( self.corloc_per_class = metrics.compute_cor_loc(
self.num_gt_imgs_per_class, self.num_gt_imgs_per_class,
......
...@@ -15,9 +15,10 @@ ...@@ -15,9 +15,10 @@
"""Tests for object_detection.utils.object_detection_evaluation.""" """Tests for object_detection.utils.object_detection_evaluation."""
from absl.testing import parameterized
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from object_detection import eval_util
from object_detection.core import standard_fields from object_detection.core import standard_fields
from object_detection.utils import object_detection_evaluation from object_detection.utils import object_detection_evaluation
...@@ -683,5 +684,141 @@ class ObjectDetectionEvaluationTest(tf.test.TestCase): ...@@ -683,5 +684,141 @@ class ObjectDetectionEvaluationTest(tf.test.TestCase):
self.assertAlmostEqual(expected_mean_corloc, mean_corloc) self.assertAlmostEqual(expected_mean_corloc, mean_corloc)
class ObjectDetectionEvaluatorTest(tf.test.TestCase, parameterized.TestCase):
def setUp(self):
self.categories = [{
'id': 1,
'name': 'person'
}, {
'id': 2,
'name': 'dog'
}, {
'id': 3,
'name': 'cat'
}]
self.od_eval = object_detection_evaluation.ObjectDetectionEvaluator(
categories=self.categories)
def _make_evaluation_dict(self,
resized_groundtruth_masks=False,
batch_size=1,
max_gt_boxes=None,
scale_to_absolute=False):
input_data_fields = standard_fields.InputDataFields
detection_fields = standard_fields.DetectionResultFields
image = tf.zeros(shape=[batch_size, 20, 20, 3], dtype=tf.uint8)
if batch_size == 1:
key = tf.constant('image1')
else:
key = tf.constant([str(i) for i in range(batch_size)])
detection_boxes = tf.concat([
tf.tile(
tf.constant([[[0., 0., 1., 1.]]]), multiples=[batch_size - 1, 1, 1
]),
tf.constant([[[0., 0., 0.5, 0.5]]])
],
axis=0)
detection_scores = tf.concat([
tf.tile(tf.constant([[0.5]]), multiples=[batch_size - 1, 1]),
tf.constant([[0.8]])
],
axis=0)
detection_classes = tf.tile(tf.constant([[0]]), multiples=[batch_size, 1])
detection_masks = tf.tile(
tf.ones(shape=[1, 2, 20, 20], dtype=tf.float32),
multiples=[batch_size, 1, 1, 1])
groundtruth_boxes = tf.constant([[0., 0., 1., 1.]])
groundtruth_classes = tf.constant([1])
groundtruth_instance_masks = tf.ones(shape=[1, 20, 20], dtype=tf.uint8)
num_detections = tf.ones([batch_size])
if resized_groundtruth_masks:
groundtruth_instance_masks = tf.ones(shape=[1, 10, 10], dtype=tf.uint8)
if batch_size > 1:
groundtruth_boxes = tf.tile(
tf.expand_dims(groundtruth_boxes, 0), multiples=[batch_size, 1, 1])
groundtruth_classes = tf.tile(
tf.expand_dims(groundtruth_classes, 0), multiples=[batch_size, 1])
groundtruth_instance_masks = tf.tile(
tf.expand_dims(groundtruth_instance_masks, 0),
multiples=[batch_size, 1, 1, 1])
detections = {
detection_fields.detection_boxes: detection_boxes,
detection_fields.detection_scores: detection_scores,
detection_fields.detection_classes: detection_classes,
detection_fields.detection_masks: detection_masks,
detection_fields.num_detections: num_detections
}
groundtruth = {
input_data_fields.groundtruth_boxes:
groundtruth_boxes,
input_data_fields.groundtruth_classes:
groundtruth_classes,
input_data_fields.groundtruth_instance_masks:
groundtruth_instance_masks,
}
if batch_size > 1:
return eval_util.result_dict_for_batched_example(
image,
key,
detections,
groundtruth,
scale_to_absolute=scale_to_absolute,
max_gt_boxes=max_gt_boxes)
else:
return eval_util.result_dict_for_single_example(
image,
key,
detections,
groundtruth,
scale_to_absolute=scale_to_absolute)
@parameterized.parameters({
'batch_size': 1,
'expected_map': 0,
'max_gt_boxes': None,
'scale_to_absolute': True
}, {
'batch_size': 8,
'expected_map': 0.765625,
'max_gt_boxes': [1],
'scale_to_absolute': True
}, {
'batch_size': 1,
'expected_map': 0,
'max_gt_boxes': None,
'scale_to_absolute': False
}, {
'batch_size': 8,
'expected_map': 0.765625,
'max_gt_boxes': [1],
'scale_to_absolute': False
})
def test_get_estimator_eval_metric_ops(self,
batch_size=1,
expected_map=1,
max_gt_boxes=None,
scale_to_absolute=False):
eval_dict = self._make_evaluation_dict(
batch_size=batch_size,
max_gt_boxes=max_gt_boxes,
scale_to_absolute=scale_to_absolute)
tf.logging.info('eval_dict: {}'.format(eval_dict))
metric_ops = self.od_eval.get_estimator_eval_metric_ops(eval_dict)
_, update_op = metric_ops['Precision/mAP@0.5IOU']
with self.test_session() as sess:
metrics = {}
for key, (value_op, _) in metric_ops.iteritems():
metrics[key] = value_op
sess.run(update_op)
metrics = sess.run(metrics)
self.assertAlmostEqual(expected_map, metrics['Precision/mAP@0.5IOU'])
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
# ============================================================================== # ==============================================================================
"""A module for helper tensorflow ops.""" """A module for helper tensorflow ops."""
import collections
import math import math
import numpy as np import numpy as np
import six import six
...@@ -1087,81 +1088,10 @@ def native_crop_and_resize(image, boxes, crop_size, scope=None): ...@@ -1087,81 +1088,10 @@ def native_crop_and_resize(image, boxes, crop_size, scope=None):
return tf.reshape(cropped_regions, final_shape) return tf.reshape(cropped_regions, final_shape)
def expected_classification_loss_under_sampling(
batch_cls_targets, cls_losses, unmatched_cls_losses,
desired_negative_sampling_ratio, min_num_negative_samples):
"""Computes classification loss by background/foreground weighting.
The weighting is such that the effective background/foreground weight ratio
is the desired_negative_sampling_ratio. if p_i is the foreground probability
of anchor a_i, L(a_i) is the anchors loss, N is the number of anchors, M
is the sum of foreground probabilities across anchors, and K is the desired
ratio between the number of negative and positive samples, then the total loss
L is calculated as:
beta = K*M/(N-M)
L = sum_{i=1}^N [p_i * L_p(a_i) + beta * (1 - p_i) * L_n(a_i)]
where L_p(a_i) is the loss against target assuming the anchor was matched,
otherwise zero, and L_n(a_i) is the loss against the background target
assuming the anchor was unmatched, otherwise zero.
Args: EqualizationLossConfig = collections.namedtuple('EqualizationLossConfig',
batch_cls_targets: A tensor with shape [batch_size, num_anchors, num_classes ['weight', 'exclude_prefixes'])
+ 1], where 0'th index is the background class, containing the class
distrubution for the target assigned to a given anchor.
cls_losses: Float tensor of shape [batch_size, num_anchors] representing
anchorwise classification losses.
unmatched_cls_losses: loss for each anchor against the unmatched class
target.
desired_negative_sampling_ratio: The desired background/foreground weight
ratio.
min_num_negative_samples: Minimum number of effective negative samples.
Used only when there are no positive examples.
Returns:
The classification loss.
"""
num_anchors = tf.cast(tf.shape(batch_cls_targets)[1], tf.float32)
# find the p_i
foreground_probabilities = 1 - batch_cls_targets[:, :, 0]
foreground_sum = tf.reduce_sum(foreground_probabilities, axis=-1)
# for each anchor, expected_j is the expected number of positive anchors
# given that this anchor was sampled as negative.
tiled_foreground_sum = tf.tile(
tf.reshape(foreground_sum, [-1, 1]),
[1, tf.cast(num_anchors, tf.int32)])
expected_j = tiled_foreground_sum - foreground_probabilities
k = desired_negative_sampling_ratio
# compute beta
expected_negatives = tf.to_float(num_anchors) - expected_j
desired_negatives = k * expected_j
desired_negatives = tf.where(
tf.greater(desired_negatives, expected_negatives), expected_negatives,
desired_negatives)
# probability that an anchor is sampled for the loss computation given that it
# is negative.
beta = desired_negatives / expected_negatives
# where the foreground sum is zero, use a minimum negative weight.
min_negative_weight = 1.0 * min_num_negative_samples / num_anchors
beta = tf.where(
tf.equal(tiled_foreground_sum, 0),
min_negative_weight * tf.ones_like(beta), beta)
foreground_weights = foreground_probabilities
background_weights = (1 - foreground_weights) * beta
weighted_foreground_losses = foreground_weights * cls_losses
weighted_background_losses = background_weights * unmatched_cls_losses
cls_losses = tf.reduce_sum(
weighted_foreground_losses, axis=-1) + tf.reduce_sum(
weighted_background_losses, axis=-1)
return cls_losses
...@@ -21,6 +21,8 @@ from object_detection.core import standard_fields as fields ...@@ -21,6 +21,8 @@ from object_detection.core import standard_fields as fields
from object_detection.utils import ops from object_detection.utils import ops
from object_detection.utils import test_case from object_detection.utils import test_case
slim = tf.contrib.slim
class NormalizedToImageCoordinatesTest(tf.test.TestCase): class NormalizedToImageCoordinatesTest(tf.test.TestCase):
...@@ -1466,189 +1468,9 @@ class OpsTestCropAndResize(test_case.TestCase): ...@@ -1466,189 +1468,9 @@ class OpsTestCropAndResize(test_case.TestCase):
self.assertAllClose(crop_output, expected_output) self.assertAllClose(crop_output, expected_output)
class OpsTestExpectedClassificationLoss(test_case.TestCase):
def testExpectedClassificationLossUnderSamplingWithHardLabels(self):
def graph_fn(batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples):
return ops.expected_classification_loss_under_sampling(
batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples)
batch_cls_targets = np.array(
[[[1., 0, 0], [0, 1., 0]], [[1., 0, 0], [0, 1., 0]]], dtype=np.float32)
cls_losses = np.array([[1, 2], [3, 4]], dtype=np.float32)
unmatched_cls_losses = np.array([[10, 20], [30, 40]], dtype=np.float32)
negative_to_positive_ratio = np.array([2], dtype=np.float32)
min_num_negative_samples = np.array([1], dtype=np.float32)
classification_loss = self.execute(graph_fn, [
batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples
])
# expected_foreground_sum = [1,1]
# expected_expected_j = [[1, 0], [1, 0]]
# expected_expected_negatives = [[1, 2], [1, 2]]
# expected_desired_negatives = [[2, 0], [2, 0]]
# expected_beta = [[1, 0], [1, 0]]
# expected_foreground_weights = [[0, 1], [0, 1]]
# expected_background_weights = [[1, 0], [1, 0]]
# expected_weighted_foreground_losses = [[0, 2], [0, 4]]
# expected_weighted_background_losses = [[10, 0], [30, 0]]
# expected_classification_loss_under_sampling = [6, 40]
expected_classification_loss_under_sampling = [2 + 10, 4 + 30]
self.assertAllClose(expected_classification_loss_under_sampling,
classification_loss)
def testExpectedClassificationLossUnderSamplingWithHardLabelsMoreNegatives(
self):
def graph_fn(batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples):
return ops.expected_classification_loss_under_sampling(
batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples)
batch_cls_targets = np.array(
[[[1., 0, 0], [0, 1., 0], [1., 0, 0], [1., 0, 0], [1., 0, 0]]],
dtype=np.float32)
cls_losses = np.array([[1, 2, 3, 4, 5]], dtype=np.float32)
unmatched_cls_losses = np.array([[10, 20, 30, 40, 50]], dtype=np.float32)
negative_to_positive_ratio = np.array([2], dtype=np.float32)
min_num_negative_samples = np.array([1], dtype=np.float32)
classification_loss = self.execute(graph_fn, [
batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples
])
# expected_foreground_sum = [1]
# expected_expected_j = [[1, 0, 1, 1, 1]]
# expected_expected_negatives = [[4, 5, 4, 4, 4]]
# expected_desired_negatives = [[2, 0, 2, 2, 2]]
# expected_beta = [[.5, 0, .5, .5, .5]]
# expected_foreground_weights = [[0, 1, 0, 0, 0]]
# expected_background_weights = [[.5, 0, .5, .5, .5]]
# expected_weighted_foreground_losses = [[0, 2, 0, 0, 0]]
# expected_weighted_background_losses = [[10*.5, 0, 30*.5, 40*.5, 50*.5]]
# expected_classification_loss_under_sampling = [5+2+15+20+25]
expected_classification_loss_under_sampling = [5 + 2 + 15 + 20 + 25]
self.assertAllClose(expected_classification_loss_under_sampling,
classification_loss)
def testExpectedClassificationLossUnderSamplingWithAllNegative(self):
def graph_fn(batch_cls_targets, cls_losses, unmatched_cls_losses):
return ops.expected_classification_loss_under_sampling(
batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples)
batch_cls_targets = np.array(
[[[1, 0, 0], [1, 0, 0]], [[1, 0, 0], [1, 0, 0]]], dtype=np.float32)
cls_losses = np.array([[1, 2], [3, 4]], dtype=np.float32)
unmatched_cls_losses = np.array([[10, 20], [30, 40]], dtype=np.float32)
negative_to_positive_ratio = np.array([2], dtype=np.float32)
min_num_negative_samples = np.array([1], dtype=np.float32)
classification_loss = self.execute(
graph_fn, [batch_cls_targets, cls_losses, unmatched_cls_losses])
# expected_foreground_sum = [0,0]
# expected_expected_j = [[0, 0], [0, 0]]
# expected_expected_negatives = [[2, 2], [2, 2]]
# expected_desired_negatives = [[0, 0], [0, 0]]
# expected_beta = [[0, 0],[0, 0]]
# expected_foreground_weights = [[0, 0], [0, 0]]
# expected_background_weights = [[.5, .5], [.5, .5]]
# expected_weighted_foreground_losses = [[0, 0], [0, 0]]
# expected_weighted_background_losses = [[5, 10], [15, 20]]
# expected_classification_loss_under_sampling = [15, 35]
expected_classification_loss_under_sampling = [
10 * .5 + 20 * .5, 30 * .5 + 40 * .5
]
self.assertAllClose(expected_classification_loss_under_sampling,
classification_loss)
def testExpectedClassificationLossUnderSamplingWithAllPositive(self):
def graph_fn(batch_cls_targets, cls_losses, unmatched_cls_losses):
return ops.expected_classification_loss_under_sampling(
batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples)
batch_cls_targets = np.array(
[[[0, 1., 0], [0, 1., 0]], [[0, 1, 0], [0, 0, 1]]], dtype=np.float32)
cls_losses = np.array([[1, 2], [3, 4]], dtype=np.float32)
unmatched_cls_losses = np.array([[10, 20], [30, 40]], dtype=np.float32)
negative_to_positive_ratio = np.array([2], dtype=np.float32)
min_num_negative_samples = np.array([1], dtype=np.float32)
classification_loss = self.execute(
graph_fn, [batch_cls_targets, cls_losses, unmatched_cls_losses])
# expected_foreground_sum = [2,2]
# expected_expected_j = [[1, 1], [1, 1]]
# expected_expected_negatives = [[1, 1], [1, 1]]
# expected_desired_negatives = [[1, 1], [1, 1]]
# expected_beta = [[1, 1],[1, 1]]
# expected_foreground_weights = [[1, 1], [1, 1]]
# expected_background_weights = [[0, 0], [0, 0]]
# expected_weighted_foreground_losses = [[1, 2], [3, 4]]
# expected_weighted_background_losses = [[0, 0], [0, 0]]
# expected_classification_loss_under_sampling = [15, 35]
expected_classification_loss_under_sampling = [1 + 2, 3 + 4]
self.assertAllClose(expected_classification_loss_under_sampling,
classification_loss)
def testExpectedClassificationLossUnderSamplingWithSoftLabels(self):
def graph_fn(batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples):
return ops.expected_classification_loss_under_sampling(
batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples)
batch_cls_targets = np.array([[[.75, .25, 0], [0.25, .75, 0], [.75, .25, 0],
[0.25, .75, 0], [1., 0, 0]]],
dtype=np.float32)
cls_losses = np.array([[1, 2, 3, 4, 5]], dtype=np.float32)
unmatched_cls_losses = np.array([[10, 20, 30, 40, 50]], dtype=np.float32)
negative_to_positive_ratio = np.array([2], dtype=np.float32)
min_num_negative_samples = np.array([1], dtype=np.float32)
classification_loss = self.execute(graph_fn, [
batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples
])
# expected_foreground_sum = [2]
# expected_expected_j = [[1.75, 1.25, 1.75, 1.25, 2]]
# expected_expected_negatives = [[3.25, 3.75, 3.25, 3.75, 3]]
# expected_desired_negatives = [[3.25, 2.5, 3.25, 2.5, 3]]
# expected_beta = [[1, 2/3, 1, 2/3, 1]]
# expected_foreground_weights = [[0.25, .75, .25, .75, 0]]
# expected_background_weights = [[[.75, 1/6., .75, 1/6., 1]]]
# expected_weighted_foreground_losses = [[.25*1, .75*2, .25*3, .75*4, 0*5]]
# expected_weighted_background_losses = [[
# .75*10, 1/6.*20, .75*30, 1/6.*40, 1*50]]
# expected_classification_loss_under_sampling = sum([
# .25*1, .75*2, .25*3, .75*4, 0, .75*10, 1/6.*20, .75*30,
# 1/6.*40, 1*50])
expected_classification_loss_under_sampling = [
sum([
.25 * 1, .75 * 2, .25 * 3, .75 * 4, 0, .75 * 10, 1 / 6. * 20,
.75 * 30, 1 / 6. * 40, 1 * 50
])
]
self.assertAllClose(expected_classification_loss_under_sampling,
classification_loss)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -42,14 +42,25 @@ class MockBoxCoder(box_coder.BoxCoder): ...@@ -42,14 +42,25 @@ class MockBoxCoder(box_coder.BoxCoder):
return box_list.BoxList(rel_codes + anchors.get()) return box_list.BoxList(rel_codes + anchors.get())
class MockMaskHead(object):
"""Simple maskhead that returns all zeros as mask predictions."""
def __init__(self, num_classes):
self._num_classes = num_classes
def predict(self, features):
batch_size = tf.shape(features)[0]
return tf.zeros((batch_size, 1, self._num_classes, DEFAULT_MASK_SIZE,
DEFAULT_MASK_SIZE),
dtype=tf.float32)
class MockBoxPredictor(box_predictor.BoxPredictor): class MockBoxPredictor(box_predictor.BoxPredictor):
"""Simple box predictor that ignores inputs and outputs all zeros.""" """Simple box predictor that ignores inputs and outputs all zeros."""
def __init__(self, is_training, num_classes, add_background_class=True, def __init__(self, is_training, num_classes, add_background_class=True):
predict_mask=False):
super(MockBoxPredictor, self).__init__(is_training, num_classes) super(MockBoxPredictor, self).__init__(is_training, num_classes)
self._add_background_class = add_background_class self._add_background_class = add_background_class
self._predict_mask = predict_mask
def _predict(self, image_features, num_predictions_per_location): def _predict(self, image_features, num_predictions_per_location):
image_feature = image_features[0] image_feature = image_features[0]
...@@ -66,31 +77,22 @@ class MockBoxPredictor(box_predictor.BoxPredictor): ...@@ -66,31 +77,22 @@ class MockBoxPredictor(box_predictor.BoxPredictor):
(batch_size, num_anchors, 1, code_size), dtype=tf.float32) (batch_size, num_anchors, 1, code_size), dtype=tf.float32)
class_predictions_with_background = zero + tf.zeros( class_predictions_with_background = zero + tf.zeros(
(batch_size, num_anchors, num_class_slots), dtype=tf.float32) (batch_size, num_anchors, num_class_slots), dtype=tf.float32)
masks = zero + tf.zeros(
(batch_size, num_anchors, self.num_classes, DEFAULT_MASK_SIZE,
DEFAULT_MASK_SIZE),
dtype=tf.float32)
predictions_dict = { predictions_dict = {
box_predictor.BOX_ENCODINGS: box_predictor.BOX_ENCODINGS:
box_encodings, box_encodings,
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND: box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND:
class_predictions_with_background class_predictions_with_background
} }
if self._predict_mask:
predictions_dict[box_predictor.MASK_PREDICTIONS] = masks
return predictions_dict return predictions_dict
class MockKerasBoxPredictor(box_predictor.KerasBoxPredictor): class MockKerasBoxPredictor(box_predictor.KerasBoxPredictor):
"""Simple box predictor that ignores inputs and outputs all zeros.""" """Simple box predictor that ignores inputs and outputs all zeros."""
def __init__(self, is_training, num_classes, add_background_class=True, def __init__(self, is_training, num_classes, add_background_class=True):
predict_mask=False):
super(MockKerasBoxPredictor, self).__init__( super(MockKerasBoxPredictor, self).__init__(
is_training, num_classes, False, False) is_training, num_classes, False, False)
self._add_background_class = add_background_class self._add_background_class = add_background_class
self._predict_mask = predict_mask
def _predict(self, image_features, **kwargs): def _predict(self, image_features, **kwargs):
image_feature = image_features[0] image_feature = image_features[0]
...@@ -107,18 +109,12 @@ class MockKerasBoxPredictor(box_predictor.KerasBoxPredictor): ...@@ -107,18 +109,12 @@ class MockKerasBoxPredictor(box_predictor.KerasBoxPredictor):
(batch_size, num_anchors, 1, code_size), dtype=tf.float32) (batch_size, num_anchors, 1, code_size), dtype=tf.float32)
class_predictions_with_background = zero + tf.zeros( class_predictions_with_background = zero + tf.zeros(
(batch_size, num_anchors, num_class_slots), dtype=tf.float32) (batch_size, num_anchors, num_class_slots), dtype=tf.float32)
masks = zero + tf.zeros(
(batch_size, num_anchors, self.num_classes, DEFAULT_MASK_SIZE,
DEFAULT_MASK_SIZE),
dtype=tf.float32)
predictions_dict = { predictions_dict = {
box_predictor.BOX_ENCODINGS: box_predictor.BOX_ENCODINGS:
box_encodings, box_encodings,
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND: box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND:
class_predictions_with_background class_predictions_with_background
} }
if self._predict_mask:
predictions_dict[box_predictor.MASK_PREDICTIONS] = masks
return predictions_dict return predictions_dict
......
...@@ -19,8 +19,7 @@ These functions often receive an image, perform some visualization on the image. ...@@ -19,8 +19,7 @@ These functions often receive an image, perform some visualization on the image.
The functions do not return a value, instead they modify the image itself. The functions do not return a value, instead they modify the image itself.
""" """
from abc import ABCMeta import abc
from abc import abstractmethod
import collections import collections
import functools import functools
# Set headless-friendly backend. # Set headless-friendly backend.
...@@ -35,7 +34,7 @@ import six ...@@ -35,7 +34,7 @@ import six
import tensorflow as tf import tensorflow as tf
from object_detection.core import standard_fields as fields from object_detection.core import standard_fields as fields
from object_detection.utils import shape_utils
_TITLE_LEFT_MARGIN = 10 _TITLE_LEFT_MARGIN = 10
_TITLE_TOP_MARGIN = 10 _TITLE_TOP_MARGIN = 10
...@@ -309,11 +308,23 @@ def _visualize_boxes_and_masks_and_keypoints( ...@@ -309,11 +308,23 @@ def _visualize_boxes_and_masks_and_keypoints(
**kwargs) **kwargs)
def _resize_original_image(image, image_shape):
image = tf.expand_dims(image, 0)
image = tf.image.resize_images(
image,
image_shape,
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
align_corners=True)
return tf.cast(tf.squeeze(image, 0), tf.uint8)
def draw_bounding_boxes_on_image_tensors(images, def draw_bounding_boxes_on_image_tensors(images,
boxes, boxes,
classes, classes,
scores, scores,
category_index, category_index,
original_image_spatial_shape=None,
true_image_shape=None,
instance_masks=None, instance_masks=None,
keypoints=None, keypoints=None,
max_boxes_to_draw=20, max_boxes_to_draw=20,
...@@ -323,13 +334,18 @@ def draw_bounding_boxes_on_image_tensors(images, ...@@ -323,13 +334,18 @@ def draw_bounding_boxes_on_image_tensors(images,
Args: Args:
images: A 4D uint8 image tensor of shape [N, H, W, C]. If C > 3, additional images: A 4D uint8 image tensor of shape [N, H, W, C]. If C > 3, additional
channels will be ignored. channels will be ignored. If C = 1, then we convert the images to RGB
images.
boxes: [N, max_detections, 4] float32 tensor of detection boxes. boxes: [N, max_detections, 4] float32 tensor of detection boxes.
classes: [N, max_detections] int tensor of detection classes. Note that classes: [N, max_detections] int tensor of detection classes. Note that
classes are 1-indexed. classes are 1-indexed.
scores: [N, max_detections] float32 tensor of detection scores. scores: [N, max_detections] float32 tensor of detection scores.
category_index: a dict that maps integer ids to category dicts. e.g. category_index: a dict that maps integer ids to category dicts. e.g.
{1: {1: 'dog'}, 2: {2: 'cat'}, ...} {1: {1: 'dog'}, 2: {2: 'cat'}, ...}
original_image_spatial_shape: [N, 2] tensor containing the spatial size of
the original image.
true_image_shape: [N, 3] tensor containing the spatial size of unpadded
original_image.
instance_masks: A 4D uint8 tensor of shape [N, max_detection, H, W] with instance_masks: A 4D uint8 tensor of shape [N, max_detection, H, W] with
instance masks. instance masks.
keypoints: A 4D float32 tensor of shape [N, max_detection, num_keypoints, 2] keypoints: A 4D float32 tensor of shape [N, max_detection, num_keypoints, 2]
...@@ -344,7 +360,10 @@ def draw_bounding_boxes_on_image_tensors(images, ...@@ -344,7 +360,10 @@ def draw_bounding_boxes_on_image_tensors(images,
4D image tensor of type uint8, with boxes drawn on top. 4D image tensor of type uint8, with boxes drawn on top.
""" """
# Additional channels are being ignored. # Additional channels are being ignored.
images = images[:, :, :, 0:3] if images.shape[3] > 3:
images = images[:, :, :, 0:3]
elif images.shape[3] == 1:
images = tf.image.grayscale_to_rgb(images)
visualization_keyword_args = { visualization_keyword_args = {
'use_normalized_coordinates': use_normalized_coordinates, 'use_normalized_coordinates': use_normalized_coordinates,
'max_boxes_to_draw': max_boxes_to_draw, 'max_boxes_to_draw': max_boxes_to_draw,
...@@ -352,35 +371,61 @@ def draw_bounding_boxes_on_image_tensors(images, ...@@ -352,35 +371,61 @@ def draw_bounding_boxes_on_image_tensors(images,
'agnostic_mode': False, 'agnostic_mode': False,
'line_thickness': 4 'line_thickness': 4
} }
if true_image_shape is None:
true_shapes = tf.constant(-1, shape=[images.shape.as_list()[0], 3])
else:
true_shapes = true_image_shape
if original_image_spatial_shape is None:
original_shapes = tf.constant(-1, shape=[images.shape.as_list()[0], 2])
else:
original_shapes = original_image_spatial_shape
if instance_masks is not None and keypoints is None: if instance_masks is not None and keypoints is None:
visualize_boxes_fn = functools.partial( visualize_boxes_fn = functools.partial(
_visualize_boxes_and_masks, _visualize_boxes_and_masks,
category_index=category_index, category_index=category_index,
**visualization_keyword_args) **visualization_keyword_args)
elems = [images, boxes, classes, scores, instance_masks] elems = [
true_shapes, original_shapes, images, boxes, classes, scores,
instance_masks
]
elif instance_masks is None and keypoints is not None: elif instance_masks is None and keypoints is not None:
visualize_boxes_fn = functools.partial( visualize_boxes_fn = functools.partial(
_visualize_boxes_and_keypoints, _visualize_boxes_and_keypoints,
category_index=category_index, category_index=category_index,
**visualization_keyword_args) **visualization_keyword_args)
elems = [images, boxes, classes, scores, keypoints] elems = [
true_shapes, original_shapes, images, boxes, classes, scores, keypoints
]
elif instance_masks is not None and keypoints is not None: elif instance_masks is not None and keypoints is not None:
visualize_boxes_fn = functools.partial( visualize_boxes_fn = functools.partial(
_visualize_boxes_and_masks_and_keypoints, _visualize_boxes_and_masks_and_keypoints,
category_index=category_index, category_index=category_index,
**visualization_keyword_args) **visualization_keyword_args)
elems = [images, boxes, classes, scores, instance_masks, keypoints] elems = [
true_shapes, original_shapes, images, boxes, classes, scores,
instance_masks, keypoints
]
else: else:
visualize_boxes_fn = functools.partial( visualize_boxes_fn = functools.partial(
_visualize_boxes, _visualize_boxes,
category_index=category_index, category_index=category_index,
**visualization_keyword_args) **visualization_keyword_args)
elems = [images, boxes, classes, scores] elems = [
true_shapes, original_shapes, images, boxes, classes, scores
]
def draw_boxes(image_and_detections): def draw_boxes(image_and_detections):
"""Draws boxes on image.""" """Draws boxes on image."""
image_with_boxes = tf.py_func(visualize_boxes_fn, image_and_detections, true_shape = image_and_detections[0]
original_shape = image_and_detections[1]
if true_image_shape is not None:
image = shape_utils.pad_or_clip_nd(image_and_detections[2],
[true_shape[0], true_shape[1], 3])
if original_image_spatial_shape is not None:
image_and_detections[2] = _resize_original_image(image, original_shape)
image_with_boxes = tf.py_func(visualize_boxes_fn, image_and_detections[2:],
tf.uint8) tf.uint8)
return image_with_boxes return image_with_boxes
...@@ -400,6 +445,7 @@ def draw_side_by_side_evaluation_image(eval_dict, ...@@ -400,6 +445,7 @@ def draw_side_by_side_evaluation_image(eval_dict,
Args: Args:
eval_dict: The evaluation dictionary returned by eval_dict: The evaluation dictionary returned by
eval_util.result_dict_for_batched_example() or
eval_util.result_dict_for_single_example(). eval_util.result_dict_for_single_example().
category_index: A category index (dictionary) produced from a labelmap. category_index: A category index (dictionary) produced from a labelmap.
max_boxes_to_draw: The maximum number of boxes to draw for detections. max_boxes_to_draw: The maximum number of boxes to draw for detections.
...@@ -409,53 +455,85 @@ def draw_side_by_side_evaluation_image(eval_dict, ...@@ -409,53 +455,85 @@ def draw_side_by_side_evaluation_image(eval_dict,
Default is True. Default is True.
Returns: Returns:
A [1, H, 2 * W, C] uint8 tensor. The subimage on the left corresponds to A list of [1, H, 2 * W, C] uint8 tensor. The subimage on the left
detections, while the subimage on the right corresponds to groundtruth. corresponds to detections, while the subimage on the right corresponds to
groundtruth.
""" """
detection_fields = fields.DetectionResultFields() detection_fields = fields.DetectionResultFields()
input_data_fields = fields.InputDataFields() input_data_fields = fields.InputDataFields()
instance_masks = None
if detection_fields.detection_masks in eval_dict: images_with_detections_list = []
instance_masks = tf.cast(
tf.expand_dims(eval_dict[detection_fields.detection_masks], axis=0), # Add the batch dimension if the eval_dict is for single example.
tf.uint8) if len(eval_dict[detection_fields.detection_classes].shape) == 1:
keypoints = None for key in eval_dict:
if detection_fields.detection_keypoints in eval_dict: if key != input_data_fields.original_image:
keypoints = tf.expand_dims( eval_dict[key] = tf.expand_dims(eval_dict[key], 0)
eval_dict[detection_fields.detection_keypoints], axis=0)
groundtruth_instance_masks = None for indx in range(eval_dict[input_data_fields.original_image].shape[0]):
if input_data_fields.groundtruth_instance_masks in eval_dict: instance_masks = None
groundtruth_instance_masks = tf.cast( if detection_fields.detection_masks in eval_dict:
instance_masks = tf.cast(
tf.expand_dims(
eval_dict[detection_fields.detection_masks][indx], axis=0),
tf.uint8)
keypoints = None
if detection_fields.detection_keypoints in eval_dict:
keypoints = tf.expand_dims(
eval_dict[detection_fields.detection_keypoints][indx], axis=0)
groundtruth_instance_masks = None
if input_data_fields.groundtruth_instance_masks in eval_dict:
groundtruth_instance_masks = tf.cast(
tf.expand_dims(
eval_dict[input_data_fields.groundtruth_instance_masks][indx],
axis=0), tf.uint8)
images_with_detections = draw_bounding_boxes_on_image_tensors(
tf.expand_dims( tf.expand_dims(
eval_dict[input_data_fields.groundtruth_instance_masks], axis=0), eval_dict[input_data_fields.original_image][indx], axis=0),
tf.uint8) tf.expand_dims(
images_with_detections = draw_bounding_boxes_on_image_tensors( eval_dict[detection_fields.detection_boxes][indx], axis=0),
eval_dict[input_data_fields.original_image], tf.expand_dims(
tf.expand_dims(eval_dict[detection_fields.detection_boxes], axis=0), eval_dict[detection_fields.detection_classes][indx], axis=0),
tf.expand_dims(eval_dict[detection_fields.detection_classes], axis=0), tf.expand_dims(
tf.expand_dims(eval_dict[detection_fields.detection_scores], axis=0), eval_dict[detection_fields.detection_scores][indx], axis=0),
category_index, category_index,
instance_masks=instance_masks, original_image_spatial_shape=tf.expand_dims(
keypoints=keypoints, eval_dict[input_data_fields.original_image_spatial_shape][indx],
max_boxes_to_draw=max_boxes_to_draw, axis=0),
min_score_thresh=min_score_thresh, true_image_shape=tf.expand_dims(
use_normalized_coordinates=use_normalized_coordinates) eval_dict[input_data_fields.true_image_shape][indx], axis=0),
images_with_groundtruth = draw_bounding_boxes_on_image_tensors( instance_masks=instance_masks,
eval_dict[input_data_fields.original_image], keypoints=keypoints,
tf.expand_dims(eval_dict[input_data_fields.groundtruth_boxes], axis=0), max_boxes_to_draw=max_boxes_to_draw,
tf.expand_dims(eval_dict[input_data_fields.groundtruth_classes], axis=0), min_score_thresh=min_score_thresh,
tf.expand_dims( use_normalized_coordinates=use_normalized_coordinates)
tf.ones_like( images_with_groundtruth = draw_bounding_boxes_on_image_tensors(
eval_dict[input_data_fields.groundtruth_classes], tf.expand_dims(
dtype=tf.float32), eval_dict[input_data_fields.original_image][indx], axis=0),
axis=0), tf.expand_dims(
category_index, eval_dict[input_data_fields.groundtruth_boxes][indx], axis=0),
instance_masks=groundtruth_instance_masks, tf.expand_dims(
keypoints=None, eval_dict[input_data_fields.groundtruth_classes][indx], axis=0),
max_boxes_to_draw=None, tf.expand_dims(
min_score_thresh=0.0, tf.ones_like(
use_normalized_coordinates=use_normalized_coordinates) eval_dict[input_data_fields.groundtruth_classes][indx],
return tf.concat([images_with_detections, images_with_groundtruth], axis=2) dtype=tf.float32),
axis=0),
category_index,
original_image_spatial_shape=tf.expand_dims(
eval_dict[input_data_fields.original_image_spatial_shape][indx],
axis=0),
true_image_shape=tf.expand_dims(
eval_dict[input_data_fields.true_image_shape][indx], axis=0),
instance_masks=groundtruth_instance_masks,
keypoints=None,
max_boxes_to_draw=None,
min_score_thresh=0.0,
use_normalized_coordinates=use_normalized_coordinates)
images_with_detections_list.append(
tf.concat([images_with_detections, images_with_groundtruth], axis=2))
return images_with_detections_list
def draw_keypoints_on_image_array(image, def draw_keypoints_on_image_array(image,
...@@ -744,7 +822,7 @@ class EvalMetricOpsVisualization(object): ...@@ -744,7 +822,7 @@ class EvalMetricOpsVisualization(object):
responsible for accruing images (with overlaid detections and groundtruth) responsible for accruing images (with overlaid detections and groundtruth)
and returning a dictionary that can be passed to `eval_metric_ops`. and returning a dictionary that can be passed to `eval_metric_ops`.
""" """
__metaclass__ = ABCMeta __metaclass__ = abc.ABCMeta
def __init__(self, def __init__(self,
category_index, category_index,
...@@ -792,26 +870,33 @@ class EvalMetricOpsVisualization(object): ...@@ -792,26 +870,33 @@ class EvalMetricOpsVisualization(object):
Args: Args:
eval_dict: A dictionary that holds an image, groundtruth, and detections eval_dict: A dictionary that holds an image, groundtruth, and detections
for a single example. See eval_util.result_dict_for_single_example() for for a batched example. Note that, we use only the first example for
a convenient method for constructing such a dictionary. The dictionary visualization. See eval_util.result_dict_for_batched_example() for a
convenient method for constructing such a dictionary. The dictionary
contains contains
fields.InputDataFields.original_image: [1, H, W, 3] image. fields.InputDataFields.original_image: [batch_size, H, W, 3] image.
fields.InputDataFields.groundtruth_boxes - [num_boxes, 4] float32 fields.InputDataFields.original_image_spatial_shape: [batch_size, 2]
tensor with groundtruth boxes in range [0.0, 1.0]. tensor containing the size of the original image.
fields.InputDataFields.groundtruth_classes - [num_boxes] int64 fields.InputDataFields.true_image_shape: [batch_size, 3]
tensor with 1-indexed groundtruth classes. tensor containing the spatial size of the upadded original image.
fields.InputDataFields.groundtruth_boxes - [batch_size, num_boxes, 4]
float32 tensor with groundtruth boxes in range [0.0, 1.0].
fields.InputDataFields.groundtruth_classes - [batch_size, num_boxes]
int64 tensor with 1-indexed groundtruth classes.
fields.InputDataFields.groundtruth_instance_masks - (optional) fields.InputDataFields.groundtruth_instance_masks - (optional)
[num_boxes, H, W] int64 tensor with instance masks. [batch_size, num_boxes, H, W] int64 tensor with instance masks.
fields.DetectionResultFields.detection_boxes - [max_num_boxes, 4] fields.DetectionResultFields.detection_boxes - [batch_size,
float32 tensor with detection boxes in range [0.0, 1.0]. max_num_boxes, 4] float32 tensor with detection boxes in range [0.0,
fields.DetectionResultFields.detection_classes - [max_num_boxes] 1.0].
int64 tensor with 1-indexed detection classes. fields.DetectionResultFields.detection_classes - [batch_size,
fields.DetectionResultFields.detection_scores - [max_num_boxes] max_num_boxes] int64 tensor with 1-indexed detection classes.
float32 tensor with detection scores. fields.DetectionResultFields.detection_scores - [batch_size,
fields.DetectionResultFields.detection_masks - (optional) max_num_boxes] float32 tensor with detection scores.
[max_num_boxes, H, W] float32 tensor of binarized masks. fields.DetectionResultFields.detection_masks - (optional) [batch_size,
max_num_boxes, H, W] float32 tensor of binarized masks.
fields.DetectionResultFields.detection_keypoints - (optional) fields.DetectionResultFields.detection_keypoints - (optional)
[max_num_boxes, num_keypoints, 2] float32 tensor with keypooints. [batch_size, max_num_boxes, num_keypoints, 2] float32 tensor with
keypoints.
Returns: Returns:
A dictionary of image summary names to tuple of (value_op, update_op). The A dictionary of image summary names to tuple of (value_op, update_op). The
...@@ -820,6 +905,8 @@ class EvalMetricOpsVisualization(object): ...@@ -820,6 +905,8 @@ class EvalMetricOpsVisualization(object):
groundtruth. Each `value_op` holds the tf.summary.image string for a given groundtruth. Each `value_op` holds the tf.summary.image string for a given
image. image.
""" """
if self._max_examples_to_draw == 0:
return {}
images = self.images_from_evaluation_dict(eval_dict) images = self.images_from_evaluation_dict(eval_dict)
def get_images(): def get_images():
...@@ -837,7 +924,7 @@ class EvalMetricOpsVisualization(object): ...@@ -837,7 +924,7 @@ class EvalMetricOpsVisualization(object):
lambda: tf.summary.image(summary_name, image), lambda: tf.summary.image(summary_name, image),
lambda: tf.constant('')) lambda: tf.constant(''))
update_op = tf.py_func(self.add_images, [images], []) update_op = tf.py_func(self.add_images, [[images[0]]], [])
image_tensors = tf.py_func( image_tensors = tf.py_func(
get_images, [], [tf.uint8] * self._max_examples_to_draw) get_images, [], [tf.uint8] * self._max_examples_to_draw)
eval_metric_ops = {} eval_metric_ops = {}
...@@ -847,7 +934,7 @@ class EvalMetricOpsVisualization(object): ...@@ -847,7 +934,7 @@ class EvalMetricOpsVisualization(object):
eval_metric_ops[summary_name] = (value_op, update_op) eval_metric_ops[summary_name] = (value_op, update_op)
return eval_metric_ops return eval_metric_ops
@abstractmethod @abc.abstractmethod
def images_from_evaluation_dict(self, eval_dict): def images_from_evaluation_dict(self, eval_dict):
"""Converts evaluation dictionary into a list of image tensors. """Converts evaluation dictionary into a list of image tensors.
...@@ -882,9 +969,6 @@ class VisualizeSingleFrameDetections(EvalMetricOpsVisualization): ...@@ -882,9 +969,6 @@ class VisualizeSingleFrameDetections(EvalMetricOpsVisualization):
summary_name_prefix=summary_name_prefix) summary_name_prefix=summary_name_prefix)
def images_from_evaluation_dict(self, eval_dict): def images_from_evaluation_dict(self, eval_dict):
return [draw_side_by_side_evaluation_image( return draw_side_by_side_evaluation_image(
eval_dict, eval_dict, self._category_index, self._max_boxes_to_draw,
self._category_index, self._min_score_thresh, self._use_normalized_coordinates)
self._max_boxes_to_draw,
self._min_score_thresh,
self._use_normalized_coordinates)]
...@@ -52,6 +52,9 @@ class VisualizationUtilsTest(tf.test.TestCase): ...@@ -52,6 +52,9 @@ class VisualizationUtilsTest(tf.test.TestCase):
def create_test_image_with_five_channels(self): def create_test_image_with_five_channels(self):
return np.full([100, 200, 5], 255, dtype=np.uint8) return np.full([100, 200, 5], 255, dtype=np.uint8)
def create_test_grayscale_image(self):
return np.full([100, 200, 1], 255, dtype=np.uint8)
def test_draw_bounding_box_on_image(self): def test_draw_bounding_box_on_image(self):
test_image = self.create_colorful_test_image() test_image = self.create_colorful_test_image()
test_image = Image.fromarray(test_image) test_image = Image.fromarray(test_image)
...@@ -119,9 +122,11 @@ class VisualizationUtilsTest(tf.test.TestCase): ...@@ -119,9 +122,11 @@ class VisualizationUtilsTest(tf.test.TestCase):
fname = os.path.join(_TESTDATA_PATH, 'image1.jpg') fname = os.path.join(_TESTDATA_PATH, 'image1.jpg')
image_np = np.array(Image.open(fname)) image_np = np.array(Image.open(fname))
images_np = np.stack((image_np, image_np), axis=0) images_np = np.stack((image_np, image_np), axis=0)
original_image_shape = [[636, 512], [636, 512]]
with tf.Graph().as_default(): with tf.Graph().as_default():
images_tensor = tf.constant(value=images_np, dtype=tf.uint8) images_tensor = tf.constant(value=images_np, dtype=tf.uint8)
image_shape = tf.constant(original_image_shape, dtype=tf.int32)
boxes = tf.constant([[[0.4, 0.25, 0.75, 0.75], [0.5, 0.3, 0.6, 0.9]], boxes = tf.constant([[[0.4, 0.25, 0.75, 0.75], [0.5, 0.3, 0.6, 0.9]],
[[0.25, 0.25, 0.75, 0.75], [0.1, 0.3, 0.6, 1.0]]]) [[0.25, 0.25, 0.75, 0.75], [0.1, 0.3, 0.6, 1.0]]])
classes = tf.constant([[1, 1], [1, 2]], dtype=tf.int64) classes = tf.constant([[1, 1], [1, 2]], dtype=tf.int64)
...@@ -133,6 +138,8 @@ class VisualizationUtilsTest(tf.test.TestCase): ...@@ -133,6 +138,8 @@ class VisualizationUtilsTest(tf.test.TestCase):
classes, classes,
scores, scores,
category_index, category_index,
original_image_spatial_shape=image_shape,
true_image_shape=image_shape,
min_score_thresh=0.2)) min_score_thresh=0.2))
with self.test_session() as sess: with self.test_session() as sess:
...@@ -140,7 +147,10 @@ class VisualizationUtilsTest(tf.test.TestCase): ...@@ -140,7 +147,10 @@ class VisualizationUtilsTest(tf.test.TestCase):
# Write output images for visualization. # Write output images for visualization.
images_with_boxes_np = sess.run(images_with_boxes) images_with_boxes_np = sess.run(images_with_boxes)
self.assertEqual(images_np.shape, images_with_boxes_np.shape) self.assertEqual(images_np.shape[0], images_with_boxes_np.shape[0])
self.assertEqual(images_np.shape[3], images_with_boxes_np.shape[3])
self.assertEqual(
tuple(original_image_shape[0]), images_with_boxes_np.shape[1:3])
for i in range(images_with_boxes_np.shape[0]): for i in range(images_with_boxes_np.shape[0]):
img_name = 'image_' + str(i) + '.png' img_name = 'image_' + str(i) + '.png'
output_file = os.path.join(self.get_temp_dir(), img_name) output_file = os.path.join(self.get_temp_dir(), img_name)
...@@ -174,6 +184,35 @@ class VisualizationUtilsTest(tf.test.TestCase): ...@@ -174,6 +184,35 @@ class VisualizationUtilsTest(tf.test.TestCase):
final_images_np = sess.run(images_with_boxes) final_images_np = sess.run(images_with_boxes)
self.assertEqual((2, 100, 200, 3), final_images_np.shape) self.assertEqual((2, 100, 200, 3), final_images_np.shape)
def test_draw_bounding_boxes_on_image_tensors_grayscale(self):
"""Tests the case where input image tensor has one channel."""
category_index = {1: {'id': 1, 'name': 'dog'}}
image_np = self.create_test_grayscale_image()
images_np = np.stack((image_np, image_np), axis=0)
with tf.Graph().as_default():
images_tensor = tf.constant(value=images_np, dtype=tf.uint8)
image_shape = tf.constant([[100, 200], [100, 200]], dtype=tf.int32)
boxes = tf.constant(0, dtype=tf.float32, shape=[2, 0, 4])
classes = tf.constant(0, dtype=tf.int64, shape=[2, 0])
scores = tf.constant(0, dtype=tf.float32, shape=[2, 0])
images_with_boxes = (
visualization_utils.draw_bounding_boxes_on_image_tensors(
images_tensor,
boxes,
classes,
scores,
category_index,
original_image_spatial_shape=image_shape,
true_image_shape=image_shape,
min_score_thresh=0.2))
with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
final_images_np = sess.run(images_with_boxes)
self.assertEqual((2, 100, 200, 3), final_images_np.shape)
def test_draw_keypoints_on_image(self): def test_draw_keypoints_on_image(self):
test_image = self.create_colorful_test_image() test_image = self.create_colorful_test_image()
test_image = Image.fromarray(test_image) test_image = Image.fromarray(test_image)
...@@ -234,34 +273,46 @@ class VisualizationUtilsTest(tf.test.TestCase): ...@@ -234,34 +273,46 @@ class VisualizationUtilsTest(tf.test.TestCase):
category_index, category_index,
max_examples_to_draw=max_examples_to_draw, max_examples_to_draw=max_examples_to_draw,
summary_name_prefix=metric_op_base) summary_name_prefix=metric_op_base)
original_image = tf.placeholder(tf.uint8, [1, None, None, 3]) original_image = tf.placeholder(tf.uint8, [4, None, None, 3])
detection_boxes = tf.random_uniform([20, 4], original_image_spatial_shape = tf.placeholder(tf.int32, [4, 2])
true_image_shape = tf.placeholder(tf.int32, [4, 3])
detection_boxes = tf.random_uniform([4, 20, 4],
minval=0.0, minval=0.0,
maxval=1.0, maxval=1.0,
dtype=tf.float32) dtype=tf.float32)
detection_classes = tf.random_uniform([20], detection_classes = tf.random_uniform([4, 20],
minval=1, minval=1,
maxval=3, maxval=3,
dtype=tf.int64) dtype=tf.int64)
detection_scores = tf.random_uniform([20], detection_scores = tf.random_uniform([4, 20],
minval=0., minval=0.,
maxval=1., maxval=1.,
dtype=tf.float32) dtype=tf.float32)
groundtruth_boxes = tf.random_uniform([8, 4], groundtruth_boxes = tf.random_uniform([4, 8, 4],
minval=0.0, minval=0.0,
maxval=1.0, maxval=1.0,
dtype=tf.float32) dtype=tf.float32)
groundtruth_classes = tf.random_uniform([8], groundtruth_classes = tf.random_uniform([4, 8],
minval=1, minval=1,
maxval=3, maxval=3,
dtype=tf.int64) dtype=tf.int64)
eval_dict = { eval_dict = {
fields.DetectionResultFields.detection_boxes: detection_boxes, fields.DetectionResultFields.detection_boxes:
fields.DetectionResultFields.detection_classes: detection_classes, detection_boxes,
fields.DetectionResultFields.detection_scores: detection_scores, fields.DetectionResultFields.detection_classes:
fields.InputDataFields.original_image: original_image, detection_classes,
fields.InputDataFields.groundtruth_boxes: groundtruth_boxes, fields.DetectionResultFields.detection_scores:
fields.InputDataFields.groundtruth_classes: groundtruth_classes} detection_scores,
fields.InputDataFields.original_image:
original_image,
fields.InputDataFields.original_image_spatial_shape: (
original_image_spatial_shape),
fields.InputDataFields.true_image_shape: (true_image_shape),
fields.InputDataFields.groundtruth_boxes:
groundtruth_boxes,
fields.InputDataFields.groundtruth_classes:
groundtruth_classes
}
metric_ops = eval_metric_ops.get_estimator_eval_metric_ops(eval_dict) metric_ops = eval_metric_ops.get_estimator_eval_metric_ops(eval_dict)
_, update_op = metric_ops[metric_ops.keys()[0]] _, update_op = metric_ops[metric_ops.keys()[0]]
...@@ -274,12 +325,20 @@ class VisualizationUtilsTest(tf.test.TestCase): ...@@ -274,12 +325,20 @@ class VisualizationUtilsTest(tf.test.TestCase):
# First run enough update steps to surpass `max_examples_to_draw`. # First run enough update steps to surpass `max_examples_to_draw`.
for i in range(max_examples_to_draw): for i in range(max_examples_to_draw):
# Use a unique image shape on each eval image. # Use a unique image shape on each eval image.
sess.run(update_op, feed_dict={ sess.run(
original_image: np.random.randint(low=0, update_op,
high=256, feed_dict={
size=(1, 6 + i, 7 + i, 3), original_image:
dtype=np.uint8) np.random.randint(
}) low=0,
high=256,
size=(4, 6 + i, 7 + i, 3),
dtype=np.uint8),
original_image_spatial_shape: [[6 + i, 7 + i], [6 + i, 7 + i],
[6 + i, 7 + i], [6 + i, 7 + i]],
true_image_shape: [[6 + i, 7 + i, 3], [6 + i, 7 + i, 3],
[6 + i, 7 + i, 3], [6 + i, 7 + i, 3]]
})
value_ops_out = sess.run(value_ops) value_ops_out = sess.run(value_ops)
for key, value_op in value_ops_out.iteritems(): for key, value_op in value_ops_out.iteritems():
self.assertNotEqual('', value_op) self.assertNotEqual('', value_op)
...@@ -289,12 +348,20 @@ class VisualizationUtilsTest(tf.test.TestCase): ...@@ -289,12 +348,20 @@ class VisualizationUtilsTest(tf.test.TestCase):
# produced. # produced.
for i in range(max_examples_to_draw - 1): for i in range(max_examples_to_draw - 1):
# Use a unique image shape on each eval image. # Use a unique image shape on each eval image.
sess.run(update_op, feed_dict={ sess.run(
original_image: np.random.randint(low=0, update_op,
high=256, feed_dict={
size=(1, 6 + i, 7 + i, 3), original_image:
dtype=np.uint8) np.random.randint(
}) low=0,
high=256,
size=(4, 6 + i, 7 + i, 3),
dtype=np.uint8),
original_image_spatial_shape: [[6 + i, 7 + i], [6 + i, 7 + i],
[6 + i, 7 + i], [6 + i, 7 + i]],
true_image_shape: [[6 + i, 7 + i, 3], [6 + i, 7 + i, 3],
[6 + i, 7 + i, 3], [6 + i, 7 + i, 3]]
})
value_ops_out = sess.run(value_ops) value_ops_out = sess.run(value_ops)
self.assertEqual( self.assertEqual(
'', '',
......
...@@ -63,7 +63,8 @@ def cyclegan_arg_scope(instance_norm_center=True, ...@@ -63,7 +63,8 @@ def cyclegan_arg_scope(instance_norm_center=True,
return sc return sc
def cyclegan_upsample(net, num_outputs, stride, method='conv2d_transpose'): def cyclegan_upsample(net, num_outputs, stride, method='conv2d_transpose',
pad_mode='REFLECT', align_corners=False):
"""Upsamples the given inputs. """Upsamples the given inputs.
Args: Args:
...@@ -75,6 +76,10 @@ def cyclegan_upsample(net, num_outputs, stride, method='conv2d_transpose'): ...@@ -75,6 +76,10 @@ def cyclegan_upsample(net, num_outputs, stride, method='conv2d_transpose'):
times the input size. times the input size.
method: The upsampling method: 'nn_upsample_conv', 'bilinear_upsample_conv', method: The upsampling method: 'nn_upsample_conv', 'bilinear_upsample_conv',
or 'conv2d_transpose'. or 'conv2d_transpose'.
pad_mode: mode for tf.pad, one of "CONSTANT", "REFLECT", or "SYMMETRIC".
align_corners: option for method, 'bilinear_upsample_conv'. If true, the
centers of the 4 corner pixels of the input and output tensors are
aligned, preserving the values at the corner pixels.
Returns: Returns:
A Tensor which was upsampled using the specified method. A Tensor which was upsampled using the specified method.
...@@ -95,12 +100,13 @@ def cyclegan_upsample(net, num_outputs, stride, method='conv2d_transpose'): ...@@ -95,12 +100,13 @@ def cyclegan_upsample(net, num_outputs, stride, method='conv2d_transpose'):
if method == 'nn_upsample_conv': if method == 'nn_upsample_conv':
net = tf.image.resize_nearest_neighbor( net = tf.image.resize_nearest_neighbor(
net, [stride[0] * height, stride[1] * width]) net, [stride[0] * height, stride[1] * width])
net = tf.pad(net, spatial_pad_1, 'REFLECT') net = tf.pad(net, spatial_pad_1, pad_mode)
net = layers.conv2d(net, num_outputs, kernel_size=[3, 3], padding='valid') net = layers.conv2d(net, num_outputs, kernel_size=[3, 3], padding='valid')
elif method == 'bilinear_upsample_conv': elif method == 'bilinear_upsample_conv':
net = tf.image.resize_bilinear( net = tf.image.resize_bilinear(
net, [stride[0] * height, stride[1] * width]) net, [stride[0] * height, stride[1] * width],
net = tf.pad(net, spatial_pad_1, 'REFLECT') align_corners=align_corners)
net = tf.pad(net, spatial_pad_1, pad_mode)
net = layers.conv2d(net, num_outputs, kernel_size=[3, 3], padding='valid') net = layers.conv2d(net, num_outputs, kernel_size=[3, 3], padding='valid')
elif method == 'conv2d_transpose': elif method == 'conv2d_transpose':
# This corrects 1 pixel offset for images with even width and height. # This corrects 1 pixel offset for images with even width and height.
...@@ -111,7 +117,7 @@ def cyclegan_upsample(net, num_outputs, stride, method='conv2d_transpose'): ...@@ -111,7 +117,7 @@ def cyclegan_upsample(net, num_outputs, stride, method='conv2d_transpose'):
net, num_outputs, kernel_size=[3, 3], stride=stride, padding='valid') net, num_outputs, kernel_size=[3, 3], stride=stride, padding='valid')
net = net[:, 1:, 1:, :] net = net[:, 1:, 1:, :]
else: else:
raise ValueError('Unknown method: [%s]', method) raise ValueError('Unknown method: [%s]' % method)
return net return net
......
...@@ -370,7 +370,8 @@ def inception_resnet_v2_arg_scope( ...@@ -370,7 +370,8 @@ def inception_resnet_v2_arg_scope(
batch_norm_decay=0.9997, batch_norm_decay=0.9997,
batch_norm_epsilon=0.001, batch_norm_epsilon=0.001,
activation_fn=tf.nn.relu, activation_fn=tf.nn.relu,
batch_norm_updates_collections=tf.GraphKeys.UPDATE_OPS): batch_norm_updates_collections=tf.GraphKeys.UPDATE_OPS,
batch_norm_scale=False):
"""Returns the scope with the default parameters for inception_resnet_v2. """Returns the scope with the default parameters for inception_resnet_v2.
Args: Args:
...@@ -380,6 +381,8 @@ def inception_resnet_v2_arg_scope( ...@@ -380,6 +381,8 @@ def inception_resnet_v2_arg_scope(
activation_fn: Activation function for conv2d. activation_fn: Activation function for conv2d.
batch_norm_updates_collections: Collection for the update ops for batch_norm_updates_collections: Collection for the update ops for
batch norm. batch norm.
batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the
activations in the batch normalization layer.
Returns: Returns:
a arg_scope with the parameters needed for inception_resnet_v2. a arg_scope with the parameters needed for inception_resnet_v2.
...@@ -394,6 +397,7 @@ def inception_resnet_v2_arg_scope( ...@@ -394,6 +397,7 @@ def inception_resnet_v2_arg_scope(
'epsilon': batch_norm_epsilon, 'epsilon': batch_norm_epsilon,
'updates_collections': batch_norm_updates_collections, 'updates_collections': batch_norm_updates_collections,
'fused': None, # Use fused batch norm if possible. 'fused': None, # Use fused batch norm if possible.
'scale': batch_norm_scale,
} }
# Set activation_fn and parameters for batch_norm. # Set activation_fn and parameters for batch_norm.
with slim.arg_scope([slim.conv2d], activation_fn=activation_fn, with slim.arg_scope([slim.conv2d], activation_fn=activation_fn,
......
...@@ -306,6 +306,29 @@ class InceptionTest(tf.test.TestCase): ...@@ -306,6 +306,29 @@ class InceptionTest(tf.test.TestCase):
output = sess.run(predictions) output = sess.run(predictions)
self.assertEquals(output.shape, (eval_batch_size,)) self.assertEquals(output.shape, (eval_batch_size,))
def testNoBatchNormScaleByDefault(self):
height, width = 299, 299
num_classes = 1000
inputs = tf.placeholder(tf.float32, (1, height, width, 3))
with tf.contrib.slim.arg_scope(inception.inception_resnet_v2_arg_scope()):
inception.inception_resnet_v2(inputs, num_classes, is_training=False)
self.assertEqual(tf.global_variables('.*/BatchNorm/gamma:0$'), [])
def testBatchNormScale(self):
height, width = 299, 299
num_classes = 1000
inputs = tf.placeholder(tf.float32, (1, height, width, 3))
with tf.contrib.slim.arg_scope(
inception.inception_resnet_v2_arg_scope(batch_norm_scale=True)):
inception.inception_resnet_v2(inputs, num_classes, is_training=False)
gamma_names = set(
v.op.name for v in tf.global_variables('.*/BatchNorm/gamma:0$'))
self.assertGreater(len(gamma_names), 0)
for v in tf.global_variables('.*/BatchNorm/moving_mean:0$'):
self.assertIn(v.op.name[:-len('moving_mean')] + 'gamma', gamma_names)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -34,7 +34,8 @@ def inception_arg_scope(weight_decay=0.00004, ...@@ -34,7 +34,8 @@ def inception_arg_scope(weight_decay=0.00004,
batch_norm_decay=0.9997, batch_norm_decay=0.9997,
batch_norm_epsilon=0.001, batch_norm_epsilon=0.001,
activation_fn=tf.nn.relu, activation_fn=tf.nn.relu,
batch_norm_updates_collections=tf.GraphKeys.UPDATE_OPS): batch_norm_updates_collections=tf.GraphKeys.UPDATE_OPS,
batch_norm_scale=False):
"""Defines the default arg scope for inception models. """Defines the default arg scope for inception models.
Args: Args:
...@@ -46,6 +47,8 @@ def inception_arg_scope(weight_decay=0.00004, ...@@ -46,6 +47,8 @@ def inception_arg_scope(weight_decay=0.00004,
activation_fn: Activation function for conv2d. activation_fn: Activation function for conv2d.
batch_norm_updates_collections: Collection for the update ops for batch_norm_updates_collections: Collection for the update ops for
batch norm. batch norm.
batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the
activations in the batch normalization layer.
Returns: Returns:
An `arg_scope` to use for the inception models. An `arg_scope` to use for the inception models.
...@@ -59,6 +62,7 @@ def inception_arg_scope(weight_decay=0.00004, ...@@ -59,6 +62,7 @@ def inception_arg_scope(weight_decay=0.00004,
'updates_collections': batch_norm_updates_collections, 'updates_collections': batch_norm_updates_collections,
# use fused batch norm if possible. # use fused batch norm if possible.
'fused': None, 'fused': None,
'scale': batch_norm_scale,
} }
if use_batch_norm: if use_batch_norm:
normalizer_fn = slim.batch_norm normalizer_fn = slim.batch_norm
......
...@@ -237,6 +237,29 @@ class InceptionV1Test(tf.test.TestCase): ...@@ -237,6 +237,29 @@ class InceptionV1Test(tf.test.TestCase):
logits_out = sess.run(logits) logits_out = sess.run(logits)
self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes]) self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes])
def testNoBatchNormScaleByDefault(self):
height, width = 224, 224
num_classes = 1000
inputs = tf.placeholder(tf.float32, (1, height, width, 3))
with slim.arg_scope(inception.inception_v1_arg_scope()):
inception.inception_v1(inputs, num_classes, is_training=False)
self.assertEqual(tf.global_variables('.*/BatchNorm/gamma:0$'), [])
def testBatchNormScale(self):
height, width = 224, 224
num_classes = 1000
inputs = tf.placeholder(tf.float32, (1, height, width, 3))
with slim.arg_scope(
inception.inception_v1_arg_scope(batch_norm_scale=True)):
inception.inception_v1(inputs, num_classes, is_training=False)
gamma_names = set(
v.op.name for v in tf.global_variables('.*/BatchNorm/gamma:0$'))
self.assertGreater(len(gamma_names), 0)
for v in tf.global_variables('.*/BatchNorm/moving_mean:0$'):
self.assertIn(v.op.name[:-len('moving_mean')] + 'gamma', gamma_names)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -351,6 +351,29 @@ class InceptionV2Test(tf.test.TestCase): ...@@ -351,6 +351,29 @@ class InceptionV2Test(tf.test.TestCase):
logits_out = sess.run(logits) logits_out = sess.run(logits)
self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes]) self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes])
def testNoBatchNormScaleByDefault(self):
height, width = 224, 224
num_classes = 1000
inputs = tf.placeholder(tf.float32, (1, height, width, 3))
with slim.arg_scope(inception.inception_v2_arg_scope()):
inception.inception_v2(inputs, num_classes, is_training=False)
self.assertEqual(tf.global_variables('.*/BatchNorm/gamma:0$'), [])
def testBatchNormScale(self):
height, width = 224, 224
num_classes = 1000
inputs = tf.placeholder(tf.float32, (1, height, width, 3))
with slim.arg_scope(
inception.inception_v2_arg_scope(batch_norm_scale=True)):
inception.inception_v2(inputs, num_classes, is_training=False)
gamma_names = set(
v.op.name for v in tf.global_variables('.*/BatchNorm/gamma:0$'))
self.assertGreater(len(gamma_names), 0)
for v in tf.global_variables('.*/BatchNorm/moving_mean:0$'):
self.assertIn(v.op.name[:-len('moving_mean')] + 'gamma', gamma_names)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -318,6 +318,29 @@ class InceptionV3Test(tf.test.TestCase): ...@@ -318,6 +318,29 @@ class InceptionV3Test(tf.test.TestCase):
logits_out = sess.run(logits) logits_out = sess.run(logits)
self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes]) self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes])
def testNoBatchNormScaleByDefault(self):
height, width = 299, 299
num_classes = 1000
inputs = tf.placeholder(tf.float32, (1, height, width, 3))
with slim.arg_scope(inception.inception_v3_arg_scope()):
inception.inception_v3(inputs, num_classes, is_training=False)
self.assertEqual(tf.global_variables('.*/BatchNorm/gamma:0$'), [])
def testBatchNormScale(self):
height, width = 299, 299
num_classes = 1000
inputs = tf.placeholder(tf.float32, (1, height, width, 3))
with slim.arg_scope(
inception.inception_v3_arg_scope(batch_norm_scale=True)):
inception.inception_v3(inputs, num_classes, is_training=False)
gamma_names = set(
v.op.name for v in tf.global_variables('.*/BatchNorm/gamma:0$'))
self.assertGreater(len(gamma_names), 0)
for v in tf.global_variables('.*/BatchNorm/moving_mean:0$'):
self.assertIn(v.op.name[:-len('moving_mean')] + 'gamma', gamma_names)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -255,6 +255,29 @@ class InceptionTest(tf.test.TestCase): ...@@ -255,6 +255,29 @@ class InceptionTest(tf.test.TestCase):
output = sess.run(predictions) output = sess.run(predictions)
self.assertEquals(output.shape, (eval_batch_size,)) self.assertEquals(output.shape, (eval_batch_size,))
def testNoBatchNormScaleByDefault(self):
height, width = 299, 299
num_classes = 1000
inputs = tf.placeholder(tf.float32, (1, height, width, 3))
with tf.contrib.slim.arg_scope(inception.inception_v4_arg_scope()):
inception.inception_v4(inputs, num_classes, is_training=False)
self.assertEqual(tf.global_variables('.*/BatchNorm/gamma:0$'), [])
def testBatchNormScale(self):
height, width = 299, 299
num_classes = 1000
inputs = tf.placeholder(tf.float32, (1, height, width, 3))
with tf.contrib.slim.arg_scope(
inception.inception_v4_arg_scope(batch_norm_scale=True)):
inception.inception_v4(inputs, num_classes, is_training=False)
gamma_names = set(
v.op.name for v in tf.global_variables('.*/BatchNorm/gamma:0$'))
self.assertGreater(len(gamma_names), 0)
for v in tf.global_variables('.*/BatchNorm/moving_mean:0$'):
self.assertIn(v.op.name[:-len('moving_mean')] + 'gamma', gamma_names)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -263,7 +263,6 @@ def mobilenet_v1_base(inputs, ...@@ -263,7 +263,6 @@ def mobilenet_v1_base(inputs,
net = _fixed_padding(net, conv_def.kernel) net = _fixed_padding(net, conv_def.kernel)
net = slim.conv2d(net, depth(conv_def.depth), conv_def.kernel, net = slim.conv2d(net, depth(conv_def.depth), conv_def.kernel,
stride=conv_def.stride, stride=conv_def.stride,
normalizer_fn=slim.batch_norm,
scope=end_point) scope=end_point)
end_points[end_point] = net end_points[end_point] = net
if end_point == final_endpoint: if end_point == final_endpoint:
...@@ -280,7 +279,6 @@ def mobilenet_v1_base(inputs, ...@@ -280,7 +279,6 @@ def mobilenet_v1_base(inputs,
depth_multiplier=1, depth_multiplier=1,
stride=layer_stride, stride=layer_stride,
rate=layer_rate, rate=layer_rate,
normalizer_fn=slim.batch_norm,
scope=end_point) scope=end_point)
end_points[end_point] = net end_points[end_point] = net
...@@ -291,7 +289,6 @@ def mobilenet_v1_base(inputs, ...@@ -291,7 +289,6 @@ def mobilenet_v1_base(inputs,
net = slim.conv2d(net, depth(conv_def.depth), [1, 1], net = slim.conv2d(net, depth(conv_def.depth), [1, 1],
stride=1, stride=1,
normalizer_fn=slim.batch_norm,
scope=end_point) scope=end_point)
end_points[end_point] = net end_points[end_point] = net
...@@ -432,7 +429,8 @@ def mobilenet_v1_arg_scope( ...@@ -432,7 +429,8 @@ def mobilenet_v1_arg_scope(
regularize_depthwise=False, regularize_depthwise=False,
batch_norm_decay=0.9997, batch_norm_decay=0.9997,
batch_norm_epsilon=0.001, batch_norm_epsilon=0.001,
batch_norm_updates_collections=tf.GraphKeys.UPDATE_OPS): batch_norm_updates_collections=tf.GraphKeys.UPDATE_OPS,
normalizer_fn=slim.batch_norm):
"""Defines the default MobilenetV1 arg scope. """Defines the default MobilenetV1 arg scope.
Args: Args:
...@@ -446,6 +444,7 @@ def mobilenet_v1_arg_scope( ...@@ -446,6 +444,7 @@ def mobilenet_v1_arg_scope(
in batch norm. in batch norm.
batch_norm_updates_collections: Collection for the update ops for batch_norm_updates_collections: Collection for the update ops for
batch norm. batch norm.
normalizer_fn: Normalization function to apply after convolution.
Returns: Returns:
An `arg_scope` to use for the mobilenet v1 model. An `arg_scope` to use for the mobilenet v1 model.
...@@ -469,7 +468,7 @@ def mobilenet_v1_arg_scope( ...@@ -469,7 +468,7 @@ def mobilenet_v1_arg_scope(
depthwise_regularizer = None depthwise_regularizer = None
with slim.arg_scope([slim.conv2d, slim.separable_conv2d], with slim.arg_scope([slim.conv2d, slim.separable_conv2d],
weights_initializer=weights_init, weights_initializer=weights_init,
activation_fn=tf.nn.relu6, normalizer_fn=slim.batch_norm): activation_fn=tf.nn.relu6, normalizer_fn=normalizer_fn):
with slim.arg_scope([slim.batch_norm], **batch_norm_params): with slim.arg_scope([slim.batch_norm], **batch_norm_params):
with slim.arg_scope([slim.conv2d], weights_regularizer=regularizer): with slim.arg_scope([slim.conv2d], weights_regularizer=regularizer):
with slim.arg_scope([slim.separable_conv2d], with slim.arg_scope([slim.separable_conv2d],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment