Commit eccae449 authored by Zhichao Lu's avatar Zhichao Lu Committed by pkulzc
Browse files

Modify the ssd meta arch to allow the option of not adding an implicit background class.

PiperOrigin-RevId: 192529600
parent a60dd985
...@@ -71,7 +71,8 @@ FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP = { ...@@ -71,7 +71,8 @@ FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP = {
} }
def build(model_config, is_training, add_summaries=True): def build(model_config, is_training, add_summaries=True,
add_background_class=True):
"""Builds a DetectionModel based on the model config. """Builds a DetectionModel based on the model config.
Args: Args:
...@@ -79,7 +80,10 @@ def build(model_config, is_training, add_summaries=True): ...@@ -79,7 +80,10 @@ def build(model_config, is_training, add_summaries=True):
DetectionModel. DetectionModel.
is_training: True if this model is being built for training purposes. is_training: True if this model is being built for training purposes.
add_summaries: Whether to add tensorflow summaries in the model graph. add_summaries: Whether to add tensorflow summaries in the model graph.
add_background_class: Whether to add an implicit background class to one-hot
encodings of groundtruth labels. Set to false if using groundtruth labels
with an explicit background class or using multiclass scores instead of
truth in the case of distillation. Ignored in the case of faster_rcnn.
Returns: Returns:
DetectionModel based on the config. DetectionModel based on the config.
...@@ -90,7 +94,8 @@ def build(model_config, is_training, add_summaries=True): ...@@ -90,7 +94,8 @@ def build(model_config, is_training, add_summaries=True):
raise ValueError('model_config not of type model_pb2.DetectionModel.') raise ValueError('model_config not of type model_pb2.DetectionModel.')
meta_architecture = model_config.WhichOneof('model') meta_architecture = model_config.WhichOneof('model')
if meta_architecture == 'ssd': if meta_architecture == 'ssd':
return _build_ssd_model(model_config.ssd, is_training, add_summaries) return _build_ssd_model(model_config.ssd, is_training, add_summaries,
add_background_class)
if meta_architecture == 'faster_rcnn': if meta_architecture == 'faster_rcnn':
return _build_faster_rcnn_model(model_config.faster_rcnn, is_training, return _build_faster_rcnn_model(model_config.faster_rcnn, is_training,
add_summaries) add_summaries)
...@@ -133,7 +138,8 @@ def _build_ssd_feature_extractor(feature_extractor_config, is_training, ...@@ -133,7 +138,8 @@ def _build_ssd_feature_extractor(feature_extractor_config, is_training,
override_base_feature_extractor_hyperparams) override_base_feature_extractor_hyperparams)
def _build_ssd_model(ssd_config, is_training, add_summaries): def _build_ssd_model(ssd_config, is_training, add_summaries,
add_background_class=True):
"""Builds an SSD detection model based on the model config. """Builds an SSD detection model based on the model config.
Args: Args:
...@@ -141,7 +147,10 @@ def _build_ssd_model(ssd_config, is_training, add_summaries): ...@@ -141,7 +147,10 @@ def _build_ssd_model(ssd_config, is_training, add_summaries):
SSDMetaArch. SSDMetaArch.
is_training: True if this model is being built for training purposes. is_training: True if this model is being built for training purposes.
add_summaries: Whether to add tf summaries in the model. add_summaries: Whether to add tf summaries in the model.
add_background_class: Whether to add an implicit background class to one-hot
encodings of groundtruth labels. Set to false if using groundtruth labels
with an explicit background class or using multiclass scores instead of
truth in the case of distillation.
Returns: Returns:
SSDMetaArch based on the config. SSDMetaArch based on the config.
...@@ -198,7 +207,8 @@ def _build_ssd_model(ssd_config, is_training, add_summaries): ...@@ -198,7 +207,8 @@ def _build_ssd_model(ssd_config, is_training, add_summaries):
add_summaries=add_summaries, add_summaries=add_summaries,
normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize, normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
freeze_batchnorm=ssd_config.freeze_batchnorm, freeze_batchnorm=ssd_config.freeze_batchnorm,
inplace_batchnorm_update=ssd_config.inplace_batchnorm_update) inplace_batchnorm_update=ssd_config.inplace_batchnorm_update,
add_background_class=add_background_class)
def _build_faster_rcnn_feature_extractor( def _build_faster_rcnn_feature_extractor(
......
...@@ -69,7 +69,7 @@ class DetectionModel(object): ...@@ -69,7 +69,7 @@ class DetectionModel(object):
Args: Args:
num_classes: number of classes. Note that num_classes *does not* include num_classes: number of classes. Note that num_classes *does not* include
background categories that might be implicitly be predicted in various background categories that might be implicitly predicted in various
implementations. implementations.
""" """
self._num_classes = num_classes self._num_classes = num_classes
......
...@@ -138,7 +138,8 @@ class SSDMetaArch(model.DetectionModel): ...@@ -138,7 +138,8 @@ class SSDMetaArch(model.DetectionModel):
add_summaries=True, add_summaries=True,
normalize_loc_loss_by_codesize=False, normalize_loc_loss_by_codesize=False,
freeze_batchnorm=False, freeze_batchnorm=False,
inplace_batchnorm_update=False): inplace_batchnorm_update=False,
add_background_class=True):
"""SSDMetaArch Constructor. """SSDMetaArch Constructor.
TODO(rathodv,jonathanhuang): group NMS parameters + score converter into TODO(rathodv,jonathanhuang): group NMS parameters + score converter into
...@@ -193,6 +194,10 @@ class SSDMetaArch(model.DetectionModel): ...@@ -193,6 +194,10 @@ class SSDMetaArch(model.DetectionModel):
values inplace. When this is false train op must add a control values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics. batch norm statistics.
add_background_class: Whether to add an implicit background class to
one-hot encodings of groundtruth labels. Set to false if using
groundtruth labels with an explicit background class or using multiclass
scores instead of truth in the case of distillation.
""" """
super(SSDMetaArch, self).__init__(num_classes=box_predictor.num_classes) super(SSDMetaArch, self).__init__(num_classes=box_predictor.num_classes)
self._is_training = is_training self._is_training = is_training
...@@ -210,6 +215,7 @@ class SSDMetaArch(model.DetectionModel): ...@@ -210,6 +215,7 @@ class SSDMetaArch(model.DetectionModel):
self._feature_extractor = feature_extractor self._feature_extractor = feature_extractor
self._matcher = matcher self._matcher = matcher
self._region_similarity_calculator = region_similarity_calculator self._region_similarity_calculator = region_similarity_calculator
self._add_background_class = add_background_class
# TODO(jonathanhuang): handle agnostic mode # TODO(jonathanhuang): handle agnostic mode
# weights # weights
...@@ -636,10 +642,14 @@ class SSDMetaArch(model.DetectionModel): ...@@ -636,10 +642,14 @@ class SSDMetaArch(model.DetectionModel):
groundtruth_boxlists = [ groundtruth_boxlists = [
box_list.BoxList(boxes) for boxes in groundtruth_boxes_list box_list.BoxList(boxes) for boxes in groundtruth_boxes_list
] ]
groundtruth_classes_with_background_list = [ if self._add_background_class:
tf.pad(one_hot_encoding, [[0, 0], [1, 0]], mode='CONSTANT') groundtruth_classes_with_background_list = [
for one_hot_encoding in groundtruth_classes_list tf.pad(one_hot_encoding, [[0, 0], [1, 0]], mode='CONSTANT')
] for one_hot_encoding in groundtruth_classes_list
]
else:
groundtruth_classes_with_background_list = groundtruth_classes_list
if groundtruth_keypoints_list is not None: if groundtruth_keypoints_list is not None:
for boxlist, keypoints in zip( for boxlist, keypoints in zip(
groundtruth_boxlists, groundtruth_keypoints_list): groundtruth_boxlists, groundtruth_keypoints_list):
......
...@@ -80,8 +80,10 @@ def _get_value_for_matching_key(dictionary, suffix): ...@@ -80,8 +80,10 @@ def _get_value_for_matching_key(dictionary, suffix):
class SsdMetaArchTest(test_case.TestCase): class SsdMetaArchTest(test_case.TestCase):
def _create_model(self, apply_hard_mining=True, def _create_model(self,
normalize_loc_loss_by_codesize=False): apply_hard_mining=True,
normalize_loc_loss_by_codesize=False,
add_background_class=True):
is_training = False is_training = False
num_classes = 1 num_classes = 1
mock_anchor_generator = MockAnchorGenerator2x2() mock_anchor_generator = MockAnchorGenerator2x2()
...@@ -117,14 +119,29 @@ class SsdMetaArchTest(test_case.TestCase): ...@@ -117,14 +119,29 @@ class SsdMetaArchTest(test_case.TestCase):
code_size = 4 code_size = 4
model = ssd_meta_arch.SSDMetaArch( model = ssd_meta_arch.SSDMetaArch(
is_training, mock_anchor_generator, mock_box_predictor, mock_box_coder, is_training,
fake_feature_extractor, mock_matcher, region_similarity_calculator, mock_anchor_generator,
encode_background_as_zeros, negative_class_weight, image_resizer_fn, mock_box_predictor,
non_max_suppression_fn, tf.identity, classification_loss, mock_box_coder,
localization_loss, classification_loss_weight, localization_loss_weight, fake_feature_extractor,
normalize_loss_by_num_matches, hard_example_miner, add_summaries=False, mock_matcher,
region_similarity_calculator,
encode_background_as_zeros,
negative_class_weight,
image_resizer_fn,
non_max_suppression_fn,
tf.identity,
classification_loss,
localization_loss,
classification_loss_weight,
localization_loss_weight,
normalize_loss_by_num_matches,
hard_example_miner,
add_summaries=False,
normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize, normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
freeze_batchnorm=False, inplace_batchnorm_update=False) freeze_batchnorm=False,
inplace_batchnorm_update=False,
add_background_class=add_background_class)
return model, num_classes, mock_anchor_generator.num_anchors(), code_size return model, num_classes, mock_anchor_generator.num_anchors(), code_size
def test_preprocess_preserves_shapes_with_dynamic_input_image(self): def test_preprocess_preserves_shapes_with_dynamic_input_image(self):
...@@ -365,6 +382,43 @@ class SsdMetaArchTest(test_case.TestCase): ...@@ -365,6 +382,43 @@ class SsdMetaArchTest(test_case.TestCase):
self.assertAllClose(localization_loss, expected_localization_loss) self.assertAllClose(localization_loss, expected_localization_loss)
self.assertAllClose(classification_loss, expected_classification_loss) self.assertAllClose(classification_loss, expected_classification_loss)
def test_loss_results_are_correct_without_add_background_class(self):
with tf.Graph().as_default():
_, num_classes, num_anchors, _ = self._create_model(
add_background_class=False)
def graph_fn(preprocessed_tensor, groundtruth_boxes1, groundtruth_boxes2,
groundtruth_classes1, groundtruth_classes2):
groundtruth_boxes_list = [groundtruth_boxes1, groundtruth_boxes2]
groundtruth_classes_list = [groundtruth_classes1, groundtruth_classes2]
model, _, _, _ = self._create_model(
apply_hard_mining=False, add_background_class=False)
model.provide_groundtruth(groundtruth_boxes_list,
groundtruth_classes_list)
prediction_dict = model.predict(
preprocessed_tensor, true_image_shapes=None)
loss_dict = model.loss(prediction_dict, true_image_shapes=None)
return (loss_dict['Loss/localization_loss'],
loss_dict['Loss/classification_loss'])
batch_size = 2
preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32)
groundtruth_boxes1 = np.array([[0, 0, .5, .5]], dtype=np.float32)
groundtruth_boxes2 = np.array([[0, 0, .5, .5]], dtype=np.float32)
groundtruth_classes1 = np.array([[0, 1]], dtype=np.float32)
groundtruth_classes2 = np.array([[0, 1]], dtype=np.float32)
expected_localization_loss = 0.0
expected_classification_loss = (
batch_size * num_anchors * (num_classes + 1) * np.log(2.0))
(localization_loss, classification_loss) = self.execute(
graph_fn, [
preprocessed_input, groundtruth_boxes1, groundtruth_boxes2,
groundtruth_classes1, groundtruth_classes2
])
self.assertAllClose(localization_loss, expected_localization_loss)
self.assertAllClose(classification_loss, expected_classification_loss)
def test_restore_map_for_detection_ckpt(self): def test_restore_map_for_detection_ckpt(self):
model, _, _, _ = self._create_model() model, _, _, _ = self._create_model()
model.predict(tf.constant(np.array([[[0, 0], [1, 1]], [[1, 0], [0, 1]]], model.predict(tf.constant(np.array([[[0, 0], [1, 1]], [[1, 0], [0, 1]]],
......
...@@ -6,6 +6,7 @@ import "object_detection/protos/optimizer.proto"; ...@@ -6,6 +6,7 @@ import "object_detection/protos/optimizer.proto";
import "object_detection/protos/preprocessor.proto"; import "object_detection/protos/preprocessor.proto";
// Message for configuring DetectionModel training jobs (train.py). // Message for configuring DetectionModel training jobs (train.py).
// Next id: 25
message TrainConfig { message TrainConfig {
// Effective batch size to use for training. // Effective batch size to use for training.
// For TPU (or sync SGD jobs), the batch size per core (or GPU) is going to be // For TPU (or sync SGD jobs), the batch size per core (or GPU) is going to be
...@@ -80,6 +81,10 @@ message TrainConfig { ...@@ -80,6 +81,10 @@ message TrainConfig {
// Note that only Sigmoid classification losses should be used. // Note that only Sigmoid classification losses should be used.
optional bool merge_multiple_label_boxes = 17 [default=false]; optional bool merge_multiple_label_boxes = 17 [default=false];
// If true, will use multiclass scores from object annotations as ground
// truth. Currently only compatible with annotated image inputs.
optional bool use_multiclass_scores = 24 [default = false];
// Whether to add regularization loss to `total_loss`. This is true by // Whether to add regularization loss to `total_loss`. This is true by
// default and adds all regularization losses defined in the model to // default and adds all regularization losses defined in the model to
// `total_loss`. // `total_loss`.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment