Unverified Commit b9ca525f authored by Jonathan Huang's avatar Jonathan Huang Committed by GitHub
Browse files

Merge pull request #4232 from pkulzc/master

Release ssdlite mobilenet v2 coco trained model, add quantized training and minor fixes.
parents 0270cac7 324d6dc3
...@@ -139,7 +139,8 @@ class SSDMetaArch(model.DetectionModel): ...@@ -139,7 +139,8 @@ class SSDMetaArch(model.DetectionModel):
normalize_loc_loss_by_codesize=False, normalize_loc_loss_by_codesize=False,
freeze_batchnorm=False, freeze_batchnorm=False,
inplace_batchnorm_update=False, inplace_batchnorm_update=False,
add_background_class=True): add_background_class=True,
random_example_sampler=None):
"""SSDMetaArch Constructor. """SSDMetaArch Constructor.
TODO(rathodv,jonathanhuang): group NMS parameters + score converter into TODO(rathodv,jonathanhuang): group NMS parameters + score converter into
...@@ -198,6 +199,12 @@ class SSDMetaArch(model.DetectionModel): ...@@ -198,6 +199,12 @@ class SSDMetaArch(model.DetectionModel):
one-hot encodings of groundtruth labels. Set to false if using one-hot encodings of groundtruth labels. Set to false if using
groundtruth labels with an explicit background class or using multiclass groundtruth labels with an explicit background class or using multiclass
scores instead of truth in the case of distillation. scores instead of truth in the case of distillation.
random_example_sampler: a BalancedPositiveNegativeSampler object that can
perform random example sampling when computing loss. If None, random
sampling process is skipped. Note that random example sampler and hard
example miner can both be applied to the model. In that case, random
sampler will take effect first and hard example miner can only process
the random sampled examples.
""" """
super(SSDMetaArch, self).__init__(num_classes=box_predictor.num_classes) super(SSDMetaArch, self).__init__(num_classes=box_predictor.num_classes)
self._is_training = is_training self._is_training = is_training
...@@ -240,6 +247,8 @@ class SSDMetaArch(model.DetectionModel): ...@@ -240,6 +247,8 @@ class SSDMetaArch(model.DetectionModel):
self._normalize_loss_by_num_matches = normalize_loss_by_num_matches self._normalize_loss_by_num_matches = normalize_loss_by_num_matches
self._normalize_loc_loss_by_codesize = normalize_loc_loss_by_codesize self._normalize_loc_loss_by_codesize = normalize_loc_loss_by_codesize
self._hard_example_miner = hard_example_miner self._hard_example_miner = hard_example_miner
self._random_example_sampler = random_example_sampler
self._parallel_iterations = 16
self._image_resizer_fn = image_resizer_fn self._image_resizer_fn = image_resizer_fn
self._non_max_suppression_fn = non_max_suppression_fn self._non_max_suppression_fn = non_max_suppression_fn
...@@ -543,6 +552,20 @@ class SSDMetaArch(model.DetectionModel): ...@@ -543,6 +552,20 @@ class SSDMetaArch(model.DetectionModel):
if self._add_summaries: if self._add_summaries:
self._summarize_target_assignment( self._summarize_target_assignment(
self.groundtruth_lists(fields.BoxListFields.boxes), match_list) self.groundtruth_lists(fields.BoxListFields.boxes), match_list)
if self._random_example_sampler:
batch_sampled_indicator = tf.to_float(
shape_utils.static_or_dynamic_map_fn(
self._minibatch_subsample_fn,
[batch_cls_targets, batch_cls_weights],
dtype=tf.bool,
parallel_iterations=self._parallel_iterations,
back_prop=True))
batch_reg_weights = tf.multiply(batch_sampled_indicator,
batch_reg_weights)
batch_cls_weights = tf.multiply(batch_sampled_indicator,
batch_cls_weights)
location_losses = self._localization_loss( location_losses = self._localization_loss(
prediction_dict['box_encodings'], prediction_dict['box_encodings'],
batch_reg_targets, batch_reg_targets,
...@@ -593,6 +616,32 @@ class SSDMetaArch(model.DetectionModel): ...@@ -593,6 +616,32 @@ class SSDMetaArch(model.DetectionModel):
} }
return loss_dict return loss_dict
def _minibatch_subsample_fn(self, inputs):
"""Randomly samples anchors for one image.
Args:
inputs: a list of 2 inputs. First one is a tensor of shape [num_anchors,
num_classes] indicating targets assigned to each anchor. Second one
is a tensor of shape [num_anchors] indicating the class weight of each
anchor.
Returns:
batch_sampled_indicator: bool tensor of shape [num_anchors] indicating
whether the anchor should be selected for loss computation.
"""
cls_targets, cls_weights = inputs
if self._add_background_class:
# Set background_class bits to 0 so that the positives_indicator
# computation would not consider background class.
background_class = tf.zeros_like(tf.slice(cls_targets, [0, 0], [-1, 1]))
regular_class = tf.slice(cls_targets, [0, 1], [-1, -1])
cls_targets = tf.concat([background_class, regular_class], 1)
positives_indicator = tf.reduce_sum(cls_targets, axis=1)
return self._random_example_sampler.subsample(
tf.cast(cls_weights, tf.bool),
batch_size=None,
labels=tf.cast(positives_indicator, tf.bool))
def _summarize_anchor_classification_loss(self, class_ids, cls_losses): def _summarize_anchor_classification_loss(self, class_ids, cls_losses):
positive_indices = tf.where(tf.greater(class_ids, 0)) positive_indices = tf.where(tf.greater(class_ids, 0))
positive_anchor_cls_loss = tf.squeeze( positive_anchor_cls_loss = tf.squeeze(
...@@ -790,8 +839,8 @@ class SSDMetaArch(model.DetectionModel): ...@@ -790,8 +839,8 @@ class SSDMetaArch(model.DetectionModel):
classification checkpoint for initialization prior to training. classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`. Default 'detection'. Valid values: `detection`, `classification`. Default 'detection'.
load_all_detection_checkpoint_vars: whether to load all variables (when load_all_detection_checkpoint_vars: whether to load all variables (when
`from_detection_checkpoint` is True). If False, only variables within `fine_tune_checkpoint_type='detection'`). If False, only variables
the appropriate scopes are included. Default False. within the appropriate scopes are included. Default False.
Returns: Returns:
A dict mapping variable names (to load from a checkpoint) to variables in A dict mapping variable names (to load from a checkpoint) to variables in
......
...@@ -19,6 +19,7 @@ import numpy as np ...@@ -19,6 +19,7 @@ import numpy as np
import tensorflow as tf import tensorflow as tf
from object_detection.core import anchor_generator from object_detection.core import anchor_generator
from object_detection.core import balanced_positive_negative_sampler as sampler
from object_detection.core import box_list from object_detection.core import box_list
from object_detection.core import losses from object_detection.core import losses
from object_detection.core import post_processing from object_detection.core import post_processing
...@@ -83,7 +84,8 @@ class SsdMetaArchTest(test_case.TestCase): ...@@ -83,7 +84,8 @@ class SsdMetaArchTest(test_case.TestCase):
def _create_model(self, def _create_model(self,
apply_hard_mining=True, apply_hard_mining=True,
normalize_loc_loss_by_codesize=False, normalize_loc_loss_by_codesize=False,
add_background_class=True): add_background_class=True,
random_example_sampling=False):
is_training = False is_training = False
num_classes = 1 num_classes = 1
mock_anchor_generator = MockAnchorGenerator2x2() mock_anchor_generator = MockAnchorGenerator2x2()
...@@ -117,6 +119,11 @@ class SsdMetaArchTest(test_case.TestCase): ...@@ -117,6 +119,11 @@ class SsdMetaArchTest(test_case.TestCase):
num_hard_examples=None, num_hard_examples=None,
iou_threshold=1.0) iou_threshold=1.0)
random_example_sampler = None
if random_example_sampling:
random_example_sampler = sampler.BalancedPositiveNegativeSampler(
positive_fraction=0.5)
code_size = 4 code_size = 4
model = ssd_meta_arch.SSDMetaArch( model = ssd_meta_arch.SSDMetaArch(
is_training, is_training,
...@@ -141,7 +148,8 @@ class SsdMetaArchTest(test_case.TestCase): ...@@ -141,7 +148,8 @@ class SsdMetaArchTest(test_case.TestCase):
normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize, normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
freeze_batchnorm=False, freeze_batchnorm=False,
inplace_batchnorm_update=False, inplace_batchnorm_update=False,
add_background_class=add_background_class) add_background_class=add_background_class,
random_example_sampler=random_example_sampler)
return model, num_classes, mock_anchor_generator.num_anchors(), code_size return model, num_classes, mock_anchor_generator.num_anchors(), code_size
def test_preprocess_preserves_shapes_with_dynamic_input_image(self): def test_preprocess_preserves_shapes_with_dynamic_input_image(self):
...@@ -493,6 +501,47 @@ class SsdMetaArchTest(test_case.TestCase): ...@@ -493,6 +501,47 @@ class SsdMetaArchTest(test_case.TestCase):
self.assertIsInstance(var_map, dict) self.assertIsInstance(var_map, dict)
self.assertIn('another_variable', var_map) self.assertIn('another_variable', var_map)
def test_loss_results_are_correct_with_random_example_sampling(self):
with tf.Graph().as_default():
_, num_classes, num_anchors, _ = self._create_model(
random_example_sampling=True)
print num_classes, num_anchors
def graph_fn(preprocessed_tensor, groundtruth_boxes1, groundtruth_boxes2,
groundtruth_classes1, groundtruth_classes2):
groundtruth_boxes_list = [groundtruth_boxes1, groundtruth_boxes2]
groundtruth_classes_list = [groundtruth_classes1, groundtruth_classes2]
model, _, _, _ = self._create_model(random_example_sampling=True)
model.provide_groundtruth(groundtruth_boxes_list,
groundtruth_classes_list)
prediction_dict = model.predict(
preprocessed_tensor, true_image_shapes=None)
loss_dict = model.loss(prediction_dict, true_image_shapes=None)
return (_get_value_for_matching_key(loss_dict, 'Loss/localization_loss'),
_get_value_for_matching_key(loss_dict,
'Loss/classification_loss'))
batch_size = 2
preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32)
groundtruth_boxes1 = np.array([[0, 0, .5, .5]], dtype=np.float32)
groundtruth_boxes2 = np.array([[0, 0, .5, .5]], dtype=np.float32)
groundtruth_classes1 = np.array([[1]], dtype=np.float32)
groundtruth_classes2 = np.array([[1]], dtype=np.float32)
expected_localization_loss = 0.0
# Among 4 anchors (1 positive, 3 negative) in this test, only 2 anchors are
# selected (1 positive, 1 negative) since random sampler will adjust number
# of negative examples to make sure positive example fraction in the batch
# is 0.5.
expected_classification_loss = (
batch_size * 2 * (num_classes + 1) * np.log(2.0))
(localization_loss, classification_loss) = self.execute_cpu(
graph_fn, [
preprocessed_input, groundtruth_boxes1, groundtruth_boxes2,
groundtruth_classes1, groundtruth_classes2
])
self.assertAllClose(localization_loss, expected_localization_loss)
self.assertAllClose(classification_loss, expected_classification_loss)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -202,8 +202,10 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -202,8 +202,10 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
return box_metrics return box_metrics
def get_estimator_eval_metric_ops(self, image_id, groundtruth_boxes, def get_estimator_eval_metric_ops(self, image_id, groundtruth_boxes,
groundtruth_classes, detection_boxes, groundtruth_classes,
detection_boxes,
detection_scores, detection_classes, detection_scores, detection_classes,
groundtruth_is_crowd=None,
num_gt_boxes_per_image=None, num_gt_boxes_per_image=None,
num_det_boxes_per_image=None): num_det_boxes_per_image=None):
"""Returns a dictionary of eval metric ops to use with `tf.EstimatorSpec`. """Returns a dictionary of eval metric ops to use with `tf.EstimatorSpec`.
...@@ -230,6 +232,9 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -230,6 +232,9 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
detection scores for the boxes. detection scores for the boxes.
detection_classes: int32 tensor of shape [batch, num_boxes] containing detection_classes: int32 tensor of shape [batch, num_boxes] containing
1-indexed detection classes for the boxes. 1-indexed detection classes for the boxes.
groundtruth_is_crowd: bool tensor of shape [batch, num_boxes] containing
is_crowd annotations. This field is optional, and if not passed, then
all boxes are treated as *not* is_crowd.
num_gt_boxes_per_image: int32 tensor of shape [batch] containing the num_gt_boxes_per_image: int32 tensor of shape [batch] containing the
number of groundtruth boxes per image. If None, will assume no padding number of groundtruth boxes per image. If None, will assume no padding
in groundtruth tensors. in groundtruth tensors.
...@@ -247,6 +252,7 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -247,6 +252,7 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
image_id_batched, image_id_batched,
groundtruth_boxes_batched, groundtruth_boxes_batched,
groundtruth_classes_batched, groundtruth_classes_batched,
groundtruth_is_crowd_batched,
num_gt_boxes_per_image, num_gt_boxes_per_image,
detection_boxes_batched, detection_boxes_batched,
detection_scores_batched, detection_scores_batched,
...@@ -254,27 +260,32 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -254,27 +260,32 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
num_det_boxes_per_image): num_det_boxes_per_image):
"""Update operation for adding batch of images to Coco evaluator.""" """Update operation for adding batch of images to Coco evaluator."""
for (image_id, gt_box, gt_class, num_gt_box, det_box, det_score, for (image_id, gt_box, gt_class, gt_is_crowd, num_gt_box, det_box,
det_class, num_det_box) in zip( det_score, det_class, num_det_box) in zip(
image_id_batched, groundtruth_boxes_batched, image_id_batched, groundtruth_boxes_batched,
groundtruth_classes_batched, num_gt_boxes_per_image, groundtruth_classes_batched, groundtruth_is_crowd_batched,
num_gt_boxes_per_image,
detection_boxes_batched, detection_scores_batched, detection_boxes_batched, detection_scores_batched,
detection_classes_batched, num_det_boxes_per_image): detection_classes_batched, num_det_boxes_per_image):
self.add_single_ground_truth_image_info( self.add_single_ground_truth_image_info(
image_id, image_id,
{'groundtruth_boxes': gt_box[:num_gt_box], {'groundtruth_boxes': gt_box[:num_gt_box],
'groundtruth_classes': gt_class[:num_gt_box]}) 'groundtruth_classes': gt_class[:num_gt_box],
'groundtruth_is_crowd': gt_is_crowd[:num_gt_box]})
self.add_single_detected_image_info( self.add_single_detected_image_info(
image_id, image_id,
{'detection_boxes': det_box[:num_det_box], {'detection_boxes': det_box[:num_det_box],
'detection_scores': det_score[:num_det_box], 'detection_scores': det_score[:num_det_box],
'detection_classes': det_class[:num_det_box]}) 'detection_classes': det_class[:num_det_box]})
if groundtruth_is_crowd is None:
groundtruth_is_crowd = tf.zeros_like(groundtruth_classes, dtype=tf.bool)
if not image_id.shape.as_list(): if not image_id.shape.as_list():
# Apply a batch dimension to all tensors. # Apply a batch dimension to all tensors.
image_id = tf.expand_dims(image_id, 0) image_id = tf.expand_dims(image_id, 0)
groundtruth_boxes = tf.expand_dims(groundtruth_boxes, 0) groundtruth_boxes = tf.expand_dims(groundtruth_boxes, 0)
groundtruth_classes = tf.expand_dims(groundtruth_classes, 0) groundtruth_classes = tf.expand_dims(groundtruth_classes, 0)
groundtruth_is_crowd = tf.expand_dims(groundtruth_is_crowd, 0)
detection_boxes = tf.expand_dims(detection_boxes, 0) detection_boxes = tf.expand_dims(detection_boxes, 0)
detection_scores = tf.expand_dims(detection_scores, 0) detection_scores = tf.expand_dims(detection_scores, 0)
detection_classes = tf.expand_dims(detection_classes, 0) detection_classes = tf.expand_dims(detection_classes, 0)
...@@ -301,6 +312,7 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -301,6 +312,7 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
update_op = tf.py_func(update_op, [image_id, update_op = tf.py_func(update_op, [image_id,
groundtruth_boxes, groundtruth_boxes,
groundtruth_classes, groundtruth_classes,
groundtruth_is_crowd,
num_gt_boxes_per_image, num_gt_boxes_per_image,
detection_boxes, detection_boxes,
detection_scores, detection_scores,
...@@ -545,7 +557,7 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -545,7 +557,7 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
groundtruth_classes, groundtruth_classes,
groundtruth_instance_masks, groundtruth_instance_masks,
detection_scores, detection_classes, detection_scores, detection_classes,
detection_masks): detection_masks, groundtruth_is_crowd=None):
"""Returns a dictionary of eval metric ops to use with `tf.EstimatorSpec`. """Returns a dictionary of eval metric ops to use with `tf.EstimatorSpec`.
Note that once value_op is called, the detections and groundtruth added via Note that once value_op is called, the detections and groundtruth added via
...@@ -568,6 +580,9 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -568,6 +580,9 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
detection_masks: uint8 tensor array of shape detection_masks: uint8 tensor array of shape
[num_boxes, image_height, image_width] containing instance masks [num_boxes, image_height, image_width] containing instance masks
corresponding to the boxes. The elements of the array must be in {0, 1}. corresponding to the boxes. The elements of the array must be in {0, 1}.
groundtruth_is_crowd: bool tensor of shape [batch, num_boxes] containing
is_crowd annotations. This field is optional, and if not passed, then
all boxes are treated as *not* is_crowd.
Returns: Returns:
a dictionary of metric names to tuple of value_op and update_op that can a dictionary of metric names to tuple of value_op and update_op that can
...@@ -580,6 +595,7 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -580,6 +595,7 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
groundtruth_boxes, groundtruth_boxes,
groundtruth_classes, groundtruth_classes,
groundtruth_instance_masks, groundtruth_instance_masks,
groundtruth_is_crowd,
detection_scores, detection_scores,
detection_classes, detection_classes,
detection_masks): detection_masks):
...@@ -587,17 +603,21 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -587,17 +603,21 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
image_id, image_id,
{'groundtruth_boxes': groundtruth_boxes, {'groundtruth_boxes': groundtruth_boxes,
'groundtruth_classes': groundtruth_classes, 'groundtruth_classes': groundtruth_classes,
'groundtruth_instance_masks': groundtruth_instance_masks}) 'groundtruth_instance_masks': groundtruth_instance_masks,
'groundtruth_is_crowd': groundtruth_is_crowd})
self.add_single_detected_image_info( self.add_single_detected_image_info(
image_id, image_id,
{'detection_scores': detection_scores, {'detection_scores': detection_scores,
'detection_classes': detection_classes, 'detection_classes': detection_classes,
'detection_masks': detection_masks}) 'detection_masks': detection_masks})
if groundtruth_is_crowd is None:
groundtruth_is_crowd = tf.zeros_like(groundtruth_classes, dtype=tf.bool)
update_op = tf.py_func(update_op, [image_id, update_op = tf.py_func(update_op, [image_id,
groundtruth_boxes, groundtruth_boxes,
groundtruth_classes, groundtruth_classes,
groundtruth_instance_masks, groundtruth_instance_masks,
groundtruth_is_crowd,
detection_scores, detection_scores,
detection_classes, detection_classes,
detection_masks], []) detection_masks], [])
......
...@@ -492,8 +492,8 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase): ...@@ -492,8 +492,8 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
detection_boxes, detection_boxes,
detection_scores, detection_scores,
detection_classes, detection_classes,
num_gt_boxes_per_image, num_gt_boxes_per_image=num_gt_boxes_per_image,
num_det_boxes_per_image) num_det_boxes_per_image=num_det_boxes_per_image)
_, update_op = eval_metric_ops['DetectionBoxes_Precision/mAP'] _, update_op = eval_metric_ops['DetectionBoxes_Precision/mAP']
......
...@@ -48,8 +48,8 @@ MODEL_BUILD_UTIL_MAP = { ...@@ -48,8 +48,8 @@ MODEL_BUILD_UTIL_MAP = {
} }
def _get_groundtruth_data(detection_model, class_agnostic): def _prepare_groundtruth_for_eval(detection_model, class_agnostic):
"""Extracts groundtruth data from detection_model. """Extracts groundtruth data from detection_model and prepares it for eval.
Args: Args:
detection_model: A `DetectionModel` object. detection_model: A `DetectionModel` object.
...@@ -63,6 +63,8 @@ def _get_groundtruth_data(detection_model, class_agnostic): ...@@ -63,6 +63,8 @@ def _get_groundtruth_data(detection_model, class_agnostic):
'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes. 'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
'groundtruth_masks': 3D float32 tensor of instance masks (if provided in 'groundtruth_masks': 3D float32 tensor of instance masks (if provided in
groundtruth) groundtruth)
'groundtruth_is_crowd': [num_boxes] bool tensor indicating is_crowd
annotations (if provided in groundtruth).
class_agnostic: Boolean indicating whether detections are class agnostic. class_agnostic: Boolean indicating whether detections are class agnostic.
""" """
input_data_fields = fields.InputDataFields() input_data_fields = fields.InputDataFields()
...@@ -86,6 +88,9 @@ def _get_groundtruth_data(detection_model, class_agnostic): ...@@ -86,6 +88,9 @@ def _get_groundtruth_data(detection_model, class_agnostic):
if detection_model.groundtruth_has_field(fields.BoxListFields.masks): if detection_model.groundtruth_has_field(fields.BoxListFields.masks):
groundtruth[input_data_fields.groundtruth_instance_masks] = ( groundtruth[input_data_fields.groundtruth_instance_masks] = (
detection_model.groundtruth_lists(fields.BoxListFields.masks)[0]) detection_model.groundtruth_lists(fields.BoxListFields.masks)[0])
if detection_model.groundtruth_has_field(fields.BoxListFields.is_crowd):
groundtruth[input_data_fields.groundtruth_is_crowd] = (
detection_model.groundtruth_lists(fields.BoxListFields.is_crowd)[0])
return groundtruth return groundtruth
...@@ -224,13 +229,16 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False): ...@@ -224,13 +229,16 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
gt_keypoints_list = None gt_keypoints_list = None
if fields.InputDataFields.groundtruth_keypoints in labels: if fields.InputDataFields.groundtruth_keypoints in labels:
gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints] gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints]
if fields.InputDataFields.groundtruth_is_crowd in labels:
gt_is_crowd_list = labels[fields.InputDataFields.groundtruth_is_crowd]
detection_model.provide_groundtruth( detection_model.provide_groundtruth(
groundtruth_boxes_list=gt_boxes_list, groundtruth_boxes_list=gt_boxes_list,
groundtruth_classes_list=gt_classes_list, groundtruth_classes_list=gt_classes_list,
groundtruth_masks_list=gt_masks_list, groundtruth_masks_list=gt_masks_list,
groundtruth_keypoints_list=gt_keypoints_list, groundtruth_keypoints_list=gt_keypoints_list,
groundtruth_weights_list=labels[ groundtruth_weights_list=labels[
fields.InputDataFields.groundtruth_weights]) fields.InputDataFields.groundtruth_weights],
groundtruth_is_crowd_list=gt_is_crowd_list)
preprocessed_images = features[fields.InputDataFields.image] preprocessed_images = features[fields.InputDataFields.image]
prediction_dict = detection_model.predict( prediction_dict = detection_model.predict(
...@@ -328,7 +336,8 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False): ...@@ -328,7 +336,8 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
if mode == tf.estimator.ModeKeys.EVAL: if mode == tf.estimator.ModeKeys.EVAL:
class_agnostic = (fields.DetectionResultFields.detection_classes class_agnostic = (fields.DetectionResultFields.detection_classes
not in detections) not in detections)
groundtruth = _get_groundtruth_data(detection_model, class_agnostic) groundtruth = _prepare_groundtruth_for_eval(
detection_model, class_agnostic)
use_original_images = fields.InputDataFields.original_image in features use_original_images = fields.InputDataFields.original_image in features
eval_images = ( eval_images = (
features[fields.InputDataFields.original_image] if use_original_images features[fields.InputDataFields.original_image] if use_original_images
...@@ -339,7 +348,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False): ...@@ -339,7 +348,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
detections, detections,
groundtruth, groundtruth,
class_agnostic=class_agnostic, class_agnostic=class_agnostic,
scale_to_absolute=False) scale_to_absolute=True)
if class_agnostic: if class_agnostic:
category_index = label_map_util.create_class_agnostic_category_index() category_index = label_map_util.create_class_agnostic_category_index()
...@@ -360,8 +369,10 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False): ...@@ -360,8 +369,10 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
if not eval_metrics: if not eval_metrics:
eval_metrics = ['coco_detection_metrics'] eval_metrics = ['coco_detection_metrics']
eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators( eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
eval_metrics, category_index.values(), eval_dict, eval_metrics,
include_metrics_per_category=False) category_index.values(),
eval_dict,
include_metrics_per_category=eval_config.include_metrics_per_category)
for loss_key, loss_tensor in iter(losses_dict.items()): for loss_key, loss_tensor in iter(losses_dict.items()):
eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor) eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor)
for var in optimizer_summary_vars: for var in optimizer_summary_vars:
...@@ -528,6 +539,7 @@ def create_train_and_eval_specs(train_input_fn, ...@@ -528,6 +539,7 @@ def create_train_and_eval_specs(train_input_fn,
train_steps, train_steps,
eval_steps, eval_steps,
eval_on_train_data=False, eval_on_train_data=False,
eval_on_train_steps=None,
final_exporter_name='Servo', final_exporter_name='Servo',
eval_spec_name='eval'): eval_spec_name='eval'):
"""Creates a `TrainSpec` and `EvalSpec`s. """Creates a `TrainSpec` and `EvalSpec`s.
...@@ -542,6 +554,8 @@ def create_train_and_eval_specs(train_input_fn, ...@@ -542,6 +554,8 @@ def create_train_and_eval_specs(train_input_fn,
eval_steps: Number of eval steps. eval_steps: Number of eval steps.
eval_on_train_data: Whether to evaluate model on training data. Default is eval_on_train_data: Whether to evaluate model on training data. Default is
False. False.
eval_on_train_steps: Number of eval steps for training data. If not given,
uses eval_steps.
final_exporter_name: String name given to `FinalExporter`. final_exporter_name: String name given to `FinalExporter`.
eval_spec_name: String name given to main `EvalSpec`. eval_spec_name: String name given to main `EvalSpec`.
...@@ -569,7 +583,7 @@ def create_train_and_eval_specs(train_input_fn, ...@@ -569,7 +583,7 @@ def create_train_and_eval_specs(train_input_fn,
eval_specs.append( eval_specs.append(
tf.estimator.EvalSpec( tf.estimator.EvalSpec(
name='eval_on_train', input_fn=eval_on_train_input_fn, name='eval_on_train', input_fn=eval_on_train_input_fn,
steps=eval_steps)) steps=eval_on_train_steps or eval_steps))
return train_spec, eval_specs return train_spec, eval_specs
......
...@@ -253,6 +253,7 @@ class ModelLibTest(tf.test.TestCase): ...@@ -253,6 +253,7 @@ class ModelLibTest(tf.test.TestCase):
pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST) pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST)
train_steps = 20 train_steps = 20
eval_steps = 10 eval_steps = 10
eval_on_train_steps = 15
train_and_eval_dict = model_lib.create_estimator_and_inputs( train_and_eval_dict = model_lib.create_estimator_and_inputs(
run_config, run_config,
hparams, hparams,
...@@ -274,6 +275,7 @@ class ModelLibTest(tf.test.TestCase): ...@@ -274,6 +275,7 @@ class ModelLibTest(tf.test.TestCase):
train_steps, train_steps,
eval_steps, eval_steps,
eval_on_train_data=True, eval_on_train_data=True,
eval_on_train_steps=eval_on_train_steps,
final_exporter_name='exporter', final_exporter_name='exporter',
eval_spec_name='holdout') eval_spec_name='holdout')
self.assertEqual(train_steps, train_spec.max_steps) self.assertEqual(train_steps, train_spec.max_steps)
...@@ -281,7 +283,7 @@ class ModelLibTest(tf.test.TestCase): ...@@ -281,7 +283,7 @@ class ModelLibTest(tf.test.TestCase):
self.assertEqual(eval_steps, eval_specs[0].steps) self.assertEqual(eval_steps, eval_specs[0].steps)
self.assertEqual('holdout', eval_specs[0].name) self.assertEqual('holdout', eval_specs[0].name)
self.assertEqual('exporter', eval_specs[0].exporters[0].name) self.assertEqual('exporter', eval_specs[0].exporters[0].name)
self.assertEqual(eval_steps, eval_specs[1].steps) self.assertEqual(eval_on_train_steps, eval_specs[1].steps)
self.assertEqual('eval_on_train', eval_specs[1].name) self.assertEqual('eval_on_train', eval_specs[1].name)
def test_experiment(self): def test_experiment(self):
......
...@@ -185,8 +185,9 @@ def fpn_top_down_feature_maps(image_features, depth, scope=None): ...@@ -185,8 +185,9 @@ def fpn_top_down_feature_maps(image_features, depth, scope=None):
See https://arxiv.org/abs/1612.03144 for details. See https://arxiv.org/abs/1612.03144 for details.
Args: Args:
image_features: list of image feature tensors. Spatial resolutions of image_features: list of tuples of (tensor_name, image_feature_tensor).
succesive tensors must reduce exactly by a factor of 2. Spatial resolutions of succesive tensors must reduce exactly by a factor
of 2.
depth: depth of output feature maps. depth: depth of output feature maps.
scope: A scope name to wrap this op under. scope: A scope name to wrap this op under.
...@@ -194,32 +195,31 @@ def fpn_top_down_feature_maps(image_features, depth, scope=None): ...@@ -194,32 +195,31 @@ def fpn_top_down_feature_maps(image_features, depth, scope=None):
feature_maps: an OrderedDict mapping keys (feature map names) to feature_maps: an OrderedDict mapping keys (feature map names) to
tensors where each tensor has shape [batch, height_i, width_i, depth_i]. tensors where each tensor has shape [batch, height_i, width_i, depth_i].
""" """
with tf.variable_scope( with tf.name_scope(scope, 'top_down'):
scope, 'top_down', image_features):
num_levels = len(image_features) num_levels = len(image_features)
output_feature_maps_list = [] output_feature_maps_list = []
output_feature_map_keys = [] output_feature_map_keys = []
with slim.arg_scope( with slim.arg_scope(
[slim.conv2d], [slim.conv2d], padding='SAME', stride=1):
activation_fn=None, normalizer_fn=None, padding='SAME', stride=1):
top_down = slim.conv2d( top_down = slim.conv2d(
image_features[-1], image_features[-1][1],
depth, [1, 1], scope='projection_%d' % num_levels) depth, [1, 1], activation_fn=None, normalizer_fn=None,
scope='projection_%d' % num_levels)
output_feature_maps_list.append(top_down) output_feature_maps_list.append(top_down)
output_feature_map_keys.append( output_feature_map_keys.append(
'top_down_feature_map_%d' % (num_levels - 1)) 'top_down_%s' % image_features[-1][0])
for level in reversed(range(num_levels - 1)): for level in reversed(range(num_levels - 1)):
top_down = ops.nearest_neighbor_upsampling(top_down, 2) top_down = ops.nearest_neighbor_upsampling(top_down, 2)
residual = slim.conv2d( residual = slim.conv2d(
image_features[level], depth, [1, 1], image_features[level][1], depth, [1, 1],
activation_fn=None, normalizer_fn=None,
scope='projection_%d' % (level + 1)) scope='projection_%d' % (level + 1))
top_down = 0.5 * top_down + 0.5 * residual top_down += residual
output_feature_maps_list.append(slim.conv2d( output_feature_maps_list.append(slim.conv2d(
top_down, top_down,
depth, [3, 3], depth, [3, 3],
activation_fn=None,
scope='smoothing_%d' % (level + 1))) scope='smoothing_%d' % (level + 1)))
output_feature_map_keys.append('top_down_feature_map_%d' % level) output_feature_map_keys.append('top_down_%s' % image_features[level][0])
return collections.OrderedDict( return collections.OrderedDict(
reversed(zip(output_feature_map_keys, output_feature_maps_list))) reversed(zip(output_feature_map_keys, output_feature_maps_list)))
...@@ -138,19 +138,19 @@ class FPNFeatureMapGeneratorTest(tf.test.TestCase): ...@@ -138,19 +138,19 @@ class FPNFeatureMapGeneratorTest(tf.test.TestCase):
def test_get_expected_feature_map_shapes(self): def test_get_expected_feature_map_shapes(self):
image_features = [ image_features = [
tf.random_uniform([4, 8, 8, 256], dtype=tf.float32), ('block2', tf.random_uniform([4, 8, 8, 256], dtype=tf.float32)),
tf.random_uniform([4, 4, 4, 256], dtype=tf.float32), ('block3', tf.random_uniform([4, 4, 4, 256], dtype=tf.float32)),
tf.random_uniform([4, 2, 2, 256], dtype=tf.float32), ('block4', tf.random_uniform([4, 2, 2, 256], dtype=tf.float32)),
tf.random_uniform([4, 1, 1, 256], dtype=tf.float32), ('block5', tf.random_uniform([4, 1, 1, 256], dtype=tf.float32))
] ]
feature_maps = feature_map_generators.fpn_top_down_feature_maps( feature_maps = feature_map_generators.fpn_top_down_feature_maps(
image_features=image_features, depth=128) image_features=image_features, depth=128)
expected_feature_map_shapes = { expected_feature_map_shapes = {
'top_down_feature_map_0': (4, 8, 8, 128), 'top_down_block2': (4, 8, 8, 128),
'top_down_feature_map_1': (4, 4, 4, 128), 'top_down_block3': (4, 4, 4, 128),
'top_down_feature_map_2': (4, 2, 2, 128), 'top_down_block4': (4, 2, 2, 128),
'top_down_feature_map_3': (4, 1, 1, 128) 'top_down_block5': (4, 1, 1, 128)
} }
init_op = tf.global_variables_initializer() init_op = tf.global_variables_initializer()
......
...@@ -148,9 +148,15 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -148,9 +148,15 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
store_non_strided_activations=True, store_non_strided_activations=True,
scope=scope) scope=scope)
image_features = self._filter_features(image_features) image_features = self._filter_features(image_features)
last_feature_map = image_features['block4']
with tf.variable_scope(self._fpn_scope_name, reuse=self._reuse_weights):
with slim.arg_scope(self._conv_hyperparams_fn()): with slim.arg_scope(self._conv_hyperparams_fn()):
with tf.variable_scope(self._fpn_scope_name,
reuse=self._reuse_weights):
fpn_features = feature_map_generators.fpn_top_down_feature_maps(
[(key, image_features[key])
for key in ['block2', 'block3', 'block4']],
depth=256)
last_feature_map = fpn_features['top_down_block4']
coarse_features = {}
for i in range(5, 7): for i in range(5, 7):
last_feature_map = slim.conv2d( last_feature_map = slim.conv2d(
last_feature_map, last_feature_map,
...@@ -158,16 +164,13 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -158,16 +164,13 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
kernel_size=[3, 3], kernel_size=[3, 3],
stride=2, stride=2,
padding='SAME', padding='SAME',
scope='block{}'.format(i)) scope='bottom_up_block{}'.format(i))
image_features['bottomup_{}'.format(i)] = last_feature_map coarse_features['bottom_up_block{}'.format(i)] = last_feature_map
feature_maps = feature_map_generators.fpn_top_down_feature_maps( return [fpn_features['top_down_block2'],
[ fpn_features['top_down_block3'],
image_features[key] for key in fpn_features['top_down_block4'],
['block2', 'block3', 'block4', 'bottomup_5', 'bottomup_6'] coarse_features['bottom_up_block5'],
], coarse_features['bottom_up_block6']]
depth=256,
scope='top_down_features')
return feature_maps.values()
class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor): class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
......
...@@ -72,4 +72,7 @@ message EvalConfig { ...@@ -72,4 +72,7 @@ message EvalConfig {
// Whether to retain original images (i.e. not pre-processed) in the tensor // Whether to retain original images (i.e. not pre-processed) in the tensor
// dictionary, so that they can be displayed in Tensorboard. // dictionary, so that they can be displayed in Tensorboard.
optional bool retain_original_images = 23 [default=true]; optional bool retain_original_images = 23 [default=true];
// If True, additionally include per-category metrics.
optional bool include_metrics_per_category = 24 [default=false];
} }
syntax = "proto2";
package object_detection.protos;
// Message to configure graph rewriter for the tf graph.
message GraphRewriter {
optional Quantization quantization = 1;
}
// Message for quantization options. See
// tensorflow/contrib/quantize/python/quantize.py for details.
message Quantization {
// Number of steps to delay before quantization takes effect during training.
optional int32 delay = 1 [default = 500000];
// Number of bits to use for quantizing weights.
// Only 8 bit is supported for now.
optional int32 weight_bits = 2 [default = 8];
// Number of bits to use for quantizing activations.
// Only 8 bit is supported for now.
optional int32 activation_bits = 3 [default = 8];
}
...@@ -38,6 +38,10 @@ message Hyperparams { ...@@ -38,6 +38,10 @@ message Hyperparams {
// BatchNorm hyperparameters. If this parameter is NOT set then BatchNorm is // BatchNorm hyperparameters. If this parameter is NOT set then BatchNorm is
// not applied! // not applied!
optional BatchNorm batch_norm = 5; optional BatchNorm batch_norm = 5;
// Whether depthwise convolutions should be regularized. If this parameter is
// NOT set then the conv hyperparams will default to the parent scope.
optional bool regularize_depthwise = 6 [default = false];
} }
// Proto with one-of field for regularizers. // Proto with one-of field for regularizers.
......
...@@ -20,6 +20,9 @@ message Loss { ...@@ -20,6 +20,9 @@ message Loss {
// Localization loss weight. // Localization loss weight.
optional float localization_weight = 5 [default=1.0]; optional float localization_weight = 5 [default=1.0];
// If not left to default, applies random example sampling.
optional RandomExampleSampler random_example_sampler = 6;
} }
// Configuration for bounding box localization loss function. // Configuration for bounding box localization loss function.
...@@ -121,7 +124,7 @@ message BootstrappedSigmoidClassificationLoss { ...@@ -121,7 +124,7 @@ message BootstrappedSigmoidClassificationLoss {
optional bool anchorwise_output = 3 [default=false]; optional bool anchorwise_output = 3 [default=false];
} }
// Configuation for hard example miner. // Configuration for hard example miner.
message HardExampleMiner { message HardExampleMiner {
// Maximum number of hard examples to be selected per image (prior to // Maximum number of hard examples to be selected per image (prior to
// enforcing max negative to positive ratio constraint). If set to 0, // enforcing max negative to positive ratio constraint). If set to 0,
...@@ -152,3 +155,10 @@ message HardExampleMiner { ...@@ -152,3 +155,10 @@ message HardExampleMiner {
// detection per image. // detection per image.
optional int32 min_negatives_per_image = 5 [default=0]; optional int32 min_negatives_per_image = 5 [default=0];
} }
// Configuration for random example sampler.
message RandomExampleSampler {
// The desired fraction of positive samples in batch when applying random
// example sampling.
optional float positive_sample_fraction = 1 [default = 0.01];
}
...@@ -5,4 +5,6 @@ package object_detection.protos; ...@@ -5,4 +5,6 @@ package object_detection.protos;
// Configuration proto for MeanStddevBoxCoder. See // Configuration proto for MeanStddevBoxCoder. See
// box_coders/mean_stddev_box_coder.py for details. // box_coders/mean_stddev_box_coder.py for details.
message MeanStddevBoxCoder { message MeanStddevBoxCoder {
// The standard deviation used to encode and decode boxes.
optional float stddev = 1 [default=0.01];
} }
...@@ -3,6 +3,7 @@ syntax = "proto2"; ...@@ -3,6 +3,7 @@ syntax = "proto2";
package object_detection.protos; package object_detection.protos;
import "object_detection/protos/eval.proto"; import "object_detection/protos/eval.proto";
import "object_detection/protos/graph_rewriter.proto";
import "object_detection/protos/input_reader.proto"; import "object_detection/protos/input_reader.proto";
import "object_detection/protos/model.proto"; import "object_detection/protos/model.proto";
import "object_detection/protos/train.proto"; import "object_detection/protos/train.proto";
...@@ -15,5 +16,6 @@ message TrainEvalPipelineConfig { ...@@ -15,5 +16,6 @@ message TrainEvalPipelineConfig {
optional InputReader train_input_reader = 3; optional InputReader train_input_reader = 3;
optional EvalConfig eval_config = 4; optional EvalConfig eval_config = 4;
optional InputReader eval_input_reader = 5; optional InputReader eval_input_reader = 5;
optional GraphRewriter graph_rewriter = 6;
extensions 1000 to max; extensions 1000 to max;
} }
...@@ -53,8 +53,7 @@ model { ...@@ -53,8 +53,7 @@ model {
num_layers_before_predictor: 0 num_layers_before_predictor: 0
use_dropout: false use_dropout: false
dropout_keep_probability: 0.8 dropout_keep_probability: 0.8
kernel_size: 3 kernel_size: 1
use_depthwise: true
box_code_size: 4 box_code_size: 4
apply_sigmoid_to_scores: false apply_sigmoid_to_scores: false
conv_hyperparams { conv_hyperparams {
...@@ -84,7 +83,6 @@ model { ...@@ -84,7 +83,6 @@ model {
type: 'ssd_mobilenet_v2' type: 'ssd_mobilenet_v2'
min_depth: 16 min_depth: 16
depth_multiplier: 1.0 depth_multiplier: 1.0
use_depthwise: true
conv_hyperparams { conv_hyperparams {
activation: RELU_6, activation: RELU_6,
regularizer { regularizer {
......
# SSDLite with Mobilenet v1 configuration for MSCOCO Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {
ssd {
num_classes: 90
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
num_layers: 6
min_scale: 0.2
max_scale: 0.95
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.3333
}
}
image_resizer {
fixed_shape_resizer {
height: 300
width: 300
}
}
box_predictor {
convolutional_box_predictor {
min_depth: 0
max_depth: 0
num_layers_before_predictor: 0
use_dropout: false
dropout_keep_probability: 0.8
kernel_size: 3
use_depthwise: true
box_code_size: 4
apply_sigmoid_to_scores: false
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.9997,
epsilon: 0.001,
}
}
}
}
feature_extractor {
type: 'ssd_mobilenet_v1'
min_depth: 16
depth_multiplier: 1.0
use_depthwise: true
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.9997,
epsilon: 0.001,
}
}
}
loss {
classification_loss {
weighted_sigmoid {
}
}
localization_loss {
weighted_smooth_l1 {
}
}
hard_example_miner {
num_hard_examples: 3000
iou_threshold: 0.99
loss_type: CLASSIFICATION
max_negatives_per_positive: 3
min_negatives_per_image: 0
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
batch_size: 24
optimizer {
rms_prop_optimizer: {
learning_rate: {
exponential_decay_learning_rate {
initial_learning_rate: 0.004
decay_steps: 800720
decay_factor: 0.95
}
}
momentum_optimizer_value: 0.9
decay: 0.9
epsilon: 1.0
}
}
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
from_detection_checkpoint: true
# Note: The below line limits the training process to 200K steps, which we
# empirically found to be sufficient enough to train the pets dataset. This
# effectively bypasses the learning rate schedule (the learning rate will
# never decay). Remove the below line to train indefinitely.
num_steps: 200000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
ssd_random_crop {
}
}
}
train_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
}
eval_config: {
num_examples: 8000
# Note: The below line limits the evaluation process to 10 evaluations.
# Remove the below line to evaluate indefinitely.
max_evals: 10
}
eval_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
shuffle: false
num_readers: 1
}
# SSDLite with Mobilenet v2 configuration for MSCOCO Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {
ssd {
num_classes: 90
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
num_layers: 6
min_scale: 0.2
max_scale: 0.95
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.3333
}
}
image_resizer {
fixed_shape_resizer {
height: 300
width: 300
}
}
box_predictor {
convolutional_box_predictor {
min_depth: 0
max_depth: 0
num_layers_before_predictor: 0
use_dropout: false
dropout_keep_probability: 0.8
kernel_size: 3
use_depthwise: true
box_code_size: 4
apply_sigmoid_to_scores: false
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.9997,
epsilon: 0.001,
}
}
}
}
feature_extractor {
type: 'ssd_mobilenet_v2'
min_depth: 16
depth_multiplier: 1.0
use_depthwise: true
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.9997,
epsilon: 0.001,
}
}
}
loss {
classification_loss {
weighted_sigmoid {
}
}
localization_loss {
weighted_smooth_l1 {
}
}
hard_example_miner {
num_hard_examples: 3000
iou_threshold: 0.99
loss_type: CLASSIFICATION
max_negatives_per_positive: 3
min_negatives_per_image: 3
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
batch_size: 24
optimizer {
rms_prop_optimizer: {
learning_rate: {
exponential_decay_learning_rate {
initial_learning_rate: 0.004
decay_steps: 800720
decay_factor: 0.95
}
}
momentum_optimizer_value: 0.9
decay: 0.9
epsilon: 1.0
}
}
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
fine_tune_checkpoint_type: "detection"
# Note: The below line limits the training process to 200K steps, which we
# empirically found to be sufficient enough to train the pets dataset. This
# effectively bypasses the learning rate schedule (the learning rate will
# never decay). Remove the below line to train indefinitely.
num_steps: 200000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
ssd_random_crop {
}
}
}
train_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
}
eval_config: {
num_examples: 8000
# Note: The below line limits the evaluation process to 10 evaluations.
# Remove the below line to evaluate indefinitely.
max_evals: 10
}
eval_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
shuffle: false
num_readers: 1
}
\ No newline at end of file
...@@ -48,6 +48,7 @@ import tensorflow as tf ...@@ -48,6 +48,7 @@ import tensorflow as tf
from object_detection import trainer from object_detection import trainer
from object_detection.builders import dataset_builder from object_detection.builders import dataset_builder
from object_detection.builders import graph_rewriter_builder
from object_detection.builders import model_builder from object_detection.builders import model_builder
from object_detection.utils import config_util from object_detection.utils import config_util
from object_detection.utils import dataset_util from object_detection.utils import dataset_util
...@@ -158,9 +159,25 @@ def main(_): ...@@ -158,9 +159,25 @@ def main(_):
is_chief = (task_info.type == 'master') is_chief = (task_info.type == 'master')
master = server.target master = server.target
trainer.train(create_input_dict_fn, model_fn, train_config, master, task, graph_rewriter_fn = None
FLAGS.num_clones, worker_replicas, FLAGS.clone_on_cpu, ps_tasks, if 'graph_rewriter_config' in configs:
worker_job_name, is_chief, FLAGS.train_dir) graph_rewriter_fn = graph_rewriter_builder.build(
configs['graph_rewriter_config'], is_training=True)
trainer.train(
create_input_dict_fn,
model_fn,
train_config,
master,
task,
FLAGS.num_clones,
worker_replicas,
FLAGS.clone_on_cpu,
ps_tasks,
worker_job_name,
is_chief,
FLAGS.train_dir,
graph_hook_fn=graph_rewriter_fn)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -231,10 +231,10 @@ def train(create_tensor_dict_fn, ...@@ -231,10 +231,10 @@ def train(create_tensor_dict_fn,
worker_job_name: Name of the worker job. worker_job_name: Name of the worker job.
is_chief: Whether this replica is the chief replica. is_chief: Whether this replica is the chief replica.
train_dir: Directory to write checkpoints and training summaries to. train_dir: Directory to write checkpoints and training summaries to.
graph_hook_fn: Optional function that is called after the training graph is graph_hook_fn: Optional function that is called after the inference graph is
completely built. This is helpful to perform additional changes to the built (before optimization). This is helpful to perform additional changes
training graph such as optimizing batchnorm. The function should modify to the training graph such as adding FakeQuant ops. The function should
the default graph. modify the default graph.
""" """
detection_model = create_model_fn() detection_model = create_model_fn()
...@@ -275,6 +275,10 @@ def train(create_tensor_dict_fn, ...@@ -275,6 +275,10 @@ def train(create_tensor_dict_fn,
clones = model_deploy.create_clones(deploy_config, model_fn, [input_queue]) clones = model_deploy.create_clones(deploy_config, model_fn, [input_queue])
first_clone_scope = clones[0].scope first_clone_scope = clones[0].scope
if graph_hook_fn:
with tf.device(deploy_config.variables_device()):
graph_hook_fn()
# Gather update_ops from the first clone. These contain, for example, # Gather update_ops from the first clone. These contain, for example,
# the updates for the batch_norm variables created by model_fn. # the updates for the batch_norm variables created by model_fn.
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)
...@@ -328,10 +332,6 @@ def train(create_tensor_dict_fn, ...@@ -328,10 +332,6 @@ def train(create_tensor_dict_fn,
with tf.control_dependencies([update_op]): with tf.control_dependencies([update_op]):
train_tensor = tf.identity(total_loss, name='train_op') train_tensor = tf.identity(total_loss, name='train_op')
if graph_hook_fn:
with tf.device(deploy_config.variables_device()):
graph_hook_fn()
# Add summaries. # Add summaries.
for model_var in slim.get_model_variables(): for model_var in slim.get_model_variables():
global_summaries.add(tf.summary.histogram('ModelVars/' + global_summaries.add(tf.summary.histogram('ModelVars/' +
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment