Commit 5a2cf36f authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

Merge remote-tracking branch 'upstream/master' into newavarecords

parents 258ddfc3 a829e648
...@@ -20,8 +20,8 @@ from __future__ import print_function ...@@ -20,8 +20,8 @@ from __future__ import print_function
import functools import functools
import unittest import unittest
from unittest import mock # pylint: disable=g-importing-member
from absl.testing import parameterized from absl.testing import parameterized
import mock
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
import tf_slim as slim import tf_slim as slim
...@@ -41,7 +41,7 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch ...@@ -41,7 +41,7 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch
from object_detection.protos import box_predictor_pb2 from object_detection.protos import box_predictor_pb2
from object_detection.protos import hyperparams_pb2 from object_detection.protos import hyperparams_pb2
from object_detection.protos import post_processing_pb2 from object_detection.protos import post_processing_pb2
from object_detection.utils import ops from object_detection.utils import spatial_transform_ops as spatial_ops
from object_detection.utils import test_case from object_detection.utils import test_case
from object_detection.utils import test_utils from object_detection.utils import test_utils
from object_detection.utils import tf_version from object_detection.utils import tf_version
...@@ -109,7 +109,6 @@ class FakeFasterRCNNKerasFeatureExtractor( ...@@ -109,7 +109,6 @@ class FakeFasterRCNNKerasFeatureExtractor(
]) ])
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase): class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
def _get_model(self, box_predictor, **common_kwargs): def _get_model(self, box_predictor, **common_kwargs):
...@@ -363,8 +362,9 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase): ...@@ -363,8 +362,9 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
max_negatives_per_positive=None) max_negatives_per_positive=None)
crop_and_resize_fn = ( crop_and_resize_fn = (
ops.matmul_crop_and_resize spatial_ops.multilevel_matmul_crop_and_resize
if use_matmul_crop_and_resize else ops.native_crop_and_resize) if use_matmul_crop_and_resize
else spatial_ops.multilevel_native_crop_and_resize)
common_kwargs = { common_kwargs = {
'is_training': 'is_training':
is_training, is_training,
...@@ -439,15 +439,16 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase): ...@@ -439,15 +439,16 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
masks_are_class_agnostic=masks_are_class_agnostic, masks_are_class_agnostic=masks_are_class_agnostic,
share_box_across_classes=share_box_across_classes), **common_kwargs) share_box_across_classes=share_box_across_classes), **common_kwargs)
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
@mock.patch.object(context_rcnn_meta_arch, 'context_rcnn_lib') @mock.patch.object(context_rcnn_meta_arch, 'context_rcnn_lib')
def test_prediction_mock(self, mock_context_rcnn_lib): def test_prediction_mock_tf1(self, mock_context_rcnn_lib_v1):
"""Mocks the context_rcnn_lib module to test the prediction. """Mocks the context_rcnn_lib_v1 module to test the prediction.
Using mock object so that we can ensure compute_box_context_attention is Using mock object so that we can ensure compute_box_context_attention is
called in side the prediction function. called in side the prediction function.
Args: Args:
mock_context_rcnn_lib: mock module for the context_rcnn_lib. mock_context_rcnn_lib_v1: mock module for the context_rcnn_lib_v1.
""" """
model = self._build_model( model = self._build_model(
is_training=False, is_training=False,
...@@ -456,7 +457,7 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase): ...@@ -456,7 +457,7 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
num_classes=42) num_classes=42)
mock_tensor = tf.ones([2, 8, 3, 3, 3], tf.float32) mock_tensor = tf.ones([2, 8, 3, 3, 3], tf.float32)
mock_context_rcnn_lib.compute_box_context_attention.return_value = mock_tensor mock_context_rcnn_lib_v1.compute_box_context_attention.return_value = mock_tensor
inputs_shape = (2, 20, 20, 3) inputs_shape = (2, 20, 20, 3)
inputs = tf.cast( inputs = tf.cast(
tf.random_uniform(inputs_shape, minval=0, maxval=255, dtype=tf.int32), tf.random_uniform(inputs_shape, minval=0, maxval=255, dtype=tf.int32),
...@@ -478,7 +479,7 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase): ...@@ -478,7 +479,7 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
side_inputs = model.get_side_inputs(features) side_inputs = model.get_side_inputs(features)
_ = model.predict(preprocessed_inputs, true_image_shapes, **side_inputs) _ = model.predict(preprocessed_inputs, true_image_shapes, **side_inputs)
mock_context_rcnn_lib.compute_box_context_attention.assert_called_once() mock_context_rcnn_lib_v1.compute_box_context_attention.assert_called_once()
@parameterized.named_parameters( @parameterized.named_parameters(
{'testcase_name': 'static_shapes', 'static_shapes': True}, {'testcase_name': 'static_shapes', 'static_shapes': True},
...@@ -517,7 +518,6 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase): ...@@ -517,7 +518,6 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
} }
side_inputs = model.get_side_inputs(features) side_inputs = model.get_side_inputs(features)
prediction_dict = model.predict(preprocessed_inputs, true_image_shapes, prediction_dict = model.predict(preprocessed_inputs, true_image_shapes,
**side_inputs) **side_inputs)
return (prediction_dict['rpn_box_predictor_features'], return (prediction_dict['rpn_box_predictor_features'],
......
...@@ -1948,9 +1948,16 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1948,9 +1948,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
Returns: Returns:
A float32 tensor with shape [K, new_height, new_width, depth]. A float32 tensor with shape [K, new_height, new_width, depth].
""" """
features_to_crop = [features_to_crop]
num_levels = len(features_to_crop)
box_levels = None
if num_levels != 1:
# If there are multiple levels to select, get the box levels
box_levels = ops.fpn_feature_levels(num_levels, num_levels - 1,
1.0/224, proposal_boxes_normalized)
cropped_regions = self._flatten_first_two_dimensions( cropped_regions = self._flatten_first_two_dimensions(
self._crop_and_resize_fn( self._crop_and_resize_fn(
features_to_crop, proposal_boxes_normalized, features_to_crop, proposal_boxes_normalized, box_levels,
[self._initial_crop_size, self._initial_crop_size])) [self._initial_crop_size, self._initial_crop_size]))
return self._maxpool_layer(cropped_regions) return self._maxpool_layer(cropped_regions)
...@@ -2517,8 +2524,8 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -2517,8 +2524,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
image_shape[1], image_shape[2], check_range=False).get() image_shape[1], image_shape[2], check_range=False).get()
flat_cropped_gt_mask = self._crop_and_resize_fn( flat_cropped_gt_mask = self._crop_and_resize_fn(
tf.expand_dims(flat_gt_masks, -1), [tf.expand_dims(flat_gt_masks, -1)],
tf.expand_dims(flat_normalized_proposals, axis=1), tf.expand_dims(flat_normalized_proposals, axis=1), None,
[mask_height, mask_width]) [mask_height, mask_width])
# Without stopping gradients into cropped groundtruth masks the # Without stopping gradients into cropped groundtruth masks the
# performance with 100-padded groundtruth masks when batch size > 1 is # performance with 100-padded groundtruth masks when batch size > 1 is
...@@ -2547,7 +2554,7 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -2547,7 +2554,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
if second_stage_mask_loss is not None: if second_stage_mask_loss is not None:
mask_loss = tf.multiply(self._second_stage_mask_loss_weight, mask_loss = tf.multiply(self._second_stage_mask_loss_weight,
second_stage_mask_loss, name='mask_loss') second_stage_mask_loss, name='mask_loss')
loss_dict[mask_loss.op.name] = mask_loss loss_dict['Loss/BoxClassifierLoss/mask_loss'] = mask_loss
return loss_dict return loss_dict
def _get_mask_proposal_boxes_and_classes( def _get_mask_proposal_boxes_and_classes(
......
...@@ -34,7 +34,7 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch ...@@ -34,7 +34,7 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch
from object_detection.protos import box_predictor_pb2 from object_detection.protos import box_predictor_pb2
from object_detection.protos import hyperparams_pb2 from object_detection.protos import hyperparams_pb2
from object_detection.protos import post_processing_pb2 from object_detection.protos import post_processing_pb2
from object_detection.utils import ops from object_detection.utils import spatial_transform_ops as spatial_ops
from object_detection.utils import test_case from object_detection.utils import test_case
from object_detection.utils import test_utils from object_detection.utils import test_utils
from object_detection.utils import tf_version from object_detection.utils import tf_version
...@@ -377,8 +377,9 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): ...@@ -377,8 +377,9 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
max_negatives_per_positive=None) max_negatives_per_positive=None)
crop_and_resize_fn = ( crop_and_resize_fn = (
ops.matmul_crop_and_resize spatial_ops.multilevel_matmul_crop_and_resize
if use_matmul_crop_and_resize else ops.native_crop_and_resize) if use_matmul_crop_and_resize
else spatial_ops.multilevel_native_crop_and_resize)
common_kwargs = { common_kwargs = {
'is_training': 'is_training':
is_training, is_training,
......
...@@ -432,14 +432,9 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -432,14 +432,9 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
return eval_metric_ops return eval_metric_ops
def _check_mask_type_and_value(array_name, masks): def convert_masks_to_binary(masks):
"""Checks whether mask dtype is uint8 and the values are either 0 or 1.""" """Converts masks to 0 or 1 and uint8 type."""
if masks.dtype != np.uint8: return (masks > 0).astype(np.uint8)
raise ValueError('{} must be of type np.uint8. Found {}.'.format(
array_name, masks.dtype))
if np.any(np.logical_and(masks != 0, masks != 1)):
raise ValueError('{} elements can only be either 0 or 1.'.format(
array_name))
class CocoKeypointEvaluator(CocoDetectionEvaluator): class CocoKeypointEvaluator(CocoDetectionEvaluator):
...@@ -952,9 +947,8 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -952,9 +947,8 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
groundtruth_instance_masks = groundtruth_dict[ groundtruth_instance_masks = groundtruth_dict[
standard_fields.InputDataFields.groundtruth_instance_masks] standard_fields.InputDataFields.groundtruth_instance_masks]
_check_mask_type_and_value(standard_fields.InputDataFields. groundtruth_instance_masks = convert_masks_to_binary(
groundtruth_instance_masks, groundtruth_instance_masks)
groundtruth_instance_masks)
self._groundtruth_list.extend( self._groundtruth_list.extend(
coco_tools. coco_tools.
ExportSingleImageGroundtruthToCoco( ExportSingleImageGroundtruthToCoco(
...@@ -1013,9 +1007,7 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -1013,9 +1007,7 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
'are incompatible: {} vs {}'.format( 'are incompatible: {} vs {}'.format(
groundtruth_masks_shape, groundtruth_masks_shape,
detection_masks.shape)) detection_masks.shape))
_check_mask_type_and_value(standard_fields.DetectionResultFields. detection_masks = convert_masks_to_binary(detection_masks)
detection_masks,
detection_masks)
self._detection_masks_list.extend( self._detection_masks_list.extend(
coco_tools.ExportSingleImageDetectionMasksToCoco( coco_tools.ExportSingleImageDetectionMasksToCoco(
image_id=image_id, image_id=image_id,
......
...@@ -1424,14 +1424,16 @@ class CocoMaskEvaluationTest(tf.test.TestCase): ...@@ -1424,14 +1424,16 @@ class CocoMaskEvaluationTest(tf.test.TestCase):
image_id='image3', image_id='image3',
detections_dict={ detections_dict={
standard_fields.DetectionResultFields.detection_boxes: standard_fields.DetectionResultFields.detection_boxes:
np.array([[25., 25., 50., 50.]]), np.array([[25., 25., 50., 50.]]),
standard_fields.DetectionResultFields.detection_scores: standard_fields.DetectionResultFields.detection_scores:
np.array([.8]), np.array([.8]),
standard_fields.DetectionResultFields.detection_classes: standard_fields.DetectionResultFields.detection_classes:
np.array([1]), np.array([1]),
standard_fields.DetectionResultFields.detection_masks: standard_fields.DetectionResultFields.detection_masks:
np.pad(np.ones([1, 25, 25], dtype=np.uint8), # The value of 5 is equivalent to 1, since masks will be
((0, 0), (10, 10), (10, 10)), mode='constant') # thresholded and binarized before evaluation.
np.pad(5 * np.ones([1, 25, 25], dtype=np.uint8),
((0, 0), (10, 10), (10, 10)), mode='constant')
}) })
metrics = coco_evaluator.evaluate() metrics = coco_evaluator.evaluate()
self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP'], 1.0) self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP'], 1.0)
......
...@@ -136,15 +136,15 @@ def build_groundtruth_dictionary(data, class_label_map): ...@@ -136,15 +136,15 @@ def build_groundtruth_dictionary(data, class_label_map):
dictionary = { dictionary = {
standard_fields.InputDataFields.groundtruth_boxes: standard_fields.InputDataFields.groundtruth_boxes:
data_location[['YMin', 'XMin', 'YMax', 'XMax']].as_matrix(), data_location[['YMin', 'XMin', 'YMax', 'XMax']].to_numpy(),
standard_fields.InputDataFields.groundtruth_classes: standard_fields.InputDataFields.groundtruth_classes:
data_location['LabelName'].map(lambda x: class_label_map[x] data_location['LabelName'].map(lambda x: class_label_map[x]
).as_matrix(), ).to_numpy(),
standard_fields.InputDataFields.groundtruth_group_of: standard_fields.InputDataFields.groundtruth_group_of:
data_location['IsGroupOf'].as_matrix().astype(int), data_location['IsGroupOf'].to_numpy().astype(int),
standard_fields.InputDataFields.groundtruth_image_classes: standard_fields.InputDataFields.groundtruth_image_classes:
data_labels['LabelName'].map(lambda x: class_label_map[x] data_labels['LabelName'].map(lambda x: class_label_map[x]
).as_matrix(), ).to_numpy(),
} }
if 'Mask' in data_location: if 'Mask' in data_location:
...@@ -179,9 +179,9 @@ def build_predictions_dictionary(data, class_label_map): ...@@ -179,9 +179,9 @@ def build_predictions_dictionary(data, class_label_map):
""" """
dictionary = { dictionary = {
standard_fields.DetectionResultFields.detection_classes: standard_fields.DetectionResultFields.detection_classes:
data['LabelName'].map(lambda x: class_label_map[x]).as_matrix(), data['LabelName'].map(lambda x: class_label_map[x]).to_numpy(),
standard_fields.DetectionResultFields.detection_scores: standard_fields.DetectionResultFields.detection_scores:
data['Score'].as_matrix() data['Score'].to_numpy()
} }
if 'Mask' in data: if 'Mask' in data:
...@@ -192,6 +192,6 @@ def build_predictions_dictionary(data, class_label_map): ...@@ -192,6 +192,6 @@ def build_predictions_dictionary(data, class_label_map):
else: else:
dictionary[standard_fields.DetectionResultFields.detection_boxes] = data[[ dictionary[standard_fields.DetectionResultFields.detection_boxes] = data[[
'YMin', 'XMin', 'YMax', 'XMax' 'YMin', 'XMin', 'YMax', 'XMax'
]].as_matrix() ]].to_numpy()
return dictionary return dictionary
...@@ -53,16 +53,16 @@ def build_groundtruth_vrd_dictionary(data, class_label_map, ...@@ -53,16 +53,16 @@ def build_groundtruth_vrd_dictionary(data, class_label_map,
boxes = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.vrd_box_data_type) boxes = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.vrd_box_data_type)
boxes['subject'] = data_boxes[['YMin1', 'XMin1', 'YMax1', boxes['subject'] = data_boxes[['YMin1', 'XMin1', 'YMax1',
'XMax1']].as_matrix() 'XMax1']].to_numpy()
boxes['object'] = data_boxes[['YMin2', 'XMin2', 'YMax2', 'XMax2']].as_matrix() boxes['object'] = data_boxes[['YMin2', 'XMin2', 'YMax2', 'XMax2']].to_numpy()
labels = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.label_data_type) labels = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.label_data_type)
labels['subject'] = data_boxes['LabelName1'].map( labels['subject'] = data_boxes['LabelName1'].map(
lambda x: class_label_map[x]).as_matrix() lambda x: class_label_map[x]).to_numpy()
labels['object'] = data_boxes['LabelName2'].map( labels['object'] = data_boxes['LabelName2'].map(
lambda x: class_label_map[x]).as_matrix() lambda x: class_label_map[x]).to_numpy()
labels['relation'] = data_boxes['RelationshipLabel'].map( labels['relation'] = data_boxes['RelationshipLabel'].map(
lambda x: relationship_label_map[x]).as_matrix() lambda x: relationship_label_map[x]).to_numpy()
return { return {
standard_fields.InputDataFields.groundtruth_boxes: standard_fields.InputDataFields.groundtruth_boxes:
...@@ -71,7 +71,7 @@ def build_groundtruth_vrd_dictionary(data, class_label_map, ...@@ -71,7 +71,7 @@ def build_groundtruth_vrd_dictionary(data, class_label_map,
labels, labels,
standard_fields.InputDataFields.groundtruth_image_classes: standard_fields.InputDataFields.groundtruth_image_classes:
data_labels['LabelName'].map(lambda x: class_label_map[x]) data_labels['LabelName'].map(lambda x: class_label_map[x])
.as_matrix(), .to_numpy(),
} }
...@@ -104,16 +104,16 @@ def build_predictions_vrd_dictionary(data, class_label_map, ...@@ -104,16 +104,16 @@ def build_predictions_vrd_dictionary(data, class_label_map,
boxes = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.vrd_box_data_type) boxes = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.vrd_box_data_type)
boxes['subject'] = data_boxes[['YMin1', 'XMin1', 'YMax1', boxes['subject'] = data_boxes[['YMin1', 'XMin1', 'YMax1',
'XMax1']].as_matrix() 'XMax1']].to_numpy()
boxes['object'] = data_boxes[['YMin2', 'XMin2', 'YMax2', 'XMax2']].as_matrix() boxes['object'] = data_boxes[['YMin2', 'XMin2', 'YMax2', 'XMax2']].to_numpy()
labels = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.label_data_type) labels = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.label_data_type)
labels['subject'] = data_boxes['LabelName1'].map( labels['subject'] = data_boxes['LabelName1'].map(
lambda x: class_label_map[x]).as_matrix() lambda x: class_label_map[x]).to_numpy()
labels['object'] = data_boxes['LabelName2'].map( labels['object'] = data_boxes['LabelName2'].map(
lambda x: class_label_map[x]).as_matrix() lambda x: class_label_map[x]).to_numpy()
labels['relation'] = data_boxes['RelationshipLabel'].map( labels['relation'] = data_boxes['RelationshipLabel'].map(
lambda x: relationship_label_map[x]).as_matrix() lambda x: relationship_label_map[x]).to_numpy()
return { return {
standard_fields.DetectionResultFields.detection_boxes: standard_fields.DetectionResultFields.detection_boxes:
...@@ -121,5 +121,5 @@ def build_predictions_vrd_dictionary(data, class_label_map, ...@@ -121,5 +121,5 @@ def build_predictions_vrd_dictionary(data, class_label_map,
standard_fields.DetectionResultFields.detection_classes: standard_fields.DetectionResultFields.detection_classes:
labels, labels,
standard_fields.DetectionResultFields.detection_scores: standard_fields.DetectionResultFields.detection_scores:
data_boxes['Score'].as_matrix() data_boxes['Score'].to_numpy()
} }
...@@ -43,7 +43,6 @@ from object_detection.utils import visualization_utils as vis_utils ...@@ -43,7 +43,6 @@ from object_detection.utils import visualization_utils as vis_utils
# pylint: disable=g-import-not-at-top # pylint: disable=g-import-not-at-top
try: try:
from tensorflow.contrib import learn as contrib_learn from tensorflow.contrib import learn as contrib_learn
from tensorflow.contrib import tpu as contrib_tpu
except ImportError: except ImportError:
# TF 2.0 doesn't ship with contrib. # TF 2.0 doesn't ship with contrib.
pass pass
...@@ -94,6 +93,15 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic, ...@@ -94,6 +93,15 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
of groundtruth boxes per image.. of groundtruth boxes per image..
'groundtruth_keypoints': [batch_size, num_boxes, num_keypoints, 2] float32 'groundtruth_keypoints': [batch_size, num_boxes, num_keypoints, 2] float32
tensor of keypoints (if provided in groundtruth). tensor of keypoints (if provided in groundtruth).
'groundtruth_dp_num_points_list': [batch_size, num_boxes] int32 tensor
with the number of DensePose points for each instance (if provided in
groundtruth).
'groundtruth_dp_part_ids_list': [batch_size, num_boxes,
max_sampled_points] int32 tensor with the part ids for each DensePose
sampled point (if provided in groundtruth).
'groundtruth_dp_surface_coords_list': [batch_size, num_boxes,
max_sampled_points, 4] containing the DensePose surface coordinates for
each sampled point (if provided in groundtruth).
'groundtruth_group_of': [batch_size, num_boxes] bool tensor indicating 'groundtruth_group_of': [batch_size, num_boxes] bool tensor indicating
group_of annotations (if provided in groundtruth). group_of annotations (if provided in groundtruth).
'groundtruth_labeled_classes': [batch_size, num_classes] int64 'groundtruth_labeled_classes': [batch_size, num_classes] int64
...@@ -164,6 +172,21 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic, ...@@ -164,6 +172,21 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
groundtruth[input_data_fields.groundtruth_labeled_classes] = tf.stack( groundtruth[input_data_fields.groundtruth_labeled_classes] = tf.stack(
labeled_classes) labeled_classes)
if detection_model.groundtruth_has_field(
fields.BoxListFields.densepose_num_points):
groundtruth[input_data_fields.groundtruth_dp_num_points] = tf.stack(
detection_model.groundtruth_lists(
fields.BoxListFields.densepose_num_points))
if detection_model.groundtruth_has_field(
fields.BoxListFields.densepose_part_ids):
groundtruth[input_data_fields.groundtruth_dp_part_ids] = tf.stack(
detection_model.groundtruth_lists(
fields.BoxListFields.densepose_part_ids))
if detection_model.groundtruth_has_field(
fields.BoxListFields.densepose_surface_coords):
groundtruth[input_data_fields.groundtruth_dp_surface_coords] = tf.stack(
detection_model.groundtruth_lists(
fields.BoxListFields.densepose_surface_coords))
groundtruth[input_data_fields.num_groundtruth_boxes] = ( groundtruth[input_data_fields.num_groundtruth_boxes] = (
tf.tile([max_number_of_boxes], multiples=[groundtruth_boxes_shape[0]])) tf.tile([max_number_of_boxes], multiples=[groundtruth_boxes_shape[0]]))
return groundtruth return groundtruth
...@@ -219,6 +242,9 @@ def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True): ...@@ -219,6 +242,9 @@ def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True):
fields.InputDataFields.groundtruth_boxes, fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_keypoints, fields.InputDataFields.groundtruth_keypoints,
fields.InputDataFields.groundtruth_keypoint_visibilities, fields.InputDataFields.groundtruth_keypoint_visibilities,
fields.InputDataFields.groundtruth_dp_num_points,
fields.InputDataFields.groundtruth_dp_part_ids,
fields.InputDataFields.groundtruth_dp_surface_coords,
fields.InputDataFields.groundtruth_group_of, fields.InputDataFields.groundtruth_group_of,
fields.InputDataFields.groundtruth_difficult, fields.InputDataFields.groundtruth_difficult,
fields.InputDataFields.groundtruth_is_crowd, fields.InputDataFields.groundtruth_is_crowd,
...@@ -269,6 +295,18 @@ def provide_groundtruth(model, labels): ...@@ -269,6 +295,18 @@ def provide_groundtruth(model, labels):
if fields.InputDataFields.groundtruth_keypoint_visibilities in labels: if fields.InputDataFields.groundtruth_keypoint_visibilities in labels:
gt_keypoint_visibilities_list = labels[ gt_keypoint_visibilities_list = labels[
fields.InputDataFields.groundtruth_keypoint_visibilities] fields.InputDataFields.groundtruth_keypoint_visibilities]
gt_dp_num_points_list = None
if fields.InputDataFields.groundtruth_dp_num_points in labels:
gt_dp_num_points_list = labels[
fields.InputDataFields.groundtruth_dp_num_points]
gt_dp_part_ids_list = None
if fields.InputDataFields.groundtruth_dp_part_ids in labels:
gt_dp_part_ids_list = labels[
fields.InputDataFields.groundtruth_dp_part_ids]
gt_dp_surface_coords_list = None
if fields.InputDataFields.groundtruth_dp_surface_coords in labels:
gt_dp_surface_coords_list = labels[
fields.InputDataFields.groundtruth_dp_surface_coords]
gt_weights_list = None gt_weights_list = None
if fields.InputDataFields.groundtruth_weights in labels: if fields.InputDataFields.groundtruth_weights in labels:
gt_weights_list = labels[fields.InputDataFields.groundtruth_weights] gt_weights_list = labels[fields.InputDataFields.groundtruth_weights]
...@@ -297,13 +335,16 @@ def provide_groundtruth(model, labels): ...@@ -297,13 +335,16 @@ def provide_groundtruth(model, labels):
groundtruth_masks_list=gt_masks_list, groundtruth_masks_list=gt_masks_list,
groundtruth_keypoints_list=gt_keypoints_list, groundtruth_keypoints_list=gt_keypoints_list,
groundtruth_keypoint_visibilities_list=gt_keypoint_visibilities_list, groundtruth_keypoint_visibilities_list=gt_keypoint_visibilities_list,
groundtruth_dp_num_points_list=gt_dp_num_points_list,
groundtruth_dp_part_ids_list=gt_dp_part_ids_list,
groundtruth_dp_surface_coords_list=gt_dp_surface_coords_list,
groundtruth_weights_list=gt_weights_list, groundtruth_weights_list=gt_weights_list,
groundtruth_is_crowd_list=gt_is_crowd_list, groundtruth_is_crowd_list=gt_is_crowd_list,
groundtruth_group_of_list=gt_group_of_list, groundtruth_group_of_list=gt_group_of_list,
groundtruth_area_list=gt_area_list) groundtruth_area_list=gt_area_list)
def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False, def create_model_fn(detection_model_fn, configs, hparams=None, use_tpu=False,
postprocess_on_cpu=False): postprocess_on_cpu=False):
"""Creates a model function for `Estimator`. """Creates a model function for `Estimator`.
...@@ -377,7 +418,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False, ...@@ -377,7 +418,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
side_inputs = detection_model.get_side_inputs(features) side_inputs = detection_model.get_side_inputs(features)
if use_tpu and train_config.use_bfloat16: if use_tpu and train_config.use_bfloat16:
with contrib_tpu.bfloat16_scope(): with tf.tpu.bfloat16_scope():
prediction_dict = detection_model.predict( prediction_dict = detection_model.predict(
preprocessed_images, preprocessed_images,
features[fields.InputDataFields.true_image_shape], **side_inputs) features[fields.InputDataFields.true_image_shape], **side_inputs)
...@@ -392,7 +433,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False, ...@@ -392,7 +433,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT): if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT):
if use_tpu and postprocess_on_cpu: if use_tpu and postprocess_on_cpu:
detections = contrib_tpu.outside_compilation( detections = tf.tpu.outside_compilation(
postprocess_wrapper, postprocess_wrapper,
(prediction_dict, (prediction_dict,
features[fields.InputDataFields.true_image_shape])) features[fields.InputDataFields.true_image_shape]))
...@@ -468,7 +509,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False, ...@@ -468,7 +509,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
if mode == tf.estimator.ModeKeys.TRAIN: if mode == tf.estimator.ModeKeys.TRAIN:
if use_tpu: if use_tpu:
training_optimizer = contrib_tpu.CrossShardOptimizer(training_optimizer) training_optimizer = tf.tpu.CrossShardOptimizer(training_optimizer)
# Optionally freeze some layers by setting their gradients to be zero. # Optionally freeze some layers by setting their gradients to be zero.
trainable_variables = None trainable_variables = None
...@@ -588,7 +629,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False, ...@@ -588,7 +629,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
# EVAL executes on CPU, so use regular non-TPU EstimatorSpec. # EVAL executes on CPU, so use regular non-TPU EstimatorSpec.
if use_tpu and mode != tf.estimator.ModeKeys.EVAL: if use_tpu and mode != tf.estimator.ModeKeys.EVAL:
return contrib_tpu.TPUEstimatorSpec( return tf.estimator.tpu.TPUEstimatorSpec(
mode=mode, mode=mode,
scaffold_fn=scaffold_fn, scaffold_fn=scaffold_fn,
predictions=detections, predictions=detections,
...@@ -619,8 +660,8 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False, ...@@ -619,8 +660,8 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
def create_estimator_and_inputs(run_config, def create_estimator_and_inputs(run_config,
hparams, hparams=None,
pipeline_config_path, pipeline_config_path=None,
config_override=None, config_override=None,
train_steps=None, train_steps=None,
sample_1_of_n_eval_examples=1, sample_1_of_n_eval_examples=1,
...@@ -639,7 +680,7 @@ def create_estimator_and_inputs(run_config, ...@@ -639,7 +680,7 @@ def create_estimator_and_inputs(run_config,
Args: Args:
run_config: A `RunConfig`. run_config: A `RunConfig`.
hparams: A `HParams`. hparams: (optional) A `HParams`.
pipeline_config_path: A path to a pipeline config file. pipeline_config_path: A path to a pipeline config file.
config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to
override the config from `pipeline_config_path`. override the config from `pipeline_config_path`.
...@@ -762,14 +803,14 @@ def create_estimator_and_inputs(run_config, ...@@ -762,14 +803,14 @@ def create_estimator_and_inputs(run_config,
model_config=model_config, predict_input_config=eval_input_configs[0]) model_config=model_config, predict_input_config=eval_input_configs[0])
# Read export_to_tpu from hparams if not passed. # Read export_to_tpu from hparams if not passed.
if export_to_tpu is None: if export_to_tpu is None and hparams is not None:
export_to_tpu = hparams.get('export_to_tpu', False) export_to_tpu = hparams.get('export_to_tpu', False)
tf.logging.info('create_estimator_and_inputs: use_tpu %s, export_to_tpu %s', tf.logging.info('create_estimator_and_inputs: use_tpu %s, export_to_tpu %s',
use_tpu, export_to_tpu) use_tpu, export_to_tpu)
model_fn = model_fn_creator(detection_model_fn, configs, hparams, use_tpu, model_fn = model_fn_creator(detection_model_fn, configs, hparams, use_tpu,
postprocess_on_cpu) postprocess_on_cpu)
if use_tpu_estimator: if use_tpu_estimator:
estimator = contrib_tpu.TPUEstimator( estimator = tf.estimator.tpu.TPUEstimator(
model_fn=model_fn, model_fn=model_fn,
train_batch_size=train_config.batch_size, train_batch_size=train_config.batch_size,
# For each core, only batch size 1 is supported for eval. # For each core, only batch size 1 is supported for eval.
......
...@@ -23,6 +23,7 @@ import os ...@@ -23,6 +23,7 @@ import os
import time import time
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
import tensorflow.compat.v2 as tf2
from object_detection import eval_util from object_detection import eval_util
from object_detection import inputs from object_detection import inputs
...@@ -93,6 +94,12 @@ def _compute_losses_and_predictions_dicts( ...@@ -93,6 +94,12 @@ def _compute_losses_and_predictions_dicts(
instance masks for objects. instance masks for objects.
labels[fields.InputDataFields.groundtruth_keypoints] is a labels[fields.InputDataFields.groundtruth_keypoints] is a
float32 tensor containing keypoints for each box. float32 tensor containing keypoints for each box.
labels[fields.InputDataFields.groundtruth_dp_num_points] is an int32
tensor with the number of sampled DensePose points per object.
labels[fields.InputDataFields.groundtruth_dp_part_ids] is an int32
tensor with the DensePose part ids (0-indexed) per object.
labels[fields.InputDataFields.groundtruth_dp_surface_coords] is a
float32 tensor with the DensePose surface coordinates.
labels[fields.InputDataFields.groundtruth_group_of] is a tf.bool tensor labels[fields.InputDataFields.groundtruth_group_of] is a tf.bool tensor
containing group_of annotations. containing group_of annotations.
labels[fields.InputDataFields.groundtruth_labeled_classes] is a float32 labels[fields.InputDataFields.groundtruth_labeled_classes] is a float32
...@@ -111,7 +118,8 @@ def _compute_losses_and_predictions_dicts( ...@@ -111,7 +118,8 @@ def _compute_losses_and_predictions_dicts(
prediction_dict = model.predict( prediction_dict = model.predict(
preprocessed_images, preprocessed_images,
features[fields.InputDataFields.true_image_shape]) features[fields.InputDataFields.true_image_shape],
**model.get_side_inputs(features))
prediction_dict = ops.bfloat16_to_float32_nested(prediction_dict) prediction_dict = ops.bfloat16_to_float32_nested(prediction_dict)
losses_dict = model.loss( losses_dict = model.loss(
...@@ -195,6 +203,17 @@ def eager_train_step(detection_model, ...@@ -195,6 +203,17 @@ def eager_train_step(detection_model,
labels[fields.InputDataFields.groundtruth_keypoints] is a labels[fields.InputDataFields.groundtruth_keypoints] is a
[batch_size, num_boxes, num_keypoints, 2] float32 tensor containing [batch_size, num_boxes, num_keypoints, 2] float32 tensor containing
keypoints for each box. keypoints for each box.
labels[fields.InputDataFields.groundtruth_dp_num_points] is a
[batch_size, num_boxes] int32 tensor with the number of DensePose
sampled points per instance.
labels[fields.InputDataFields.groundtruth_dp_part_ids] is a
[batch_size, num_boxes, max_sampled_points] int32 tensor with the
part ids (0-indexed) for each instance.
labels[fields.InputDataFields.groundtruth_dp_surface_coords] is a
[batch_size, num_boxes, max_sampled_points, 4] float32 tensor with the
surface coordinates for each point. Each surface coordinate is of the
form (y, x, v, u) where (y, x) are normalized image locations and
(v, u) are part-relative normalized surface coordinates.
labels[fields.InputDataFields.groundtruth_labeled_classes] is a float32 labels[fields.InputDataFields.groundtruth_labeled_classes] is a float32
k-hot tensor of classes. k-hot tensor of classes.
unpad_groundtruth_tensors: A parameter passed to unstack_batch. unpad_groundtruth_tensors: A parameter passed to unstack_batch.
...@@ -336,11 +355,18 @@ def load_fine_tune_checkpoint( ...@@ -336,11 +355,18 @@ def load_fine_tune_checkpoint(
labels) labels)
strategy = tf.compat.v2.distribute.get_strategy() strategy = tf.compat.v2.distribute.get_strategy()
strategy.experimental_run_v2( if hasattr(tf.distribute.Strategy, 'run'):
_dummy_computation_fn, args=( strategy.run(
features, _dummy_computation_fn, args=(
labels, features,
)) labels,
))
else:
strategy.experimental_run_v2(
_dummy_computation_fn, args=(
features,
labels,
))
restore_from_objects_dict = model.restore_from_objects( restore_from_objects_dict = model.restore_from_objects(
fine_tune_checkpoint_type=checkpoint_type) fine_tune_checkpoint_type=checkpoint_type)
...@@ -391,6 +417,7 @@ def train_loop( ...@@ -391,6 +417,7 @@ def train_loop(
save_final_config=False, save_final_config=False,
checkpoint_every_n=1000, checkpoint_every_n=1000,
checkpoint_max_to_keep=7, checkpoint_max_to_keep=7,
record_summaries=True,
**kwargs): **kwargs):
"""Trains a model using eager + functions. """Trains a model using eager + functions.
...@@ -420,6 +447,7 @@ def train_loop( ...@@ -420,6 +447,7 @@ def train_loop(
Checkpoint every n training steps. Checkpoint every n training steps.
checkpoint_max_to_keep: checkpoint_max_to_keep:
int, the number of most recent checkpoints to keep in the model directory. int, the number of most recent checkpoints to keep in the model directory.
record_summaries: Boolean, whether or not to record summaries.
**kwargs: Additional keyword arguments for configuration override. **kwargs: Additional keyword arguments for configuration override.
""" """
## Parse the configs ## Parse the configs
...@@ -506,8 +534,11 @@ def train_loop( ...@@ -506,8 +534,11 @@ def train_loop(
# is the chief. # is the chief.
summary_writer_filepath = get_filepath(strategy, summary_writer_filepath = get_filepath(strategy,
os.path.join(model_dir, 'train')) os.path.join(model_dir, 'train'))
summary_writer = tf.compat.v2.summary.create_file_writer( if record_summaries:
summary_writer_filepath) summary_writer = tf.compat.v2.summary.create_file_writer(
summary_writer_filepath)
else:
summary_writer = tf2.summary.create_noop_writer()
if use_tpu: if use_tpu:
num_steps_per_iteration = 100 num_steps_per_iteration = 100
...@@ -562,8 +593,12 @@ def train_loop( ...@@ -562,8 +593,12 @@ def train_loop(
def _sample_and_train(strategy, train_step_fn, data_iterator): def _sample_and_train(strategy, train_step_fn, data_iterator):
features, labels = data_iterator.next() features, labels = data_iterator.next()
per_replica_losses = strategy.experimental_run_v2( if hasattr(tf.distribute.Strategy, 'run'):
train_step_fn, args=(features, labels)) per_replica_losses = strategy.run(
train_step_fn, args=(features, labels))
else:
per_replica_losses = strategy.experimental_run_v2(
train_step_fn, args=(features, labels))
# TODO(anjalisridhar): explore if it is safe to remove the # TODO(anjalisridhar): explore if it is safe to remove the
## num_replicas scaling of the loss and switch this to a ReduceOp.Mean ## num_replicas scaling of the loss and switch this to a ReduceOp.Mean
return strategy.reduce(tf.distribute.ReduceOp.SUM, return strategy.reduce(tf.distribute.ReduceOp.SUM,
...@@ -575,7 +610,9 @@ def train_loop( ...@@ -575,7 +610,9 @@ def train_loop(
if num_steps_per_iteration > 1: if num_steps_per_iteration > 1:
for _ in tf.range(num_steps_per_iteration - 1): for _ in tf.range(num_steps_per_iteration - 1):
_sample_and_train(strategy, train_step_fn, data_iterator) # Following suggestion on yaqs/5402607292645376
with tf.name_scope(''):
_sample_and_train(strategy, train_step_fn, data_iterator)
return _sample_and_train(strategy, train_step_fn, data_iterator) return _sample_and_train(strategy, train_step_fn, data_iterator)
...@@ -767,7 +804,16 @@ def eager_eval_loop( ...@@ -767,7 +804,16 @@ def eager_eval_loop(
name='eval_side_by_side_' + str(i), name='eval_side_by_side_' + str(i),
step=global_step, step=global_step,
data=sbys_images, data=sbys_images,
max_outputs=1) max_outputs=eval_config.num_visualizations)
if eval_util.has_densepose(eval_dict):
dp_image_list = vutils.draw_densepose_visualizations(
eval_dict)
dp_images = tf.concat(dp_image_list, axis=0)
tf.compat.v2.summary.image(
name='densepose_detections_' + str(i),
step=global_step,
data=dp_images,
max_outputs=eval_config.num_visualizations)
if evaluators is None: if evaluators is None:
if class_agnostic: if class_agnostic:
...@@ -817,6 +863,7 @@ def eval_continuously( ...@@ -817,6 +863,7 @@ def eval_continuously(
checkpoint_dir=None, checkpoint_dir=None,
wait_interval=180, wait_interval=180,
timeout=3600, timeout=3600,
eval_index=None,
**kwargs): **kwargs):
"""Run continuous evaluation of a detection model eagerly. """Run continuous evaluation of a detection model eagerly.
...@@ -846,6 +893,8 @@ def eval_continuously( ...@@ -846,6 +893,8 @@ def eval_continuously(
new checkpoint. new checkpoint.
timeout: The maximum number of seconds to wait for a checkpoint. Execution timeout: The maximum number of seconds to wait for a checkpoint. Execution
will terminate if no new checkpoints are found after these many seconds. will terminate if no new checkpoints are found after these many seconds.
eval_index: int, optional If give, only evaluate the dataset at the given
index.
**kwargs: Additional keyword arguments for configuration override. **kwargs: Additional keyword arguments for configuration override.
""" """
...@@ -899,6 +948,11 @@ def eval_continuously( ...@@ -899,6 +948,11 @@ def eval_continuously(
model=detection_model) model=detection_model)
eval_inputs.append((eval_input_config.name, next_eval_input)) eval_inputs.append((eval_input_config.name, next_eval_input))
if eval_index is not None:
eval_inputs = [eval_inputs[eval_index]]
tf.logging.info('eval_index selected - {}'.format(
eval_inputs))
global_step = tf.compat.v2.Variable( global_step = tf.compat.v2.Variable(
0, trainable=False, dtype=tf.compat.v2.dtypes.int64) 0, trainable=False, dtype=tf.compat.v2.dtypes.int64)
......
...@@ -22,7 +22,6 @@ from absl import flags ...@@ -22,7 +22,6 @@ from absl import flags
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from object_detection import model_hparams
from object_detection import model_lib from object_detection import model_lib
flags.DEFINE_string( flags.DEFINE_string(
...@@ -41,10 +40,6 @@ flags.DEFINE_integer('sample_1_of_n_eval_on_train_examples', 5, 'Will sample ' ...@@ -41,10 +40,6 @@ flags.DEFINE_integer('sample_1_of_n_eval_on_train_examples', 5, 'Will sample '
'one of every n train input examples for evaluation, ' 'one of every n train input examples for evaluation, '
'where n is provided. This is only used if ' 'where n is provided. This is only used if '
'`eval_training_data` is True.') '`eval_training_data` is True.')
flags.DEFINE_string(
'hparams_overrides', None, 'Hyperparameter overrides, '
'represented as a string containing comma-separated '
'hparam_name=value pairs.')
flags.DEFINE_string( flags.DEFINE_string(
'checkpoint_dir', None, 'Path to directory holding a checkpoint. If ' 'checkpoint_dir', None, 'Path to directory holding a checkpoint. If '
'`checkpoint_dir` is provided, this binary operates in eval-only mode, ' '`checkpoint_dir` is provided, this binary operates in eval-only mode, '
...@@ -68,7 +63,6 @@ def main(unused_argv): ...@@ -68,7 +63,6 @@ def main(unused_argv):
train_and_eval_dict = model_lib.create_estimator_and_inputs( train_and_eval_dict = model_lib.create_estimator_and_inputs(
run_config=config, run_config=config,
hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
pipeline_config_path=FLAGS.pipeline_config_path, pipeline_config_path=FLAGS.pipeline_config_path,
train_steps=FLAGS.num_train_steps, train_steps=FLAGS.num_train_steps,
sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples,
......
...@@ -54,10 +54,19 @@ flags.DEFINE_integer('eval_timeout', 3600, 'Number of seconds to wait for an' ...@@ -54,10 +54,19 @@ flags.DEFINE_integer('eval_timeout', 3600, 'Number of seconds to wait for an'
'evaluation checkpoint before exiting.') 'evaluation checkpoint before exiting.')
flags.DEFINE_bool('use_tpu', False, 'Whether the job is executing on a TPU.') flags.DEFINE_bool('use_tpu', False, 'Whether the job is executing on a TPU.')
flags.DEFINE_string(
'tpu_name',
default=None,
help='Name of the Cloud TPU for Cluster Resolvers.')
flags.DEFINE_integer( flags.DEFINE_integer(
'num_workers', 1, 'When num_workers > 1, training uses ' 'num_workers', 1, 'When num_workers > 1, training uses '
'MultiWorkerMirroredStrategy. When num_workers = 1 it uses ' 'MultiWorkerMirroredStrategy. When num_workers = 1 it uses '
'MirroredStrategy.') 'MirroredStrategy.')
flags.DEFINE_integer(
'checkpoint_every_n', 1000, 'Integer defining how often we checkpoint.')
flags.DEFINE_boolean('record_summaries', True,
('Whether or not to record summaries during'
' training.'))
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
...@@ -79,7 +88,10 @@ def main(unused_argv): ...@@ -79,7 +88,10 @@ def main(unused_argv):
wait_interval=300, timeout=FLAGS.eval_timeout) wait_interval=300, timeout=FLAGS.eval_timeout)
else: else:
if FLAGS.use_tpu: if FLAGS.use_tpu:
resolver = tf.distribute.cluster_resolver.TPUClusterResolver() # TPU is automatically inferred if tpu_name is None and
# we are running under cloud ai-platform.
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
FLAGS.tpu_name)
tf.config.experimental_connect_to_cluster(resolver) tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver) tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.experimental.TPUStrategy(resolver) strategy = tf.distribute.experimental.TPUStrategy(resolver)
...@@ -93,7 +105,9 @@ def main(unused_argv): ...@@ -93,7 +105,9 @@ def main(unused_argv):
pipeline_config_path=FLAGS.pipeline_config_path, pipeline_config_path=FLAGS.pipeline_config_path,
model_dir=FLAGS.model_dir, model_dir=FLAGS.model_dir,
train_steps=FLAGS.num_train_steps, train_steps=FLAGS.num_train_steps,
use_tpu=FLAGS.use_tpu) use_tpu=FLAGS.use_tpu,
checkpoint_every_n=FLAGS.checkpoint_every_n,
record_summaries=FLAGS.record_summaries)
if __name__ == '__main__': if __name__ == '__main__':
tf.compat.v1.app.run() tf.compat.v1.app.run()
...@@ -26,18 +26,8 @@ from absl import flags ...@@ -26,18 +26,8 @@ from absl import flags
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from object_detection import model_hparams
from object_detection import model_lib from object_detection import model_lib
# pylint: disable=g-import-not-at-top
try:
from tensorflow.contrib import cluster_resolver as contrib_cluster_resolver
from tensorflow.contrib import tpu as contrib_tpu
except ImportError:
# TF 2.0 doesn't ship with contrib.
pass
# pylint: enable=g-import-not-at-top
tf.flags.DEFINE_bool('use_tpu', True, 'Use TPUs rather than plain CPUs') tf.flags.DEFINE_bool('use_tpu', True, 'Use TPUs rather than plain CPUs')
# Cloud TPU Cluster Resolvers # Cloud TPU Cluster Resolvers
...@@ -67,10 +57,6 @@ flags.DEFINE_string('mode', 'train', ...@@ -67,10 +57,6 @@ flags.DEFINE_string('mode', 'train',
flags.DEFINE_integer('train_batch_size', None, 'Batch size for training. If ' flags.DEFINE_integer('train_batch_size', None, 'Batch size for training. If '
'this is not provided, batch size is read from training ' 'this is not provided, batch size is read from training '
'config.') 'config.')
flags.DEFINE_string(
'hparams_overrides', None, 'Comma-separated list of '
'hyperparameters to override defaults.')
flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.') flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.')
flags.DEFINE_boolean('eval_training_data', False, flags.DEFINE_boolean('eval_training_data', False,
'If training data should be evaluated for this job.') 'If training data should be evaluated for this job.')
...@@ -99,15 +85,15 @@ def main(unused_argv): ...@@ -99,15 +85,15 @@ def main(unused_argv):
flags.mark_flag_as_required('pipeline_config_path') flags.mark_flag_as_required('pipeline_config_path')
tpu_cluster_resolver = ( tpu_cluster_resolver = (
contrib_cluster_resolver.TPUClusterResolver( tf.distribute.cluster_resolver.TPUClusterResolver(
tpu=[FLAGS.tpu_name], zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)) tpu=[FLAGS.tpu_name], zone=FLAGS.tpu_zone, project=FLAGS.gcp_project))
tpu_grpc_url = tpu_cluster_resolver.get_master() tpu_grpc_url = tpu_cluster_resolver.get_master()
config = contrib_tpu.RunConfig( config = tf.estimator.tpu.RunConfig(
master=tpu_grpc_url, master=tpu_grpc_url,
evaluation_master=tpu_grpc_url, evaluation_master=tpu_grpc_url,
model_dir=FLAGS.model_dir, model_dir=FLAGS.model_dir,
tpu_config=contrib_tpu.TPUConfig( tpu_config=tf.estimator.tpu.TPUConfig(
iterations_per_loop=FLAGS.iterations_per_loop, iterations_per_loop=FLAGS.iterations_per_loop,
num_shards=FLAGS.num_shards)) num_shards=FLAGS.num_shards))
...@@ -117,7 +103,6 @@ def main(unused_argv): ...@@ -117,7 +103,6 @@ def main(unused_argv):
train_and_eval_dict = model_lib.create_estimator_and_inputs( train_and_eval_dict = model_lib.create_estimator_and_inputs(
run_config=config, run_config=config,
hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
pipeline_config_path=FLAGS.pipeline_config_path, pipeline_config_path=FLAGS.pipeline_config_path,
train_steps=FLAGS.num_train_steps, train_steps=FLAGS.num_train_steps,
sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples,
......
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""MobileNet V2[1] feature extractor for CenterNet[2] meta architecture.
[1]: https://arxiv.org/abs/1801.04381
[2]: https://arxiv.org/abs/1904.07850
"""
import tensorflow.compat.v1 as tf
from object_detection.meta_architectures import center_net_meta_arch
from object_detection.models.keras_models import mobilenet_v2 as mobilenetv2
class CenterNetMobileNetV2FeatureExtractor(
center_net_meta_arch.CenterNetFeatureExtractor):
"""The MobileNet V2 feature extractor for CenterNet."""
def __init__(self,
mobilenet_v2_net,
channel_means=(0., 0., 0.),
channel_stds=(1., 1., 1.),
bgr_ordering=False):
"""Intializes the feature extractor.
Args:
mobilenet_v2_net: The underlying mobilenet_v2 network to use.
channel_means: A tuple of floats, denoting the mean of each channel
which will be subtracted from it.
channel_stds: A tuple of floats, denoting the standard deviation of each
channel. Each channel will be divided by its standard deviation value.
bgr_ordering: bool, if set will change the channel ordering to be in the
[blue, red, green] order.
"""
super(CenterNetMobileNetV2FeatureExtractor, self).__init__(
channel_means=channel_means,
channel_stds=channel_stds,
bgr_ordering=bgr_ordering)
self._network = mobilenet_v2_net
output = self._network(self._network.input)
# TODO(nkhadke): Try out MobileNet+FPN next (skip connections are cheap and
# should help with performance).
# MobileNet by itself transforms a 224x224x3 volume into a 7x7x1280, which
# leads to a stride of 32. We perform upsampling to get it to a target
# stride of 4.
for num_filters in [256, 128, 64]:
# 1. We use a simple convolution instead of a deformable convolution
conv = tf.keras.layers.Conv2D(
filters=num_filters, kernel_size=1, strides=1, padding='same')
output = conv(output)
output = tf.keras.layers.BatchNormalization()(output)
output = tf.keras.layers.ReLU()(output)
# 2. We use the default initialization for the convolution layers
# instead of initializing it to do bilinear upsampling.
conv_transpose = tf.keras.layers.Conv2DTranspose(
filters=num_filters, kernel_size=3, strides=2, padding='same')
output = conv_transpose(output)
output = tf.keras.layers.BatchNormalization()(output)
output = tf.keras.layers.ReLU()(output)
self._network = tf.keras.models.Model(
inputs=self._network.input, outputs=output)
def preprocess(self, resized_inputs):
resized_inputs = super(CenterNetMobileNetV2FeatureExtractor,
self).preprocess(resized_inputs)
return tf.keras.applications.mobilenet_v2.preprocess_input(resized_inputs)
def load_feature_extractor_weights(self, path):
self._network.load_weights(path)
def get_base_model(self):
return self._network
def call(self, inputs):
return [self._network(inputs)]
@property
def out_stride(self):
"""The stride in the output image of the network."""
return 4
@property
def num_feature_outputs(self):
"""The number of feature outputs returned by the feature extractor."""
return 1
def get_model(self):
return self._network
def mobilenet_v2(channel_means, channel_stds, bgr_ordering):
"""The MobileNetV2 backbone for CenterNet."""
# We set 'is_training' to True for now.
network = mobilenetv2.mobilenet_v2(True, include_top=False)
return CenterNetMobileNetV2FeatureExtractor(
network,
channel_means=channel_means,
channel_stds=channel_stds,
bgr_ordering=bgr_ordering)
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Testing mobilenet_v2 feature extractor for CenterNet."""
import unittest
import numpy as np
import tensorflow.compat.v1 as tf
from object_detection.models import center_net_mobilenet_v2_feature_extractor
from object_detection.models.keras_models import mobilenet_v2
from object_detection.utils import test_case
from object_detection.utils import tf_version
@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class CenterNetMobileNetV2FeatureExtractorTest(test_case.TestCase):
def test_center_net_mobilenet_v2_feature_extractor(self):
net = mobilenet_v2.mobilenet_v2(True, include_top=False)
model = center_net_mobilenet_v2_feature_extractor.CenterNetMobileNetV2FeatureExtractor(
net)
def graph_fn():
img = np.zeros((8, 224, 224, 3), dtype=np.float32)
processed_img = model.preprocess(img)
return model(processed_img)
outputs = self.execute(graph_fn, [])
self.assertEqual(outputs.shape, (8, 56, 56, 64))
if __name__ == '__main__':
tf.test.main()
...@@ -21,9 +21,14 @@ ...@@ -21,9 +21,14 @@
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from object_detection.meta_architectures.center_net_meta_arch import CenterNetFeatureExtractor from object_detection.meta_architectures.center_net_meta_arch import CenterNetFeatureExtractor
from object_detection.models.keras_models import resnet_v1
_RESNET_MODEL_OUTPUT_LAYERS = { _RESNET_MODEL_OUTPUT_LAYERS = {
'resnet_v1_18': ['conv2_block2_out', 'conv3_block2_out',
'conv4_block2_out', 'conv5_block2_out'],
'resnet_v1_34': ['conv2_block3_out', 'conv3_block4_out',
'conv4_block6_out', 'conv5_block3_out'],
'resnet_v1_50': ['conv2_block3_out', 'conv3_block4_out', 'resnet_v1_50': ['conv2_block3_out', 'conv3_block4_out',
'conv4_block6_out', 'conv5_block3_out'], 'conv4_block6_out', 'conv5_block3_out'],
'resnet_v1_101': ['conv2_block3_out', 'conv3_block4_out', 'resnet_v1_101': ['conv2_block3_out', 'conv3_block4_out',
...@@ -69,6 +74,10 @@ class CenterNetResnetV1FpnFeatureExtractor(CenterNetFeatureExtractor): ...@@ -69,6 +74,10 @@ class CenterNetResnetV1FpnFeatureExtractor(CenterNetFeatureExtractor):
self._base_model = tf.keras.applications.ResNet50(weights=None) self._base_model = tf.keras.applications.ResNet50(weights=None)
elif resnet_type == 'resnet_v1_101': elif resnet_type == 'resnet_v1_101':
self._base_model = tf.keras.applications.ResNet101(weights=None) self._base_model = tf.keras.applications.ResNet101(weights=None)
elif resnet_type == 'resnet_v1_18':
self._base_model = resnet_v1.resnet_v1_18(weights=None)
elif resnet_type == 'resnet_v1_34':
self._base_model = resnet_v1.resnet_v1_34(weights=None)
else: else:
raise ValueError('Unknown Resnet Model {}'.format(resnet_type)) raise ValueError('Unknown Resnet Model {}'.format(resnet_type))
output_layers = _RESNET_MODEL_OUTPUT_LAYERS[resnet_type] output_layers = _RESNET_MODEL_OUTPUT_LAYERS[resnet_type]
...@@ -174,3 +183,24 @@ def resnet_v1_50_fpn(channel_means, channel_stds, bgr_ordering): ...@@ -174,3 +183,24 @@ def resnet_v1_50_fpn(channel_means, channel_stds, bgr_ordering):
channel_means=channel_means, channel_means=channel_means,
channel_stds=channel_stds, channel_stds=channel_stds,
bgr_ordering=bgr_ordering) bgr_ordering=bgr_ordering)
def resnet_v1_34_fpn(channel_means, channel_stds, bgr_ordering):
"""The ResNet v1 34 FPN feature extractor."""
return CenterNetResnetV1FpnFeatureExtractor(
resnet_type='resnet_v1_34',
channel_means=channel_means,
channel_stds=channel_stds,
bgr_ordering=bgr_ordering
)
def resnet_v1_18_fpn(channel_means, channel_stds, bgr_ordering):
"""The ResNet v1 18 FPN feature extractor."""
return CenterNetResnetV1FpnFeatureExtractor(
resnet_type='resnet_v1_18',
channel_means=channel_means,
channel_stds=channel_stds,
bgr_ordering=bgr_ordering)
...@@ -31,6 +31,8 @@ class CenterNetResnetV1FpnFeatureExtractorTest(test_case.TestCase, ...@@ -31,6 +31,8 @@ class CenterNetResnetV1FpnFeatureExtractorTest(test_case.TestCase,
@parameterized.parameters( @parameterized.parameters(
{'resnet_type': 'resnet_v1_50'}, {'resnet_type': 'resnet_v1_50'},
{'resnet_type': 'resnet_v1_101'}, {'resnet_type': 'resnet_v1_101'},
{'resnet_type': 'resnet_v1_18'},
{'resnet_type': 'resnet_v1_34'},
) )
def test_correct_output_size(self, resnet_type): def test_correct_output_size(self, resnet_type):
"""Verify that shape of features returned by the backbone is correct.""" """Verify that shape of features returned by the backbone is correct."""
......
...@@ -56,7 +56,7 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractor( ...@@ -56,7 +56,7 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractor(
the resnet_v1.resnet_v1_{50,101,152} models. the resnet_v1.resnet_v1_{50,101,152} models.
resnet_v1_base_model_name: model name under which to construct resnet v1. resnet_v1_base_model_name: model name under which to construct resnet v1.
first_stage_features_stride: See base class. first_stage_features_stride: See base class.
conv_hyperparameters: a `hyperparams_builder.KerasLayerHyperparams` object conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
containing convolution hyperparameters for the layers added on top of containing convolution hyperparameters for the layers added on top of
the base feature extractor. the base feature extractor.
batch_norm_trainable: See base class. batch_norm_trainable: See base class.
...@@ -143,19 +143,21 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractor( ...@@ -143,19 +143,21 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractor(
with tf.name_scope('ResnetV1FPN'): with tf.name_scope('ResnetV1FPN'):
full_resnet_v1_model = self._resnet_v1_base_model( full_resnet_v1_model = self._resnet_v1_base_model(
batchnorm_training=self._train_batch_norm, batchnorm_training=self._train_batch_norm,
conv_hyperparams=(self._conv_hyperparams conv_hyperparams=(self._conv_hyperparams if
if self._override_base_feature_extractor_hyperparams self._override_base_feature_extractor_hyperparams
else None), else None),
classes=None, classes=None,
weights=None, weights=None,
include_top=False) include_top=False)
output_layers = _RESNET_MODEL_OUTPUT_LAYERS[self._resnet_v1_base_model_name] output_layers = _RESNET_MODEL_OUTPUT_LAYERS[
self._resnet_v1_base_model_name]
outputs = [full_resnet_v1_model.get_layer(output_layer_name).output outputs = [full_resnet_v1_model.get_layer(output_layer_name).output
for output_layer_name in output_layers] for output_layer_name in output_layers]
self.classification_backbone = tf.keras.Model( self.classification_backbone = tf.keras.Model(
inputs=full_resnet_v1_model.inputs, inputs=full_resnet_v1_model.inputs,
outputs=outputs) outputs=outputs)
backbone_outputs = self.classification_backbone(full_resnet_v1_model.inputs) backbone_outputs = self.classification_backbone(
full_resnet_v1_model.inputs)
# construct FPN feature generator # construct FPN feature generator
self._base_fpn_max_level = min(self._fpn_max_level, 5) self._base_fpn_max_level = min(self._fpn_max_level, 5)
...@@ -236,7 +238,7 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractor( ...@@ -236,7 +238,7 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractor(
""" """
with tf.name_scope(name): with tf.name_scope(name):
with tf.name_scope('ResnetV1FPN'): with tf.name_scope('ResnetV1FPN'):
# TODO: Add a batchnorm layer between two fc layers. # TODO(yiming): Add a batchnorm layer between two fc layers.
feature_extractor_model = tf.keras.models.Sequential([ feature_extractor_model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(), tf.keras.layers.Flatten(),
tf.keras.layers.Dense(units=1024, activation='relu'), tf.keras.layers.Dense(units=1024, activation='relu'),
...@@ -283,12 +285,15 @@ class FasterRCNNResnet50FpnKerasFeatureExtractor( ...@@ -283,12 +285,15 @@ class FasterRCNNResnet50FpnKerasFeatureExtractor(
fpn_min_level=fpn_min_level, fpn_min_level=fpn_min_level,
fpn_max_level=fpn_max_level, fpn_max_level=fpn_max_level,
additional_layer_depth=additional_layer_depth, additional_layer_depth=additional_layer_depth,
override_base_feature_extractor_hyperparams=override_base_feature_extractor_hyperparams) override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams
)
class FasterRCNNResnet101FpnKerasFeatureExtractor( class FasterRCNNResnet101FpnKerasFeatureExtractor(
FasterRCNNResnetV1FpnKerasFeatureExtractor): FasterRCNNResnetV1FpnKerasFeatureExtractor):
"""Faster RCNN with Resnet101 FPN feature extractor.""" """Faster RCNN with Resnet101 FPN feature extractor."""
def __init__(self, def __init__(self,
is_training, is_training,
first_stage_features_stride=16, first_stage_features_stride=16,
...@@ -323,7 +328,8 @@ class FasterRCNNResnet101FpnKerasFeatureExtractor( ...@@ -323,7 +328,8 @@ class FasterRCNNResnet101FpnKerasFeatureExtractor(
fpn_min_level=fpn_min_level, fpn_min_level=fpn_min_level,
fpn_max_level=fpn_max_level, fpn_max_level=fpn_max_level,
additional_layer_depth=additional_layer_depth, additional_layer_depth=additional_layer_depth,
override_base_feature_extractor_hyperparams=override_base_feature_extractor_hyperparams) override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams)
class FasterRCNNResnet152FpnKerasFeatureExtractor( class FasterRCNNResnet152FpnKerasFeatureExtractor(
...@@ -364,4 +370,5 @@ class FasterRCNNResnet152FpnKerasFeatureExtractor( ...@@ -364,4 +370,5 @@ class FasterRCNNResnet152FpnKerasFeatureExtractor(
fpn_min_level=fpn_min_level, fpn_min_level=fpn_min_level,
fpn_max_level=fpn_max_level, fpn_max_level=fpn_max_level,
additional_layer_depth=additional_layer_depth, additional_layer_depth=additional_layer_depth,
override_base_feature_extractor_hyperparams=override_base_feature_extractor_hyperparams) override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams)
...@@ -21,8 +21,8 @@ from google.protobuf import text_format ...@@ -21,8 +21,8 @@ from google.protobuf import text_format
from object_detection.builders import hyperparams_builder from object_detection.builders import hyperparams_builder
from object_detection.models import faster_rcnn_resnet_v1_fpn_keras_feature_extractor as frcnn_res_fpn from object_detection.models import faster_rcnn_resnet_v1_fpn_keras_feature_extractor as frcnn_res_fpn
from object_detection.utils import tf_version
from object_detection.protos import hyperparams_pb2 from object_detection.protos import hyperparams_pb2
from object_detection.utils import tf_version
@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.') @unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
...@@ -40,7 +40,7 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractorTest(tf.test.TestCase): ...@@ -40,7 +40,7 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractorTest(tf.test.TestCase):
} }
} }
""" """
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) text_format.Parse(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams) return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
def _build_feature_extractor(self): def _build_feature_extractor(self):
......
...@@ -21,6 +21,7 @@ from __future__ import print_function ...@@ -21,6 +21,7 @@ from __future__ import print_function
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from tensorflow.python.keras.applications import resnet
from object_detection.core import freezable_batch_norm from object_detection.core import freezable_batch_norm
from object_detection.models.keras_models import model_utils from object_detection.models.keras_models import model_utils
...@@ -95,11 +96,11 @@ class _LayersOverride(object): ...@@ -95,11 +96,11 @@ class _LayersOverride(object):
self.regularizer = tf.keras.regularizers.l2(weight_decay) self.regularizer = tf.keras.regularizers.l2(weight_decay)
self.initializer = tf.variance_scaling_initializer() self.initializer = tf.variance_scaling_initializer()
def _FixedPaddingLayer(self, kernel_size, rate=1): def _FixedPaddingLayer(self, kernel_size, rate=1): # pylint: disable=invalid-name
return tf.keras.layers.Lambda( return tf.keras.layers.Lambda(
lambda x: _fixed_padding(x, kernel_size, rate)) lambda x: _fixed_padding(x, kernel_size, rate))
def Conv2D(self, filters, kernel_size, **kwargs): def Conv2D(self, filters, kernel_size, **kwargs): # pylint: disable=invalid-name
"""Builds a Conv2D layer according to the current Object Detection config. """Builds a Conv2D layer according to the current Object Detection config.
Overrides the Keras Resnet application's convolutions with ones that Overrides the Keras Resnet application's convolutions with ones that
...@@ -141,7 +142,7 @@ class _LayersOverride(object): ...@@ -141,7 +142,7 @@ class _LayersOverride(object):
else: else:
return tf.keras.layers.Conv2D(filters, kernel_size, **kwargs) return tf.keras.layers.Conv2D(filters, kernel_size, **kwargs)
def Activation(self, *args, **kwargs): # pylint: disable=unused-argument def Activation(self, *args, **kwargs): # pylint: disable=unused-argument,invalid-name
"""Builds an activation layer. """Builds an activation layer.
Overrides the Keras application Activation layer specified by the Overrides the Keras application Activation layer specified by the
...@@ -163,7 +164,7 @@ class _LayersOverride(object): ...@@ -163,7 +164,7 @@ class _LayersOverride(object):
else: else:
return tf.keras.layers.Lambda(tf.nn.relu, name=name) return tf.keras.layers.Lambda(tf.nn.relu, name=name)
def BatchNormalization(self, **kwargs): def BatchNormalization(self, **kwargs): # pylint: disable=invalid-name
"""Builds a normalization layer. """Builds a normalization layer.
Overrides the Keras application batch norm with the norm specified by the Overrides the Keras application batch norm with the norm specified by the
...@@ -191,7 +192,7 @@ class _LayersOverride(object): ...@@ -191,7 +192,7 @@ class _LayersOverride(object):
momentum=self._default_batchnorm_momentum, momentum=self._default_batchnorm_momentum,
**kwargs) **kwargs)
def Input(self, shape): def Input(self, shape): # pylint: disable=invalid-name
"""Builds an Input layer. """Builds an Input layer.
Overrides the Keras application Input layer with one that uses a Overrides the Keras application Input layer with one that uses a
...@@ -219,7 +220,7 @@ class _LayersOverride(object): ...@@ -219,7 +220,7 @@ class _LayersOverride(object):
input=input_tensor, shape=[None] + shape) input=input_tensor, shape=[None] + shape)
return model_utils.input_layer(shape, placeholder_with_default) return model_utils.input_layer(shape, placeholder_with_default)
def MaxPooling2D(self, pool_size, **kwargs): def MaxPooling2D(self, pool_size, **kwargs): # pylint: disable=invalid-name
"""Builds a MaxPooling2D layer with default padding as 'SAME'. """Builds a MaxPooling2D layer with default padding as 'SAME'.
This is specified by the default resnet arg_scope in slim. This is specified by the default resnet arg_scope in slim.
...@@ -237,7 +238,7 @@ class _LayersOverride(object): ...@@ -237,7 +238,7 @@ class _LayersOverride(object):
# Add alias as Keras also has it. # Add alias as Keras also has it.
MaxPool2D = MaxPooling2D # pylint: disable=invalid-name MaxPool2D = MaxPooling2D # pylint: disable=invalid-name
def ZeroPadding2D(self, padding, **kwargs): # pylint: disable=unused-argument def ZeroPadding2D(self, padding, **kwargs): # pylint: disable=unused-argument,invalid-name
"""Replaces explicit padding in the Keras application with a no-op. """Replaces explicit padding in the Keras application with a no-op.
Args: Args:
...@@ -395,3 +396,146 @@ def resnet_v1_152(batchnorm_training, ...@@ -395,3 +396,146 @@ def resnet_v1_152(batchnorm_training,
return tf.keras.applications.resnet.ResNet152( return tf.keras.applications.resnet.ResNet152(
layers=layers_override, **kwargs) layers=layers_override, **kwargs)
# pylint: enable=invalid-name # pylint: enable=invalid-name
# The following codes are based on the existing keras ResNet model pattern:
# google3/third_party/tensorflow/python/keras/applications/resnet.py
def block_basic(x,
filters,
kernel_size=3,
stride=1,
conv_shortcut=False,
name=None):
"""A residual block for ResNet18/34.
Arguments:
x: input tensor.
filters: integer, filters of the bottleneck layer.
kernel_size: default 3, kernel size of the bottleneck layer.
stride: default 1, stride of the first layer.
conv_shortcut: default False, use convolution shortcut if True, otherwise
identity shortcut.
name: string, block label.
Returns:
Output tensor for the residual block.
"""
layers = tf.keras.layers
bn_axis = 3 if tf.keras.backend.image_data_format() == 'channels_last' else 1
preact = layers.BatchNormalization(
axis=bn_axis, epsilon=1.001e-5, name=name + '_preact_bn')(
x)
preact = layers.Activation('relu', name=name + '_preact_relu')(preact)
if conv_shortcut:
shortcut = layers.Conv2D(
filters, 1, strides=1, name=name + '_0_conv')(
preact)
else:
shortcut = layers.MaxPooling2D(1, strides=stride)(x) if stride > 1 else x
x = layers.ZeroPadding2D(
padding=((1, 1), (1, 1)), name=name + '_1_pad')(
preact)
x = layers.Conv2D(
filters, kernel_size, strides=1, use_bias=False, name=name + '_1_conv')(
x)
x = layers.BatchNormalization(
axis=bn_axis, epsilon=1.001e-5, name=name + '_1_bn')(
x)
x = layers.Activation('relu', name=name + '_1_relu')(x)
x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name=name + '_2_pad')(x)
x = layers.Conv2D(
filters,
kernel_size,
strides=stride,
use_bias=False,
name=name + '_2_conv')(
x)
x = layers.BatchNormalization(
axis=bn_axis, epsilon=1.001e-5, name=name + '_2_bn')(
x)
x = layers.Activation('relu', name=name + '_2_relu')(x)
x = layers.Add(name=name + '_out')([shortcut, x])
return x
def stack_basic(x, filters, blocks, stride1=2, name=None):
"""A set of stacked residual blocks for ResNet18/34.
Arguments:
x: input tensor.
filters: integer, filters of the bottleneck layer in a block.
blocks: integer, blocks in the stacked blocks.
stride1: default 2, stride of the first layer in the first block.
name: string, stack label.
Returns:
Output tensor for the stacked blocks.
"""
x = block_basic(x, filters, conv_shortcut=True, name=name + '_block1')
for i in range(2, blocks):
x = block_basic(x, filters, name=name + '_block' + str(i))
x = block_basic(
x, filters, stride=stride1, name=name + '_block' + str(blocks))
return x
def resnet_v1_18(include_top=True,
weights='imagenet',
input_tensor=None,
input_shape=None,
pooling=None,
classes=1000,
classifier_activation='softmax'):
"""Instantiates the ResNet18 architecture."""
def stack_fn(x):
x = stack_basic(x, 64, 2, stride1=1, name='conv2')
x = stack_basic(x, 128, 2, name='conv3')
x = stack_basic(x, 256, 2, name='conv4')
return stack_basic(x, 512, 2, name='conv5')
return resnet.ResNet(
stack_fn,
True,
True,
'resnet18',
include_top,
weights,
input_tensor,
input_shape,
pooling,
classes,
classifier_activation=classifier_activation)
def resnet_v1_34(include_top=True,
weights='imagenet',
input_tensor=None,
input_shape=None,
pooling=None,
classes=1000,
classifier_activation='softmax'):
"""Instantiates the ResNet34 architecture."""
def stack_fn(x):
x = stack_basic(x, 64, 3, stride1=1, name='conv2')
x = stack_basic(x, 128, 4, name='conv3')
x = stack_basic(x, 256, 6, name='conv4')
return stack_basic(x, 512, 3, name='conv5')
return resnet.ResNet(
stack_fn,
True,
True,
'resnet34',
include_top,
weights,
input_tensor,
input_shape,
pooling,
classes,
classifier_activation=classifier_activation)
...@@ -20,12 +20,13 @@ object detection. To verify the consistency of the two models, we compare: ...@@ -20,12 +20,13 @@ object detection. To verify the consistency of the two models, we compare:
2. Number of global variables. 2. Number of global variables.
""" """
import unittest import unittest
from absl.testing import parameterized
import numpy as np import numpy as np
from six.moves import zip from six.moves import zip
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from google.protobuf import text_format from google.protobuf import text_format
from object_detection.builders import hyperparams_builder from object_detection.builders import hyperparams_builder
from object_detection.models.keras_models import resnet_v1 from object_detection.models.keras_models import resnet_v1
from object_detection.protos import hyperparams_pb2 from object_detection.protos import hyperparams_pb2
...@@ -180,5 +181,46 @@ class ResnetV1Test(test_case.TestCase): ...@@ -180,5 +181,46 @@ class ResnetV1Test(test_case.TestCase):
self.assertEqual(len(variables), var_num) self.assertEqual(len(variables), var_num)
class ResnetShapeTest(test_case.TestCase, parameterized.TestCase):
@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
@parameterized.parameters(
{
'resnet_type':
'resnet_v1_34',
'output_layer_names': [
'conv2_block3_out', 'conv3_block4_out', 'conv4_block6_out',
'conv5_block3_out'
]
}, {
'resnet_type':
'resnet_v1_18',
'output_layer_names': [
'conv2_block2_out', 'conv3_block2_out', 'conv4_block2_out',
'conv5_block2_out'
]
})
def test_output_shapes(self, resnet_type, output_layer_names):
if resnet_type == 'resnet_v1_34':
model = resnet_v1.resnet_v1_34(weights=None)
else:
model = resnet_v1.resnet_v1_18(weights=None)
outputs = [
model.get_layer(output_layer_name).output
for output_layer_name in output_layer_names
]
resnet_model = tf.keras.models.Model(inputs=model.input, outputs=outputs)
outputs = resnet_model(np.zeros((2, 64, 64, 3), dtype=np.float32))
# Check the shape of 'conv2_block3_out':
self.assertEqual(outputs[0].shape, [2, 16, 16, 64])
# Check the shape of 'conv3_block4_out':
self.assertEqual(outputs[1].shape, [2, 8, 8, 128])
# Check the shape of 'conv4_block6_out':
self.assertEqual(outputs[2].shape, [2, 4, 4, 256])
# Check the shape of 'conv5_block3_out':
self.assertEqual(outputs[3].shape, [2, 2, 2, 512])
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment