"...text-generation-inference.git" did not exist on "46aeb0860dae0c5a1e5990dff50f8d381fddce61"
Unverified Commit 0ad4922f authored by vivek rathod's avatar vivek rathod Committed by GitHub
Browse files

Merged commit includes the following changes: (#8809)



320335495  by rathodv:

    Remove hparams support form TF1 main binaries as its not available in TF1.15 runtime on cloud ai platform.

--
320278161  by ronnyvotel:

    Exposing DensePose fields to model libraries.

--
320277319  by rathodv:

    Remove TPU Name check since TPU is automatically inferred under cloud AI platform.

--
320258215  by rathodv:

    Internal Change.

--
320245458  by yuhuic:

    Updated the CenterNet restore_from_objects function to be compatible with
    existing configs that load converted checkpoints.

--
320225405  by jonathanhuang:

    Small change to Keras box predictor and box heads to fix export errors for SSD and Faster R-CNN.

--
320145077  by aom:

    Implements EfficientDet feature extractor.

--

PiperOrigin-RevId: 320335495
Co-authored-by: default avatarTF Object Detection Team <no-reply@google.com>
parent 571369aa
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
"""A function to build a DetectionModel from configuration.""" """A function to build a DetectionModel from configuration."""
import functools import functools
import sys
from object_detection.builders import anchor_generator_builder from object_detection.builders import anchor_generator_builder
from object_detection.builders import box_coder_builder from object_detection.builders import box_coder_builder
from object_detection.builders import box_predictor_builder from object_detection.builders import box_predictor_builder
...@@ -58,6 +59,8 @@ if tf_version.is_tf2(): ...@@ -58,6 +59,8 @@ if tf_version.is_tf2():
from object_detection.models.ssd_mobilenet_v2_fpn_keras_feature_extractor import SSDMobileNetV2FpnKerasFeatureExtractor from object_detection.models.ssd_mobilenet_v2_fpn_keras_feature_extractor import SSDMobileNetV2FpnKerasFeatureExtractor
from object_detection.models.ssd_mobilenet_v2_keras_feature_extractor import SSDMobileNetV2KerasFeatureExtractor from object_detection.models.ssd_mobilenet_v2_keras_feature_extractor import SSDMobileNetV2KerasFeatureExtractor
from object_detection.predictors import rfcn_keras_box_predictor from object_detection.predictors import rfcn_keras_box_predictor
if sys.version_info[0] >= 3:
from object_detection.models import ssd_efficientnet_bifpn_feature_extractor as ssd_efficientnet_bifpn
if tf_version.is_tf1(): if tf_version.is_tf1():
from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
...@@ -99,6 +102,22 @@ if tf_version.is_tf2(): ...@@ -99,6 +102,22 @@ if tf_version.is_tf2():
ssd_resnet_v1_fpn_keras.SSDResNet101V1FpnKerasFeatureExtractor, ssd_resnet_v1_fpn_keras.SSDResNet101V1FpnKerasFeatureExtractor,
'ssd_resnet152_v1_fpn_keras': 'ssd_resnet152_v1_fpn_keras':
ssd_resnet_v1_fpn_keras.SSDResNet152V1FpnKerasFeatureExtractor, ssd_resnet_v1_fpn_keras.SSDResNet152V1FpnKerasFeatureExtractor,
'ssd_efficientnet-b0_bifpn_keras':
ssd_efficientnet_bifpn.SSDEfficientNetB0BiFPNKerasFeatureExtractor,
'ssd_efficientnet-b1_bifpn_keras':
ssd_efficientnet_bifpn.SSDEfficientNetB1BiFPNKerasFeatureExtractor,
'ssd_efficientnet-b2_bifpn_keras':
ssd_efficientnet_bifpn.SSDEfficientNetB2BiFPNKerasFeatureExtractor,
'ssd_efficientnet-b3_bifpn_keras':
ssd_efficientnet_bifpn.SSDEfficientNetB3BiFPNKerasFeatureExtractor,
'ssd_efficientnet-b4_bifpn_keras':
ssd_efficientnet_bifpn.SSDEfficientNetB4BiFPNKerasFeatureExtractor,
'ssd_efficientnet-b5_bifpn_keras':
ssd_efficientnet_bifpn.SSDEfficientNetB5BiFPNKerasFeatureExtractor,
'ssd_efficientnet-b6_bifpn_keras':
ssd_efficientnet_bifpn.SSDEfficientNetB6BiFPNKerasFeatureExtractor,
'ssd_efficientnet-b7_bifpn_keras':
ssd_efficientnet_bifpn.SSDEfficientNetB7BiFPNKerasFeatureExtractor,
} }
FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP = { FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP = {
...@@ -310,6 +329,14 @@ def _build_ssd_feature_extractor(feature_extractor_config, ...@@ -310,6 +329,14 @@ def _build_ssd_feature_extractor(feature_extractor_config,
feature_extractor_config.fpn.additional_layer_depth, feature_extractor_config.fpn.additional_layer_depth,
}) })
if feature_extractor_config.HasField('bifpn'):
kwargs.update({
'bifpn_min_level': feature_extractor_config.bifpn.min_level,
'bifpn_max_level': feature_extractor_config.bifpn.max_level,
'bifpn_num_iterations': feature_extractor_config.bifpn.num_iterations,
'bifpn_num_filters': feature_extractor_config.bifpn.num_filters,
'bifpn_combine_method': feature_extractor_config.bifpn.combine_method,
})
return feature_extractor_class(**kwargs) return feature_extractor_class(**kwargs)
......
...@@ -39,6 +39,9 @@ class ModelBuilderTest(test_case.TestCase, parameterized.TestCase): ...@@ -39,6 +39,9 @@ class ModelBuilderTest(test_case.TestCase, parameterized.TestCase):
def ssd_feature_extractors(self): def ssd_feature_extractors(self):
raise NotImplementedError raise NotImplementedError
def get_override_base_feature_extractor_hyperparams(self, extractor_type):
raise NotImplementedError
def faster_rcnn_feature_extractors(self): def faster_rcnn_feature_extractors(self):
raise NotImplementedError raise NotImplementedError
...@@ -70,7 +73,6 @@ class ModelBuilderTest(test_case.TestCase, parameterized.TestCase): ...@@ -70,7 +73,6 @@ class ModelBuilderTest(test_case.TestCase, parameterized.TestCase):
} }
} }
} }
override_base_feature_extractor_hyperparams: true
} }
box_coder { box_coder {
faster_rcnn_box_coder { faster_rcnn_box_coder {
...@@ -205,6 +207,8 @@ class ModelBuilderTest(test_case.TestCase, parameterized.TestCase): ...@@ -205,6 +207,8 @@ class ModelBuilderTest(test_case.TestCase, parameterized.TestCase):
for extractor_type, extractor_class in self.ssd_feature_extractors().items( for extractor_type, extractor_class in self.ssd_feature_extractors().items(
): ):
model_proto.ssd.feature_extractor.type = extractor_type model_proto.ssd.feature_extractor.type = extractor_type
model_proto.ssd.feature_extractor.override_base_feature_extractor_hyperparams = (
self.get_override_base_feature_extractor_hyperparams(extractor_type))
model = model_builder.build(model_proto, is_training=True) model = model_builder.build(model_proto, is_training=True)
self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch) self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
self.assertIsInstance(model._feature_extractor, extractor_class) self.assertIsInstance(model._feature_extractor, extractor_class)
......
...@@ -38,6 +38,9 @@ class ModelBuilderTF1Test(model_builder_test.ModelBuilderTest): ...@@ -38,6 +38,9 @@ class ModelBuilderTF1Test(model_builder_test.ModelBuilderTest):
def ssd_feature_extractors(self): def ssd_feature_extractors(self):
return model_builder.SSD_FEATURE_EXTRACTOR_CLASS_MAP return model_builder.SSD_FEATURE_EXTRACTOR_CLASS_MAP
def get_override_base_feature_extractor_hyperparams(self, extractor_type):
return extractor_type in {'ssd_inception_v2', 'ssd_inception_v3'}
def faster_rcnn_feature_extractors(self): def faster_rcnn_feature_extractors(self):
return model_builder.FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP return model_builder.FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP
......
...@@ -42,6 +42,9 @@ class ModelBuilderTF2Test(model_builder_test.ModelBuilderTest): ...@@ -42,6 +42,9 @@ class ModelBuilderTF2Test(model_builder_test.ModelBuilderTest):
def ssd_feature_extractors(self): def ssd_feature_extractors(self):
return model_builder.SSD_KERAS_FEATURE_EXTRACTOR_CLASS_MAP return model_builder.SSD_KERAS_FEATURE_EXTRACTOR_CLASS_MAP
def get_override_base_feature_extractor_hyperparams(self, extractor_type):
return extractor_type in {}
def faster_rcnn_feature_extractors(self): def faster_rcnn_feature_extractors(self):
return model_builder.FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP return model_builder.FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP
......
...@@ -134,7 +134,7 @@ class BoxPredictor(object): ...@@ -134,7 +134,7 @@ class BoxPredictor(object):
pass pass
class KerasBoxPredictor(tf.keras.Model): class KerasBoxPredictor(tf.keras.layers.Layer):
"""Keras-based BoxPredictor.""" """Keras-based BoxPredictor."""
def __init__(self, is_training, num_classes, freeze_batchnorm, def __init__(self, is_training, num_classes, freeze_batchnorm,
......
...@@ -251,9 +251,14 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)): ...@@ -251,9 +251,14 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
detection_classes: [batch, max_detections] detection_classes: [batch, max_detections]
(If a model is producing class-agnostic detections, this field may be (If a model is producing class-agnostic detections, this field may be
missing) missing)
instance_masks: [batch, max_detections, image_height, image_width] detection_masks: [batch, max_detections, mask_height, mask_width]
(optional) (optional)
keypoints: [batch, max_detections, num_keypoints, 2] (optional) detection_keypoints: [batch, max_detections, num_keypoints, 2]
(optional)
detection_keypoint_scores: [batch, max_detections, num_keypoints]
(optional)
detection_surface_coords: [batch, max_detections, mask_height,
mask_width, 2] (optional)
num_detections: [batch] num_detections: [batch]
In addition to the above fields this stage also outputs the following In addition to the above fields this stage also outputs the following
......
...@@ -2749,6 +2749,14 @@ class CenterNetMetaArch(model.DetectionModel): ...@@ -2749,6 +2749,14 @@ class CenterNetMetaArch(model.DetectionModel):
checkpoint (with compatible variable names) or to restore from a checkpoint (with compatible variable names) or to restore from a
classification checkpoint for initialization prior to training. classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`. Default 'detection'. Valid values: `detection`, `classification`. Default 'detection'.
'detection': used when loading in the Hourglass model pre-trained on
other detection task.
'classification': used when loading in the ResNet model pre-trained on
image classification task. Note that only the image feature encoding
part is loaded but not those upsampling layers.
'fine_tune': used when loading the entire CenterNet feature extractor
pre-trained on other tasks. The checkpoints saved during CenterNet
model training can be directly loaded using this mode.
Returns: Returns:
A dict mapping keys to Trackable objects (tf.Module or Checkpoint). A dict mapping keys to Trackable objects (tf.Module or Checkpoint).
...@@ -2757,10 +2765,13 @@ class CenterNetMetaArch(model.DetectionModel): ...@@ -2757,10 +2765,13 @@ class CenterNetMetaArch(model.DetectionModel):
if fine_tune_checkpoint_type == 'classification': if fine_tune_checkpoint_type == 'classification':
return {'feature_extractor': self._feature_extractor.get_base_model()} return {'feature_extractor': self._feature_extractor.get_base_model()}
if fine_tune_checkpoint_type == 'detection': elif fine_tune_checkpoint_type == 'detection':
fake_model = tf.train.Checkpoint( return {'feature_extractor': self._feature_extractor.get_model()}
elif fine_tune_checkpoint_type == 'fine_tune':
feature_extractor_model = tf.train.Checkpoint(
_feature_extractor=self._feature_extractor) _feature_extractor=self._feature_extractor)
return {'model': fake_model} return {'model': feature_extractor_model}
else: else:
raise ValueError('Not supported fine tune checkpoint type - {}'.format( raise ValueError('Not supported fine tune checkpoint type - {}'.format(
......
...@@ -432,14 +432,9 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -432,14 +432,9 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
return eval_metric_ops return eval_metric_ops
def _check_mask_type_and_value(array_name, masks): def convert_masks_to_binary(masks):
"""Checks whether mask dtype is uint8 and the values are either 0 or 1.""" """Converts masks to 0 or 1 and uint8 type."""
if masks.dtype != np.uint8: return (masks > 0).astype(np.uint8)
raise ValueError('{} must be of type np.uint8. Found {}.'.format(
array_name, masks.dtype))
if np.any(np.logical_and(masks != 0, masks != 1)):
raise ValueError('{} elements can only be either 0 or 1.'.format(
array_name))
class CocoKeypointEvaluator(CocoDetectionEvaluator): class CocoKeypointEvaluator(CocoDetectionEvaluator):
...@@ -952,9 +947,8 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -952,9 +947,8 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
groundtruth_instance_masks = groundtruth_dict[ groundtruth_instance_masks = groundtruth_dict[
standard_fields.InputDataFields.groundtruth_instance_masks] standard_fields.InputDataFields.groundtruth_instance_masks]
_check_mask_type_and_value(standard_fields.InputDataFields. groundtruth_instance_masks = convert_masks_to_binary(
groundtruth_instance_masks, groundtruth_instance_masks)
groundtruth_instance_masks)
self._groundtruth_list.extend( self._groundtruth_list.extend(
coco_tools. coco_tools.
ExportSingleImageGroundtruthToCoco( ExportSingleImageGroundtruthToCoco(
...@@ -1013,9 +1007,7 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -1013,9 +1007,7 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
'are incompatible: {} vs {}'.format( 'are incompatible: {} vs {}'.format(
groundtruth_masks_shape, groundtruth_masks_shape,
detection_masks.shape)) detection_masks.shape))
_check_mask_type_and_value(standard_fields.DetectionResultFields. detection_masks = convert_masks_to_binary(detection_masks)
detection_masks,
detection_masks)
self._detection_masks_list.extend( self._detection_masks_list.extend(
coco_tools.ExportSingleImageDetectionMasksToCoco( coco_tools.ExportSingleImageDetectionMasksToCoco(
image_id=image_id, image_id=image_id,
......
...@@ -1424,14 +1424,16 @@ class CocoMaskEvaluationTest(tf.test.TestCase): ...@@ -1424,14 +1424,16 @@ class CocoMaskEvaluationTest(tf.test.TestCase):
image_id='image3', image_id='image3',
detections_dict={ detections_dict={
standard_fields.DetectionResultFields.detection_boxes: standard_fields.DetectionResultFields.detection_boxes:
np.array([[25., 25., 50., 50.]]), np.array([[25., 25., 50., 50.]]),
standard_fields.DetectionResultFields.detection_scores: standard_fields.DetectionResultFields.detection_scores:
np.array([.8]), np.array([.8]),
standard_fields.DetectionResultFields.detection_classes: standard_fields.DetectionResultFields.detection_classes:
np.array([1]), np.array([1]),
standard_fields.DetectionResultFields.detection_masks: standard_fields.DetectionResultFields.detection_masks:
np.pad(np.ones([1, 25, 25], dtype=np.uint8), # The value of 5 is equivalent to 1, since masks will be
((0, 0), (10, 10), (10, 10)), mode='constant') # thresholded and binarized before evaluation.
np.pad(5 * np.ones([1, 25, 25], dtype=np.uint8),
((0, 0), (10, 10), (10, 10)), mode='constant')
}) })
metrics = coco_evaluator.evaluate() metrics = coco_evaluator.evaluate()
self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP'], 1.0) self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP'], 1.0)
......
...@@ -43,7 +43,6 @@ from object_detection.utils import visualization_utils as vis_utils ...@@ -43,7 +43,6 @@ from object_detection.utils import visualization_utils as vis_utils
# pylint: disable=g-import-not-at-top # pylint: disable=g-import-not-at-top
try: try:
from tensorflow.contrib import learn as contrib_learn from tensorflow.contrib import learn as contrib_learn
from tensorflow.contrib import tpu as contrib_tpu
except ImportError: except ImportError:
# TF 2.0 doesn't ship with contrib. # TF 2.0 doesn't ship with contrib.
pass pass
...@@ -94,6 +93,15 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic, ...@@ -94,6 +93,15 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
of groundtruth boxes per image.. of groundtruth boxes per image..
'groundtruth_keypoints': [batch_size, num_boxes, num_keypoints, 2] float32 'groundtruth_keypoints': [batch_size, num_boxes, num_keypoints, 2] float32
tensor of keypoints (if provided in groundtruth). tensor of keypoints (if provided in groundtruth).
'groundtruth_dp_num_points_list': [batch_size, num_boxes] int32 tensor
with the number of DensePose points for each instance (if provided in
groundtruth).
'groundtruth_dp_part_ids_list': [batch_size, num_boxes,
max_sampled_points] int32 tensor with the part ids for each DensePose
sampled point (if provided in groundtruth).
'groundtruth_dp_surface_coords_list': [batch_size, num_boxes,
max_sampled_points, 4] containing the DensePose surface coordinates for
each sampled point (if provided in groundtruth).
'groundtruth_group_of': [batch_size, num_boxes] bool tensor indicating 'groundtruth_group_of': [batch_size, num_boxes] bool tensor indicating
group_of annotations (if provided in groundtruth). group_of annotations (if provided in groundtruth).
'groundtruth_labeled_classes': [batch_size, num_classes] int64 'groundtruth_labeled_classes': [batch_size, num_classes] int64
...@@ -164,6 +172,21 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic, ...@@ -164,6 +172,21 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
groundtruth[input_data_fields.groundtruth_labeled_classes] = tf.stack( groundtruth[input_data_fields.groundtruth_labeled_classes] = tf.stack(
labeled_classes) labeled_classes)
if detection_model.groundtruth_has_field(
fields.BoxListFields.densepose_num_points):
groundtruth[input_data_fields.groundtruth_dp_num_points] = tf.stack(
detection_model.groundtruth_lists(
fields.BoxListFields.densepose_num_points))
if detection_model.groundtruth_has_field(
fields.BoxListFields.densepose_part_ids):
groundtruth[input_data_fields.groundtruth_dp_part_ids] = tf.stack(
detection_model.groundtruth_lists(
fields.BoxListFields.densepose_part_ids))
if detection_model.groundtruth_has_field(
fields.BoxListFields.densepose_surface_coords):
groundtruth[input_data_fields.groundtruth_dp_surface_coords] = tf.stack(
detection_model.groundtruth_lists(
fields.BoxListFields.densepose_surface_coords))
groundtruth[input_data_fields.num_groundtruth_boxes] = ( groundtruth[input_data_fields.num_groundtruth_boxes] = (
tf.tile([max_number_of_boxes], multiples=[groundtruth_boxes_shape[0]])) tf.tile([max_number_of_boxes], multiples=[groundtruth_boxes_shape[0]]))
return groundtruth return groundtruth
...@@ -219,6 +242,9 @@ def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True): ...@@ -219,6 +242,9 @@ def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True):
fields.InputDataFields.groundtruth_boxes, fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_keypoints, fields.InputDataFields.groundtruth_keypoints,
fields.InputDataFields.groundtruth_keypoint_visibilities, fields.InputDataFields.groundtruth_keypoint_visibilities,
fields.InputDataFields.groundtruth_dp_num_points,
fields.InputDataFields.groundtruth_dp_part_ids,
fields.InputDataFields.groundtruth_dp_surface_coords,
fields.InputDataFields.groundtruth_group_of, fields.InputDataFields.groundtruth_group_of,
fields.InputDataFields.groundtruth_difficult, fields.InputDataFields.groundtruth_difficult,
fields.InputDataFields.groundtruth_is_crowd, fields.InputDataFields.groundtruth_is_crowd,
...@@ -269,6 +295,18 @@ def provide_groundtruth(model, labels): ...@@ -269,6 +295,18 @@ def provide_groundtruth(model, labels):
if fields.InputDataFields.groundtruth_keypoint_visibilities in labels: if fields.InputDataFields.groundtruth_keypoint_visibilities in labels:
gt_keypoint_visibilities_list = labels[ gt_keypoint_visibilities_list = labels[
fields.InputDataFields.groundtruth_keypoint_visibilities] fields.InputDataFields.groundtruth_keypoint_visibilities]
gt_dp_num_points_list = None
if fields.InputDataFields.groundtruth_dp_num_points in labels:
gt_dp_num_points_list = labels[
fields.InputDataFields.groundtruth_dp_num_points]
gt_dp_part_ids_list = None
if fields.InputDataFields.groundtruth_dp_part_ids in labels:
gt_dp_part_ids_list = labels[
fields.InputDataFields.groundtruth_dp_part_ids]
gt_dp_surface_coords_list = None
if fields.InputDataFields.groundtruth_dp_surface_coords in labels:
gt_dp_surface_coords_list = labels[
fields.InputDataFields.groundtruth_dp_surface_coords]
gt_weights_list = None gt_weights_list = None
if fields.InputDataFields.groundtruth_weights in labels: if fields.InputDataFields.groundtruth_weights in labels:
gt_weights_list = labels[fields.InputDataFields.groundtruth_weights] gt_weights_list = labels[fields.InputDataFields.groundtruth_weights]
...@@ -297,13 +335,16 @@ def provide_groundtruth(model, labels): ...@@ -297,13 +335,16 @@ def provide_groundtruth(model, labels):
groundtruth_masks_list=gt_masks_list, groundtruth_masks_list=gt_masks_list,
groundtruth_keypoints_list=gt_keypoints_list, groundtruth_keypoints_list=gt_keypoints_list,
groundtruth_keypoint_visibilities_list=gt_keypoint_visibilities_list, groundtruth_keypoint_visibilities_list=gt_keypoint_visibilities_list,
groundtruth_dp_num_points_list=gt_dp_num_points_list,
groundtruth_dp_part_ids_list=gt_dp_part_ids_list,
groundtruth_dp_surface_coords_list=gt_dp_surface_coords_list,
groundtruth_weights_list=gt_weights_list, groundtruth_weights_list=gt_weights_list,
groundtruth_is_crowd_list=gt_is_crowd_list, groundtruth_is_crowd_list=gt_is_crowd_list,
groundtruth_group_of_list=gt_group_of_list, groundtruth_group_of_list=gt_group_of_list,
groundtruth_area_list=gt_area_list) groundtruth_area_list=gt_area_list)
def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False, def create_model_fn(detection_model_fn, configs, hparams=None, use_tpu=False,
postprocess_on_cpu=False): postprocess_on_cpu=False):
"""Creates a model function for `Estimator`. """Creates a model function for `Estimator`.
...@@ -377,7 +418,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False, ...@@ -377,7 +418,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
side_inputs = detection_model.get_side_inputs(features) side_inputs = detection_model.get_side_inputs(features)
if use_tpu and train_config.use_bfloat16: if use_tpu and train_config.use_bfloat16:
with contrib_tpu.bfloat16_scope(): with tf.tpu.bfloat16_scope():
prediction_dict = detection_model.predict( prediction_dict = detection_model.predict(
preprocessed_images, preprocessed_images,
features[fields.InputDataFields.true_image_shape], **side_inputs) features[fields.InputDataFields.true_image_shape], **side_inputs)
...@@ -392,7 +433,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False, ...@@ -392,7 +433,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT): if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT):
if use_tpu and postprocess_on_cpu: if use_tpu and postprocess_on_cpu:
detections = contrib_tpu.outside_compilation( detections = tf.tpu.outside_compilation(
postprocess_wrapper, postprocess_wrapper,
(prediction_dict, (prediction_dict,
features[fields.InputDataFields.true_image_shape])) features[fields.InputDataFields.true_image_shape]))
...@@ -468,7 +509,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False, ...@@ -468,7 +509,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
if mode == tf.estimator.ModeKeys.TRAIN: if mode == tf.estimator.ModeKeys.TRAIN:
if use_tpu: if use_tpu:
training_optimizer = contrib_tpu.CrossShardOptimizer(training_optimizer) training_optimizer = tf.tpu.CrossShardOptimizer(training_optimizer)
# Optionally freeze some layers by setting their gradients to be zero. # Optionally freeze some layers by setting their gradients to be zero.
trainable_variables = None trainable_variables = None
...@@ -588,7 +629,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False, ...@@ -588,7 +629,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
# EVAL executes on CPU, so use regular non-TPU EstimatorSpec. # EVAL executes on CPU, so use regular non-TPU EstimatorSpec.
if use_tpu and mode != tf.estimator.ModeKeys.EVAL: if use_tpu and mode != tf.estimator.ModeKeys.EVAL:
return contrib_tpu.TPUEstimatorSpec( return tf.estimator.tpu.TPUEstimatorSpec(
mode=mode, mode=mode,
scaffold_fn=scaffold_fn, scaffold_fn=scaffold_fn,
predictions=detections, predictions=detections,
...@@ -619,8 +660,8 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False, ...@@ -619,8 +660,8 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
def create_estimator_and_inputs(run_config, def create_estimator_and_inputs(run_config,
hparams, hparams=None,
pipeline_config_path, pipeline_config_path=None,
config_override=None, config_override=None,
train_steps=None, train_steps=None,
sample_1_of_n_eval_examples=1, sample_1_of_n_eval_examples=1,
...@@ -639,7 +680,7 @@ def create_estimator_and_inputs(run_config, ...@@ -639,7 +680,7 @@ def create_estimator_and_inputs(run_config,
Args: Args:
run_config: A `RunConfig`. run_config: A `RunConfig`.
hparams: A `HParams`. hparams: (optional) A `HParams`.
pipeline_config_path: A path to a pipeline config file. pipeline_config_path: A path to a pipeline config file.
config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to
override the config from `pipeline_config_path`. override the config from `pipeline_config_path`.
...@@ -762,14 +803,14 @@ def create_estimator_and_inputs(run_config, ...@@ -762,14 +803,14 @@ def create_estimator_and_inputs(run_config,
model_config=model_config, predict_input_config=eval_input_configs[0]) model_config=model_config, predict_input_config=eval_input_configs[0])
# Read export_to_tpu from hparams if not passed. # Read export_to_tpu from hparams if not passed.
if export_to_tpu is None: if export_to_tpu is None and hparams is not None:
export_to_tpu = hparams.get('export_to_tpu', False) export_to_tpu = hparams.get('export_to_tpu', False)
tf.logging.info('create_estimator_and_inputs: use_tpu %s, export_to_tpu %s', tf.logging.info('create_estimator_and_inputs: use_tpu %s, export_to_tpu %s',
use_tpu, export_to_tpu) use_tpu, export_to_tpu)
model_fn = model_fn_creator(detection_model_fn, configs, hparams, use_tpu, model_fn = model_fn_creator(detection_model_fn, configs, hparams, use_tpu,
postprocess_on_cpu) postprocess_on_cpu)
if use_tpu_estimator: if use_tpu_estimator:
estimator = contrib_tpu.TPUEstimator( estimator = tf.estimator.tpu.TPUEstimator(
model_fn=model_fn, model_fn=model_fn,
train_batch_size=train_config.batch_size, train_batch_size=train_config.batch_size,
# For each core, only batch size 1 is supported for eval. # For each core, only batch size 1 is supported for eval.
......
...@@ -93,6 +93,12 @@ def _compute_losses_and_predictions_dicts( ...@@ -93,6 +93,12 @@ def _compute_losses_and_predictions_dicts(
instance masks for objects. instance masks for objects.
labels[fields.InputDataFields.groundtruth_keypoints] is a labels[fields.InputDataFields.groundtruth_keypoints] is a
float32 tensor containing keypoints for each box. float32 tensor containing keypoints for each box.
labels[fields.InputDataFields.groundtruth_dp_num_points] is an int32
tensor with the number of sampled DensePose points per object.
labels[fields.InputDataFields.groundtruth_dp_part_ids] is an int32
tensor with the DensePose part ids (0-indexed) per object.
labels[fields.InputDataFields.groundtruth_dp_surface_coords] is a
float32 tensor with the DensePose surface coordinates.
labels[fields.InputDataFields.groundtruth_group_of] is a tf.bool tensor labels[fields.InputDataFields.groundtruth_group_of] is a tf.bool tensor
containing group_of annotations. containing group_of annotations.
labels[fields.InputDataFields.groundtruth_labeled_classes] is a float32 labels[fields.InputDataFields.groundtruth_labeled_classes] is a float32
...@@ -195,6 +201,17 @@ def eager_train_step(detection_model, ...@@ -195,6 +201,17 @@ def eager_train_step(detection_model,
labels[fields.InputDataFields.groundtruth_keypoints] is a labels[fields.InputDataFields.groundtruth_keypoints] is a
[batch_size, num_boxes, num_keypoints, 2] float32 tensor containing [batch_size, num_boxes, num_keypoints, 2] float32 tensor containing
keypoints for each box. keypoints for each box.
labels[fields.InputDataFields.groundtruth_dp_num_points] is a
[batch_size, num_boxes] int32 tensor with the number of DensePose
sampled points per instance.
labels[fields.InputDataFields.groundtruth_dp_part_ids] is a
[batch_size, num_boxes, max_sampled_points] int32 tensor with the
part ids (0-indexed) for each instance.
labels[fields.InputDataFields.groundtruth_dp_surface_coords] is a
[batch_size, num_boxes, max_sampled_points, 4] float32 tensor with the
surface coordinates for each point. Each surface coordinate is of the
form (y, x, v, u) where (y, x) are normalized image locations and
(v, u) are part-relative normalized surface coordinates.
labels[fields.InputDataFields.groundtruth_labeled_classes] is a float32 labels[fields.InputDataFields.groundtruth_labeled_classes] is a float32
k-hot tensor of classes. k-hot tensor of classes.
unpad_groundtruth_tensors: A parameter passed to unstack_batch. unpad_groundtruth_tensors: A parameter passed to unstack_batch.
......
...@@ -22,7 +22,6 @@ from absl import flags ...@@ -22,7 +22,6 @@ from absl import flags
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from object_detection import model_hparams
from object_detection import model_lib from object_detection import model_lib
flags.DEFINE_string( flags.DEFINE_string(
...@@ -41,10 +40,6 @@ flags.DEFINE_integer('sample_1_of_n_eval_on_train_examples', 5, 'Will sample ' ...@@ -41,10 +40,6 @@ flags.DEFINE_integer('sample_1_of_n_eval_on_train_examples', 5, 'Will sample '
'one of every n train input examples for evaluation, ' 'one of every n train input examples for evaluation, '
'where n is provided. This is only used if ' 'where n is provided. This is only used if '
'`eval_training_data` is True.') '`eval_training_data` is True.')
flags.DEFINE_string(
'hparams_overrides', None, 'Hyperparameter overrides, '
'represented as a string containing comma-separated '
'hparam_name=value pairs.')
flags.DEFINE_string( flags.DEFINE_string(
'checkpoint_dir', None, 'Path to directory holding a checkpoint. If ' 'checkpoint_dir', None, 'Path to directory holding a checkpoint. If '
'`checkpoint_dir` is provided, this binary operates in eval-only mode, ' '`checkpoint_dir` is provided, this binary operates in eval-only mode, '
...@@ -68,7 +63,6 @@ def main(unused_argv): ...@@ -68,7 +63,6 @@ def main(unused_argv):
train_and_eval_dict = model_lib.create_estimator_and_inputs( train_and_eval_dict = model_lib.create_estimator_and_inputs(
run_config=config, run_config=config,
hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
pipeline_config_path=FLAGS.pipeline_config_path, pipeline_config_path=FLAGS.pipeline_config_path,
train_steps=FLAGS.num_train_steps, train_steps=FLAGS.num_train_steps,
sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples,
......
...@@ -83,9 +83,8 @@ def main(unused_argv): ...@@ -83,9 +83,8 @@ def main(unused_argv):
wait_interval=300, timeout=FLAGS.eval_timeout) wait_interval=300, timeout=FLAGS.eval_timeout)
else: else:
if FLAGS.use_tpu: if FLAGS.use_tpu:
if FLAGS.tpu_name is None: # TPU is automatically inferred if tpu_name is None and
raise ValueError('--tpu_name needs to be specified when use_tpu' # we are running under cloud ai-platform.
' is set.')
resolver = tf.distribute.cluster_resolver.TPUClusterResolver( resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
FLAGS.tpu_name) FLAGS.tpu_name)
tf.config.experimental_connect_to_cluster(resolver) tf.config.experimental_connect_to_cluster(resolver)
......
...@@ -26,18 +26,8 @@ from absl import flags ...@@ -26,18 +26,8 @@ from absl import flags
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from object_detection import model_hparams
from object_detection import model_lib from object_detection import model_lib
# pylint: disable=g-import-not-at-top
try:
from tensorflow.contrib import cluster_resolver as contrib_cluster_resolver
from tensorflow.contrib import tpu as contrib_tpu
except ImportError:
# TF 2.0 doesn't ship with contrib.
pass
# pylint: enable=g-import-not-at-top
tf.flags.DEFINE_bool('use_tpu', True, 'Use TPUs rather than plain CPUs') tf.flags.DEFINE_bool('use_tpu', True, 'Use TPUs rather than plain CPUs')
# Cloud TPU Cluster Resolvers # Cloud TPU Cluster Resolvers
...@@ -67,10 +57,6 @@ flags.DEFINE_string('mode', 'train', ...@@ -67,10 +57,6 @@ flags.DEFINE_string('mode', 'train',
flags.DEFINE_integer('train_batch_size', None, 'Batch size for training. If ' flags.DEFINE_integer('train_batch_size', None, 'Batch size for training. If '
'this is not provided, batch size is read from training ' 'this is not provided, batch size is read from training '
'config.') 'config.')
flags.DEFINE_string(
'hparams_overrides', None, 'Comma-separated list of '
'hyperparameters to override defaults.')
flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.') flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.')
flags.DEFINE_boolean('eval_training_data', False, flags.DEFINE_boolean('eval_training_data', False,
'If training data should be evaluated for this job.') 'If training data should be evaluated for this job.')
...@@ -99,15 +85,15 @@ def main(unused_argv): ...@@ -99,15 +85,15 @@ def main(unused_argv):
flags.mark_flag_as_required('pipeline_config_path') flags.mark_flag_as_required('pipeline_config_path')
tpu_cluster_resolver = ( tpu_cluster_resolver = (
contrib_cluster_resolver.TPUClusterResolver( tf.distribute.cluster_resolver.TPUClusterResolver(
tpu=[FLAGS.tpu_name], zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)) tpu=[FLAGS.tpu_name], zone=FLAGS.tpu_zone, project=FLAGS.gcp_project))
tpu_grpc_url = tpu_cluster_resolver.get_master() tpu_grpc_url = tpu_cluster_resolver.get_master()
config = contrib_tpu.RunConfig( config = tf.estimator.tpu.RunConfig(
master=tpu_grpc_url, master=tpu_grpc_url,
evaluation_master=tpu_grpc_url, evaluation_master=tpu_grpc_url,
model_dir=FLAGS.model_dir, model_dir=FLAGS.model_dir,
tpu_config=contrib_tpu.TPUConfig( tpu_config=tf.estimator.tpu.TPUConfig(
iterations_per_loop=FLAGS.iterations_per_loop, iterations_per_loop=FLAGS.iterations_per_loop,
num_shards=FLAGS.num_shards)) num_shards=FLAGS.num_shards))
...@@ -117,7 +103,6 @@ def main(unused_argv): ...@@ -117,7 +103,6 @@ def main(unused_argv):
train_and_eval_dict = model_lib.create_estimator_and_inputs( train_and_eval_dict = model_lib.create_estimator_and_inputs(
run_config=config, run_config=config,
hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
pipeline_config_path=FLAGS.pipeline_config_path, pipeline_config_path=FLAGS.pipeline_config_path,
train_steps=FLAGS.num_train_steps, train_steps=FLAGS.num_train_steps,
sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples,
......
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for the ssd_efficientnet_bifpn_feature_extractor."""
import unittest
from absl.testing import parameterized
import numpy as np
import tensorflow.compat.v2 as tf
from google.protobuf import text_format
from object_detection.builders import hyperparams_builder
from object_detection.models import ssd_efficientnet_bifpn_feature_extractor
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
from object_detection.utils import tf_version
def _count_params(model, trainable_only=True):
"""Returns the count of all model parameters, or just trainable ones."""
if not trainable_only:
return model.count_params()
else:
return int(np.sum([
tf.keras.backend.count_params(p) for p in model.trainable_weights]))
@parameterized.parameters(
{'efficientdet_version': 'efficientdet-d0',
'efficientnet_version': 'efficientnet-b0',
'bifpn_num_iterations': 3,
'bifpn_num_filters': 64,
'bifpn_combine_method': 'fast_attention'},
{'efficientdet_version': 'efficientdet-d1',
'efficientnet_version': 'efficientnet-b1',
'bifpn_num_iterations': 4,
'bifpn_num_filters': 88,
'bifpn_combine_method': 'fast_attention'},
{'efficientdet_version': 'efficientdet-d2',
'efficientnet_version': 'efficientnet-b2',
'bifpn_num_iterations': 5,
'bifpn_num_filters': 112,
'bifpn_combine_method': 'fast_attention'},
{'efficientdet_version': 'efficientdet-d3',
'efficientnet_version': 'efficientnet-b3',
'bifpn_num_iterations': 6,
'bifpn_num_filters': 160,
'bifpn_combine_method': 'fast_attention'},
{'efficientdet_version': 'efficientdet-d4',
'efficientnet_version': 'efficientnet-b4',
'bifpn_num_iterations': 7,
'bifpn_num_filters': 224,
'bifpn_combine_method': 'fast_attention'},
{'efficientdet_version': 'efficientdet-d5',
'efficientnet_version': 'efficientnet-b5',
'bifpn_num_iterations': 7,
'bifpn_num_filters': 288,
'bifpn_combine_method': 'fast_attention'},
# efficientdet-d6 and efficientdet-d7 only differ in input size.
{'efficientdet_version': 'efficientdet-d6-d7',
'efficientnet_version': 'efficientnet-b6',
'bifpn_num_iterations': 8,
'bifpn_num_filters': 384,
'bifpn_combine_method': 'sum'})
@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class SSDEfficientNetBiFPNFeatureExtractorTest(
test_case.TestCase, parameterized.TestCase):
def _build_conv_hyperparams(self, add_batch_norm=True):
conv_hyperparams = hyperparams_pb2.Hyperparams()
conv_hyperparams_text_proto = """
force_use_bias: true
activation: SWISH
regularizer {
l2_regularizer {
weight: 0.0004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
"""
if add_batch_norm:
batch_norm_proto = """
batch_norm {
scale: true,
decay: 0.99,
epsilon: 0.001,
}
"""
conv_hyperparams_text_proto += batch_norm_proto
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
def _create_feature_extractor(self,
efficientnet_version='efficientnet-b0',
bifpn_num_iterations=3,
bifpn_num_filters=64,
bifpn_combine_method='fast_attention'):
"""Constructs a new EfficientNetBiFPN feature extractor."""
depth_multiplier = 1.0
pad_to_multiple = 1
min_depth = 16
return (ssd_efficientnet_bifpn_feature_extractor
.SSDEfficientNetBiFPNKerasFeatureExtractor(
is_training=True,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams=self._build_conv_hyperparams(),
freeze_batchnorm=False,
inplace_batchnorm_update=False,
bifpn_min_level=3,
bifpn_max_level=7,
bifpn_num_iterations=bifpn_num_iterations,
bifpn_num_filters=bifpn_num_filters,
bifpn_combine_method=bifpn_combine_method,
efficientnet_version=efficientnet_version))
def test_efficientdet_feature_extractor_shapes(self,
efficientdet_version,
efficientnet_version,
bifpn_num_iterations,
bifpn_num_filters,
bifpn_combine_method):
feature_extractor = self._create_feature_extractor(
efficientnet_version=efficientnet_version,
bifpn_num_iterations=bifpn_num_iterations,
bifpn_num_filters=bifpn_num_filters,
bifpn_combine_method=bifpn_combine_method)
outputs = feature_extractor(np.zeros((2, 256, 256, 3), dtype=np.float32))
self.assertEqual(outputs[0].shape, (2, 32, 32, bifpn_num_filters))
self.assertEqual(outputs[1].shape, (2, 16, 16, bifpn_num_filters))
self.assertEqual(outputs[2].shape, (2, 8, 8, bifpn_num_filters))
self.assertEqual(outputs[3].shape, (2, 4, 4, bifpn_num_filters))
self.assertEqual(outputs[4].shape, (2, 2, 2, bifpn_num_filters))
def test_efficientdet_feature_extractor_params(self,
efficientdet_version,
efficientnet_version,
bifpn_num_iterations,
bifpn_num_filters,
bifpn_combine_method):
feature_extractor = self._create_feature_extractor(
efficientnet_version=efficientnet_version,
bifpn_num_iterations=bifpn_num_iterations,
bifpn_num_filters=bifpn_num_filters,
bifpn_combine_method=bifpn_combine_method)
_ = feature_extractor(np.zeros((2, 256, 256, 3), dtype=np.float32))
expected_params = {
'efficientdet-d0': 5484829,
'efficientdet-d1': 8185156,
'efficientdet-d2': 9818153,
'efficientdet-d3': 13792706,
'efficientdet-d4': 22691445,
'efficientdet-d5': 35795677,
'efficientdet-d6-d7': 53624512,
}
num_params = _count_params(feature_extractor)
self.assertEqual(expected_params[efficientdet_version], num_params)
if __name__ == '__main__':
tf.test.main()
...@@ -61,7 +61,7 @@ class Head(object): ...@@ -61,7 +61,7 @@ class Head(object):
pass pass
class KerasHead(tf.keras.Model): class KerasHead(tf.keras.layers.Layer):
"""Keras head base class.""" """Keras head base class."""
def call(self, features): def call(self, features):
......
...@@ -145,7 +145,7 @@ message Ssd { ...@@ -145,7 +145,7 @@ message Ssd {
optional MaskHead mask_head_config = 25; optional MaskHead mask_head_config = 25;
} }
// Next id: 19. // Next id: 20.
message SsdFeatureExtractor { message SsdFeatureExtractor {
reserved 6; reserved 6;
...@@ -185,8 +185,13 @@ message SsdFeatureExtractor { ...@@ -185,8 +185,13 @@ message SsdFeatureExtractor {
// feature maps added by SSD. // feature maps added by SSD.
optional bool use_depthwise = 8 [default = false]; optional bool use_depthwise = 8 [default = false];
// Feature Pyramid Networks config. oneof feature_pyramid_oneof {
optional FeaturePyramidNetworks fpn = 10; // Feature Pyramid Networks config.
FeaturePyramidNetworks fpn = 10;
// Bidirectional Feature Pyramid Networks config.
BidirectionalFeaturePyramidNetworks bifpn = 19;
}
// If true, replace preprocess function of feature extractor with a // If true, replace preprocess function of feature extractor with a
// placeholder. This should only be used if all the image preprocessing steps // placeholder. This should only be used if all the image preprocessing steps
...@@ -225,3 +230,23 @@ message FeaturePyramidNetworks { ...@@ -225,3 +230,23 @@ message FeaturePyramidNetworks {
} }
// Configuration for Bidirectional Feature Pyramid Networks.
message BidirectionalFeaturePyramidNetworks {
// minimum level in the feature pyramid.
optional int32 min_level = 1 [default = 3];
// maximum level in the feature pyramid.
optional int32 max_level = 2 [default = 7];
// The number of repeated top-down bottom-up iterations for BiFPN-based
// feature extractors (bidirectional feature pyramid networks).
optional int32 num_iterations = 3;
// The number of filters (channels) to use in feature pyramid layers for
// BiFPN-based feature extractors (bidirectional feature pyramid networks).
optional int32 num_filters = 4;
// Method used to combine inputs to BiFPN nodes.
optional string combine_method = 5 [default = 'fast_attention'];
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment