Unverified Commit 0ad4922f authored by vivek rathod's avatar vivek rathod Committed by GitHub
Browse files

Merged commit includes the following changes: (#8809)



320335495  by rathodv:

    Remove hparams support form TF1 main binaries as its not available in TF1.15 runtime on cloud ai platform.

--
320278161  by ronnyvotel:

    Exposing DensePose fields to model libraries.

--
320277319  by rathodv:

    Remove TPU Name check since TPU is automatically inferred under cloud AI platform.

--
320258215  by rathodv:

    Internal Change.

--
320245458  by yuhuic:

    Updated the CenterNet restore_from_objects function to be compatible with
    existing configs that load converted checkpoints.

--
320225405  by jonathanhuang:

    Small change to Keras box predictor and box heads to fix export errors for SSD and Faster R-CNN.

--
320145077  by aom:

    Implements EfficientDet feature extractor.

--

PiperOrigin-RevId: 320335495
Co-authored-by: default avatarTF Object Detection Team <no-reply@google.com>
parent 571369aa
......@@ -16,6 +16,7 @@
"""A function to build a DetectionModel from configuration."""
import functools
import sys
from object_detection.builders import anchor_generator_builder
from object_detection.builders import box_coder_builder
from object_detection.builders import box_predictor_builder
......@@ -58,6 +59,8 @@ if tf_version.is_tf2():
from object_detection.models.ssd_mobilenet_v2_fpn_keras_feature_extractor import SSDMobileNetV2FpnKerasFeatureExtractor
from object_detection.models.ssd_mobilenet_v2_keras_feature_extractor import SSDMobileNetV2KerasFeatureExtractor
from object_detection.predictors import rfcn_keras_box_predictor
if sys.version_info[0] >= 3:
from object_detection.models import ssd_efficientnet_bifpn_feature_extractor as ssd_efficientnet_bifpn
if tf_version.is_tf1():
from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
......@@ -99,6 +102,22 @@ if tf_version.is_tf2():
ssd_resnet_v1_fpn_keras.SSDResNet101V1FpnKerasFeatureExtractor,
'ssd_resnet152_v1_fpn_keras':
ssd_resnet_v1_fpn_keras.SSDResNet152V1FpnKerasFeatureExtractor,
'ssd_efficientnet-b0_bifpn_keras':
ssd_efficientnet_bifpn.SSDEfficientNetB0BiFPNKerasFeatureExtractor,
'ssd_efficientnet-b1_bifpn_keras':
ssd_efficientnet_bifpn.SSDEfficientNetB1BiFPNKerasFeatureExtractor,
'ssd_efficientnet-b2_bifpn_keras':
ssd_efficientnet_bifpn.SSDEfficientNetB2BiFPNKerasFeatureExtractor,
'ssd_efficientnet-b3_bifpn_keras':
ssd_efficientnet_bifpn.SSDEfficientNetB3BiFPNKerasFeatureExtractor,
'ssd_efficientnet-b4_bifpn_keras':
ssd_efficientnet_bifpn.SSDEfficientNetB4BiFPNKerasFeatureExtractor,
'ssd_efficientnet-b5_bifpn_keras':
ssd_efficientnet_bifpn.SSDEfficientNetB5BiFPNKerasFeatureExtractor,
'ssd_efficientnet-b6_bifpn_keras':
ssd_efficientnet_bifpn.SSDEfficientNetB6BiFPNKerasFeatureExtractor,
'ssd_efficientnet-b7_bifpn_keras':
ssd_efficientnet_bifpn.SSDEfficientNetB7BiFPNKerasFeatureExtractor,
}
FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP = {
......@@ -310,6 +329,14 @@ def _build_ssd_feature_extractor(feature_extractor_config,
feature_extractor_config.fpn.additional_layer_depth,
})
if feature_extractor_config.HasField('bifpn'):
kwargs.update({
'bifpn_min_level': feature_extractor_config.bifpn.min_level,
'bifpn_max_level': feature_extractor_config.bifpn.max_level,
'bifpn_num_iterations': feature_extractor_config.bifpn.num_iterations,
'bifpn_num_filters': feature_extractor_config.bifpn.num_filters,
'bifpn_combine_method': feature_extractor_config.bifpn.combine_method,
})
return feature_extractor_class(**kwargs)
......
......@@ -39,6 +39,9 @@ class ModelBuilderTest(test_case.TestCase, parameterized.TestCase):
def ssd_feature_extractors(self):
raise NotImplementedError
def get_override_base_feature_extractor_hyperparams(self, extractor_type):
raise NotImplementedError
def faster_rcnn_feature_extractors(self):
raise NotImplementedError
......@@ -70,7 +73,6 @@ class ModelBuilderTest(test_case.TestCase, parameterized.TestCase):
}
}
}
override_base_feature_extractor_hyperparams: true
}
box_coder {
faster_rcnn_box_coder {
......@@ -205,6 +207,8 @@ class ModelBuilderTest(test_case.TestCase, parameterized.TestCase):
for extractor_type, extractor_class in self.ssd_feature_extractors().items(
):
model_proto.ssd.feature_extractor.type = extractor_type
model_proto.ssd.feature_extractor.override_base_feature_extractor_hyperparams = (
self.get_override_base_feature_extractor_hyperparams(extractor_type))
model = model_builder.build(model_proto, is_training=True)
self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
self.assertIsInstance(model._feature_extractor, extractor_class)
......
......@@ -38,6 +38,9 @@ class ModelBuilderTF1Test(model_builder_test.ModelBuilderTest):
def ssd_feature_extractors(self):
return model_builder.SSD_FEATURE_EXTRACTOR_CLASS_MAP
def get_override_base_feature_extractor_hyperparams(self, extractor_type):
return extractor_type in {'ssd_inception_v2', 'ssd_inception_v3'}
def faster_rcnn_feature_extractors(self):
return model_builder.FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP
......
......@@ -42,6 +42,9 @@ class ModelBuilderTF2Test(model_builder_test.ModelBuilderTest):
def ssd_feature_extractors(self):
return model_builder.SSD_KERAS_FEATURE_EXTRACTOR_CLASS_MAP
def get_override_base_feature_extractor_hyperparams(self, extractor_type):
return extractor_type in {}
def faster_rcnn_feature_extractors(self):
return model_builder.FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP
......
......@@ -134,7 +134,7 @@ class BoxPredictor(object):
pass
class KerasBoxPredictor(tf.keras.Model):
class KerasBoxPredictor(tf.keras.layers.Layer):
"""Keras-based BoxPredictor."""
def __init__(self, is_training, num_classes, freeze_batchnorm,
......
......@@ -251,9 +251,14 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
detection_classes: [batch, max_detections]
(If a model is producing class-agnostic detections, this field may be
missing)
instance_masks: [batch, max_detections, image_height, image_width]
detection_masks: [batch, max_detections, mask_height, mask_width]
(optional)
keypoints: [batch, max_detections, num_keypoints, 2] (optional)
detection_keypoints: [batch, max_detections, num_keypoints, 2]
(optional)
detection_keypoint_scores: [batch, max_detections, num_keypoints]
(optional)
detection_surface_coords: [batch, max_detections, mask_height,
mask_width, 2] (optional)
num_detections: [batch]
In addition to the above fields this stage also outputs the following
......
......@@ -2749,6 +2749,14 @@ class CenterNetMetaArch(model.DetectionModel):
checkpoint (with compatible variable names) or to restore from a
classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`. Default 'detection'.
'detection': used when loading in the Hourglass model pre-trained on
other detection task.
'classification': used when loading in the ResNet model pre-trained on
image classification task. Note that only the image feature encoding
part is loaded but not those upsampling layers.
'fine_tune': used when loading the entire CenterNet feature extractor
pre-trained on other tasks. The checkpoints saved during CenterNet
model training can be directly loaded using this mode.
Returns:
A dict mapping keys to Trackable objects (tf.Module or Checkpoint).
......@@ -2757,10 +2765,13 @@ class CenterNetMetaArch(model.DetectionModel):
if fine_tune_checkpoint_type == 'classification':
return {'feature_extractor': self._feature_extractor.get_base_model()}
if fine_tune_checkpoint_type == 'detection':
fake_model = tf.train.Checkpoint(
elif fine_tune_checkpoint_type == 'detection':
return {'feature_extractor': self._feature_extractor.get_model()}
elif fine_tune_checkpoint_type == 'fine_tune':
feature_extractor_model = tf.train.Checkpoint(
_feature_extractor=self._feature_extractor)
return {'model': fake_model}
return {'model': feature_extractor_model}
else:
raise ValueError('Not supported fine tune checkpoint type - {}'.format(
......
......@@ -432,14 +432,9 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
return eval_metric_ops
def _check_mask_type_and_value(array_name, masks):
"""Checks whether mask dtype is uint8 and the values are either 0 or 1."""
if masks.dtype != np.uint8:
raise ValueError('{} must be of type np.uint8. Found {}.'.format(
array_name, masks.dtype))
if np.any(np.logical_and(masks != 0, masks != 1)):
raise ValueError('{} elements can only be either 0 or 1.'.format(
array_name))
def convert_masks_to_binary(masks):
"""Converts masks to 0 or 1 and uint8 type."""
return (masks > 0).astype(np.uint8)
class CocoKeypointEvaluator(CocoDetectionEvaluator):
......@@ -952,9 +947,8 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
groundtruth_instance_masks = groundtruth_dict[
standard_fields.InputDataFields.groundtruth_instance_masks]
_check_mask_type_and_value(standard_fields.InputDataFields.
groundtruth_instance_masks,
groundtruth_instance_masks)
groundtruth_instance_masks = convert_masks_to_binary(
groundtruth_instance_masks)
self._groundtruth_list.extend(
coco_tools.
ExportSingleImageGroundtruthToCoco(
......@@ -1013,9 +1007,7 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
'are incompatible: {} vs {}'.format(
groundtruth_masks_shape,
detection_masks.shape))
_check_mask_type_and_value(standard_fields.DetectionResultFields.
detection_masks,
detection_masks)
detection_masks = convert_masks_to_binary(detection_masks)
self._detection_masks_list.extend(
coco_tools.ExportSingleImageDetectionMasksToCoco(
image_id=image_id,
......
......@@ -1424,14 +1424,16 @@ class CocoMaskEvaluationTest(tf.test.TestCase):
image_id='image3',
detections_dict={
standard_fields.DetectionResultFields.detection_boxes:
np.array([[25., 25., 50., 50.]]),
np.array([[25., 25., 50., 50.]]),
standard_fields.DetectionResultFields.detection_scores:
np.array([.8]),
np.array([.8]),
standard_fields.DetectionResultFields.detection_classes:
np.array([1]),
np.array([1]),
standard_fields.DetectionResultFields.detection_masks:
np.pad(np.ones([1, 25, 25], dtype=np.uint8),
((0, 0), (10, 10), (10, 10)), mode='constant')
# The value of 5 is equivalent to 1, since masks will be
# thresholded and binarized before evaluation.
np.pad(5 * np.ones([1, 25, 25], dtype=np.uint8),
((0, 0), (10, 10), (10, 10)), mode='constant')
})
metrics = coco_evaluator.evaluate()
self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP'], 1.0)
......
......@@ -43,7 +43,6 @@ from object_detection.utils import visualization_utils as vis_utils
# pylint: disable=g-import-not-at-top
try:
from tensorflow.contrib import learn as contrib_learn
from tensorflow.contrib import tpu as contrib_tpu
except ImportError:
# TF 2.0 doesn't ship with contrib.
pass
......@@ -94,6 +93,15 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
of groundtruth boxes per image..
'groundtruth_keypoints': [batch_size, num_boxes, num_keypoints, 2] float32
tensor of keypoints (if provided in groundtruth).
'groundtruth_dp_num_points_list': [batch_size, num_boxes] int32 tensor
with the number of DensePose points for each instance (if provided in
groundtruth).
'groundtruth_dp_part_ids_list': [batch_size, num_boxes,
max_sampled_points] int32 tensor with the part ids for each DensePose
sampled point (if provided in groundtruth).
'groundtruth_dp_surface_coords_list': [batch_size, num_boxes,
max_sampled_points, 4] containing the DensePose surface coordinates for
each sampled point (if provided in groundtruth).
'groundtruth_group_of': [batch_size, num_boxes] bool tensor indicating
group_of annotations (if provided in groundtruth).
'groundtruth_labeled_classes': [batch_size, num_classes] int64
......@@ -164,6 +172,21 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
groundtruth[input_data_fields.groundtruth_labeled_classes] = tf.stack(
labeled_classes)
if detection_model.groundtruth_has_field(
fields.BoxListFields.densepose_num_points):
groundtruth[input_data_fields.groundtruth_dp_num_points] = tf.stack(
detection_model.groundtruth_lists(
fields.BoxListFields.densepose_num_points))
if detection_model.groundtruth_has_field(
fields.BoxListFields.densepose_part_ids):
groundtruth[input_data_fields.groundtruth_dp_part_ids] = tf.stack(
detection_model.groundtruth_lists(
fields.BoxListFields.densepose_part_ids))
if detection_model.groundtruth_has_field(
fields.BoxListFields.densepose_surface_coords):
groundtruth[input_data_fields.groundtruth_dp_surface_coords] = tf.stack(
detection_model.groundtruth_lists(
fields.BoxListFields.densepose_surface_coords))
groundtruth[input_data_fields.num_groundtruth_boxes] = (
tf.tile([max_number_of_boxes], multiples=[groundtruth_boxes_shape[0]]))
return groundtruth
......@@ -219,6 +242,9 @@ def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True):
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_keypoints,
fields.InputDataFields.groundtruth_keypoint_visibilities,
fields.InputDataFields.groundtruth_dp_num_points,
fields.InputDataFields.groundtruth_dp_part_ids,
fields.InputDataFields.groundtruth_dp_surface_coords,
fields.InputDataFields.groundtruth_group_of,
fields.InputDataFields.groundtruth_difficult,
fields.InputDataFields.groundtruth_is_crowd,
......@@ -269,6 +295,18 @@ def provide_groundtruth(model, labels):
if fields.InputDataFields.groundtruth_keypoint_visibilities in labels:
gt_keypoint_visibilities_list = labels[
fields.InputDataFields.groundtruth_keypoint_visibilities]
gt_dp_num_points_list = None
if fields.InputDataFields.groundtruth_dp_num_points in labels:
gt_dp_num_points_list = labels[
fields.InputDataFields.groundtruth_dp_num_points]
gt_dp_part_ids_list = None
if fields.InputDataFields.groundtruth_dp_part_ids in labels:
gt_dp_part_ids_list = labels[
fields.InputDataFields.groundtruth_dp_part_ids]
gt_dp_surface_coords_list = None
if fields.InputDataFields.groundtruth_dp_surface_coords in labels:
gt_dp_surface_coords_list = labels[
fields.InputDataFields.groundtruth_dp_surface_coords]
gt_weights_list = None
if fields.InputDataFields.groundtruth_weights in labels:
gt_weights_list = labels[fields.InputDataFields.groundtruth_weights]
......@@ -297,13 +335,16 @@ def provide_groundtruth(model, labels):
groundtruth_masks_list=gt_masks_list,
groundtruth_keypoints_list=gt_keypoints_list,
groundtruth_keypoint_visibilities_list=gt_keypoint_visibilities_list,
groundtruth_dp_num_points_list=gt_dp_num_points_list,
groundtruth_dp_part_ids_list=gt_dp_part_ids_list,
groundtruth_dp_surface_coords_list=gt_dp_surface_coords_list,
groundtruth_weights_list=gt_weights_list,
groundtruth_is_crowd_list=gt_is_crowd_list,
groundtruth_group_of_list=gt_group_of_list,
groundtruth_area_list=gt_area_list)
def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
def create_model_fn(detection_model_fn, configs, hparams=None, use_tpu=False,
postprocess_on_cpu=False):
"""Creates a model function for `Estimator`.
......@@ -377,7 +418,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
side_inputs = detection_model.get_side_inputs(features)
if use_tpu and train_config.use_bfloat16:
with contrib_tpu.bfloat16_scope():
with tf.tpu.bfloat16_scope():
prediction_dict = detection_model.predict(
preprocessed_images,
features[fields.InputDataFields.true_image_shape], **side_inputs)
......@@ -392,7 +433,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT):
if use_tpu and postprocess_on_cpu:
detections = contrib_tpu.outside_compilation(
detections = tf.tpu.outside_compilation(
postprocess_wrapper,
(prediction_dict,
features[fields.InputDataFields.true_image_shape]))
......@@ -468,7 +509,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
if mode == tf.estimator.ModeKeys.TRAIN:
if use_tpu:
training_optimizer = contrib_tpu.CrossShardOptimizer(training_optimizer)
training_optimizer = tf.tpu.CrossShardOptimizer(training_optimizer)
# Optionally freeze some layers by setting their gradients to be zero.
trainable_variables = None
......@@ -588,7 +629,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
# EVAL executes on CPU, so use regular non-TPU EstimatorSpec.
if use_tpu and mode != tf.estimator.ModeKeys.EVAL:
return contrib_tpu.TPUEstimatorSpec(
return tf.estimator.tpu.TPUEstimatorSpec(
mode=mode,
scaffold_fn=scaffold_fn,
predictions=detections,
......@@ -619,8 +660,8 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
def create_estimator_and_inputs(run_config,
hparams,
pipeline_config_path,
hparams=None,
pipeline_config_path=None,
config_override=None,
train_steps=None,
sample_1_of_n_eval_examples=1,
......@@ -639,7 +680,7 @@ def create_estimator_and_inputs(run_config,
Args:
run_config: A `RunConfig`.
hparams: A `HParams`.
hparams: (optional) A `HParams`.
pipeline_config_path: A path to a pipeline config file.
config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to
override the config from `pipeline_config_path`.
......@@ -762,14 +803,14 @@ def create_estimator_and_inputs(run_config,
model_config=model_config, predict_input_config=eval_input_configs[0])
# Read export_to_tpu from hparams if not passed.
if export_to_tpu is None:
if export_to_tpu is None and hparams is not None:
export_to_tpu = hparams.get('export_to_tpu', False)
tf.logging.info('create_estimator_and_inputs: use_tpu %s, export_to_tpu %s',
use_tpu, export_to_tpu)
model_fn = model_fn_creator(detection_model_fn, configs, hparams, use_tpu,
postprocess_on_cpu)
if use_tpu_estimator:
estimator = contrib_tpu.TPUEstimator(
estimator = tf.estimator.tpu.TPUEstimator(
model_fn=model_fn,
train_batch_size=train_config.batch_size,
# For each core, only batch size 1 is supported for eval.
......
......@@ -93,6 +93,12 @@ def _compute_losses_and_predictions_dicts(
instance masks for objects.
labels[fields.InputDataFields.groundtruth_keypoints] is a
float32 tensor containing keypoints for each box.
labels[fields.InputDataFields.groundtruth_dp_num_points] is an int32
tensor with the number of sampled DensePose points per object.
labels[fields.InputDataFields.groundtruth_dp_part_ids] is an int32
tensor with the DensePose part ids (0-indexed) per object.
labels[fields.InputDataFields.groundtruth_dp_surface_coords] is a
float32 tensor with the DensePose surface coordinates.
labels[fields.InputDataFields.groundtruth_group_of] is a tf.bool tensor
containing group_of annotations.
labels[fields.InputDataFields.groundtruth_labeled_classes] is a float32
......@@ -195,6 +201,17 @@ def eager_train_step(detection_model,
labels[fields.InputDataFields.groundtruth_keypoints] is a
[batch_size, num_boxes, num_keypoints, 2] float32 tensor containing
keypoints for each box.
labels[fields.InputDataFields.groundtruth_dp_num_points] is a
[batch_size, num_boxes] int32 tensor with the number of DensePose
sampled points per instance.
labels[fields.InputDataFields.groundtruth_dp_part_ids] is a
[batch_size, num_boxes, max_sampled_points] int32 tensor with the
part ids (0-indexed) for each instance.
labels[fields.InputDataFields.groundtruth_dp_surface_coords] is a
[batch_size, num_boxes, max_sampled_points, 4] float32 tensor with the
surface coordinates for each point. Each surface coordinate is of the
form (y, x, v, u) where (y, x) are normalized image locations and
(v, u) are part-relative normalized surface coordinates.
labels[fields.InputDataFields.groundtruth_labeled_classes] is a float32
k-hot tensor of classes.
unpad_groundtruth_tensors: A parameter passed to unstack_batch.
......
......@@ -22,7 +22,6 @@ from absl import flags
import tensorflow.compat.v1 as tf
from object_detection import model_hparams
from object_detection import model_lib
flags.DEFINE_string(
......@@ -41,10 +40,6 @@ flags.DEFINE_integer('sample_1_of_n_eval_on_train_examples', 5, 'Will sample '
'one of every n train input examples for evaluation, '
'where n is provided. This is only used if '
'`eval_training_data` is True.')
flags.DEFINE_string(
'hparams_overrides', None, 'Hyperparameter overrides, '
'represented as a string containing comma-separated '
'hparam_name=value pairs.')
flags.DEFINE_string(
'checkpoint_dir', None, 'Path to directory holding a checkpoint. If '
'`checkpoint_dir` is provided, this binary operates in eval-only mode, '
......@@ -68,7 +63,6 @@ def main(unused_argv):
train_and_eval_dict = model_lib.create_estimator_and_inputs(
run_config=config,
hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
pipeline_config_path=FLAGS.pipeline_config_path,
train_steps=FLAGS.num_train_steps,
sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples,
......
......@@ -83,9 +83,8 @@ def main(unused_argv):
wait_interval=300, timeout=FLAGS.eval_timeout)
else:
if FLAGS.use_tpu:
if FLAGS.tpu_name is None:
raise ValueError('--tpu_name needs to be specified when use_tpu'
' is set.')
# TPU is automatically inferred if tpu_name is None and
# we are running under cloud ai-platform.
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
FLAGS.tpu_name)
tf.config.experimental_connect_to_cluster(resolver)
......
......@@ -26,18 +26,8 @@ from absl import flags
import tensorflow.compat.v1 as tf
from object_detection import model_hparams
from object_detection import model_lib
# pylint: disable=g-import-not-at-top
try:
from tensorflow.contrib import cluster_resolver as contrib_cluster_resolver
from tensorflow.contrib import tpu as contrib_tpu
except ImportError:
# TF 2.0 doesn't ship with contrib.
pass
# pylint: enable=g-import-not-at-top
tf.flags.DEFINE_bool('use_tpu', True, 'Use TPUs rather than plain CPUs')
# Cloud TPU Cluster Resolvers
......@@ -67,10 +57,6 @@ flags.DEFINE_string('mode', 'train',
flags.DEFINE_integer('train_batch_size', None, 'Batch size for training. If '
'this is not provided, batch size is read from training '
'config.')
flags.DEFINE_string(
'hparams_overrides', None, 'Comma-separated list of '
'hyperparameters to override defaults.')
flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.')
flags.DEFINE_boolean('eval_training_data', False,
'If training data should be evaluated for this job.')
......@@ -99,15 +85,15 @@ def main(unused_argv):
flags.mark_flag_as_required('pipeline_config_path')
tpu_cluster_resolver = (
contrib_cluster_resolver.TPUClusterResolver(
tf.distribute.cluster_resolver.TPUClusterResolver(
tpu=[FLAGS.tpu_name], zone=FLAGS.tpu_zone, project=FLAGS.gcp_project))
tpu_grpc_url = tpu_cluster_resolver.get_master()
config = contrib_tpu.RunConfig(
config = tf.estimator.tpu.RunConfig(
master=tpu_grpc_url,
evaluation_master=tpu_grpc_url,
model_dir=FLAGS.model_dir,
tpu_config=contrib_tpu.TPUConfig(
tpu_config=tf.estimator.tpu.TPUConfig(
iterations_per_loop=FLAGS.iterations_per_loop,
num_shards=FLAGS.num_shards))
......@@ -117,7 +103,6 @@ def main(unused_argv):
train_and_eval_dict = model_lib.create_estimator_and_inputs(
run_config=config,
hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
pipeline_config_path=FLAGS.pipeline_config_path,
train_steps=FLAGS.num_train_steps,
sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples,
......
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""SSD Keras-based EfficientNet + BiFPN (EfficientDet) Feature Extractor."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from absl import logging
from six.moves import range
from six.moves import zip
import tensorflow.compat.v2 as tf
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import bidirectional_feature_pyramid_generators as bifpn_generators
from object_detection.utils import ops
from object_detection.utils import shape_utils
from object_detection.utils import tf_version
# pylint: disable=g-import-not-at-top
if tf_version.is_tf2():
from official.vision.image_classification.efficientnet import efficientnet_model
_EFFICIENTNET_LEVEL_ENDPOINTS = {
1: 'stack_0/block_0/project_bn',
2: 'stack_1/block_1/add',
3: 'stack_2/block_1/add',
4: 'stack_4/block_2/add',
5: 'stack_6/block_0/project_bn',
}
class SSDEfficientNetBiFPNKerasFeatureExtractor(
ssd_meta_arch.SSDKerasFeatureExtractor):
"""SSD Keras-based EfficientNetBiFPN (EfficientDet) Feature Extractor."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
freeze_batchnorm,
inplace_batchnorm_update,
bifpn_min_level,
bifpn_max_level,
bifpn_num_iterations,
bifpn_num_filters,
bifpn_combine_method,
efficientnet_version,
use_explicit_padding=None,
use_depthwise=None,
override_base_feature_extractor_hyperparams=None,
name=None):
"""SSD Keras-based EfficientNetBiFPN (EfficientDet) feature extractor.
Args:
is_training: whether the network is in training mode.
depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
multiplier for the feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
containing convolution hyperparameters for the layers added on top of
the base feature extractor.
freeze_batchnorm: whether to freeze batch norm parameters during training
or not. When training with a small batch size (e.g. 1), it is desirable
to freeze batch norm update and use pretrained batch norm params.
inplace_batchnorm_update: whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
bifpn_min_level: the highest resolution feature map to use in BiFPN. The
valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
respectively.
bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
BiFPN constructions uses features maps starting from bifpn_min_level
upto the bifpn_max_level. In the case that there are not enough feature
maps in the backbone network, additional feature maps are created by
applying stride 2 convolutions until we get the desired number of BiFPN
levels.
bifpn_num_iterations: number of BiFPN iterations. Overrided if
efficientdet_version is provided.
bifpn_num_filters: number of filters (channels) in all BiFPN layers.
Overrided if efficientdet_version is provided.
bifpn_combine_method: the method used to combine BiFPN nodes.
efficientnet_version: the EfficientNet version to use for this feature
extractor's backbone.
use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
explicit padding when extracting features.
use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
convolutions when inputs to a node have a differing number of channels,
and use separable convolutions after combine operations.
override_base_feature_extractor_hyperparams: unsupported. Whether to
override hyperparameters of the base feature extractor with the one from
`conv_hyperparams`.
name: a string name scope to assign to the model. If 'None', Keras will
auto-generate one from the class name.
"""
super(SSDEfficientNetBiFPNKerasFeatureExtractor, self).__init__(
is_training=is_training,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=freeze_batchnorm,
inplace_batchnorm_update=inplace_batchnorm_update,
use_explicit_padding=None,
use_depthwise=None,
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams,
name=name)
if depth_multiplier != 1.0:
raise ValueError('EfficientNetBiFPN does not support a non-default '
'depth_multiplier.')
if use_explicit_padding:
raise ValueError('EfficientNetBiFPN does not support explicit padding.')
if use_depthwise:
raise ValueError('EfficientNetBiFPN does not support use_depthwise.')
if override_base_feature_extractor_hyperparams:
raise ValueError('EfficientNetBiFPN does not support '
'override_base_feature_extractor_hyperparams.')
self._bifpn_min_level = bifpn_min_level
self._bifpn_max_level = bifpn_max_level
self._bifpn_num_iterations = bifpn_num_iterations
self._bifpn_num_filters = max(bifpn_num_filters, min_depth)
self._bifpn_node_params = {'combine_method': bifpn_combine_method}
self._efficientnet_version = efficientnet_version
logging.info('EfficientDet EfficientNet backbone version: %s',
self._efficientnet_version)
logging.info('EfficientDet BiFPN num filters: %d', self._bifpn_num_filters)
logging.info('EfficientDet BiFPN num iterations: %d',
self._bifpn_num_iterations)
self._backbone_max_level = min(
max(_EFFICIENTNET_LEVEL_ENDPOINTS.keys()), bifpn_max_level)
self._output_layer_names = [
_EFFICIENTNET_LEVEL_ENDPOINTS[i]
for i in range(bifpn_min_level, self._backbone_max_level + 1)]
self._output_layer_alias = [
'level_{}'.format(i)
for i in range(bifpn_min_level, self._backbone_max_level + 1)]
# Initialize the EfficientNet backbone.
# Note, this is currently done in the init method rather than in the build
# method, since doing so introduces an error which is not well understood.
efficientnet_base = efficientnet_model.EfficientNet.from_name(
model_name=self._efficientnet_version,
overrides={'rescale_input': False})
outputs = [efficientnet_base.get_layer(output_layer_name).output
for output_layer_name in self._output_layer_names]
self._efficientnet = tf.keras.Model(
inputs=efficientnet_base.inputs, outputs=outputs)
self.classification_backbone = efficientnet_base
self._bifpn_stage = None
def build(self, input_shape):
self._bifpn_stage = bifpn_generators.KerasBiFpnFeatureMaps(
bifpn_num_iterations=self._bifpn_num_iterations,
bifpn_num_filters=self._bifpn_num_filters,
fpn_min_level=self._bifpn_min_level,
fpn_max_level=self._bifpn_max_level,
input_max_level=self._backbone_max_level,
is_training=self._is_training,
conv_hyperparams=self._conv_hyperparams,
freeze_batchnorm=self._freeze_batchnorm,
bifpn_node_params=self._bifpn_node_params,
name='bifpn')
self.built = True
def preprocess(self, inputs):
"""SSD preprocessing.
Channel-wise mean subtraction and scaling.
Args:
inputs: a [batch, height, width, channels] float tensor representing a
batch of images.
Returns:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
if inputs.shape.as_list()[3] == 3:
# Input images are expected to be in the range [0, 255].
channel_offset = [0.485, 0.456, 0.406]
channel_scale = [0.229, 0.224, 0.225]
return ((inputs / 255.0) - [[channel_offset]]) / [[channel_scale]]
else:
return inputs
def _extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
preprocessed_inputs = shape_utils.check_min_image_dim(
129, preprocessed_inputs)
base_feature_maps = self._efficientnet(
ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple))
output_feature_map_dict = self._bifpn_stage(
zip(self._output_layer_alias, base_feature_maps))
return list(output_feature_map_dict.values())
class SSDEfficientNetB0BiFPNKerasFeatureExtractor(
SSDEfficientNetBiFPNKerasFeatureExtractor):
"""SSD Keras EfficientNet-b0 BiFPN (EfficientDet-d0) Feature Extractor."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
freeze_batchnorm,
inplace_batchnorm_update,
bifpn_min_level=3,
bifpn_max_level=7,
bifpn_num_iterations=3,
bifpn_num_filters=64,
bifpn_combine_method='fast_attention',
use_explicit_padding=None,
use_depthwise=None,
override_base_feature_extractor_hyperparams=None,
name='EfficientDet-D0'):
"""SSD Keras EfficientNet-b0 BiFPN (EfficientDet-d0) Feature Extractor.
Args:
is_training: whether the network is in training mode.
depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
multiplier for the feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
containing convolution hyperparameters for the layers added on top of
the base feature extractor.
freeze_batchnorm: whether to freeze batch norm parameters during training
or not. When training with a small batch size (e.g. 1), it is desirable
to freeze batch norm update and use pretrained batch norm params.
inplace_batchnorm_update: whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
bifpn_min_level: the highest resolution feature map to use in BiFPN. The
valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
respectively.
bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
BiFPN constructions uses features maps starting from bifpn_min_level
upto the bifpn_max_level. In the case that there are not enough feature
maps in the backbone network, additional feature maps are created by
applying stride 2 convolutions until we get the desired number of BiFPN
levels.
bifpn_num_iterations: number of BiFPN iterations. Overrided if
efficientdet_version is provided.
bifpn_num_filters: number of filters (channels) in all BiFPN layers.
Overrided if efficientdet_version is provided.
bifpn_combine_method: the method used to combine BiFPN nodes.
use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
explicit padding when extracting features.
use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
convolutions when inputs to a node have a differing number of channels,
and use separable convolutions after combine operations.
override_base_feature_extractor_hyperparams: unsupported. Whether to
override hyperparameters of the base feature extractor with the one from
`conv_hyperparams`.
name: a string name scope to assign to the model. If 'None', Keras will
auto-generate one from the class name.
"""
super(SSDEfficientNetB0BiFPNKerasFeatureExtractor, self).__init__(
is_training=is_training,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=freeze_batchnorm,
inplace_batchnorm_update=inplace_batchnorm_update,
bifpn_min_level=bifpn_min_level,
bifpn_max_level=bifpn_max_level,
bifpn_num_iterations=bifpn_num_iterations,
bifpn_num_filters=bifpn_num_filters,
bifpn_combine_method=bifpn_combine_method,
efficientnet_version='efficientnet-b0',
use_explicit_padding=use_explicit_padding,
use_depthwise=use_depthwise,
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams,
name=name)
class SSDEfficientNetB1BiFPNKerasFeatureExtractor(
SSDEfficientNetBiFPNKerasFeatureExtractor):
"""SSD Keras EfficientNet-b1 BiFPN (EfficientDet-d1) Feature Extractor."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
freeze_batchnorm,
inplace_batchnorm_update,
bifpn_min_level=3,
bifpn_max_level=7,
bifpn_num_iterations=4,
bifpn_num_filters=88,
bifpn_combine_method='fast_attention',
use_explicit_padding=None,
use_depthwise=None,
override_base_feature_extractor_hyperparams=None,
name='EfficientDet-D1'):
"""SSD Keras EfficientNet-b1 BiFPN (EfficientDet-d1) Feature Extractor.
Args:
is_training: whether the network is in training mode.
depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
multiplier for the feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
containing convolution hyperparameters for the layers added on top of
the base feature extractor.
freeze_batchnorm: whether to freeze batch norm parameters during training
or not. When training with a small batch size (e.g. 1), it is desirable
to freeze batch norm update and use pretrained batch norm params.
inplace_batchnorm_update: whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
bifpn_min_level: the highest resolution feature map to use in BiFPN. The
valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
respectively.
bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
BiFPN constructions uses features maps starting from bifpn_min_level
upto the bifpn_max_level. In the case that there are not enough feature
maps in the backbone network, additional feature maps are created by
applying stride 2 convolutions until we get the desired number of BiFPN
levels.
bifpn_num_iterations: number of BiFPN iterations. Overrided if
efficientdet_version is provided.
bifpn_num_filters: number of filters (channels) in all BiFPN layers.
Overrided if efficientdet_version is provided.
bifpn_combine_method: the method used to combine BiFPN nodes.
use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
explicit padding when extracting features.
use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
convolutions when inputs to a node have a differing number of channels,
and use separable convolutions after combine operations.
override_base_feature_extractor_hyperparams: unsupported. Whether to
override hyperparameters of the base feature extractor with the one from
`conv_hyperparams`.
name: a string name scope to assign to the model. If 'None', Keras will
auto-generate one from the class name.
"""
super(SSDEfficientNetB1BiFPNKerasFeatureExtractor, self).__init__(
is_training=is_training,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=freeze_batchnorm,
inplace_batchnorm_update=inplace_batchnorm_update,
bifpn_min_level=bifpn_min_level,
bifpn_max_level=bifpn_max_level,
bifpn_num_iterations=bifpn_num_iterations,
bifpn_num_filters=bifpn_num_filters,
bifpn_combine_method=bifpn_combine_method,
efficientnet_version='efficientnet-b1',
use_explicit_padding=use_explicit_padding,
use_depthwise=use_depthwise,
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams,
name=name)
class SSDEfficientNetB2BiFPNKerasFeatureExtractor(
SSDEfficientNetBiFPNKerasFeatureExtractor):
"""SSD Keras EfficientNet-b2 BiFPN (EfficientDet-d2) Feature Extractor."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
freeze_batchnorm,
inplace_batchnorm_update,
bifpn_min_level=3,
bifpn_max_level=7,
bifpn_num_iterations=5,
bifpn_num_filters=112,
bifpn_combine_method='fast_attention',
use_explicit_padding=None,
use_depthwise=None,
override_base_feature_extractor_hyperparams=None,
name='EfficientDet-D2'):
"""SSD Keras EfficientNet-b2 BiFPN (EfficientDet-d2) Feature Extractor.
Args:
is_training: whether the network is in training mode.
depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
multiplier for the feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
containing convolution hyperparameters for the layers added on top of
the base feature extractor.
freeze_batchnorm: whether to freeze batch norm parameters during training
or not. When training with a small batch size (e.g. 1), it is desirable
to freeze batch norm update and use pretrained batch norm params.
inplace_batchnorm_update: whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
bifpn_min_level: the highest resolution feature map to use in BiFPN. The
valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
respectively.
bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
BiFPN constructions uses features maps starting from bifpn_min_level
upto the bifpn_max_level. In the case that there are not enough feature
maps in the backbone network, additional feature maps are created by
applying stride 2 convolutions until we get the desired number of BiFPN
levels.
bifpn_num_iterations: number of BiFPN iterations. Overrided if
efficientdet_version is provided.
bifpn_num_filters: number of filters (channels) in all BiFPN layers.
Overrided if efficientdet_version is provided.
bifpn_combine_method: the method used to combine BiFPN nodes.
use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
explicit padding when extracting features.
use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
convolutions when inputs to a node have a differing number of channels,
and use separable convolutions after combine operations.
override_base_feature_extractor_hyperparams: unsupported. Whether to
override hyperparameters of the base feature extractor with the one from
`conv_hyperparams`.
name: a string name scope to assign to the model. If 'None', Keras will
auto-generate one from the class name.
"""
super(SSDEfficientNetB2BiFPNKerasFeatureExtractor, self).__init__(
is_training=is_training,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=freeze_batchnorm,
inplace_batchnorm_update=inplace_batchnorm_update,
bifpn_min_level=bifpn_min_level,
bifpn_max_level=bifpn_max_level,
bifpn_num_iterations=bifpn_num_iterations,
bifpn_num_filters=bifpn_num_filters,
bifpn_combine_method=bifpn_combine_method,
efficientnet_version='efficientnet-b2',
use_explicit_padding=use_explicit_padding,
use_depthwise=use_depthwise,
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams,
name=name)
class SSDEfficientNetB3BiFPNKerasFeatureExtractor(
SSDEfficientNetBiFPNKerasFeatureExtractor):
"""SSD Keras EfficientNet-b3 BiFPN (EfficientDet-d3) Feature Extractor."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
freeze_batchnorm,
inplace_batchnorm_update,
bifpn_min_level=3,
bifpn_max_level=7,
bifpn_num_iterations=6,
bifpn_num_filters=160,
bifpn_combine_method='fast_attention',
use_explicit_padding=None,
use_depthwise=None,
override_base_feature_extractor_hyperparams=None,
name='EfficientDet-D3'):
"""SSD Keras EfficientNet-b3 BiFPN (EfficientDet-d3) Feature Extractor.
Args:
is_training: whether the network is in training mode.
depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
multiplier for the feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
containing convolution hyperparameters for the layers added on top of
the base feature extractor.
freeze_batchnorm: whether to freeze batch norm parameters during training
or not. When training with a small batch size (e.g. 1), it is desirable
to freeze batch norm update and use pretrained batch norm params.
inplace_batchnorm_update: whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
bifpn_min_level: the highest resolution feature map to use in BiFPN. The
valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
respectively.
bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
BiFPN constructions uses features maps starting from bifpn_min_level
upto the bifpn_max_level. In the case that there are not enough feature
maps in the backbone network, additional feature maps are created by
applying stride 2 convolutions until we get the desired number of BiFPN
levels.
bifpn_num_iterations: number of BiFPN iterations. Overrided if
efficientdet_version is provided.
bifpn_num_filters: number of filters (channels) in all BiFPN layers.
Overrided if efficientdet_version is provided.
bifpn_combine_method: the method used to combine BiFPN nodes.
use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
explicit padding when extracting features.
use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
convolutions when inputs to a node have a differing number of channels,
and use separable convolutions after combine operations.
override_base_feature_extractor_hyperparams: unsupported. Whether to
override hyperparameters of the base feature extractor with the one from
`conv_hyperparams`.
name: a string name scope to assign to the model. If 'None', Keras will
auto-generate one from the class name.
"""
super(SSDEfficientNetB3BiFPNKerasFeatureExtractor, self).__init__(
is_training=is_training,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=freeze_batchnorm,
inplace_batchnorm_update=inplace_batchnorm_update,
bifpn_min_level=bifpn_min_level,
bifpn_max_level=bifpn_max_level,
bifpn_num_iterations=bifpn_num_iterations,
bifpn_num_filters=bifpn_num_filters,
bifpn_combine_method=bifpn_combine_method,
efficientnet_version='efficientnet-b3',
use_explicit_padding=use_explicit_padding,
use_depthwise=use_depthwise,
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams,
name=name)
class SSDEfficientNetB4BiFPNKerasFeatureExtractor(
SSDEfficientNetBiFPNKerasFeatureExtractor):
"""SSD Keras EfficientNet-b4 BiFPN (EfficientDet-d4) Feature Extractor."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
freeze_batchnorm,
inplace_batchnorm_update,
bifpn_min_level=3,
bifpn_max_level=7,
bifpn_num_iterations=7,
bifpn_num_filters=224,
bifpn_combine_method='fast_attention',
use_explicit_padding=None,
use_depthwise=None,
override_base_feature_extractor_hyperparams=None,
name='EfficientDet-D4'):
"""SSD Keras EfficientNet-b4 BiFPN (EfficientDet-d4) Feature Extractor.
Args:
is_training: whether the network is in training mode.
depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
multiplier for the feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
containing convolution hyperparameters for the layers added on top of
the base feature extractor.
freeze_batchnorm: whether to freeze batch norm parameters during training
or not. When training with a small batch size (e.g. 1), it is desirable
to freeze batch norm update and use pretrained batch norm params.
inplace_batchnorm_update: whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
bifpn_min_level: the highest resolution feature map to use in BiFPN. The
valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
respectively.
bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
BiFPN constructions uses features maps starting from bifpn_min_level
upto the bifpn_max_level. In the case that there are not enough feature
maps in the backbone network, additional feature maps are created by
applying stride 2 convolutions until we get the desired number of BiFPN
levels.
bifpn_num_iterations: number of BiFPN iterations. Overrided if
efficientdet_version is provided.
bifpn_num_filters: number of filters (channels) in all BiFPN layers.
Overrided if efficientdet_version is provided.
bifpn_combine_method: the method used to combine BiFPN nodes.
use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
explicit padding when extracting features.
use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
convolutions when inputs to a node have a differing number of channels,
and use separable convolutions after combine operations.
override_base_feature_extractor_hyperparams: unsupported. Whether to
override hyperparameters of the base feature extractor with the one from
`conv_hyperparams`.
name: a string name scope to assign to the model. If 'None', Keras will
auto-generate one from the class name.
"""
super(SSDEfficientNetB4BiFPNKerasFeatureExtractor, self).__init__(
is_training=is_training,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=freeze_batchnorm,
inplace_batchnorm_update=inplace_batchnorm_update,
bifpn_min_level=bifpn_min_level,
bifpn_max_level=bifpn_max_level,
bifpn_num_iterations=bifpn_num_iterations,
bifpn_num_filters=bifpn_num_filters,
bifpn_combine_method=bifpn_combine_method,
efficientnet_version='efficientnet-b4',
use_explicit_padding=use_explicit_padding,
use_depthwise=use_depthwise,
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams,
name=name)
class SSDEfficientNetB5BiFPNKerasFeatureExtractor(
SSDEfficientNetBiFPNKerasFeatureExtractor):
"""SSD Keras EfficientNet-b5 BiFPN (EfficientDet-d5) Feature Extractor."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
freeze_batchnorm,
inplace_batchnorm_update,
bifpn_min_level=3,
bifpn_max_level=7,
bifpn_num_iterations=7,
bifpn_num_filters=288,
bifpn_combine_method='fast_attention',
use_explicit_padding=None,
use_depthwise=None,
override_base_feature_extractor_hyperparams=None,
name='EfficientDet-D5'):
"""SSD Keras EfficientNet-b5 BiFPN (EfficientDet-d5) Feature Extractor.
Args:
is_training: whether the network is in training mode.
depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
multiplier for the feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
containing convolution hyperparameters for the layers added on top of
the base feature extractor.
freeze_batchnorm: whether to freeze batch norm parameters during training
or not. When training with a small batch size (e.g. 1), it is desirable
to freeze batch norm update and use pretrained batch norm params.
inplace_batchnorm_update: whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
bifpn_min_level: the highest resolution feature map to use in BiFPN. The
valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
respectively.
bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
BiFPN constructions uses features maps starting from bifpn_min_level
upto the bifpn_max_level. In the case that there are not enough feature
maps in the backbone network, additional feature maps are created by
applying stride 2 convolutions until we get the desired number of BiFPN
levels.
bifpn_num_iterations: number of BiFPN iterations. Overrided if
efficientdet_version is provided.
bifpn_num_filters: number of filters (channels) in all BiFPN layers.
Overrided if efficientdet_version is provided.
bifpn_combine_method: the method used to combine BiFPN nodes.
use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
explicit padding when extracting features.
use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
convolutions when inputs to a node have a differing number of channels,
and use separable convolutions after combine operations.
override_base_feature_extractor_hyperparams: unsupported. Whether to
override hyperparameters of the base feature extractor with the one from
`conv_hyperparams`.
name: a string name scope to assign to the model. If 'None', Keras will
auto-generate one from the class name.
"""
super(SSDEfficientNetB5BiFPNKerasFeatureExtractor, self).__init__(
is_training=is_training,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=freeze_batchnorm,
inplace_batchnorm_update=inplace_batchnorm_update,
bifpn_min_level=bifpn_min_level,
bifpn_max_level=bifpn_max_level,
bifpn_num_iterations=bifpn_num_iterations,
bifpn_num_filters=bifpn_num_filters,
bifpn_combine_method=bifpn_combine_method,
efficientnet_version='efficientnet-b5',
use_explicit_padding=use_explicit_padding,
use_depthwise=use_depthwise,
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams,
name=name)
class SSDEfficientNetB6BiFPNKerasFeatureExtractor(
SSDEfficientNetBiFPNKerasFeatureExtractor):
"""SSD Keras EfficientNet-b6 BiFPN (EfficientDet-d[6,7]) Feature Extractor."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
freeze_batchnorm,
inplace_batchnorm_update,
bifpn_min_level=3,
bifpn_max_level=7,
bifpn_num_iterations=8,
bifpn_num_filters=384,
bifpn_combine_method='sum',
use_explicit_padding=None,
use_depthwise=None,
override_base_feature_extractor_hyperparams=None,
name='EfficientDet-D6-D7'):
"""SSD Keras EfficientNet-b6 BiFPN (EfficientDet-d[6,7]) Feature Extractor.
SSD Keras EfficientNet-b6 BiFPN Feature Extractor, a.k.a. EfficientDet-d6
and EfficientDet-d7. The EfficientDet-d[6,7] models use the same backbone
EfficientNet-b6 and the same BiFPN architecture, and therefore have the same
number of parameters. They only differ in their input resolutions.
Args:
is_training: whether the network is in training mode.
depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
multiplier for the feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
containing convolution hyperparameters for the layers added on top of
the base feature extractor.
freeze_batchnorm: whether to freeze batch norm parameters during training
or not. When training with a small batch size (e.g. 1), it is desirable
to freeze batch norm update and use pretrained batch norm params.
inplace_batchnorm_update: whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
bifpn_min_level: the highest resolution feature map to use in BiFPN. The
valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
respectively.
bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
BiFPN constructions uses features maps starting from bifpn_min_level
upto the bifpn_max_level. In the case that there are not enough feature
maps in the backbone network, additional feature maps are created by
applying stride 2 convolutions until we get the desired number of BiFPN
levels.
bifpn_num_iterations: number of BiFPN iterations. Overrided if
efficientdet_version is provided.
bifpn_num_filters: number of filters (channels) in all BiFPN layers.
Overrided if efficientdet_version is provided.
bifpn_combine_method: the method used to combine BiFPN nodes.
use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
explicit padding when extracting features.
use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
convolutions when inputs to a node have a differing number of channels,
and use separable convolutions after combine operations.
override_base_feature_extractor_hyperparams: unsupported. Whether to
override hyperparameters of the base feature extractor with the one from
`conv_hyperparams`.
name: a string name scope to assign to the model. If 'None', Keras will
auto-generate one from the class name.
"""
super(SSDEfficientNetB6BiFPNKerasFeatureExtractor, self).__init__(
is_training=is_training,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=freeze_batchnorm,
inplace_batchnorm_update=inplace_batchnorm_update,
bifpn_min_level=bifpn_min_level,
bifpn_max_level=bifpn_max_level,
bifpn_num_iterations=bifpn_num_iterations,
bifpn_num_filters=bifpn_num_filters,
bifpn_combine_method=bifpn_combine_method,
efficientnet_version='efficientnet-b6',
use_explicit_padding=use_explicit_padding,
use_depthwise=use_depthwise,
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams,
name=name)
class SSDEfficientNetB7BiFPNKerasFeatureExtractor(
SSDEfficientNetBiFPNKerasFeatureExtractor):
"""SSD Keras EfficientNet-b7 BiFPN Feature Extractor."""
def __init__(self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
freeze_batchnorm,
inplace_batchnorm_update,
bifpn_min_level=3,
bifpn_max_level=7,
bifpn_num_iterations=8,
bifpn_num_filters=384,
bifpn_combine_method='sum',
use_explicit_padding=None,
use_depthwise=None,
override_base_feature_extractor_hyperparams=None,
name='EfficientNet-B7_BiFPN'):
"""SSD Keras EfficientNet-b7 BiFPN Feature Extractor.
Args:
is_training: whether the network is in training mode.
depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
multiplier for the feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
containing convolution hyperparameters for the layers added on top of
the base feature extractor.
freeze_batchnorm: whether to freeze batch norm parameters during training
or not. When training with a small batch size (e.g. 1), it is desirable
to freeze batch norm update and use pretrained batch norm params.
inplace_batchnorm_update: whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
bifpn_min_level: the highest resolution feature map to use in BiFPN. The
valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
respectively.
bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
BiFPN constructions uses features maps starting from bifpn_min_level
upto the bifpn_max_level. In the case that there are not enough feature
maps in the backbone network, additional feature maps are created by
applying stride 2 convolutions until we get the desired number of BiFPN
levels.
bifpn_num_iterations: number of BiFPN iterations. Overrided if
efficientdet_version is provided.
bifpn_num_filters: number of filters (channels) in all BiFPN layers.
Overrided if efficientdet_version is provided.
bifpn_combine_method: the method used to combine BiFPN nodes.
use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
explicit padding when extracting features.
use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
convolutions when inputs to a node have a differing number of channels,
and use separable convolutions after combine operations.
override_base_feature_extractor_hyperparams: unsupported. Whether to
override hyperparameters of the base feature extractor with the one from
`conv_hyperparams`.
name: a string name scope to assign to the model. If 'None', Keras will
auto-generate one from the class name.
"""
super(SSDEfficientNetB7BiFPNKerasFeatureExtractor, self).__init__(
is_training=is_training,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams=conv_hyperparams,
freeze_batchnorm=freeze_batchnorm,
inplace_batchnorm_update=inplace_batchnorm_update,
bifpn_min_level=bifpn_min_level,
bifpn_max_level=bifpn_max_level,
bifpn_num_iterations=bifpn_num_iterations,
bifpn_num_filters=bifpn_num_filters,
bifpn_combine_method=bifpn_combine_method,
efficientnet_version='efficientnet-b7',
use_explicit_padding=use_explicit_padding,
use_depthwise=use_depthwise,
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams,
name=name)
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for the ssd_efficientnet_bifpn_feature_extractor."""
import unittest
from absl.testing import parameterized
import numpy as np
import tensorflow.compat.v2 as tf
from google.protobuf import text_format
from object_detection.builders import hyperparams_builder
from object_detection.models import ssd_efficientnet_bifpn_feature_extractor
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
from object_detection.utils import tf_version
def _count_params(model, trainable_only=True):
"""Returns the count of all model parameters, or just trainable ones."""
if not trainable_only:
return model.count_params()
else:
return int(np.sum([
tf.keras.backend.count_params(p) for p in model.trainable_weights]))
@parameterized.parameters(
{'efficientdet_version': 'efficientdet-d0',
'efficientnet_version': 'efficientnet-b0',
'bifpn_num_iterations': 3,
'bifpn_num_filters': 64,
'bifpn_combine_method': 'fast_attention'},
{'efficientdet_version': 'efficientdet-d1',
'efficientnet_version': 'efficientnet-b1',
'bifpn_num_iterations': 4,
'bifpn_num_filters': 88,
'bifpn_combine_method': 'fast_attention'},
{'efficientdet_version': 'efficientdet-d2',
'efficientnet_version': 'efficientnet-b2',
'bifpn_num_iterations': 5,
'bifpn_num_filters': 112,
'bifpn_combine_method': 'fast_attention'},
{'efficientdet_version': 'efficientdet-d3',
'efficientnet_version': 'efficientnet-b3',
'bifpn_num_iterations': 6,
'bifpn_num_filters': 160,
'bifpn_combine_method': 'fast_attention'},
{'efficientdet_version': 'efficientdet-d4',
'efficientnet_version': 'efficientnet-b4',
'bifpn_num_iterations': 7,
'bifpn_num_filters': 224,
'bifpn_combine_method': 'fast_attention'},
{'efficientdet_version': 'efficientdet-d5',
'efficientnet_version': 'efficientnet-b5',
'bifpn_num_iterations': 7,
'bifpn_num_filters': 288,
'bifpn_combine_method': 'fast_attention'},
# efficientdet-d6 and efficientdet-d7 only differ in input size.
{'efficientdet_version': 'efficientdet-d6-d7',
'efficientnet_version': 'efficientnet-b6',
'bifpn_num_iterations': 8,
'bifpn_num_filters': 384,
'bifpn_combine_method': 'sum'})
@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class SSDEfficientNetBiFPNFeatureExtractorTest(
test_case.TestCase, parameterized.TestCase):
def _build_conv_hyperparams(self, add_batch_norm=True):
conv_hyperparams = hyperparams_pb2.Hyperparams()
conv_hyperparams_text_proto = """
force_use_bias: true
activation: SWISH
regularizer {
l2_regularizer {
weight: 0.0004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
"""
if add_batch_norm:
batch_norm_proto = """
batch_norm {
scale: true,
decay: 0.99,
epsilon: 0.001,
}
"""
conv_hyperparams_text_proto += batch_norm_proto
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
def _create_feature_extractor(self,
efficientnet_version='efficientnet-b0',
bifpn_num_iterations=3,
bifpn_num_filters=64,
bifpn_combine_method='fast_attention'):
"""Constructs a new EfficientNetBiFPN feature extractor."""
depth_multiplier = 1.0
pad_to_multiple = 1
min_depth = 16
return (ssd_efficientnet_bifpn_feature_extractor
.SSDEfficientNetBiFPNKerasFeatureExtractor(
is_training=True,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams=self._build_conv_hyperparams(),
freeze_batchnorm=False,
inplace_batchnorm_update=False,
bifpn_min_level=3,
bifpn_max_level=7,
bifpn_num_iterations=bifpn_num_iterations,
bifpn_num_filters=bifpn_num_filters,
bifpn_combine_method=bifpn_combine_method,
efficientnet_version=efficientnet_version))
def test_efficientdet_feature_extractor_shapes(self,
efficientdet_version,
efficientnet_version,
bifpn_num_iterations,
bifpn_num_filters,
bifpn_combine_method):
feature_extractor = self._create_feature_extractor(
efficientnet_version=efficientnet_version,
bifpn_num_iterations=bifpn_num_iterations,
bifpn_num_filters=bifpn_num_filters,
bifpn_combine_method=bifpn_combine_method)
outputs = feature_extractor(np.zeros((2, 256, 256, 3), dtype=np.float32))
self.assertEqual(outputs[0].shape, (2, 32, 32, bifpn_num_filters))
self.assertEqual(outputs[1].shape, (2, 16, 16, bifpn_num_filters))
self.assertEqual(outputs[2].shape, (2, 8, 8, bifpn_num_filters))
self.assertEqual(outputs[3].shape, (2, 4, 4, bifpn_num_filters))
self.assertEqual(outputs[4].shape, (2, 2, 2, bifpn_num_filters))
def test_efficientdet_feature_extractor_params(self,
efficientdet_version,
efficientnet_version,
bifpn_num_iterations,
bifpn_num_filters,
bifpn_combine_method):
feature_extractor = self._create_feature_extractor(
efficientnet_version=efficientnet_version,
bifpn_num_iterations=bifpn_num_iterations,
bifpn_num_filters=bifpn_num_filters,
bifpn_combine_method=bifpn_combine_method)
_ = feature_extractor(np.zeros((2, 256, 256, 3), dtype=np.float32))
expected_params = {
'efficientdet-d0': 5484829,
'efficientdet-d1': 8185156,
'efficientdet-d2': 9818153,
'efficientdet-d3': 13792706,
'efficientdet-d4': 22691445,
'efficientdet-d5': 35795677,
'efficientdet-d6-d7': 53624512,
}
num_params = _count_params(feature_extractor)
self.assertEqual(expected_params[efficientdet_version], num_params)
if __name__ == '__main__':
tf.test.main()
......@@ -61,7 +61,7 @@ class Head(object):
pass
class KerasHead(tf.keras.Model):
class KerasHead(tf.keras.layers.Layer):
"""Keras head base class."""
def call(self, features):
......
......@@ -145,7 +145,7 @@ message Ssd {
optional MaskHead mask_head_config = 25;
}
// Next id: 19.
// Next id: 20.
message SsdFeatureExtractor {
reserved 6;
......@@ -185,8 +185,13 @@ message SsdFeatureExtractor {
// feature maps added by SSD.
optional bool use_depthwise = 8 [default = false];
// Feature Pyramid Networks config.
optional FeaturePyramidNetworks fpn = 10;
oneof feature_pyramid_oneof {
// Feature Pyramid Networks config.
FeaturePyramidNetworks fpn = 10;
// Bidirectional Feature Pyramid Networks config.
BidirectionalFeaturePyramidNetworks bifpn = 19;
}
// If true, replace preprocess function of feature extractor with a
// placeholder. This should only be used if all the image preprocessing steps
......@@ -225,3 +230,23 @@ message FeaturePyramidNetworks {
}
// Configuration for Bidirectional Feature Pyramid Networks.
message BidirectionalFeaturePyramidNetworks {
// minimum level in the feature pyramid.
optional int32 min_level = 1 [default = 3];
// maximum level in the feature pyramid.
optional int32 max_level = 2 [default = 7];
// The number of repeated top-down bottom-up iterations for BiFPN-based
// feature extractors (bidirectional feature pyramid networks).
optional int32 num_iterations = 3;
// The number of filters (channels) to use in feature pyramid layers for
// BiFPN-based feature extractors (bidirectional feature pyramid networks).
optional int32 num_filters = 4;
// Method used to combine inputs to BiFPN nodes.
optional string combine_method = 5 [default = 'fast_attention'];
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment