Unverified Commit 965cc3ee authored by Ayushman Kumar's avatar Ayushman Kumar Committed by GitHub
Browse files

Merge pull request #7 from tensorflow/master

updated
parents 1f3247f4 1f685c54
...@@ -14,84 +14,18 @@ ...@@ -14,84 +14,18 @@
# ============================================================================== # ==============================================================================
"""Config template to train Retinanet.""" """Config template to train Retinanet."""
# pylint: disable=line-too-long from official.modeling.hyperparams import params_dict
from official.vision.detection.configs import base_config
# For ResNet-50, this freezes the variables of the first conv1 and conv2_x
# layers [1], which leads to higher training speed and slightly better testing
# accuracy. The intuition is that the low-level architecture (e.g., ResNet-50)
# is able to capture low-level features such as edges; therefore, it does not
# need to be fine-tuned for the detection task.
# Note that we need to trailing `/` to avoid the incorrect match.
# [1]: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/config.py#L198
RESNET_FROZEN_VAR_PREFIX = r'(resnet\d+)\/(conv2d(|_([1-9]|10))|batch_normalization(|_([1-9]|10)))\/'
REGULARIZATION_VAR_REGEX = r'.*(kernel|weight):0$'
# pylint: disable=line-too-long # pylint: disable=line-too-long
RETINANET_CFG = { RETINANET_CFG = params_dict.ParamsDict(base_config.BASE_CFG)
RETINANET_CFG.override({
'type': 'retinanet', 'type': 'retinanet',
'model_dir': '',
'use_tpu': True,
'strategy_type': 'tpu',
'train': {
'batch_size': 64,
'iterations_per_loop': 500,
'total_steps': 22500,
'optimizer': {
'type': 'momentum',
'momentum': 0.9,
'nesterov': True, # `False` is better for TPU v3-128.
},
'learning_rate': {
'type': 'step',
'warmup_learning_rate': 0.0067,
'warmup_steps': 500,
'init_learning_rate': 0.08,
'learning_rate_levels': [0.008, 0.0008],
'learning_rate_steps': [15000, 20000],
},
'checkpoint': {
'path': '',
'prefix': '',
},
'frozen_variable_prefix': RESNET_FROZEN_VAR_PREFIX,
'train_file_pattern': '',
# TODO(b/142174042): Support transpose_input option.
'transpose_input': False,
'regularization_variable_regex': REGULARIZATION_VAR_REGEX,
'l2_weight_decay': 0.0001,
'input_sharding': False,
},
'eval': {
'batch_size': 8,
'min_eval_interval': 180,
'eval_timeout': None,
'eval_samples': 5000,
'type': 'box',
'val_json_file': '',
'eval_file_pattern': '',
'input_sharding': True,
# When visualizing images, set evaluation batch size to 40 to avoid
# potential OOM.
'num_images_to_visualize': 0,
},
'predict': {
'predict_batch_size': 8,
},
'architecture': { 'architecture': {
'parser': 'retinanet_parser', 'parser': 'retinanet_parser',
'backbone': 'resnet',
'multilevel_features': 'fpn',
'use_bfloat16': False,
},
'anchor': {
'min_level': 3,
'max_level': 7,
'num_scales': 3,
'aspect_ratios': [1.0, 2.0, 0.5],
'anchor_size': 4.0,
}, },
'retinanet_parser': { 'retinanet_parser': {
'use_bfloat16': False,
'output_size': [640, 640], 'output_size': [640, 640],
'num_channels': 3, 'num_channels': 3,
'match_threshold': 0.5, 'match_threshold': 0.5,
...@@ -104,68 +38,22 @@ RETINANET_CFG = { ...@@ -104,68 +38,22 @@ RETINANET_CFG = {
'skip_crowd_during_training': True, 'skip_crowd_during_training': True,
'max_num_instances': 100, 'max_num_instances': 100,
}, },
'resnet': {
'resnet_depth': 50,
'batch_norm': {
'batch_norm_momentum': 0.997,
'batch_norm_epsilon': 1e-4,
'batch_norm_trainable': True,
},
},
'fpn': {
'min_level': 3,
'max_level': 7,
'fpn_feat_dims': 256,
'use_separable_conv': False,
'use_batch_norm': True,
'batch_norm': {
'batch_norm_momentum': 0.997,
'batch_norm_epsilon': 1e-4,
'batch_norm_trainable': True,
},
},
'retinanet_head': { 'retinanet_head': {
'min_level': 3,
'max_level': 7,
# Note that `num_classes` is the total number of classes including
# one background classes whose index is 0.
'num_classes': 91,
'anchors_per_location': 9, 'anchors_per_location': 9,
'retinanet_head_num_convs': 4, 'num_convs': 4,
'retinanet_head_num_filters': 256, 'num_filters': 256,
'use_separable_conv': False, 'use_separable_conv': False,
'batch_norm': {
'batch_norm_momentum': 0.997,
'batch_norm_epsilon': 1e-4,
'batch_norm_trainable': True,
},
}, },
'retinanet_loss': { 'retinanet_loss': {
'num_classes': 91,
'focal_loss_alpha': 0.25, 'focal_loss_alpha': 0.25,
'focal_loss_gamma': 1.5, 'focal_loss_gamma': 1.5,
'huber_loss_delta': 0.1, 'huber_loss_delta': 0.1,
'box_loss_weight': 50, 'box_loss_weight': 50,
}, },
'postprocess': { 'enable_summary': True,
'use_batched_nms': False, }, is_strict=False)
'min_level': 3,
'max_level': 7,
'max_total_size': 100,
'nms_iou_threshold': 0.5,
'score_threshold': 0.05,
'pre_nms_num_boxes': 5000,
},
'enable_summary': False,
}
RETINANET_RESTRICTIONS = [ RETINANET_RESTRICTIONS = [
'architecture.use_bfloat16 == retinanet_parser.use_bfloat16',
'anchor.min_level == retinanet_head.min_level',
'anchor.max_level == retinanet_head.max_level',
'anchor.min_level == postprocess.min_level',
'anchor.max_level == postprocess.max_level',
'retinanet_head.num_classes == retinanet_loss.num_classes',
] ]
# pylint: enable=line-too-long # pylint: enable=line-too-long
...@@ -29,8 +29,8 @@ def parser_generator(params, mode): ...@@ -29,8 +29,8 @@ def parser_generator(params, mode):
parser_params = params.retinanet_parser parser_params = params.retinanet_parser
parser_fn = retinanet_parser.Parser( parser_fn = retinanet_parser.Parser(
output_size=parser_params.output_size, output_size=parser_params.output_size,
min_level=anchor_params.min_level, min_level=params.architecture.min_level,
max_level=anchor_params.max_level, max_level=params.architecture.max_level,
num_scales=anchor_params.num_scales, num_scales=anchor_params.num_scales,
aspect_ratios=anchor_params.aspect_ratios, aspect_ratios=anchor_params.aspect_ratios,
anchor_size=anchor_params.anchor_size, anchor_size=anchor_params.anchor_size,
...@@ -43,15 +43,15 @@ def parser_generator(params, mode): ...@@ -43,15 +43,15 @@ def parser_generator(params, mode):
autoaugment_policy_name=parser_params.autoaugment_policy_name, autoaugment_policy_name=parser_params.autoaugment_policy_name,
skip_crowd_during_training=parser_params.skip_crowd_during_training, skip_crowd_during_training=parser_params.skip_crowd_during_training,
max_num_instances=parser_params.max_num_instances, max_num_instances=parser_params.max_num_instances,
use_bfloat16=parser_params.use_bfloat16, use_bfloat16=params.architecture.use_bfloat16,
mode=mode) mode=mode)
elif params.architecture.parser == 'maskrcnn_parser': elif params.architecture.parser == 'maskrcnn_parser':
anchor_params = params.anchor anchor_params = params.anchor
parser_params = params.maskrcnn_parser parser_params = params.maskrcnn_parser
parser_fn = maskrcnn_parser.Parser( parser_fn = maskrcnn_parser.Parser(
output_size=parser_params.output_size, output_size=parser_params.output_size,
min_level=anchor_params.min_level, min_level=params.architecture.min_level,
max_level=anchor_params.max_level, max_level=params.architecture.max_level,
num_scales=anchor_params.num_scales, num_scales=anchor_params.num_scales,
aspect_ratios=anchor_params.aspect_ratios, aspect_ratios=anchor_params.aspect_ratios,
anchor_size=anchor_params.anchor_size, anchor_size=anchor_params.anchor_size,
...@@ -64,17 +64,17 @@ def parser_generator(params, mode): ...@@ -64,17 +64,17 @@ def parser_generator(params, mode):
aug_scale_max=parser_params.aug_scale_max, aug_scale_max=parser_params.aug_scale_max,
skip_crowd_during_training=parser_params.skip_crowd_during_training, skip_crowd_during_training=parser_params.skip_crowd_during_training,
max_num_instances=parser_params.max_num_instances, max_num_instances=parser_params.max_num_instances,
include_mask=parser_params.include_mask, include_mask=params.architecture.include_mask,
mask_crop_size=parser_params.mask_crop_size, mask_crop_size=parser_params.mask_crop_size,
use_bfloat16=parser_params.use_bfloat16, use_bfloat16=params.architecture.use_bfloat16,
mode=mode) mode=mode)
elif params.architecture.parser == 'shapemask_parser': elif params.architecture.parser == 'shapemask_parser':
anchor_params = params.anchor anchor_params = params.anchor
parser_params = params.shapemask_parser parser_params = params.shapemask_parser
parser_fn = shapemask_parser.Parser( parser_fn = shapemask_parser.Parser(
output_size=parser_params.output_size, output_size=parser_params.output_size,
min_level=anchor_params.min_level, min_level=params.architecture.min_level,
max_level=anchor_params.max_level, max_level=params.architecture.max_level,
num_scales=anchor_params.num_scales, num_scales=anchor_params.num_scales,
aspect_ratios=anchor_params.aspect_ratios, aspect_ratios=anchor_params.aspect_ratios,
anchor_size=anchor_params.anchor_size, anchor_size=anchor_params.anchor_size,
...@@ -93,7 +93,7 @@ def parser_generator(params, mode): ...@@ -93,7 +93,7 @@ def parser_generator(params, mode):
aug_scale_max=parser_params.aug_scale_max, aug_scale_max=parser_params.aug_scale_max,
skip_crowd_during_training=parser_params.skip_crowd_during_training, skip_crowd_during_training=parser_params.skip_crowd_during_training,
max_num_instances=parser_params.max_num_instances, max_num_instances=parser_params.max_num_instances,
use_bfloat16=parser_params.use_bfloat16, use_bfloat16=params.architecture.use_bfloat16,
mask_train_class=parser_params.mask_train_class, mask_train_class=parser_params.mask_train_class,
mode=mode) mode=mode)
else: else:
......
...@@ -85,13 +85,14 @@ class InputFn(object): ...@@ -85,13 +85,14 @@ class InputFn(object):
if self._input_sharding and ctx and ctx.num_input_pipelines > 1: if self._input_sharding and ctx and ctx.num_input_pipelines > 1:
dataset = dataset.shard(ctx.num_input_pipelines, ctx.input_pipeline_id) dataset = dataset.shard(ctx.num_input_pipelines, ctx.input_pipeline_id)
dataset = dataset.cache()
if self._is_training: if self._is_training:
dataset = dataset.repeat() dataset = dataset.repeat()
dataset = dataset.interleave( dataset = dataset.interleave(
map_func=lambda file_name: self._dataset_fn(file_name), cycle_length=32, map_func=lambda file_name: self._dataset_fn(file_name), cycle_length=32,
num_parallel_calls=tf.data.experimental.AUTOTUNE) num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.cache()
if self._is_training: if self._is_training:
# Large shuffle size is critical for 2vm input pipeline. Can use small # Large shuffle size is critical for 2vm input pipeline. Can use small
......
...@@ -35,10 +35,12 @@ from official.vision.detection.dataloader import input_reader ...@@ -35,10 +35,12 @@ from official.vision.detection.dataloader import input_reader
from official.vision.detection.dataloader import mode_keys as ModeKeys from official.vision.detection.dataloader import mode_keys as ModeKeys
from official.vision.detection.executor.detection_executor import DetectionDistributedExecutor from official.vision.detection.executor.detection_executor import DetectionDistributedExecutor
from official.vision.detection.modeling import factory as model_factory from official.vision.detection.modeling import factory as model_factory
from official.utils.flags import core as flags_core
from official.utils.misc import distribution_utils from official.utils.misc import distribution_utils
from official.utils.misc import keras_utils from official.utils.misc import keras_utils
hyperparams_flags.initialize_common_flags() hyperparams_flags.initialize_common_flags()
flags_core.define_log_steps()
flags.DEFINE_bool( flags.DEFINE_bool(
'enable_xla', 'enable_xla',
...@@ -67,10 +69,12 @@ FLAGS = flags.FLAGS ...@@ -67,10 +69,12 @@ FLAGS = flags.FLAGS
def run_executor(params, def run_executor(params,
mode,
checkpoint_path=None,
train_input_fn=None, train_input_fn=None,
eval_input_fn=None, eval_input_fn=None,
callbacks=None, callbacks=None,
strategy=None): prebuilt_strategy=None):
"""Runs Retinanet model on distribution strategy defined by the user.""" """Runs Retinanet model on distribution strategy defined by the user."""
if params.architecture.use_bfloat16: if params.architecture.use_bfloat16:
...@@ -80,7 +84,9 @@ def run_executor(params, ...@@ -80,7 +84,9 @@ def run_executor(params,
model_builder = model_factory.model_generator(params) model_builder = model_factory.model_generator(params)
if strategy is None: if prebuilt_strategy is not None:
strategy = prebuilt_strategy
else:
strategy_config = params.strategy_config strategy_config = params.strategy_config
distribution_utils.configure_cluster(strategy_config.worker_hosts, distribution_utils.configure_cluster(strategy_config.worker_hosts,
strategy_config.task_index) strategy_config.task_index)
...@@ -94,7 +100,7 @@ def run_executor(params, ...@@ -94,7 +100,7 @@ def run_executor(params,
num_workers = int(strategy.num_replicas_in_sync + 7) // 8 num_workers = int(strategy.num_replicas_in_sync + 7) // 8
is_multi_host = (int(num_workers) >= 2) is_multi_host = (int(num_workers) >= 2)
if FLAGS.mode == 'train': if mode == 'train':
def _model_fn(params): def _model_fn(params):
return model_builder.build_model(params, mode=ModeKeys.TRAIN) return model_builder.build_model(params, mode=ModeKeys.TRAIN)
...@@ -126,8 +132,7 @@ def run_executor(params, ...@@ -126,8 +132,7 @@ def run_executor(params,
init_checkpoint=model_builder.make_restore_checkpoint_fn(), init_checkpoint=model_builder.make_restore_checkpoint_fn(),
custom_callbacks=callbacks, custom_callbacks=callbacks,
save_config=True) save_config=True)
elif mode == 'eval' or mode == 'eval_once':
elif FLAGS.mode == 'eval' or FLAGS.mode == 'eval_once':
def _model_fn(params): def _model_fn(params):
return model_builder.build_model(params, mode=ModeKeys.PREDICT_WITH_GT) return model_builder.build_model(params, mode=ModeKeys.PREDICT_WITH_GT)
...@@ -150,7 +155,7 @@ def run_executor(params, ...@@ -150,7 +155,7 @@ def run_executor(params,
trainable_variables_filter=model_builder trainable_variables_filter=model_builder
.make_filter_trainable_variables_fn()) .make_filter_trainable_variables_fn())
if FLAGS.mode == 'eval': if mode == 'eval':
results = dist_executor.evaluate_from_model_dir( results = dist_executor.evaluate_from_model_dir(
model_dir=params.model_dir, model_dir=params.model_dir,
eval_input_fn=eval_input_fn, eval_input_fn=eval_input_fn,
...@@ -160,9 +165,8 @@ def run_executor(params, ...@@ -160,9 +165,8 @@ def run_executor(params,
total_steps=params.train.total_steps) total_steps=params.train.total_steps)
else: else:
# Run evaluation once for a single checkpoint. # Run evaluation once for a single checkpoint.
if not FLAGS.checkpoint_path: if not checkpoint_path:
raise ValueError('FLAGS.checkpoint_path cannot be empty.') raise ValueError('checkpoint_path cannot be empty.')
checkpoint_path = FLAGS.checkpoint_path
if tf.io.gfile.isdir(checkpoint_path): if tf.io.gfile.isdir(checkpoint_path):
checkpoint_path = tf.train.latest_checkpoint(checkpoint_path) checkpoint_path = tf.train.latest_checkpoint(checkpoint_path)
summary_writer = executor.SummaryWriter(params.model_dir, 'eval') summary_writer = executor.SummaryWriter(params.model_dir, 'eval')
...@@ -175,7 +179,7 @@ def run_executor(params, ...@@ -175,7 +179,7 @@ def run_executor(params,
logging.info('Final eval metric %s: %f', k, v) logging.info('Final eval metric %s: %f', k, v)
return results return results
else: else:
raise ValueError('Mode not found: %s.' % FLAGS.mode) raise ValueError('Mode not found: %s.' % mode)
def run(callbacks=None): def run(callbacks=None):
...@@ -224,8 +228,21 @@ def run(callbacks=None): ...@@ -224,8 +228,21 @@ def run(callbacks=None):
mode=input_reader.ModeKeys.PREDICT_WITH_GT, mode=input_reader.ModeKeys.PREDICT_WITH_GT,
batch_size=params.eval.batch_size, batch_size=params.eval.batch_size,
num_examples=params.eval.eval_samples) num_examples=params.eval.eval_samples)
if callbacks is None:
callbacks = []
if FLAGS.log_steps:
callbacks.append(
keras_utils.TimeHistory(
batch_size=params.train.batch_size,
log_steps=FLAGS.log_steps,
))
return run_executor( return run_executor(
params, params,
FLAGS.mode,
checkpoint_path=FLAGS.checkpoint_path,
train_input_fn=train_input_fn, train_input_fn=train_input_fn,
eval_input_fn=eval_input_fn, eval_input_fn=eval_input_fn,
callbacks=callbacks) callbacks=callbacks)
...@@ -238,6 +255,5 @@ def main(argv): ...@@ -238,6 +255,5 @@ def main(argv):
if __name__ == '__main__': if __name__ == '__main__':
assert tf.version.VERSION.startswith('2.')
tf.config.set_soft_device_placement(True) tf.config.set_soft_device_placement(True)
app.run(main) app.run(main)
...@@ -25,16 +25,12 @@ from official.vision.detection.modeling.architecture import nn_ops ...@@ -25,16 +25,12 @@ from official.vision.detection.modeling.architecture import nn_ops
from official.vision.detection.modeling.architecture import resnet from official.vision.detection.modeling.architecture import resnet
def batch_norm_relu_generator(params): def norm_activation_generator(params):
return nn_ops.norm_activation_builder(
def _batch_norm_op(**kwargs): momentum=params.batch_norm_momentum,
return nn_ops.BatchNormRelu( epsilon=params.batch_norm_epsilon,
momentum=params.batch_norm_momentum, trainable=params.batch_norm_trainable,
epsilon=params.batch_norm_epsilon, activation=params.activation)
trainable=params.batch_norm_trainable,
**kwargs)
return _batch_norm_op
def backbone_generator(params): def backbone_generator(params):
...@@ -43,10 +39,12 @@ def backbone_generator(params): ...@@ -43,10 +39,12 @@ def backbone_generator(params):
resnet_params = params.resnet resnet_params = params.resnet
backbone_fn = resnet.Resnet( backbone_fn = resnet.Resnet(
resnet_depth=resnet_params.resnet_depth, resnet_depth=resnet_params.resnet_depth,
batch_norm_relu=batch_norm_relu_generator(resnet_params.batch_norm)) activation=params.norm_activation.activation,
norm_activation=norm_activation_generator(
params.norm_activation))
else: else:
raise ValueError('Backbone model %s is not supported.' % raise ValueError('Backbone model `{}` is not supported.'
params.architecture.backbone) .format(params.architecture.backbone))
return backbone_fn return backbone_fn
...@@ -56,81 +54,75 @@ def multilevel_features_generator(params): ...@@ -56,81 +54,75 @@ def multilevel_features_generator(params):
if params.architecture.multilevel_features == 'fpn': if params.architecture.multilevel_features == 'fpn':
fpn_params = params.fpn fpn_params = params.fpn
fpn_fn = fpn.Fpn( fpn_fn = fpn.Fpn(
min_level=fpn_params.min_level, min_level=params.architecture.min_level,
max_level=fpn_params.max_level, max_level=params.architecture.max_level,
fpn_feat_dims=fpn_params.fpn_feat_dims, fpn_feat_dims=fpn_params.fpn_feat_dims,
use_separable_conv=fpn_params.use_separable_conv, use_separable_conv=fpn_params.use_separable_conv,
activation=params.norm_activation.activation,
use_batch_norm=fpn_params.use_batch_norm, use_batch_norm=fpn_params.use_batch_norm,
batch_norm_relu=batch_norm_relu_generator(fpn_params.batch_norm)) norm_activation=norm_activation_generator(
params.norm_activation))
elif params.architecture.multilevel_features == 'identity': elif params.architecture.multilevel_features == 'identity':
fpn_fn = identity.Identity() fpn_fn = identity.Identity()
else: else:
raise ValueError('The multi-level feature model %s is not supported.' raise ValueError('The multi-level feature model `{}` is not supported.'
% params.architecture.multilevel_features) .format(params.architecture.multilevel_features))
return fpn_fn return fpn_fn
def retinanet_head_generator(params): def retinanet_head_generator(params):
"""Generator function for RetinaNet head architecture.""" """Generator function for RetinaNet head architecture."""
head_params = params.retinanet_head
return heads.RetinanetHead( return heads.RetinanetHead(
params.min_level, params.architecture.min_level,
params.max_level, params.architecture.max_level,
params.num_classes, params.architecture.num_classes,
params.anchors_per_location, head_params.anchors_per_location,
params.retinanet_head_num_convs, head_params.num_convs,
params.retinanet_head_num_filters, head_params.num_filters,
params.use_separable_conv, head_params.use_separable_conv,
batch_norm_relu=batch_norm_relu_generator(params.batch_norm)) norm_activation=norm_activation_generator(params.norm_activation))
def rpn_head_generator(params): def rpn_head_generator(params):
head_params = params.rpn_head
"""Generator function for RPN head architecture.""" """Generator function for RPN head architecture."""
return heads.RpnHead(params.min_level, return heads.RpnHead(
params.max_level, params.architecture.min_level,
params.anchors_per_location, params.architecture.max_level,
params.num_convs, head_params.anchors_per_location,
params.num_filters, head_params.num_convs,
params.use_separable_conv, head_params.num_filters,
params.use_batch_norm, head_params.use_separable_conv,
batch_norm_relu=batch_norm_relu_generator( params.norm_activation.activation,
params.batch_norm)) head_params.use_batch_norm,
norm_activation=norm_activation_generator(params.norm_activation))
def fast_rcnn_head_generator(params): def fast_rcnn_head_generator(params):
"""Generator function for Fast R-CNN head architecture.""" """Generator function for Fast R-CNN head architecture."""
return heads.FastrcnnHead(params.num_classes, head_params = params.frcnn_head
params.num_convs, return heads.FastrcnnHead(
params.num_filters, params.architecture.num_classes,
params.use_separable_conv, head_params.num_convs,
params.num_fcs, head_params.num_filters,
params.fc_dims, head_params.use_separable_conv,
params.use_batch_norm, head_params.num_fcs,
batch_norm_relu=batch_norm_relu_generator( head_params.fc_dims,
params.batch_norm)) params.norm_activation.activation,
head_params.use_batch_norm,
norm_activation=norm_activation_generator(params.norm_activation))
def mask_rcnn_head_generator(params): def mask_rcnn_head_generator(params):
"""Generator function for Mask R-CNN head architecture.""" """Generator function for Mask R-CNN head architecture."""
return heads.MaskrcnnHead(params.num_classes, head_params = params.mrcnn_head
params.mask_target_size, return heads.MaskrcnnHead(
params.num_convs, params.architecture.num_classes,
params.num_filters, params.architecture.mask_target_size,
params.use_separable_conv, head_params.num_convs,
params.use_batch_norm, head_params.num_filters,
batch_norm_relu=batch_norm_relu_generator( head_params.use_separable_conv,
params.batch_norm)) params.norm_activation.activation,
head_params.use_batch_norm,
norm_activation=norm_activation_generator(params.norm_activation))
def shapeprior_head_generator(params):
"""Generator function for Shapemask head architecture."""
raise NotImplementedError('Unimplemented')
def coarsemask_head_generator(params):
"""Generator function for Shapemask head architecture."""
raise NotImplementedError('Unimplemented')
def finemask_head_generator(params):
"""Generator function for Shapemask head architecture."""
raise NotImplementedError('Unimplemented')
...@@ -41,8 +41,10 @@ class Fpn(object): ...@@ -41,8 +41,10 @@ class Fpn(object):
max_level=7, max_level=7,
fpn_feat_dims=256, fpn_feat_dims=256,
use_separable_conv=False, use_separable_conv=False,
activation='relu',
use_batch_norm=True, use_batch_norm=True,
batch_norm_relu=nn_ops.BatchNormRelu): norm_activation=nn_ops.norm_activation_builder(
activation='relu')):
"""FPN initialization function. """FPN initialization function.
Args: Args:
...@@ -52,8 +54,8 @@ class Fpn(object): ...@@ -52,8 +54,8 @@ class Fpn(object):
use_separable_conv: `bool`, if True use separable convolution for use_separable_conv: `bool`, if True use separable convolution for
convolution in FPN layers. convolution in FPN layers.
use_batch_norm: 'bool', indicating whether batchnorm layers are added. use_batch_norm: 'bool', indicating whether batchnorm layers are added.
batch_norm_relu: an operation that includes a batch normalization layer norm_activation: an operation that includes a normalization layer
followed by a relu layer(optional). followed by an optional activation layer.
""" """
self._min_level = min_level self._min_level = min_level
self._max_level = max_level self._max_level = max_level
...@@ -63,17 +65,23 @@ class Fpn(object): ...@@ -63,17 +65,23 @@ class Fpn(object):
tf.keras.layers.SeparableConv2D, depth_multiplier=1) tf.keras.layers.SeparableConv2D, depth_multiplier=1)
else: else:
self._conv2d_op = tf.keras.layers.Conv2D self._conv2d_op = tf.keras.layers.Conv2D
if activation == 'relu':
self._activation_op = tf.nn.relu
elif activation == 'swish':
self._activation_op = tf.nn.swish
else:
raise ValueError('Unsupported activation `{}`.'.format(activation))
self._use_batch_norm = use_batch_norm self._use_batch_norm = use_batch_norm
self._batch_norm_relu = batch_norm_relu self._norm_activation = norm_activation
self._batch_norm_relus = {} self._norm_activations = {}
self._lateral_conv2d_op = {} self._lateral_conv2d_op = {}
self._post_hoc_conv2d_op = {} self._post_hoc_conv2d_op = {}
self._coarse_conv2d_op = {} self._coarse_conv2d_op = {}
for level in range(self._min_level, self._max_level + 1): for level in range(self._min_level, self._max_level + 1):
if self._use_batch_norm: if self._use_batch_norm:
self._batch_norm_relus[level] = batch_norm_relu( self._norm_activations[level] = norm_activation(
relu=False, name='p%d-bn' % level) use_activation=False, name='p%d-bn' % level)
self._lateral_conv2d_op[level] = self._conv2d_op( self._lateral_conv2d_op[level] = self._conv2d_op(
filters=self._fpn_feat_dims, filters=self._fpn_feat_dims,
kernel_size=(1, 1), kernel_size=(1, 1),
...@@ -133,11 +141,11 @@ class Fpn(object): ...@@ -133,11 +141,11 @@ class Fpn(object):
for level in range(backbone_max_level + 1, self._max_level + 1): for level in range(backbone_max_level + 1, self._max_level + 1):
feats_in = feats[level - 1] feats_in = feats[level - 1]
if level > backbone_max_level + 1: if level > backbone_max_level + 1:
feats_in = tf.nn.relu(feats_in) feats_in = self._activation_op(feats_in)
feats[level] = self._coarse_conv2d_op[level](feats_in) feats[level] = self._coarse_conv2d_op[level](feats_in)
if self._use_batch_norm: if self._use_batch_norm:
# Adds batch_norm layer. # Adds batch_norm layer.
for level in range(self._min_level, self._max_level + 1): for level in range(self._min_level, self._max_level + 1):
feats[level] = self._batch_norm_relus[level]( feats[level] = self._norm_activations[level](
feats[level], is_training=is_training) feats[level], is_training=is_training)
return feats return feats
...@@ -39,8 +39,10 @@ class RpnHead(tf.keras.layers.Layer): ...@@ -39,8 +39,10 @@ class RpnHead(tf.keras.layers.Layer):
num_convs=2, num_convs=2,
num_filters=256, num_filters=256,
use_separable_conv=False, use_separable_conv=False,
activation='relu',
use_batch_norm=True, use_batch_norm=True,
batch_norm_relu=nn_ops.BatchNormRelu): norm_activation=nn_ops.norm_activation_builder(
activation='relu')):
"""Initialize params to build Region Proposal Network head. """Initialize params to build Region Proposal Network head.
Args: Args:
...@@ -55,12 +57,18 @@ class RpnHead(tf.keras.layers.Layer): ...@@ -55,12 +57,18 @@ class RpnHead(tf.keras.layers.Layer):
use_separable_conv: `bool`, indicating whether the separable conv layers use_separable_conv: `bool`, indicating whether the separable conv layers
is used. is used.
use_batch_norm: 'bool', indicating whether batchnorm layers are added. use_batch_norm: 'bool', indicating whether batchnorm layers are added.
batch_norm_relu: an operation that includes a batch normalization layer norm_activation: an operation that includes a normalization layer
followed by a relu layer(optional). followed by an optional activation layer.
""" """
self._min_level = min_level self._min_level = min_level
self._max_level = max_level self._max_level = max_level
self._anchors_per_location = anchors_per_location self._anchors_per_location = anchors_per_location
if activation == 'relu':
self._activation_op = tf.nn.relu
elif activation == 'swish':
self._activation_op = tf.nn.swish
else:
raise ValueError('Unsupported activation `{}`.'.format(activation))
self._use_batch_norm = use_batch_norm self._use_batch_norm = use_batch_norm
if use_separable_conv: if use_separable_conv:
...@@ -78,7 +86,7 @@ class RpnHead(tf.keras.layers.Layer): ...@@ -78,7 +86,7 @@ class RpnHead(tf.keras.layers.Layer):
num_filters, num_filters,
kernel_size=(3, 3), kernel_size=(3, 3),
strides=(1, 1), strides=(1, 1),
activation=(None if self._use_batch_norm else tf.nn.relu), activation=(None if self._use_batch_norm else self._activation_op),
padding='same', padding='same',
name='rpn') name='rpn')
self._rpn_class_conv = self._conv2d_op( self._rpn_class_conv = self._conv2d_op(
...@@ -94,10 +102,10 @@ class RpnHead(tf.keras.layers.Layer): ...@@ -94,10 +102,10 @@ class RpnHead(tf.keras.layers.Layer):
padding='valid', padding='valid',
name='rpn-box') name='rpn-box')
self._batch_norm_relus = {} self._norm_activations = {}
if self._use_batch_norm: if self._use_batch_norm:
for level in range(self._min_level, self._max_level + 1): for level in range(self._min_level, self._max_level + 1):
self._batch_norm_relus[level] = batch_norm_relu(name='rpn-l%d-bn' % self._norm_activations[level] = norm_activation(name='rpn-l%d-bn' %
level) level)
def _shared_rpn_heads(self, features, anchors_per_location, level, def _shared_rpn_heads(self, features, anchors_per_location, level,
...@@ -106,7 +114,7 @@ class RpnHead(tf.keras.layers.Layer): ...@@ -106,7 +114,7 @@ class RpnHead(tf.keras.layers.Layer):
features = self._rpn_conv(features) features = self._rpn_conv(features)
if self._use_batch_norm: if self._use_batch_norm:
# The batch normalization layers are not shared between levels. # The batch normalization layers are not shared between levels.
features = self._batch_norm_relus[level]( features = self._norm_activations[level](
features, is_training=is_training) features, is_training=is_training)
# Proposal classification scores # Proposal classification scores
scores = self._rpn_class_conv(features) scores = self._rpn_class_conv(features)
...@@ -139,8 +147,10 @@ class FastrcnnHead(tf.keras.layers.Layer): ...@@ -139,8 +147,10 @@ class FastrcnnHead(tf.keras.layers.Layer):
use_separable_conv=False, use_separable_conv=False,
num_fcs=2, num_fcs=2,
fc_dims=1024, fc_dims=1024,
activation='relu',
use_batch_norm=True, use_batch_norm=True,
batch_norm_relu=nn_ops.BatchNormRelu): norm_activation=nn_ops.norm_activation_builder(
activation='relu')):
"""Initialize params to build Fast R-CNN box head. """Initialize params to build Fast R-CNN box head.
Args: Args:
...@@ -156,8 +166,8 @@ class FastrcnnHead(tf.keras.layers.Layer): ...@@ -156,8 +166,8 @@ class FastrcnnHead(tf.keras.layers.Layer):
fc_dims: `int` number that represents the number of dimension of the FC fc_dims: `int` number that represents the number of dimension of the FC
layers. layers.
use_batch_norm: 'bool', indicating whether batchnorm layers are added. use_batch_norm: 'bool', indicating whether batchnorm layers are added.
batch_norm_relu: an operation that includes a batch normalization layer norm_activation: an operation that includes a normalization layer
followed by a relu layer(optional). followed by an optional activation layer.
""" """
self._num_classes = num_classes self._num_classes = num_classes
...@@ -177,9 +187,14 @@ class FastrcnnHead(tf.keras.layers.Layer): ...@@ -177,9 +187,14 @@ class FastrcnnHead(tf.keras.layers.Layer):
self._num_fcs = num_fcs self._num_fcs = num_fcs
self._fc_dims = fc_dims self._fc_dims = fc_dims
if activation == 'relu':
self._activation_op = tf.nn.relu
elif activation == 'swish':
self._activation_op = tf.nn.swish
else:
raise ValueError('Unsupported activation `{}`.'.format(activation))
self._use_batch_norm = use_batch_norm self._use_batch_norm = use_batch_norm
self._batch_norm_relu = batch_norm_relu self._norm_activation = norm_activation
self._conv_ops = [] self._conv_ops = []
self._conv_bn_ops = [] self._conv_bn_ops = []
...@@ -191,10 +206,10 @@ class FastrcnnHead(tf.keras.layers.Layer): ...@@ -191,10 +206,10 @@ class FastrcnnHead(tf.keras.layers.Layer):
strides=(1, 1), strides=(1, 1),
padding='same', padding='same',
dilation_rate=(1, 1), dilation_rate=(1, 1),
activation=(None if self._use_batch_norm else tf.nn.relu), activation=(None if self._use_batch_norm else self._activation_op),
name='conv_{}'.format(i))) name='conv_{}'.format(i)))
if self._use_batch_norm: if self._use_batch_norm:
self._conv_bn_ops.append(self._batch_norm_relu()) self._conv_bn_ops.append(self._norm_activation())
self._fc_ops = [] self._fc_ops = []
self._fc_bn_ops = [] self._fc_bn_ops = []
...@@ -202,10 +217,10 @@ class FastrcnnHead(tf.keras.layers.Layer): ...@@ -202,10 +217,10 @@ class FastrcnnHead(tf.keras.layers.Layer):
self._fc_ops.append( self._fc_ops.append(
tf.keras.layers.Dense( tf.keras.layers.Dense(
units=self._fc_dims, units=self._fc_dims,
activation=(None if self._use_batch_norm else tf.nn.relu), activation=(None if self._use_batch_norm else self._activation_op),
name='fc{}'.format(i))) name='fc{}'.format(i)))
if self._use_batch_norm: if self._use_batch_norm:
self._fc_bn_ops.append(self._batch_norm_relu(fused=False)) self._fc_bn_ops.append(self._norm_activation(fused=False))
self._class_predict = tf.keras.layers.Dense( self._class_predict = tf.keras.layers.Dense(
self._num_classes, self._num_classes,
...@@ -266,8 +281,10 @@ class MaskrcnnHead(tf.keras.layers.Layer): ...@@ -266,8 +281,10 @@ class MaskrcnnHead(tf.keras.layers.Layer):
num_convs=4, num_convs=4,
num_filters=256, num_filters=256,
use_separable_conv=False, use_separable_conv=False,
activation='relu',
use_batch_norm=True, use_batch_norm=True,
batch_norm_relu=nn_ops.BatchNormRelu): norm_activation=nn_ops.norm_activation_builder(
activation='relu')):
"""Initialize params to build Fast R-CNN head. """Initialize params to build Fast R-CNN head.
Args: Args:
...@@ -280,8 +297,8 @@ class MaskrcnnHead(tf.keras.layers.Layer): ...@@ -280,8 +297,8 @@ class MaskrcnnHead(tf.keras.layers.Layer):
use_separable_conv: `bool`, indicating whether the separable conv layers use_separable_conv: `bool`, indicating whether the separable conv layers
is used. is used.
use_batch_norm: 'bool', indicating whether batchnorm layers are added. use_batch_norm: 'bool', indicating whether batchnorm layers are added.
batch_norm_relu: an operation that includes a batch normalization layer norm_activation: an operation that includes a normalization layer
followed by a relu layer(optional). followed by an optional activation layer.
""" """
self._num_classes = num_classes self._num_classes = num_classes
self._mask_target_size = mask_target_size self._mask_target_size = mask_target_size
...@@ -299,9 +316,14 @@ class MaskrcnnHead(tf.keras.layers.Layer): ...@@ -299,9 +316,14 @@ class MaskrcnnHead(tf.keras.layers.Layer):
kernel_initializer=tf.keras.initializers.VarianceScaling( kernel_initializer=tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'), scale=2, mode='fan_out', distribution='untruncated_normal'),
bias_initializer=tf.zeros_initializer()) bias_initializer=tf.zeros_initializer())
if activation == 'relu':
self._activation_op = tf.nn.relu
elif activation == 'swish':
self._activation_op = tf.nn.swish
else:
raise ValueError('Unsupported activation `{}`.'.format(activation))
self._use_batch_norm = use_batch_norm self._use_batch_norm = use_batch_norm
self._batch_norm_relu = batch_norm_relu self._norm_activation = norm_activation
self._conv2d_ops = [] self._conv2d_ops = []
for i in range(self._num_convs): for i in range(self._num_convs):
self._conv2d_ops.append( self._conv2d_ops.append(
...@@ -311,14 +333,14 @@ class MaskrcnnHead(tf.keras.layers.Layer): ...@@ -311,14 +333,14 @@ class MaskrcnnHead(tf.keras.layers.Layer):
strides=(1, 1), strides=(1, 1),
padding='same', padding='same',
dilation_rate=(1, 1), dilation_rate=(1, 1),
activation=(None if self._use_batch_norm else tf.nn.relu), activation=(None if self._use_batch_norm else self._activation_op),
name='mask-conv-l%d' % i)) name='mask-conv-l%d' % i))
self._mask_conv_transpose = tf.keras.layers.Conv2DTranspose( self._mask_conv_transpose = tf.keras.layers.Conv2DTranspose(
self._num_filters, self._num_filters,
kernel_size=(2, 2), kernel_size=(2, 2),
strides=(2, 2), strides=(2, 2),
padding='valid', padding='valid',
activation=(None if self._use_batch_norm else tf.nn.relu), activation=(None if self._use_batch_norm else self._activation_op),
kernel_initializer=tf.keras.initializers.VarianceScaling( kernel_initializer=tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'), scale=2, mode='fan_out', distribution='untruncated_normal'),
bias_initializer=tf.zeros_initializer(), bias_initializer=tf.zeros_initializer(),
...@@ -353,11 +375,11 @@ class MaskrcnnHead(tf.keras.layers.Layer): ...@@ -353,11 +375,11 @@ class MaskrcnnHead(tf.keras.layers.Layer):
for i in range(self._num_convs): for i in range(self._num_convs):
net = self._conv2d_ops[i](net) net = self._conv2d_ops[i](net)
if self._use_batch_norm: if self._use_batch_norm:
net = self._batch_norm_relu()(net, is_training=is_training) net = self._norm_activation()(net, is_training=is_training)
net = self._mask_conv_transpose(net) net = self._mask_conv_transpose(net)
if self._use_batch_norm: if self._use_batch_norm:
net = self._batch_norm_relu()(net, is_training=is_training) net = self._norm_activation()(net, is_training=is_training)
mask_outputs = self._conv2d_op( mask_outputs = self._conv2d_op(
self._num_classes, self._num_classes,
...@@ -398,7 +420,8 @@ class RetinanetHead(object): ...@@ -398,7 +420,8 @@ class RetinanetHead(object):
num_convs=4, num_convs=4,
num_filters=256, num_filters=256,
use_separable_conv=False, use_separable_conv=False,
batch_norm_relu=nn_ops.BatchNormRelu): norm_activation=nn_ops.norm_activation_builder(
activation='relu')):
"""Initialize params to build RetinaNet head. """Initialize params to build RetinaNet head.
Args: Args:
...@@ -411,8 +434,8 @@ class RetinanetHead(object): ...@@ -411,8 +434,8 @@ class RetinanetHead(object):
num_filters: `int` number of filters used in the head architecture. num_filters: `int` number of filters used in the head architecture.
use_separable_conv: `bool` to indicate whether to use separable use_separable_conv: `bool` to indicate whether to use separable
convoluation. convoluation.
batch_norm_relu: an operation that includes a batch normalization layer norm_activation: an operation that includes a normalization layer
followed by a relu layer(optional). followed by an optional activation layer.
""" """
self._min_level = min_level self._min_level = min_level
self._max_level = max_level self._max_level = max_level
...@@ -423,13 +446,12 @@ class RetinanetHead(object): ...@@ -423,13 +446,12 @@ class RetinanetHead(object):
self._num_convs = num_convs self._num_convs = num_convs
self._num_filters = num_filters self._num_filters = num_filters
self._use_separable_conv = use_separable_conv self._use_separable_conv = use_separable_conv
with tf.name_scope('class_net') as scope_name: with tf.name_scope('class_net') as scope_name:
self._class_name_scope = tf.name_scope(scope_name) self._class_name_scope = tf.name_scope(scope_name)
with tf.name_scope('box_net') as scope_name: with tf.name_scope('box_net') as scope_name:
self._box_name_scope = tf.name_scope(scope_name) self._box_name_scope = tf.name_scope(scope_name)
self._build_class_net_layers(batch_norm_relu) self._build_class_net_layers(norm_activation)
self._build_box_net_layers(batch_norm_relu) self._build_box_net_layers(norm_activation)
def _class_net_batch_norm_name(self, i, level): def _class_net_batch_norm_name(self, i, level):
return 'class-%d-%d' % (i, level) return 'class-%d-%d' % (i, level)
...@@ -437,7 +459,7 @@ class RetinanetHead(object): ...@@ -437,7 +459,7 @@ class RetinanetHead(object):
def _box_net_batch_norm_name(self, i, level): def _box_net_batch_norm_name(self, i, level):
return 'box-%d-%d' % (i, level) return 'box-%d-%d' % (i, level)
def _build_class_net_layers(self, batch_norm_relu): def _build_class_net_layers(self, norm_activation):
"""Build re-usable layers for class prediction network.""" """Build re-usable layers for class prediction network."""
if self._use_separable_conv: if self._use_separable_conv:
self._class_predict = tf.keras.layers.SeparableConv2D( self._class_predict = tf.keras.layers.SeparableConv2D(
...@@ -455,7 +477,7 @@ class RetinanetHead(object): ...@@ -455,7 +477,7 @@ class RetinanetHead(object):
padding='same', padding='same',
name='class-predict') name='class-predict')
self._class_conv = [] self._class_conv = []
self._class_batch_norm_relu = {} self._class_norm_activation = {}
for i in range(self._num_convs): for i in range(self._num_convs):
if self._use_separable_conv: if self._use_separable_conv:
self._class_conv.append( self._class_conv.append(
...@@ -479,9 +501,9 @@ class RetinanetHead(object): ...@@ -479,9 +501,9 @@ class RetinanetHead(object):
name='class-' + str(i))) name='class-' + str(i)))
for level in range(self._min_level, self._max_level + 1): for level in range(self._min_level, self._max_level + 1):
name = self._class_net_batch_norm_name(i, level) name = self._class_net_batch_norm_name(i, level)
self._class_batch_norm_relu[name] = batch_norm_relu(name=name) self._class_norm_activation[name] = norm_activation(name=name)
def _build_box_net_layers(self, batch_norm_relu): def _build_box_net_layers(self, norm_activation):
"""Build re-usable layers for box prediction network.""" """Build re-usable layers for box prediction network."""
if self._use_separable_conv: if self._use_separable_conv:
self._box_predict = tf.keras.layers.SeparableConv2D( self._box_predict = tf.keras.layers.SeparableConv2D(
...@@ -499,7 +521,7 @@ class RetinanetHead(object): ...@@ -499,7 +521,7 @@ class RetinanetHead(object):
padding='same', padding='same',
name='box-predict') name='box-predict')
self._box_conv = [] self._box_conv = []
self._box_batch_norm_relu = {} self._box_norm_activation = {}
for i in range(self._num_convs): for i in range(self._num_convs):
if self._use_separable_conv: if self._use_separable_conv:
self._box_conv.append( self._box_conv.append(
...@@ -523,13 +545,13 @@ class RetinanetHead(object): ...@@ -523,13 +545,13 @@ class RetinanetHead(object):
name='box-' + str(i))) name='box-' + str(i)))
for level in range(self._min_level, self._max_level + 1): for level in range(self._min_level, self._max_level + 1):
name = self._box_net_batch_norm_name(i, level) name = self._box_net_batch_norm_name(i, level)
self._box_batch_norm_relu[name] = batch_norm_relu(name=name) self._box_norm_activation[name] = norm_activation(name=name)
def __call__(self, fpn_features, is_training=None): def __call__(self, fpn_features, is_training=None):
"""Returns outputs of RetinaNet head.""" """Returns outputs of RetinaNet head."""
class_outputs = {} class_outputs = {}
box_outputs = {} box_outputs = {}
with backend.get_graph().as_default(), tf.name_scope('retinanet'): with backend.get_graph().as_default(), tf.name_scope('retinanet_head'):
for level in range(self._min_level, self._max_level + 1): for level in range(self._min_level, self._max_level + 1):
features = fpn_features[level] features = fpn_features[level]
...@@ -548,7 +570,7 @@ class RetinanetHead(object): ...@@ -548,7 +570,7 @@ class RetinanetHead(object):
# each level has its batch normlization to capture the statistical # each level has its batch normlization to capture the statistical
# difference among different levels. # difference among different levels.
name = self._class_net_batch_norm_name(i, level) name = self._class_net_batch_norm_name(i, level)
features = self._class_batch_norm_relu[name]( features = self._class_norm_activation[name](
features, is_training=is_training) features, is_training=is_training)
classes = self._class_predict(features) classes = self._class_predict(features)
...@@ -563,7 +585,7 @@ class RetinanetHead(object): ...@@ -563,7 +585,7 @@ class RetinanetHead(object):
# each level has its batch normlization to capture the statistical # each level has its batch normlization to capture the statistical
# difference among different levels. # difference among different levels.
name = self._box_net_batch_norm_name(i, level) name = self._box_net_batch_norm_name(i, level)
features = self._box_batch_norm_relu[name]( features = self._box_norm_activation[name](
features, is_training=is_training) features, is_training=is_training)
boxes = self._box_predict(features) boxes = self._box_predict(features)
...@@ -953,13 +975,13 @@ class ShapemaskCoarsemaskHead(object): ...@@ -953,13 +975,13 @@ class ShapemaskCoarsemaskHead(object):
def coarsemask_decoder_net(self, def coarsemask_decoder_net(self,
images, images,
is_training=None, is_training=None,
batch_norm_relu=nn_ops.BatchNormRelu): norm_activation=nn_ops.norm_activation_builder()):
"""Coarse mask decoder network architecture. """Coarse mask decoder network architecture.
Args: Args:
images: A tensor of size [batch, height_in, width_in, channels_in]. images: A tensor of size [batch, height_in, width_in, channels_in].
is_training: Whether batch_norm layers are in training mode. is_training: Whether batch_norm layers are in training mode.
batch_norm_relu: an operation that includes a batch normalization layer norm_activation: an operation that includes a batch normalization layer
followed by a relu layer(optional). followed by a relu layer(optional).
Returns: Returns:
images: A feature tensor of size [batch, output_size, output_size, images: A feature tensor of size [batch, output_size, output_size,
...@@ -975,7 +997,7 @@ class ShapemaskCoarsemaskHead(object): ...@@ -975,7 +997,7 @@ class ShapemaskCoarsemaskHead(object):
padding='same', padding='same',
name='coarse-class-%d' % i)( name='coarse-class-%d' % i)(
images) images)
images = batch_norm_relu(name='coarse-class-%d-bn' % i)( images = norm_activation(name='coarse-class-%d-bn' % i)(
images, is_training=is_training) images, is_training=is_training)
return images return images
...@@ -991,7 +1013,7 @@ class ShapemaskFinemaskHead(object): ...@@ -991,7 +1013,7 @@ class ShapemaskFinemaskHead(object):
num_convs, num_convs,
coarse_mask_thr, coarse_mask_thr,
gt_upsample_scale, gt_upsample_scale,
batch_norm_relu=nn_ops.BatchNormRelu): norm_activation=nn_ops.norm_activation_builder()):
"""Initialize params to build ShapeMask coarse and fine prediction head. """Initialize params to build ShapeMask coarse and fine prediction head.
Args: Args:
...@@ -1002,7 +1024,7 @@ class ShapemaskFinemaskHead(object): ...@@ -1002,7 +1024,7 @@ class ShapemaskFinemaskHead(object):
layer. layer.
coarse_mask_thr: the threshold for suppressing noisy coarse prediction. coarse_mask_thr: the threshold for suppressing noisy coarse prediction.
gt_upsample_scale: scale for upsampling groundtruths. gt_upsample_scale: scale for upsampling groundtruths.
batch_norm_relu: an operation that includes a batch normalization layer norm_activation: an operation that includes a batch normalization layer
followed by a relu layer(optional). followed by a relu layer(optional).
""" """
self._mask_num_classes = num_classes self._mask_num_classes = num_classes
...@@ -1038,7 +1060,7 @@ class ShapemaskFinemaskHead(object): ...@@ -1038,7 +1060,7 @@ class ShapemaskFinemaskHead(object):
activation=None, activation=None,
padding='same', padding='same',
name='fine-class-%d' % i)) name='fine-class-%d' % i))
self._fine_class_bn.append(batch_norm_relu(name='fine-class-%d-bn' % i)) self._fine_class_bn.append(norm_activation(name='fine-class-%d-bn' % i))
def __call__(self, prior_conditioned_features, class_probs, is_training=None): def __call__(self, prior_conditioned_features, class_probs, is_training=None):
"""Generate instance masks from FPN features and detection priors. """Generate instance masks from FPN features and detection priors.
......
...@@ -18,20 +18,21 @@ from __future__ import absolute_import ...@@ -18,20 +18,21 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import functools
from absl import logging from absl import logging
import tensorflow.compat.v2 as tf import tensorflow.compat.v2 as tf
from tensorflow.python.keras import backend
class BatchNormRelu(tf.keras.layers.Layer): class NormActivation(tf.keras.layers.Layer):
"""Combined Batch Normalization and ReLU layers.""" """Combined Normalization and Activation layers."""
def __init__(self, def __init__(self,
momentum=0.997, momentum=0.997,
epsilon=1e-4, epsilon=1e-4,
trainable=True, trainable=True,
relu=True,
init_zero=False, init_zero=False,
use_activation=True,
activation='relu',
fused=True, fused=True,
name=None): name=None):
"""A class to construct layers for a batch normalization followed by a ReLU. """A class to construct layers for a batch normalization followed by a ReLU.
...@@ -39,22 +40,24 @@ class BatchNormRelu(tf.keras.layers.Layer): ...@@ -39,22 +40,24 @@ class BatchNormRelu(tf.keras.layers.Layer):
Args: Args:
momentum: momentum for the moving average. momentum: momentum for the moving average.
epsilon: small float added to variance to avoid dividing by zero. epsilon: small float added to variance to avoid dividing by zero.
trainable: `boolean`, if True also add variables to the graph collection trainable: `bool`, if True also add variables to the graph collection
GraphKeys.TRAINABLE_VARIABLES. If False, freeze batch normalization GraphKeys.TRAINABLE_VARIABLES. If False, freeze batch normalization
layer. layer.
relu: `bool` if False, omits the ReLU operation.
init_zero: `bool` if True, initializes scale parameter of batch init_zero: `bool` if True, initializes scale parameter of batch
normalization with 0. If False, initialize it with 1. normalization with 0. If False, initialize it with 1.
fused: `bool` fused option in batch normalziation. fused: `bool` fused option in batch normalziation.
use_actiation: `bool`, whether to add the optional activation layer after
the batch normalization layer.
activation: 'string', the type of the activation layer. Currently support
`relu` and `swish`.
name: `str` name for the operation. name: `str` name for the operation.
""" """
super(BatchNormRelu, self).__init__(trainable=trainable) super(NormActivation, self).__init__(trainable=trainable)
self._use_relu = relu
if init_zero: if init_zero:
gamma_initializer = tf.keras.initializers.Zeros() gamma_initializer = tf.keras.initializers.Zeros()
else: else:
gamma_initializer = tf.keras.initializers.Ones() gamma_initializer = tf.keras.initializers.Ones()
self._batch_norm_op = tf.keras.layers.BatchNormalization( self._normalization_op = tf.keras.layers.BatchNormalization(
momentum=momentum, momentum=momentum,
epsilon=epsilon, epsilon=epsilon,
center=True, center=True,
...@@ -63,9 +66,16 @@ class BatchNormRelu(tf.keras.layers.Layer): ...@@ -63,9 +66,16 @@ class BatchNormRelu(tf.keras.layers.Layer):
fused=fused, fused=fused,
gamma_initializer=gamma_initializer, gamma_initializer=gamma_initializer,
name=name) name=name)
self._use_activation = use_activation
if activation == 'relu':
self._activation_op = tf.nn.relu
elif activation == 'swish':
self._activation_op = tf.nn.swish
else:
raise ValueError('Unsupported activation `{}`.'.format(activation))
def __call__(self, inputs, is_training=None): def __call__(self, inputs, is_training=None):
"""Builds layers for a batch normalization followed by a ReLU. """Builds the normalization layer followed by an optional activation layer.
Args: Args:
inputs: `Tensor` of shape `[batch, channels, ...]`. inputs: `Tensor` of shape `[batch, channels, ...]`.
...@@ -78,9 +88,22 @@ class BatchNormRelu(tf.keras.layers.Layer): ...@@ -78,9 +88,22 @@ class BatchNormRelu(tf.keras.layers.Layer):
# from keras.Model.training # from keras.Model.training
if is_training and self.trainable: if is_training and self.trainable:
is_training = True is_training = True
inputs = self._batch_norm_op(inputs, training=is_training) inputs = self._normalization_op(inputs, training=is_training)
if self._use_relu: if self._use_activation:
inputs = tf.nn.relu(inputs) inputs = self._activation_op(inputs)
return inputs return inputs
def norm_activation_builder(momentum=0.997,
epsilon=1e-4,
trainable=True,
activation='relu',
**kwargs):
return functools.partial(
NormActivation,
momentum=momentum,
epsilon=epsilon,
trainable=trainable,
activation='relu',
**kwargs)
...@@ -34,21 +34,27 @@ class Resnet(object): ...@@ -34,21 +34,27 @@ class Resnet(object):
def __init__(self, def __init__(self,
resnet_depth, resnet_depth,
batch_norm_relu=nn_ops.BatchNormRelu, activation='relu',
norm_activation=nn_ops.norm_activation_builder(
activation='relu'),
data_format='channels_last'): data_format='channels_last'):
"""ResNet initialization function. """ResNet initialization function.
Args: Args:
resnet_depth: `int` depth of ResNet backbone model. resnet_depth: `int` depth of ResNet backbone model.
batch_norm_relu: an operation that includes a batch normalization layer norm_activation: an operation that includes a normalization layer
followed by a relu layer(optional). followed by an optional activation layer.
data_format: `str` either "channels_first" for `[batch, channels, height, data_format: `str` either "channels_first" for `[batch, channels, height,
width]` or "channels_last for `[batch, height, width, channels]`. width]` or "channels_last for `[batch, height, width, channels]`.
""" """
self._resnet_depth = resnet_depth self._resnet_depth = resnet_depth
if activation == 'relu':
self._batch_norm_relu = batch_norm_relu self._activation_op = tf.nn.relu
elif activation == 'swish':
self._activation_op = tf.nn.swish
else:
raise ValueError('Unsupported activation `{}`.'.format(activation))
self._norm_activation = norm_activation
self._data_format = data_format self._data_format = data_format
model_params = { model_params = {
...@@ -170,19 +176,19 @@ class Resnet(object): ...@@ -170,19 +176,19 @@ class Resnet(object):
# Projection shortcut in first layer to match filters and strides # Projection shortcut in first layer to match filters and strides
shortcut = self.conv2d_fixed_padding( shortcut = self.conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=1, strides=strides) inputs=inputs, filters=filters, kernel_size=1, strides=strides)
shortcut = self._batch_norm_relu(relu=False)( shortcut = self._norm_activation(use_activation=False)(
shortcut, is_training=is_training) shortcut, is_training=is_training)
inputs = self.conv2d_fixed_padding( inputs = self.conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=3, strides=strides) inputs=inputs, filters=filters, kernel_size=3, strides=strides)
inputs = self._batch_norm_relu()(inputs, is_training=is_training) inputs = self._norm_activation()(inputs, is_training=is_training)
inputs = self.conv2d_fixed_padding( inputs = self.conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=3, strides=1) inputs=inputs, filters=filters, kernel_size=3, strides=1)
inputs = self._batch_norm_relu()( inputs = self._norm_activation(use_activation=False, init_zero=True)(
inputs, relu=False, init_zero=True, is_training=is_training) inputs, is_training=is_training)
return tf.nn.relu(inputs + shortcut) return self._activation_op(inputs + shortcut)
def bottleneck_block(self, def bottleneck_block(self,
inputs, inputs,
...@@ -214,24 +220,23 @@ class Resnet(object): ...@@ -214,24 +220,23 @@ class Resnet(object):
filters_out = 4 * filters filters_out = 4 * filters
shortcut = self.conv2d_fixed_padding( shortcut = self.conv2d_fixed_padding(
inputs=inputs, filters=filters_out, kernel_size=1, strides=strides) inputs=inputs, filters=filters_out, kernel_size=1, strides=strides)
shortcut = self._batch_norm_relu(relu=False)( shortcut = self._norm_activation(use_activation=False)(
shortcut, is_training=is_training) shortcut, is_training=is_training)
inputs = self.conv2d_fixed_padding( inputs = self.conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=1, strides=1) inputs=inputs, filters=filters, kernel_size=1, strides=1)
inputs = self._batch_norm_relu()(inputs, is_training=is_training) inputs = self._norm_activation()(inputs, is_training=is_training)
inputs = self.conv2d_fixed_padding( inputs = self.conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=3, strides=strides) inputs=inputs, filters=filters, kernel_size=3, strides=strides)
inputs = self._batch_norm_relu()(inputs, is_training=is_training) inputs = self._norm_activation()(inputs, is_training=is_training)
inputs = self.conv2d_fixed_padding( inputs = self.conv2d_fixed_padding(
inputs=inputs, filters=4 * filters, kernel_size=1, strides=1) inputs=inputs, filters=4 * filters, kernel_size=1, strides=1)
inputs = self._batch_norm_relu( inputs = self._norm_activation(use_activation=False, init_zero=True)(
relu=False, init_zero=True)( inputs, is_training=is_training)
inputs, is_training=is_training)
return tf.nn.relu(inputs + shortcut) return self._activation_op(inputs + shortcut)
def block_group(self, inputs, filters, block_fn, blocks, strides, name, def block_group(self, inputs, filters, block_fn, blocks, strides, name,
is_training): is_training):
...@@ -279,7 +284,7 @@ class Resnet(object): ...@@ -279,7 +284,7 @@ class Resnet(object):
inputs = self.conv2d_fixed_padding( inputs = self.conv2d_fixed_padding(
inputs=inputs, filters=64, kernel_size=7, strides=2) inputs=inputs, filters=64, kernel_size=7, strides=2)
inputs = tf.identity(inputs, 'initial_conv') inputs = tf.identity(inputs, 'initial_conv')
inputs = self._batch_norm_relu()(inputs, is_training=is_training) inputs = self._norm_activation()(inputs, is_training=is_training)
inputs = tf.keras.layers.MaxPool2D( inputs = tf.keras.layers.MaxPool2D(
pool_size=3, strides=2, padding='SAME', pool_size=3, strides=2, padding='SAME',
......
...@@ -24,37 +24,7 @@ import re ...@@ -24,37 +24,7 @@ import re
import tensorflow.compat.v2 as tf import tensorflow.compat.v2 as tf
from official.vision.detection.modeling import checkpoint_utils from official.vision.detection.modeling import checkpoint_utils
from official.vision.detection.modeling import learning_rates from official.vision.detection.modeling import learning_rates
from official.vision.detection.modeling import optimizers
class OptimizerFactory(object):
"""Class to generate optimizer function."""
def __init__(self, params):
"""Creates optimized based on the specified flags."""
if params.type == 'momentum':
nesterov = False
try:
nesterov = params.nesterov
except AttributeError:
pass
self._optimizer = functools.partial(
tf.keras.optimizers.SGD,
momentum=params.momentum,
nesterov=nesterov)
elif params.type == 'adam':
self._optimizer = tf.keras.optimizers.Adam
elif params.type == 'adadelta':
self._optimizer = tf.keras.optimizers.Adadelta
elif params.type == 'adagrad':
self._optimizer = tf.keras.optimizers.Adagrad
elif params.type == 'rmsprop':
self._optimizer = functools.partial(
tf.keras.optimizers.RMSprop, momentum=params.momentum)
else:
raise ValueError('Unsupported optimizer type %s.' % self._optimizer)
def __call__(self, learning_rate):
return self._optimizer(learning_rate=learning_rate)
def _make_filter_trainable_variables_fn(frozen_variable_prefix): def _make_filter_trainable_variables_fn(frozen_variable_prefix):
...@@ -73,7 +43,8 @@ def _make_filter_trainable_variables_fn(frozen_variable_prefix): ...@@ -73,7 +43,8 @@ def _make_filter_trainable_variables_fn(frozen_variable_prefix):
# the frozen variables' names. # the frozen variables' names.
filtered_variables = [ filtered_variables = [
v for v in variables v for v in variables
if not re.match(frozen_variable_prefix, v.name) if not frozen_variable_prefix or
not re.match(frozen_variable_prefix, v.name)
] ]
return filtered_variables return filtered_variables
...@@ -94,9 +65,9 @@ class Model(object): ...@@ -94,9 +65,9 @@ class Model(object):
tf.compat.v2.keras.mixed_precision.experimental.set_policy(policy) tf.compat.v2.keras.mixed_precision.experimental.set_policy(policy)
# Optimization. # Optimization.
self._optimizer_fn = OptimizerFactory(params.train.optimizer) self._optimizer_fn = optimizers.OptimizerFactory(params.train.optimizer)
self._learning_rate = learning_rates.learning_rate_generator( self._learning_rate = learning_rates.learning_rate_generator(
params.train.learning_rate) params.train.total_steps, params.train.learning_rate)
self._frozen_variable_prefix = params.train.frozen_variable_prefix self._frozen_variable_prefix = params.train.frozen_variable_prefix
self._regularization_var_regex = params.train.regularization_variable_regex self._regularization_var_regex = params.train.regularization_variable_regex
......
...@@ -28,9 +28,10 @@ from official.modeling.hyperparams import params_dict ...@@ -28,9 +28,10 @@ from official.modeling.hyperparams import params_dict
class StepLearningRateWithLinearWarmup(tf.keras.optimizers.schedules.LearningRateSchedule): class StepLearningRateWithLinearWarmup(tf.keras.optimizers.schedules.LearningRateSchedule):
"""Class to generate learning rate tensor.""" """Class to generate learning rate tensor."""
def __init__(self, params): def __init__(self, total_steps, params):
"""Creates the step learning rate tensor with linear warmup.""" """Creates the step learning rate tensor with linear warmup."""
super(StepLearningRateWithLinearWarmup, self).__init__() super(StepLearningRateWithLinearWarmup, self).__init__()
self._total_steps = total_steps
assert isinstance(params, (dict, params_dict.ParamsDict)) assert isinstance(params, (dict, params_dict.ParamsDict))
if isinstance(params, dict): if isinstance(params, dict):
params = params_dict.ParamsDict(params) params = params_dict.ParamsDict(params)
...@@ -59,9 +60,10 @@ class StepLearningRateWithLinearWarmup(tf.keras.optimizers.schedules.LearningRat ...@@ -59,9 +60,10 @@ class StepLearningRateWithLinearWarmup(tf.keras.optimizers.schedules.LearningRat
class CosineLearningRateWithLinearWarmup(tf.keras.optimizers.schedules.LearningRateSchedule): class CosineLearningRateWithLinearWarmup(tf.keras.optimizers.schedules.LearningRateSchedule):
"""Class to generate learning rate tensor.""" """Class to generate learning rate tensor."""
def __init__(self, params): def __init__(self, total_steps, params):
"""Creates the consine learning rate tensor with linear warmup.""" """Creates the consine learning rate tensor with linear warmup."""
super(CosineLearningRateWithLinearWarmup, self).__init__() super(CosineLearningRateWithLinearWarmup, self).__init__()
self._total_steps = total_steps
assert isinstance(params, (dict, params_dict.ParamsDict)) assert isinstance(params, (dict, params_dict.ParamsDict))
if isinstance(params, dict): if isinstance(params, dict):
params = params_dict.ParamsDict(params) params = params_dict.ParamsDict(params)
...@@ -72,7 +74,7 @@ class CosineLearningRateWithLinearWarmup(tf.keras.optimizers.schedules.LearningR ...@@ -72,7 +74,7 @@ class CosineLearningRateWithLinearWarmup(tf.keras.optimizers.schedules.LearningR
warmup_lr = self._params.warmup_learning_rate warmup_lr = self._params.warmup_learning_rate
warmup_steps = self._params.warmup_steps warmup_steps = self._params.warmup_steps
init_lr = self._params.init_learning_rate init_lr = self._params.init_learning_rate
total_steps = self._params.total_steps total_steps = self._total_steps
linear_warmup = ( linear_warmup = (
warmup_lr + global_step / warmup_steps * (init_lr - warmup_lr)) warmup_lr + global_step / warmup_steps * (init_lr - warmup_lr))
cosine_learning_rate = ( cosine_learning_rate = (
...@@ -86,11 +88,11 @@ class CosineLearningRateWithLinearWarmup(tf.keras.optimizers.schedules.LearningR ...@@ -86,11 +88,11 @@ class CosineLearningRateWithLinearWarmup(tf.keras.optimizers.schedules.LearningR
return {'_params': self._params.as_dict()} return {'_params': self._params.as_dict()}
def learning_rate_generator(params): def learning_rate_generator(total_steps, params):
"""The learning rate function generator.""" """The learning rate function generator."""
if params.type == 'step': if params.type == 'step':
return StepLearningRateWithLinearWarmup(params) return StepLearningRateWithLinearWarmup(total_steps, params)
elif params.type == 'cosine': elif params.type == 'cosine':
return CosineLearningRateWithLinearWarmup(params) return CosineLearningRateWithLinearWarmup(total_steps, params)
else: else:
raise ValueError('Unsupported learning rate type: {}.'.format(params.type)) raise ValueError('Unsupported learning rate type: {}.'.format(params.type))
...@@ -371,8 +371,8 @@ class MaskrcnnLoss(object): ...@@ -371,8 +371,8 @@ class MaskrcnnLoss(object):
class RetinanetClassLoss(object): class RetinanetClassLoss(object):
"""RetinaNet class loss.""" """RetinaNet class loss."""
def __init__(self, params): def __init__(self, params, num_classes):
self._num_classes = params.num_classes self._num_classes = num_classes
self._focal_loss_alpha = params.focal_loss_alpha self._focal_loss_alpha = params.focal_loss_alpha
self._focal_loss_gamma = params.focal_loss_gamma self._focal_loss_gamma = params.focal_loss_gamma
......
...@@ -49,14 +49,16 @@ class MaskrcnnModel(base_model.Model): ...@@ -49,14 +49,16 @@ class MaskrcnnModel(base_model.Model):
# Architecture generators. # Architecture generators.
self._backbone_fn = factory.backbone_generator(params) self._backbone_fn = factory.backbone_generator(params)
self._fpn_fn = factory.multilevel_features_generator(params) self._fpn_fn = factory.multilevel_features_generator(params)
self._rpn_head_fn = factory.rpn_head_generator(params.rpn_head) self._rpn_head_fn = factory.rpn_head_generator(params)
self._generate_rois_fn = roi_ops.ROIGenerator(params.roi_proposal) self._generate_rois_fn = roi_ops.ROIGenerator(params.roi_proposal)
self._sample_rois_fn = sampling_ops.ROISampler(params.roi_sampling) self._sample_rois_fn = sampling_ops.ROISampler(params.roi_sampling)
self._sample_masks_fn = sampling_ops.MaskSampler(params.mask_sampling) self._sample_masks_fn = sampling_ops.MaskSampler(
params.architecture.mask_target_size,
params.mask_sampling.num_mask_samples_per_image)
self._frcnn_head_fn = factory.fast_rcnn_head_generator(params.frcnn_head) self._frcnn_head_fn = factory.fast_rcnn_head_generator(params)
if self._include_mask: if self._include_mask:
self._mrcnn_head_fn = factory.mask_rcnn_head_generator(params.mrcnn_head) self._mrcnn_head_fn = factory.mask_rcnn_head_generator(params)
# Loss function. # Loss function.
self._rpn_score_loss_fn = losses.RpnScoreLoss(params.rpn_score_loss) self._rpn_score_loss_fn = losses.RpnScoreLoss(params.rpn_score_loss)
...@@ -91,8 +93,8 @@ class MaskrcnnModel(base_model.Model): ...@@ -91,8 +93,8 @@ class MaskrcnnModel(base_model.Model):
tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
rpn_box_outputs), rpn_box_outputs),
}) })
input_anchor = anchor.Anchor(self._params.anchor.min_level, input_anchor = anchor.Anchor(self._params.architecture.min_level,
self._params.anchor.max_level, self._params.architecture.max_level,
self._params.anchor.num_scales, self._params.anchor.num_scales,
self._params.anchor.aspect_ratios, self._params.anchor.aspect_ratios,
self._params.anchor.anchor_size, self._params.anchor.anchor_size,
......
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Optimizers."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import numpy as np
import tensorflow.compat.v2 as tf
class OptimizerFactory(object):
"""Class to generate optimizer function."""
def __init__(self, params):
"""Creates optimized based on the specified flags."""
if params.type == 'momentum':
self._optimizer = functools.partial(
tf.keras.optimizers.SGD,
momentum=params.momentum,
nesterov=params.nesterov)
elif params.type == 'adam':
self._optimizer = tf.keras.optimizers.Adam
elif params.type == 'adadelta':
self._optimizer = tf.keras.optimizers.Adadelta
elif params.type == 'adagrad':
self._optimizer = tf.keras.optimizers.Adagrad
elif params.type == 'rmsprop':
self._optimizer = functools.partial(
tf.keras.optimizers.RMSprop, momentum=params.momentum)
else:
raise ValueError('Unsupported optimizer type `{}`.'.format(params.type))
def __call__(self, learning_rate):
return self._optimizer(learning_rate=learning_rate)
...@@ -44,16 +44,19 @@ class RetinanetModel(base_model.Model): ...@@ -44,16 +44,19 @@ class RetinanetModel(base_model.Model):
# Architecture generators. # Architecture generators.
self._backbone_fn = factory.backbone_generator(params) self._backbone_fn = factory.backbone_generator(params)
self._fpn_fn = factory.multilevel_features_generator(params) self._fpn_fn = factory.multilevel_features_generator(params)
self._head_fn = factory.retinanet_head_generator(params.retinanet_head) self._head_fn = factory.retinanet_head_generator(params)
# Loss function. # Loss function.
self._cls_loss_fn = losses.RetinanetClassLoss(params.retinanet_loss) self._cls_loss_fn = losses.RetinanetClassLoss(
params.retinanet_loss, params.architecture.num_classes)
self._box_loss_fn = losses.RetinanetBoxLoss(params.retinanet_loss) self._box_loss_fn = losses.RetinanetBoxLoss(params.retinanet_loss)
self._box_loss_weight = params.retinanet_loss.box_loss_weight self._box_loss_weight = params.retinanet_loss.box_loss_weight
self._keras_model = None self._keras_model = None
# Predict function. # Predict function.
self._generate_detections_fn = postprocess_ops.MultilevelDetectionGenerator( self._generate_detections_fn = postprocess_ops.MultilevelDetectionGenerator(
params.architecture.min_level,
params.architecture.max_level,
params.postprocess) params.postprocess)
self._transpose_input = params.train.transpose_input self._transpose_input = params.train.transpose_input
......
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
...@@ -294,10 +294,10 @@ def _generate_detections_batched(boxes, ...@@ -294,10 +294,10 @@ def _generate_detections_batched(boxes,
class MultilevelDetectionGenerator(object): class MultilevelDetectionGenerator(object):
"""Generates detected boxes with scores and classes for one-stage detector.""" """Generates detected boxes with scores and classes for one-stage detector."""
def __init__(self, params): def __init__(self, min_level, max_level, params):
self._min_level = min_level
self._max_level = max_level
self._generate_detections = generate_detections_factory(params) self._generate_detections = generate_detections_factory(params)
self._min_level = params.min_level
self._max_level = params.max_level
def __call__(self, box_outputs, class_outputs, anchor_boxes, image_shape): def __call__(self, box_outputs, class_outputs, anchor_boxes, image_shape):
# Collects outputs from all levels into a list. # Collects outputs from all levels into a list.
......
...@@ -346,9 +346,9 @@ class ROISampler(object): ...@@ -346,9 +346,9 @@ class ROISampler(object):
class MaskSampler(object): class MaskSampler(object):
"""Samples and creates mask training targets.""" """Samples and creates mask training targets."""
def __init__(self, params): def __init__(self, mask_target_size, num_mask_samples_per_image):
self._num_mask_samples_per_image = params.num_mask_samples_per_image self._mask_target_size = mask_target_size
self._mask_target_size = params.mask_target_size self._num_mask_samples_per_image = num_mask_samples_per_image
def __call__(self, def __call__(self,
candidate_rois, candidate_rois,
......
...@@ -48,12 +48,143 @@ def nearest_upsampling(data, scale): ...@@ -48,12 +48,143 @@ def nearest_upsampling(data, scale):
return tf.reshape(data, [bs, h * scale, w * scale, c]) return tf.reshape(data, [bs, h * scale, w * scale, c])
def feature_bilinear_interpolation(features, kernel_y, kernel_x):
"""Feature bilinear interpolation.
The RoIAlign feature f can be computed by bilinear interpolation
of four neighboring feature points f0, f1, f2, and f3.
f(y, x) = [hy, ly] * [[f00, f01], * [hx, lx]^T
[f10, f11]]
f(y, x) = (hy*hx)f00 + (hy*lx)f01 + (ly*hx)f10 + (lx*ly)f11
f(y, x) = w00*f00 + w01*f01 + w10*f10 + w11*f11
kernel_y = [hy, ly]
kernel_x = [hx, lx]
Args:
features: The features are in shape of [batch_size, num_boxes, output_size *
2, output_size * 2, num_filters].
kernel_y: Tensor of size [batch_size, boxes, output_size, 2, 1].
kernel_x: Tensor of size [batch_size, boxes, output_size, 2, 1].
Returns:
A 5-D tensor representing feature crop of shape
[batch_size, num_boxes, output_size, output_size, num_filters].
"""
(batch_size, num_boxes, output_size, _,
num_filters) = features.get_shape().as_list()
output_size = output_size // 2
kernel_y = tf.reshape(kernel_y, [batch_size, num_boxes, output_size * 2, 1])
kernel_x = tf.reshape(kernel_x, [batch_size, num_boxes, 1, output_size * 2])
# Use implicit broadcast to generate the interpolation kernel. The
# multiplier `4` is for avg pooling.
interpolation_kernel = kernel_y * kernel_x * 4
# Interpolate the gathered features with computed interpolation kernels.
features *= tf.cast(
tf.expand_dims(interpolation_kernel, axis=-1), dtype=features.dtype)
features = tf.reshape(
features,
[batch_size * num_boxes, output_size * 2, output_size * 2, num_filters])
features = tf.nn.avg_pool(features, [1, 2, 2, 1], [1, 2, 2, 1], 'VALID')
features = tf.reshape(
features, [batch_size, num_boxes, output_size, output_size, num_filters])
return features
def compute_grid_positions(boxes, boundaries, output_size, sample_offset):
"""Compute the grid position w.r.t.
the corresponding feature map.
Args:
boxes: a 3-D tensor of shape [batch_size, num_boxes, 4] encoding the
information of each box w.r.t. the corresponding feature map.
boxes[:, :, 0:2] are the grid position in (y, x) (float) of the top-left
corner of each box. boxes[:, :, 2:4] are the box sizes in (h, w) (float)
in terms of the number of pixels of the corresponding feature map size.
boundaries: a 3-D tensor of shape [batch_size, num_boxes, 2] representing
the boundary (in (y, x)) of the corresponding feature map for each box.
Any resampled grid points that go beyond the bounary will be clipped.
output_size: a scalar indicating the output crop size.
sample_offset: a float number in [0, 1] indicates the subpixel sample offset
from grid point.
Returns:
kernel_y: Tensor of size [batch_size, boxes, output_size, 2, 1].
kernel_x: Tensor of size [batch_size, boxes, output_size, 2, 1].
box_grid_y0y1: Tensor of size [batch_size, boxes, output_size, 2]
box_grid_x0x1: Tensor of size [batch_size, boxes, output_size, 2]
"""
batch_size, num_boxes, _ = boxes.get_shape().as_list()
box_grid_x = []
box_grid_y = []
for i in range(output_size):
box_grid_x.append(boxes[:, :, 1] +
(i + sample_offset) * boxes[:, :, 3] / output_size)
box_grid_y.append(boxes[:, :, 0] +
(i + sample_offset) * boxes[:, :, 2] / output_size)
box_grid_x = tf.stack(box_grid_x, axis=2)
box_grid_y = tf.stack(box_grid_y, axis=2)
box_grid_y0 = tf.floor(box_grid_y)
box_grid_x0 = tf.floor(box_grid_x)
box_grid_x0 = tf.maximum(0., box_grid_x0)
box_grid_y0 = tf.maximum(0., box_grid_y0)
box_grid_x0 = tf.minimum(box_grid_x0, tf.expand_dims(boundaries[:, :, 1], -1))
box_grid_x1 = tf.minimum(box_grid_x0 + 1,
tf.expand_dims(boundaries[:, :, 1], -1))
box_grid_y0 = tf.minimum(box_grid_y0, tf.expand_dims(boundaries[:, :, 0], -1))
box_grid_y1 = tf.minimum(box_grid_y0 + 1,
tf.expand_dims(boundaries[:, :, 0], -1))
box_gridx0x1 = tf.stack([box_grid_x0, box_grid_x1], axis=-1)
box_gridy0y1 = tf.stack([box_grid_y0, box_grid_y1], axis=-1)
# The RoIAlign feature f can be computed by bilinear interpolation of four
# neighboring feature points f0, f1, f2, and f3.
# f(y, x) = [hy, ly] * [[f00, f01], * [hx, lx]^T
# [f10, f11]]
# f(y, x) = (hy*hx)f00 + (hy*lx)f01 + (ly*hx)f10 + (lx*ly)f11
# f(y, x) = w00*f00 + w01*f01 + w10*f10 + w11*f11
ly = box_grid_y - box_grid_y0
lx = box_grid_x - box_grid_x0
hy = 1.0 - ly
hx = 1.0 - lx
kernel_y = tf.reshape(
tf.stack([hy, ly], axis=3), [batch_size, num_boxes, output_size, 2, 1])
kernel_x = tf.reshape(
tf.stack([hx, lx], axis=3), [batch_size, num_boxes, output_size, 2, 1])
return kernel_y, kernel_x, box_gridy0y1, box_gridx0x1
def get_grid_one_hot(box_gridy0y1, box_gridx0x1, feature_height, feature_width):
"""Get grid_one_hot from indices and feature_size."""
(batch_size, num_boxes, output_size, _) = box_gridx0x1.get_shape().as_list()
y_indices = tf.cast(
tf.reshape(box_gridy0y1, [batch_size, num_boxes, output_size, 2]),
dtype=tf.int32)
x_indices = tf.cast(
tf.reshape(box_gridx0x1, [batch_size, num_boxes, output_size, 2]),
dtype=tf.int32)
# shape is [batch_size, num_boxes, output_size, 2, height]
grid_y_one_hot = tf.one_hot(tf.cast(y_indices, tf.int32), feature_height)
# shape is [batch_size, num_boxes, output_size, 2, width]
grid_x_one_hot = tf.one_hot(tf.cast(x_indices, tf.int32), feature_width)
return grid_y_one_hot, grid_x_one_hot
def selective_crop_and_resize(features, def selective_crop_and_resize(features,
boxes, boxes,
box_levels, box_levels,
boundaries, boundaries,
output_size=7, output_size=7,
sample_offset=0.5): sample_offset=0.5,
use_einsum_gather=False):
"""Crop and resize boxes on a set of feature maps. """Crop and resize boxes on a set of feature maps.
Given multiple features maps indexed by different levels, and a set of boxes Given multiple features maps indexed by different levels, and a set of boxes
...@@ -67,7 +198,7 @@ def selective_crop_and_resize(features, ...@@ -67,7 +198,7 @@ def selective_crop_and_resize(features,
pixel. pixel.
For performance, we perform the gather and interpolation on all layers as a For performance, we perform the gather and interpolation on all layers as a
single operation. This is op the multi-level features are first stacked and single operation. In this op the multi-level features are first stacked and
gathered into [2*output_size, 2*output_size] feature points. Then bilinear gathered into [2*output_size, 2*output_size] feature points. Then bilinear
interpolation is performed on the gathered feature points to generate interpolation is performed on the gathered feature points to generate
[output_size, output_size] RoIAlign feature map. [output_size, output_size] RoIAlign feature map.
...@@ -86,14 +217,13 @@ def selective_crop_and_resize(features, ...@@ -86,14 +217,13 @@ def selective_crop_and_resize(features,
output_size. output_size.
Args: Args:
features: a 5-D tensor of shape features: a 5-D tensor of shape [batch_size, num_levels, max_height,
[batch_size, num_levels, max_height, max_width, num_filters] where max_width, num_filters] where cropping and resizing are based.
cropping and resizing are based.
boxes: a 3-D tensor of shape [batch_size, num_boxes, 4] encoding the boxes: a 3-D tensor of shape [batch_size, num_boxes, 4] encoding the
information of each box w.r.t. the corresponding feature map. information of each box w.r.t. the corresponding feature map.
boxes[:, :, 0:2] are the grid position in (y, x) (float) of the top-left boxes[:, :, 0:2] are the grid position in (y, x) (float) of the top-left
corner of each box. boxes[:, :, 2:4] are the box sizes in (h, w) (float) corner of each box. boxes[:, :, 2:4] are the box sizes in (h, w) (float)
in terms of the number of pixels of the corresponding feature map size. in terms of the number of pixels of the corresponding feature map size.
box_levels: a 3-D tensor of shape [batch_size, num_boxes, 1] representing box_levels: a 3-D tensor of shape [batch_size, num_boxes, 1] representing
the 0-based corresponding feature level index of each box. the 0-based corresponding feature level index of each box.
boundaries: a 3-D tensor of shape [batch_size, num_boxes, 2] representing boundaries: a 3-D tensor of shape [batch_size, num_boxes, 2] representing
...@@ -102,6 +232,10 @@ def selective_crop_and_resize(features, ...@@ -102,6 +232,10 @@ def selective_crop_and_resize(features,
output_size: a scalar indicating the output crop size. output_size: a scalar indicating the output crop size.
sample_offset: a float number in [0, 1] indicates the subpixel sample offset sample_offset: a float number in [0, 1] indicates the subpixel sample offset
from grid point. from grid point.
use_einsum_gather: use einsum to replace gather or not. Replacing einsum
with gather can improve performance when feature size is not large, einsum
is friendly with model partition as well. Gather's performance is better
when feature size is very large and there are multiple box levels.
Returns: Returns:
features_per_box: a 5-D tensor of shape features_per_box: a 5-D tensor of shape
...@@ -112,93 +246,77 @@ def selective_crop_and_resize(features, ...@@ -112,93 +246,77 @@ def selective_crop_and_resize(features,
num_filters) = features.get_shape().as_list() num_filters) = features.get_shape().as_list()
_, num_boxes, _ = boxes.get_shape().as_list() _, num_boxes, _ = boxes.get_shape().as_list()
# Compute the grid position w.r.t. the corresponding feature map. kernel_y, kernel_x, box_gridy0y1, box_gridx0x1 = compute_grid_positions(
box_grid_x = [] boxes, boundaries, output_size, sample_offset)
box_grid_y = []
for i in range(output_size):
box_grid_x.append(boxes[:, :, 1] +
(i + sample_offset) * boxes[:, :, 3] / output_size)
box_grid_y.append(boxes[:, :, 0] +
(i + sample_offset) * boxes[:, :, 2] / output_size)
box_grid_x = tf.stack(box_grid_x, axis=2)
box_grid_y = tf.stack(box_grid_y, axis=2)
# Compute indices for gather operation.
box_grid_y0 = tf.floor(box_grid_y)
box_grid_x0 = tf.floor(box_grid_x)
box_grid_x0 = tf.maximum(0., box_grid_x0)
box_grid_y0 = tf.maximum(0., box_grid_y0)
box_gridx0x1 = tf.stack(
[tf.minimum(box_grid_x0, tf.expand_dims(boundaries[:, :, 1], -1)),
tf.minimum(box_grid_x0 + 1, tf.expand_dims(boundaries[:, :, 1], -1))],
axis=3)
box_gridy0y1 = tf.stack(
[tf.minimum(box_grid_y0, tf.expand_dims(boundaries[:, :, 0], -1)),
tf.minimum(box_grid_y0 + 1, tf.expand_dims(boundaries[:, :, 0], -1))],
axis=3)
x_indices = tf.cast( x_indices = tf.cast(
tf.reshape(box_gridx0x1, tf.reshape(box_gridx0x1, [batch_size, num_boxes, output_size * 2]),
[batch_size, num_boxes, output_size * 2]), dtype=tf.int32) dtype=tf.int32)
y_indices = tf.cast( y_indices = tf.cast(
tf.reshape(box_gridy0y1, tf.reshape(box_gridy0y1, [batch_size, num_boxes, output_size * 2]),
[batch_size, num_boxes, output_size * 2]), dtype=tf.int32) dtype=tf.int32)
height_dim_offset = max_feature_width if use_einsum_gather:
level_dim_offset = max_feature_height * height_dim_offset # Blinear interpolation is done during the last two gathers:
batch_dim_offset = num_levels * level_dim_offset # f(y, x) = [hy, ly] * [[f00, f01], * [hx, lx]^T
indices = tf.reshape( # [f10, f11]]
tf.tile(tf.reshape(tf.range(batch_size) * batch_dim_offset, # [[f00, f01],
[batch_size, 1, 1, 1]), # [f10, f11]] = tf.einsum(tf.einsum(features, y_one_hot), x_one_hot)
[1, num_boxes, output_size * 2, output_size * 2]) + # where [hy, ly] and [hx, lx] are the bilinear interpolation kernel.
tf.tile(tf.reshape(box_levels * level_dim_offset,
[batch_size, num_boxes, 1, 1]), # shape is [batch_size, boxes, output_size, 2, 1]
[1, 1, output_size * 2, output_size * 2]) + grid_y_one_hot, grid_x_one_hot = get_grid_one_hot(box_gridy0y1,
tf.tile(tf.reshape(y_indices * height_dim_offset, box_gridx0x1,
[batch_size, num_boxes, output_size * 2, 1]), max_feature_height,
[1, 1, 1, output_size * 2]) + max_feature_width)
tf.tile(tf.reshape(x_indices,
[batch_size, num_boxes, 1, output_size * 2]), # shape is [batch_size, num_boxes, output_size, height]
[1, 1, output_size * 2, 1]), [-1]) grid_y_weight = tf.reduce_sum(
tf.multiply(grid_y_one_hot, kernel_y), axis=-2)
features = tf.reshape(features, [-1, num_filters]) # shape is [batch_size, num_boxes, output_size, width]
features_per_box = tf.reshape( grid_x_weight = tf.reduce_sum(
tf.gather(features, indices), tf.multiply(grid_x_one_hot, kernel_x), axis=-2)
[batch_size, num_boxes, output_size * 2, output_size * 2, num_filters])
# Gather for y_axis.
# The RoIAlign feature f can be computed by bilinear interpolation of four # shape is [batch_size, num_boxes, output_size, width, features]
# neighboring feature points f0, f1, f2, and f3. features_per_box = tf.einsum('bmhwf,bmoh->bmowf', features,
# f(y, x) = [hy, ly] * [[f00, f01], * [hx, lx]^T tf.cast(grid_y_weight, features.dtype))
# [f10, f11]] # Gather for x_axis.
# f(y, x) = (hy*hx)f00 + (hy*lx)f01 + (ly*hx)f10 + (lx*ly)f11 # shape is [batch_size, num_boxes, output_size, output_size, features]
# f(y, x) = w00*f00 + w01*f01 + w10*f10 + w11*f11 features_per_box = tf.einsum('bmhwf,bmow->bmhof', features_per_box,
ly = box_grid_y - box_grid_y0 tf.cast(grid_x_weight, features.dtype))
lx = box_grid_x - box_grid_x0 else:
hy = 1.0 - ly height_dim_offset = max_feature_width
hx = 1.0 - lx level_dim_offset = max_feature_height * height_dim_offset
kernel_x = tf.reshape(tf.stack([hx, lx], axis=3), batch_dim_offset = num_levels * level_dim_offset
[batch_size, num_boxes, 1, output_size*2])
kernel_y = tf.reshape(tf.stack([hy, ly], axis=3), batch_size_offset = tf.tile(
[batch_size, num_boxes, output_size*2, 1]) tf.reshape(
# Uses implicit broadcast to generate the interpolation kernel. The tf.range(batch_size) * batch_dim_offset, [batch_size, 1, 1, 1]),
# multiplier `4` is for avg pooling. [1, num_boxes, output_size * 2, output_size * 2])
interpolation_kernel = kernel_y * kernel_x * 4 box_levels_offset = tf.tile(
tf.reshape(box_levels * level_dim_offset,
# Interpolates the gathered features with computed interpolation kernels. [batch_size, num_boxes, 1, 1]),
features_per_box *= tf.cast( [1, 1, output_size * 2, output_size * 2])
tf.expand_dims(interpolation_kernel, axis=4), y_indices_offset = tf.tile(
dtype=features_per_box.dtype) tf.reshape(y_indices * height_dim_offset,
features_per_box = tf.reshape( [batch_size, num_boxes, output_size * 2, 1]),
features_per_box, [1, 1, 1, output_size * 2])
[batch_size * num_boxes, output_size*2, output_size*2, num_filters]) x_indices_offset = tf.tile(
features_per_box = tf.nn.avg_pool2d( tf.reshape(x_indices, [batch_size, num_boxes, 1, output_size * 2]),
input=features_per_box, [1, 1, output_size * 2, 1])
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], indices = tf.reshape(
padding='VALID') batch_size_offset + box_levels_offset + y_indices_offset +
features_per_box = tf.reshape( x_indices_offset, [-1])
features_per_box,
[batch_size, num_boxes, output_size, output_size, num_filters]) features = tf.reshape(features, [-1, num_filters])
# TODO(wangtao): replace tf.gather with tf.gather_nd and try to get similar
# performance.
features_per_box = tf.reshape(
tf.gather(features, indices),
[batch_size, num_boxes, output_size * 2, output_size * 2, num_filters])
features_per_box = feature_bilinear_interpolation(features_per_box,
kernel_y, kernel_x)
return features_per_box return features_per_box
...@@ -211,29 +329,52 @@ def multilevel_crop_and_resize(features, boxes, output_size=7): ...@@ -211,29 +329,52 @@ def multilevel_crop_and_resize(features, boxes, output_size=7):
and resizing it using the correspoding feature map of that level. and resizing it using the correspoding feature map of that level.
Args: Args:
features: A dictionary with key as pyramid level and value as features. features: A dictionary with key as pyramid level and value as features. The
The features are in shape of [batch_size, height_l, width_l, num_filters]. features are in shape of [batch_size, height_l, width_l, num_filters].
boxes: A 3-D Tensor of shape [batch_size, num_boxes, 4]. Each row boxes: A 3-D Tensor of shape [batch_size, num_boxes, 4]. Each row represents
represents a box with [y1, x1, y2, x2] in un-normalized coordinates. a box with [y1, x1, y2, x2] in un-normalized coordinates.
output_size: A scalar to indicate the output crop size. output_size: A scalar to indicate the output crop size.
Returns: Returns:
A 5-D tensor representing feature crop of shape A 5-D tensor representing feature crop of shape
[batch_size, num_boxes, output_size, output_size, num_filters]. [batch_size, num_boxes, output_size, output_size, num_filters].
""" """
with tf.name_scope('multilevel_crop_and_resize'): with tf.name_scope('multilevel_crop_and_resize'):
levels = features.keys() levels = list(features.keys())
min_level = min(levels) min_level = min(levels)
max_level = max(levels) max_level = max(levels)
_, max_feature_height, max_feature_width, _ = ( batch_size, max_feature_height, max_feature_width, num_filters = (
features[min_level].get_shape().as_list()) features[min_level].get_shape().as_list())
# Stacks feature pyramid into a features_all of shape _, num_boxes, _ = boxes.get_shape().as_list()
# Stack feature pyramid into a features_all of shape
# [batch_size, levels, height, width, num_filters]. # [batch_size, levels, height, width, num_filters].
features_all = [] features_all = []
feature_heights = []
feature_widths = []
for level in range(min_level, max_level + 1): for level in range(min_level, max_level + 1):
features_all.append(tf.image.pad_to_bounding_box( shape = features[level].get_shape().as_list()
features[level], 0, 0, max_feature_height, max_feature_width)) feature_heights.append(shape[1])
features_all = tf.stack(features_all, axis=1) feature_widths.append(shape[2])
# Concat tensor of [batch_size, height_l * width_l, num_filters] for each
# levels.
features_all.append(
tf.reshape(features[level], [batch_size, -1, num_filters]))
features_r2 = tf.reshape(tf.concat(features_all, 1), [-1, num_filters])
# Calculate height_l * width_l for each level.
level_dim_sizes = [
feature_widths[i] * feature_heights[i]
for i in range(len(feature_widths))
]
# level_dim_offsets is accumulated sum of level_dim_size.
level_dim_offsets = [0]
for i in range(len(feature_widths) - 1):
level_dim_offsets.append(level_dim_offsets[i] + level_dim_sizes[i])
batch_dim_size = level_dim_offsets[-1] + level_dim_sizes[-1]
level_dim_offsets = tf.constant(level_dim_offsets, tf.int32)
height_dim_sizes = tf.constant(feature_widths, tf.int32)
# Assigns boxes to the right level. # Assigns boxes to the right level.
box_width = boxes[:, :, 3] - boxes[:, :, 1] box_width = boxes[:, :, 3] - boxes[:, :, 1]
...@@ -241,8 +382,8 @@ def multilevel_crop_and_resize(features, boxes, output_size=7): ...@@ -241,8 +382,8 @@ def multilevel_crop_and_resize(features, boxes, output_size=7):
areas_sqrt = tf.sqrt(box_height * box_width) areas_sqrt = tf.sqrt(box_height * box_width)
levels = tf.cast( levels = tf.cast(
tf.math.floordiv( tf.math.floordiv(
tf.math.log(tf.divide(areas_sqrt, 224.0)), tf.math.log(2.0)) tf.math.log(tf.divide(areas_sqrt, 224.0)), tf.math.log(2.0)) +
+ 4.0, 4.0,
dtype=tf.int32) dtype=tf.int32)
# Maps levels between [min_level, max_level]. # Maps levels between [min_level, max_level].
levels = tf.minimum(max_level, tf.maximum(levels, min_level)) levels = tf.minimum(max_level, tf.maximum(levels, min_level))
...@@ -263,17 +404,58 @@ def multilevel_crop_and_resize(features, boxes, output_size=7): ...@@ -263,17 +404,58 @@ def multilevel_crop_and_resize(features, boxes, output_size=7):
level_strides = tf.pow([[2.0]], tf.cast(levels, tf.float32)) level_strides = tf.pow([[2.0]], tf.cast(levels, tf.float32))
boundary = tf.cast( boundary = tf.cast(
tf.concat([ tf.concat([
tf.expand_dims([[tf.cast(max_feature_height, tf.float32)]] / tf.expand_dims(
level_strides - 1, [[tf.cast(max_feature_height, tf.float32)]] / level_strides - 1,
axis=-1), axis=-1),
tf.expand_dims([[tf.cast(max_feature_width, tf.float32)]] / tf.expand_dims(
level_strides - 1, [[tf.cast(max_feature_width, tf.float32)]] / level_strides - 1,
axis=-1), axis=-1),
], axis=-1), ],
boxes.dtype) axis=-1), boxes.dtype)
# Compute grid positions.
kernel_y, kernel_x, box_gridy0y1, box_gridx0x1 = compute_grid_positions(
boxes, boundary, output_size, sample_offset=0.5)
x_indices = tf.cast(
tf.reshape(box_gridx0x1, [batch_size, num_boxes, output_size * 2]),
dtype=tf.int32)
y_indices = tf.cast(
tf.reshape(box_gridy0y1, [batch_size, num_boxes, output_size * 2]),
dtype=tf.int32)
return selective_crop_and_resize( batch_size_offset = tf.tile(
features_all, boxes, levels, boundary, output_size) tf.reshape(
tf.range(batch_size) * batch_dim_size, [batch_size, 1, 1, 1]),
[1, num_boxes, output_size * 2, output_size * 2])
# Get level offset for each box. Each box belongs to one level.
levels_offset = tf.tile(
tf.reshape(
tf.gather(level_dim_offsets, levels),
[batch_size, num_boxes, 1, 1]),
[1, 1, output_size * 2, output_size * 2])
y_indices_offset = tf.tile(
tf.reshape(
y_indices * tf.expand_dims(tf.gather(height_dim_sizes, levels), -1),
[batch_size, num_boxes, output_size * 2, 1]),
[1, 1, 1, output_size * 2])
x_indices_offset = tf.tile(
tf.reshape(x_indices, [batch_size, num_boxes, 1, output_size * 2]),
[1, 1, output_size * 2, 1])
indices = tf.reshape(
batch_size_offset + levels_offset + y_indices_offset + x_indices_offset,
[-1])
# TODO(wangtao): replace tf.gather with tf.gather_nd and try to get similar
# performance.
features_per_box = tf.reshape(
tf.gather(features_r2, indices),
[batch_size, num_boxes, output_size * 2, output_size * 2, num_filters])
# Bilinear interpolation.
features_per_box = feature_bilinear_interpolation(features_per_box,
kernel_y, kernel_x)
return features_per_box
def single_level_feature_crop(features, level_boxes, detection_prior_levels, def single_level_feature_crop(features, level_boxes, detection_prior_levels,
...@@ -355,7 +537,8 @@ def crop_mask_in_target_box(masks, ...@@ -355,7 +537,8 @@ def crop_mask_in_target_box(masks,
boxes, boxes,
target_boxes, target_boxes,
output_size, output_size,
sample_offset=0): sample_offset=0,
use_einsum=True):
"""Crop masks in target boxes. """Crop masks in target boxes.
Args: Args:
...@@ -370,6 +553,7 @@ def crop_mask_in_target_box(masks, ...@@ -370,6 +553,7 @@ def crop_mask_in_target_box(masks,
supports to output a square shape outputs. supports to output a square shape outputs.
sample_offset: a float number in [0, 1] indicates the subpixel sample offset sample_offset: a float number in [0, 1] indicates the subpixel sample offset
from grid point. from grid point.
use_einsum: Use einsum to replace gather in selective_crop_and_resize.
Returns: Returns:
A 4-D tensor representing feature crop of shape A 4-D tensor representing feature crop of shape
...@@ -417,7 +601,8 @@ def crop_mask_in_target_box(masks, ...@@ -417,7 +601,8 @@ def crop_mask_in_target_box(masks,
levels, levels,
boundaries, boundaries,
output_size, output_size,
sample_offset=sample_offset) sample_offset=sample_offset,
use_einsum_gather=use_einsum)
cropped_masks = tf.squeeze(cropped_masks, axis=-1) cropped_masks = tf.squeeze(cropped_masks, axis=-1)
return cropped_masks return cropped_masks
...@@ -19,21 +19,49 @@ installed and ...@@ -19,21 +19,49 @@ installed and
### ImageNet preparation ### ImageNet preparation
#### Using TFDS
`classifier_trainer.py` supports ImageNet with
[TensorFlow Datasets (TFDS)](https://www.tensorflow.org/datasets/overview).
Please see the following [example snippet](https://github.com/tensorflow/datasets/blob/master/tensorflow_datasets/scripts/download_and_prepare.py)
for more information on how to use TFDS to download and prepare datasets, and
specifically the [TFDS ImageNet readme](https://github.com/tensorflow/datasets/blob/master/docs/catalog/imagenet2012.md)
for manual download instructions.
#### Legacy TFRecords
Download the ImageNet dataset and convert it to TFRecord format. Download the ImageNet dataset and convert it to TFRecord format.
The following [script](https://github.com/tensorflow/tpu/blob/master/tools/datasets/imagenet_to_gcs.py) The following [script](https://github.com/tensorflow/tpu/blob/master/tools/datasets/imagenet_to_gcs.py)
and [README](https://github.com/tensorflow/tpu/tree/master/tools/datasets#imagenet_to_gcspy) and [README](https://github.com/tensorflow/tpu/tree/master/tools/datasets#imagenet_to_gcspy)
provide a few options. provide a few options.
Note that the legacy ResNet runners, e.g. [resnet/resnet_ctl_imagenet_main.py](resnet/resnet_ctl_imagenet_main.py)
require TFRecords whereas `classifier_trainer.py` can use both by setting the
builder to 'records' or 'tfds' in the configurations.
### Running on Cloud TPUs ### Running on Cloud TPUs
Note: These models will **not** work with TPUs on Colab. Note: These models will **not** work with TPUs on Colab.
You can train image classification models on Cloud TPUs using You can train image classification models on Cloud TPUs using
`tf.distribute.TPUStrategy`. If you are not familiar with Cloud TPUs, it is [tf.distribute.experimental.TPUStrategy](https://www.tensorflow.org/api_docs/python/tf/distribute/experimental/TPUStrategy?version=nightly).
strongly recommended that you go through the If you are not familiar with Cloud TPUs, it is strongly recommended that you go
through the
[quickstart](https://cloud.google.com/tpu/docs/quickstart) to learn how to [quickstart](https://cloud.google.com/tpu/docs/quickstart) to learn how to
create a TPU and GCE VM. create a TPU and GCE VM.
### Running on multiple GPU hosts
You can also train these models on multiple hosts, each with GPUs, using
[tf.distribute.experimental.MultiWorkerMirroredStrategy](https://www.tensorflow.org/api_docs/python/tf/distribute/experimental/MultiWorkerMirroredStrategy).
The easiest way to run multi-host benchmarks is to set the
[`TF_CONFIG`](https://www.tensorflow.org/guide/distributed_training#TF_CONFIG)
appropriately at each host. e.g., to run using `MultiWorkerMirroredStrategy` on
2 hosts, the `cluster` in `TF_CONFIG` should have 2 `host:port` entries, and
host `i` should have the `task` in `TF_CONFIG` set to `{"type": "worker",
"index": i}`. `MultiWorkerMirroredStrategy` will automatically use all the
available GPUs at each host.
## MNIST ## MNIST
To download the data and run the MNIST sample model locally for the first time, To download the data and run the MNIST sample model locally for the first time,
...@@ -100,7 +128,7 @@ python3 classifier_trainer.py \ ...@@ -100,7 +128,7 @@ python3 classifier_trainer.py \
--tpu=$TPU_NAME \ --tpu=$TPU_NAME \
--model_dir=$MODEL_DIR \ --model_dir=$MODEL_DIR \
--data_dir=$DATA_DIR \ --data_dir=$DATA_DIR \
--config_file=config/examples/resnet/imagenet/tpu.yaml --config_file=configs/examples/resnet/imagenet/tpu.yaml
``` ```
### EfficientNet ### EfficientNet
...@@ -127,7 +155,7 @@ python3 classifier_trainer.py \ ...@@ -127,7 +155,7 @@ python3 classifier_trainer.py \
--tpu=$TPU_NAME \ --tpu=$TPU_NAME \
--model_dir=$MODEL_DIR \ --model_dir=$MODEL_DIR \
--data_dir=$DATA_DIR \ --data_dir=$DATA_DIR \
--config_file=config/examples/efficientnet/imagenet/efficientnet-b0-tpu.yaml --config_file=configs/examples/efficientnet/imagenet/efficientnet-b0-tpu.yaml
``` ```
Note that the number of GPU devices can be overridden in the command line using Note that the number of GPU devices can be overridden in the command line using
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment