Unverified Commit 0f0c7745 authored by vivek rathod's avatar vivek rathod Committed by GitHub
Browse files

Merged commit includes the following changes: (#8739)

318417714  by jonathanhuang:

    Internal change.

--
318367213  by sbeery:

    Pointing users to more documentation for beam

--
318358685  by sbeery:

    Context R-CNN sample config for GPU

--
318309800  by rathodv:

    Internal

--
318303364  by ronnyvotel:

    Adding the option for parsing and including DensePose annotations. http://densepose.org/

--
318291319  by aom:

    Adds conv_bn_act conv_block option, and naming convention changes for BiFPN utils.

--
318200598  by ronnyvotel:

    Updating the TF Example Decoder to parse DensePose annotations.

--
318174065  by jonathanhuang:

    Internal change.

--
318167805  by rathodv:

    Add use_tpu flag to TF2 binary.

--
318145285  by aom:

    Adds option for convolutional keras box predictor to force use_bias.

--

PiperOrigin-RevId: 318417714
parent 1e4fd825
...@@ -16,14 +16,6 @@ ...@@ -16,14 +16,6 @@
r"""Creates and runs TF2 object detection models. r"""Creates and runs TF2 object detection models.
##################################
NOTE: This module has not been fully tested; please bear with us while we iron
out the kinks.
##################################
When a TPU device is available, this binary uses TPUStrategy. Otherwise, it uses
GPUS with MirroredStrategy/MultiWorkerMirroredStrategy.
For local training/evaluation run: For local training/evaluation run:
PIPELINE_CONFIG_PATH=path/to/pipeline.config PIPELINE_CONFIG_PATH=path/to/pipeline.config
MODEL_DIR=/tmp/model_outputs MODEL_DIR=/tmp/model_outputs
...@@ -60,6 +52,8 @@ flags.DEFINE_string( ...@@ -60,6 +52,8 @@ flags.DEFINE_string(
flags.DEFINE_integer('eval_timeout', 3600, 'Number of seconds to wait for an' flags.DEFINE_integer('eval_timeout', 3600, 'Number of seconds to wait for an'
'evaluation checkpoint before exiting.') 'evaluation checkpoint before exiting.')
flags.DEFINE_bool('use_tpu', False, 'Whether the job is executing on a TPU.')
flags.DEFINE_integer( flags.DEFINE_integer(
'num_workers', 1, 'When num_workers > 1, training uses ' 'num_workers', 1, 'When num_workers > 1, training uses '
'MultiWorkerMirroredStrategy. When num_workers = 1 it uses ' 'MultiWorkerMirroredStrategy. When num_workers = 1 it uses '
...@@ -84,7 +78,7 @@ def main(unused_argv): ...@@ -84,7 +78,7 @@ def main(unused_argv):
checkpoint_dir=FLAGS.checkpoint_dir, checkpoint_dir=FLAGS.checkpoint_dir,
wait_interval=300, timeout=FLAGS.eval_timeout) wait_interval=300, timeout=FLAGS.eval_timeout)
else: else:
if tf.config.get_visible_devices('TPU'): if FLAGS.use_tpu:
resolver = tf.distribute.cluster_resolver.TPUClusterResolver() resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
tf.config.experimental_connect_to_cluster(resolver) tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver) tf.tpu.experimental.initialize_tpu_system(resolver)
......
...@@ -73,7 +73,7 @@ class FasterRcnnInceptionResnetV2KerasFeatureExtractorTest(tf.test.TestCase): ...@@ -73,7 +73,7 @@ class FasterRcnnInceptionResnetV2KerasFeatureExtractorTest(tf.test.TestCase):
proposal_classifier_features = ( proposal_classifier_features = (
model(proposal_feature_maps)) model(proposal_feature_maps))
features_shape = tf.shape(proposal_classifier_features) features_shape = tf.shape(proposal_classifier_features)
self.assertAllEqual(features_shape.numpy(), [2, 8, 8, 1536]) self.assertAllEqual(features_shape.numpy(), [2, 9, 9, 1536])
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -175,23 +175,6 @@ class FasterRCNNResnetKerasFeatureExtractor( ...@@ -175,23 +175,6 @@ class FasterRCNNResnetKerasFeatureExtractor(
self._variable_dict[variable.name[:-2]] = variable self._variable_dict[variable.name[:-2]] = variable
return keras_model return keras_model
def restore_from_classification_checkpoint_fn(
self,
first_stage_feature_extractor_scope,
second_stage_feature_extractor_scope):
"""Returns a map for restoring from an (object-based) checkpoint.
Args:
first_stage_feature_extractor_scope: A scope name for the first stage
feature extractor (unused).
second_stage_feature_extractor_scope: A scope name for the second stage
feature extractor (unused).
Returns:
A dict mapping keys to Keras models
"""
return {'feature_extractor': self.classification_backbone}
class FasterRCNNResnet50KerasFeatureExtractor( class FasterRCNNResnet50KerasFeatureExtractor(
FasterRCNNResnetKerasFeatureExtractor): FasterRCNNResnetKerasFeatureExtractor):
......
...@@ -163,14 +163,3 @@ class SSDMobileNetV1KerasFeatureExtractor( ...@@ -163,14 +163,3 @@ class SSDMobileNetV1KerasFeatureExtractor(
'Conv2d_13_pointwise': image_features[1]}) 'Conv2d_13_pointwise': image_features[1]})
return list(feature_maps.values()) return list(feature_maps.values())
def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
"""Returns a map for restoring from an (object-based) checkpoint.
Args:
feature_extractor_scope: A scope name for the feature extractor (unused).
Returns:
A dict mapping keys to Keras models
"""
return {'feature_extractor': self.classification_backbone}
...@@ -241,14 +241,3 @@ class SSDMobileNetV2FpnKerasFeatureExtractor( ...@@ -241,14 +241,3 @@ class SSDMobileNetV2FpnKerasFeatureExtractor(
last_feature_map = layer(last_feature_map) last_feature_map = layer(last_feature_map)
feature_maps.append(last_feature_map) feature_maps.append(last_feature_map)
return feature_maps return feature_maps
def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
"""Returns a map for restoring from an (object-based) checkpoint.
Args:
feature_extractor_scope: A scope name for the feature extractor (unused).
Returns:
A dict mapping keys to Keras models
"""
return {'feature_extractor': self.classification_backbone}
...@@ -166,14 +166,3 @@ class SSDMobileNetV2KerasFeatureExtractor( ...@@ -166,14 +166,3 @@ class SSDMobileNetV2KerasFeatureExtractor(
'layer_19': image_features[1]}) 'layer_19': image_features[1]})
return list(feature_maps.values()) return list(feature_maps.values())
def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
"""Returns a map for restoring from an (object-based) checkpoint.
Args:
feature_extractor_scope: A scope name for the feature extractor (unused).
Returns:
A dict mapping keys to Keras models
"""
return {'feature_extractor': self.classification_backbone}
...@@ -246,17 +246,6 @@ class SSDResNetV1FpnKerasFeatureExtractor( ...@@ -246,17 +246,6 @@ class SSDResNetV1FpnKerasFeatureExtractor(
feature_maps.append(last_feature_map) feature_maps.append(last_feature_map)
return feature_maps return feature_maps
def restore_from_classification_checkpoint_fn(self, feature_extractor_scope):
"""Returns a map for restoring from an (object-based) checkpoint.
Args:
feature_extractor_scope: A scope name for the feature extractor (unused).
Returns:
A dict mapping keys to Keras models
"""
return {'feature_extractor': self.classification_backbone}
class SSDResNet50V1FpnKerasFeatureExtractor( class SSDResNet50V1FpnKerasFeatureExtractor(
SSDResNetV1FpnKerasFeatureExtractor): SSDResNetV1FpnKerasFeatureExtractor):
......
...@@ -314,7 +314,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor): ...@@ -314,7 +314,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
self, inserted_layer_counter, target_channel): self, inserted_layer_counter, target_channel):
projection_layers = [] projection_layers = []
if inserted_layer_counter >= 0: if inserted_layer_counter >= 0:
use_bias = False if self._apply_batch_norm else True use_bias = False if (self._apply_batch_norm and not
self._conv_hyperparams.force_use_bias()) else True
projection_layers.append(keras.Conv2D( projection_layers.append(keras.Conv2D(
target_channel, [1, 1], strides=1, padding='SAME', target_channel, [1, 1], strides=1, padding='SAME',
name='ProjectionLayer/conv2d_{}'.format(inserted_layer_counter), name='ProjectionLayer/conv2d_{}'.format(inserted_layer_counter),
...@@ -331,7 +332,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor): ...@@ -331,7 +332,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
conv_layers = [] conv_layers = []
batch_norm_layers = [] batch_norm_layers = []
activation_layers = [] activation_layers = []
use_bias = False if self._apply_batch_norm else True use_bias = False if (self._apply_batch_norm and not
self._conv_hyperparams.force_use_bias()) else True
for additional_conv_layer_idx in range(self._num_layers_before_predictor): for additional_conv_layer_idx in range(self._num_layers_before_predictor):
layer_name = '{}/conv2d_{}'.format( layer_name = '{}/conv2d_{}'.format(
tower_name_scope, additional_conv_layer_idx) tower_name_scope, additional_conv_layer_idx)
...@@ -363,7 +365,9 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor): ...@@ -363,7 +365,9 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
training=(self._is_training and not self._freeze_batchnorm), training=(self._is_training and not self._freeze_batchnorm),
name='{}/conv2d_{}/BatchNorm/feature_{}'.format( name='{}/conv2d_{}/BatchNorm/feature_{}'.format(
tower_name_scope, additional_conv_layer_idx, feature_index))) tower_name_scope, additional_conv_layer_idx, feature_index)))
activation_layers.append(tf.keras.layers.Lambda(tf.nn.relu6)) activation_layers.append(self._conv_hyperparams.build_activation_layer(
name='{}/conv2d_{}/activation_{}'.format(
tower_name_scope, additional_conv_layer_idx, feature_index)))
# Set conv layers as the shared conv layers for different feature maps with # Set conv layers as the shared conv layers for different feature maps with
# the same tower_name_scope. # the same tower_name_scope.
......
...@@ -31,7 +31,7 @@ enum InputType { ...@@ -31,7 +31,7 @@ enum InputType {
TF_SEQUENCE_EXAMPLE = 2; // TfSequenceExample Input TF_SEQUENCE_EXAMPLE = 2; // TfSequenceExample Input
} }
// Next id: 31 // Next id: 32
message InputReader { message InputReader {
// Name of input reader. Typically used to describe the dataset that is read // Name of input reader. Typically used to describe the dataset that is read
// by this input reader. // by this input reader.
...@@ -119,6 +119,10 @@ message InputReader { ...@@ -119,6 +119,10 @@ message InputReader {
// Type of instance mask. // Type of instance mask.
optional InstanceMaskType mask_type = 10 [default = NUMERICAL_MASKS]; optional InstanceMaskType mask_type = 10 [default = NUMERICAL_MASKS];
// Whether to load DensePose data. If set, must also set load_instance_masks
// to true.
optional bool load_dense_pose = 31 [default = false];
// Whether to use the display name when decoding examples. This is only used // Whether to use the display name when decoding examples. This is only used
// when mapping class text strings to integers. // when mapping class text strings to integers.
optional bool use_display_name = 17 [default = false]; optional bool use_display_name = 17 [default = false];
......
...@@ -59,7 +59,8 @@ message TrainConfig { ...@@ -59,7 +59,8 @@ message TrainConfig {
// Whether to load all checkpoint vars that match model variable names and // Whether to load all checkpoint vars that match model variable names and
// sizes. This option is only available if `from_detection_checkpoint` is // sizes. This option is only available if `from_detection_checkpoint` is
// True. // True. This option is *not* supported for TF2 --- setting it to true
// will raise an error.
optional bool load_all_detection_checkpoint_vars = 19 [default = false]; optional bool load_all_detection_checkpoint_vars = 19 [default = false];
// Number of steps to train the DetectionModel for. If 0, will train the model // Number of steps to train the DetectionModel for. If 0, will train the model
......
# Context R-CNN configuration for Snapshot Serengeti Dataset, with sequence
# example input data with context_features.
# This model uses attention into contextual features within the Faster R-CNN
# object detection framework to improve object detection performance.
# See https://arxiv.org/abs/1912.03538 for more information.
# Search for "PATH_TO_BE_CONFIGURED" to find the fields that should be
# configured.
model {
faster_rcnn {
num_classes: 48
image_resizer {
fixed_shape_resizer {
height: 640
width: 640
}
}
feature_extractor {
type: "faster_rcnn_resnet101"
first_stage_features_stride: 16
batch_norm_trainable: true
}
first_stage_anchor_generator {
grid_anchor_generator {
height_stride: 16
width_stride: 16
scales: 0.25
scales: 0.5
scales: 1.0
scales: 2.0
aspect_ratios: 0.5
aspect_ratios: 1.0
aspect_ratios: 2.0
}
}
first_stage_box_predictor_conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
truncated_normal_initializer {
stddev: 0.00999999977648
}
}
}
first_stage_nms_score_threshold: 0.0
first_stage_nms_iou_threshold: 0.699999988079
first_stage_max_proposals: 300
first_stage_localization_loss_weight: 2.0
first_stage_objectness_loss_weight: 1.0
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
second_stage_box_predictor {
mask_rcnn_box_predictor {
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
variance_scaling_initializer {
factor: 1.0
uniform: true
mode: FAN_AVG
}
}
}
use_dropout: false
dropout_keep_probability: 1.0
share_box_across_classes: true
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.0
iou_threshold: 0.600000023842
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
second_stage_localization_loss_weight: 2.0
second_stage_classification_loss_weight: 1.0
use_matmul_crop_and_resize: true
clip_anchors_to_image: true
use_matmul_gather_in_matcher: true
use_static_balanced_label_sampler: true
use_static_shapes: true
context_config {
max_num_context_features: 2000
context_feature_length: 2057
}
}
}
train_config {
batch_size: 8
data_augmentation_options {
random_horizontal_flip {
}
}
sync_replicas: true
optimizer {
momentum_optimizer {
learning_rate {
manual_step_learning_rate {
initial_learning_rate: 0.0
schedule {
step: 400000
learning_rate: 0.002
}
schedule {
step: 500000
learning_rate: 0.0002
}
schedule {
step: 600000
learning_rate: 0.00002
}
warmup: true
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
gradient_clipping_by_norm: 10.0
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/faster_rcnn_resnet101_coco_2018_08_14/model.ckpt"
from_detection_checkpoint: true
num_steps: 5000000
replicas_to_aggregate: 8
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
use_bfloat16: true
}
train_input_reader {
label_map_path: "PATH_TO_BE_CONFIGURED/ss_label_map.pbtxt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/snapshot_serengeti_train-?????-of-?????"
}
load_context_features: true
input_type: TF_SEQUENCE_EXAMPLE
}
eval_config {
max_evals: 50
metrics_set: "coco_detection_metrics"
use_moving_averages: false
batch_size: 1
}
eval_input_reader {
label_map_path: "PATH_TO_BE_CONFIGURED/ss_label_map.pbtxt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/snapshot_serengeti_val-?????-of-?????"
}
load_context_features: true
input_type: TF_SEQUENCE_EXAMPLE
}
...@@ -26,7 +26,8 @@ from object_detection.utils import shape_utils ...@@ -26,7 +26,8 @@ from object_detection.utils import shape_utils
def create_conv_block(name, num_filters, kernel_size, strides, padding, def create_conv_block(name, num_filters, kernel_size, strides, padding,
use_separable, apply_batchnorm, apply_activation, use_separable, apply_batchnorm, apply_activation,
conv_hyperparams, is_training, freeze_batchnorm): conv_hyperparams, is_training, freeze_batchnorm,
conv_bn_act_pattern=True):
"""Create Keras layers for regular or separable convolutions. """Create Keras layers for regular or separable convolutions.
Args: Args:
...@@ -50,6 +51,9 @@ def create_conv_block(name, num_filters, kernel_size, strides, padding, ...@@ -50,6 +51,9 @@ def create_conv_block(name, num_filters, kernel_size, strides, padding,
training or not. When training with a small batch size (e.g. 1), it is training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm desirable to freeze batch norm update and use pretrained batch norm
params. params.
conv_bn_act_pattern: Bool. By default, when True, the layers returned by
this function are in the order [conv, batchnorm, activation]. Otherwise,
when False, the order of the layers is [activation, conv, batchnorm].
Returns: Returns:
A list of keras layers, including (regular or seperable) convolution, and A list of keras layers, including (regular or seperable) convolution, and
...@@ -73,7 +77,7 @@ def create_conv_block(name, num_filters, kernel_size, strides, padding, ...@@ -73,7 +77,7 @@ def create_conv_block(name, num_filters, kernel_size, strides, padding,
depth_multiplier=1, depth_multiplier=1,
padding=padding, padding=padding,
strides=strides, strides=strides,
name=name + '_separable_conv', name=name + 'separable_conv',
**kwargs)) **kwargs))
else: else:
layers.append( layers.append(
...@@ -82,18 +86,22 @@ def create_conv_block(name, num_filters, kernel_size, strides, padding, ...@@ -82,18 +86,22 @@ def create_conv_block(name, num_filters, kernel_size, strides, padding,
kernel_size=kernel_size, kernel_size=kernel_size,
padding=padding, padding=padding,
strides=strides, strides=strides,
name=name + '_conv', name=name + 'conv',
**conv_hyperparams.params())) **conv_hyperparams.params()))
if apply_batchnorm: if apply_batchnorm:
layers.append( layers.append(
conv_hyperparams.build_batch_norm( conv_hyperparams.build_batch_norm(
training=(is_training and not freeze_batchnorm), training=(is_training and not freeze_batchnorm),
name=name + '_batchnorm')) name=name + 'batchnorm'))
if apply_activation: if apply_activation:
layers.append( activation_layer = conv_hyperparams.build_activation_layer(
conv_hyperparams.build_activation_layer(name=name + '_activation')) name=name + 'activation')
if conv_bn_act_pattern:
layers.append(activation_layer)
else:
layers = [activation_layer] + layers
return layers return layers
...@@ -133,28 +141,28 @@ def create_downsample_feature_map_ops(scale, downsample_method, ...@@ -133,28 +141,28 @@ def create_downsample_feature_map_ops(scale, downsample_method,
pool_size=kernel_size, pool_size=kernel_size,
strides=stride, strides=stride,
padding=padding, padding=padding,
name=name + '_downsample_max_x{}'.format(stride))) name=name + 'downsample_max_x{}'.format(stride)))
elif downsample_method == 'avg_pooling': elif downsample_method == 'avg_pooling':
layers.append( layers.append(
tf.keras.layers.AveragePooling2D( tf.keras.layers.AveragePooling2D(
pool_size=kernel_size, pool_size=kernel_size,
strides=stride, strides=stride,
padding=padding, padding=padding,
name=name + '_downsample_avg_x{}'.format(stride))) name=name + 'downsample_avg_x{}'.format(stride)))
elif downsample_method == 'depthwise_conv': elif downsample_method == 'depthwise_conv':
layers.append( layers.append(
tf.keras.layers.DepthwiseConv2D( tf.keras.layers.DepthwiseConv2D(
kernel_size=kernel_size, kernel_size=kernel_size,
strides=stride, strides=stride,
padding=padding, padding=padding,
name=name + '_downsample_depthwise_x{}'.format(stride))) name=name + 'downsample_depthwise_x{}'.format(stride)))
layers.append( layers.append(
conv_hyperparams.build_batch_norm( conv_hyperparams.build_batch_norm(
training=(is_training and not freeze_batchnorm), training=(is_training and not freeze_batchnorm),
name=name + '_downsample_batchnorm')) name=name + 'downsample_batchnorm'))
layers.append( layers.append(
conv_hyperparams.build_activation_layer(name=name + conv_hyperparams.build_activation_layer(name=name +
'_downsample_activation')) 'downsample_activation'))
else: else:
raise ValueError('Unknown downsample method: {}'.format(downsample_method)) raise ValueError('Unknown downsample method: {}'.format(downsample_method))
......
...@@ -147,6 +147,7 @@ def clear_fine_tune_checkpoint(pipeline_config_path, ...@@ -147,6 +147,7 @@ def clear_fine_tune_checkpoint(pipeline_config_path,
"""Clears fine_tune_checkpoint and writes a new pipeline config file.""" """Clears fine_tune_checkpoint and writes a new pipeline config file."""
configs = get_configs_from_pipeline_file(pipeline_config_path) configs = get_configs_from_pipeline_file(pipeline_config_path)
configs["train_config"].fine_tune_checkpoint = "" configs["train_config"].fine_tune_checkpoint = ""
configs["train_config"].load_all_detection_checkpoint_vars = False
pipeline_proto = create_pipeline_proto_from_configs(configs) pipeline_proto = create_pipeline_proto_from_configs(configs)
with tf.gfile.Open(new_pipeline_config_path, "wb") as f: with tf.gfile.Open(new_pipeline_config_path, "wb") as f:
f.write(text_format.MessageToString(pipeline_proto)) f.write(text_format.MessageToString(pipeline_proto))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment