Commit 31ca3b97 authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

resovle merge conflicts

parents 3e9d886d 7fcd7cba
...@@ -314,7 +314,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor): ...@@ -314,7 +314,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
self, inserted_layer_counter, target_channel): self, inserted_layer_counter, target_channel):
projection_layers = [] projection_layers = []
if inserted_layer_counter >= 0: if inserted_layer_counter >= 0:
use_bias = False if self._apply_batch_norm else True use_bias = False if (self._apply_batch_norm and not
self._conv_hyperparams.force_use_bias()) else True
projection_layers.append(keras.Conv2D( projection_layers.append(keras.Conv2D(
target_channel, [1, 1], strides=1, padding='SAME', target_channel, [1, 1], strides=1, padding='SAME',
name='ProjectionLayer/conv2d_{}'.format(inserted_layer_counter), name='ProjectionLayer/conv2d_{}'.format(inserted_layer_counter),
...@@ -331,7 +332,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor): ...@@ -331,7 +332,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
conv_layers = [] conv_layers = []
batch_norm_layers = [] batch_norm_layers = []
activation_layers = [] activation_layers = []
use_bias = False if self._apply_batch_norm else True use_bias = False if (self._apply_batch_norm and not
self._conv_hyperparams.force_use_bias()) else True
for additional_conv_layer_idx in range(self._num_layers_before_predictor): for additional_conv_layer_idx in range(self._num_layers_before_predictor):
layer_name = '{}/conv2d_{}'.format( layer_name = '{}/conv2d_{}'.format(
tower_name_scope, additional_conv_layer_idx) tower_name_scope, additional_conv_layer_idx)
...@@ -363,7 +365,9 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor): ...@@ -363,7 +365,9 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
training=(self._is_training and not self._freeze_batchnorm), training=(self._is_training and not self._freeze_batchnorm),
name='{}/conv2d_{}/BatchNorm/feature_{}'.format( name='{}/conv2d_{}/BatchNorm/feature_{}'.format(
tower_name_scope, additional_conv_layer_idx, feature_index))) tower_name_scope, additional_conv_layer_idx, feature_index)))
activation_layers.append(tf.keras.layers.Lambda(tf.nn.relu6)) activation_layers.append(self._conv_hyperparams.build_activation_layer(
name='{}/conv2d_{}/activation_{}'.format(
tower_name_scope, additional_conv_layer_idx, feature_index)))
# Set conv layers as the shared conv layers for different feature maps with # Set conv layers as the shared conv layers for different feature maps with
# the same tower_name_scope. # the same tower_name_scope.
......
...@@ -61,7 +61,7 @@ class Head(object): ...@@ -61,7 +61,7 @@ class Head(object):
pass pass
class KerasHead(tf.keras.Model): class KerasHead(tf.keras.layers.Layer):
"""Keras head base class.""" """Keras head base class."""
def call(self, features): def call(self, features):
......
...@@ -183,6 +183,41 @@ message CenterNet { ...@@ -183,6 +183,41 @@ message CenterNet {
optional float heatmap_bias_init = 3 [default = -2.19]; optional float heatmap_bias_init = 3 [default = -2.19];
} }
optional MaskEstimation mask_estimation_task = 8; optional MaskEstimation mask_estimation_task = 8;
// Parameters which are related to DensePose estimation task.
// http://densepose.org/
message DensePoseEstimation {
// Weight of the task loss. The total loss of the model will be their
// summation of task losses weighted by the weights.
optional float task_loss_weight = 1 [default = 1.0];
// Class ID (0-indexed) that corresponds to the object in the label map that
// contains DensePose data.
optional int32 class_id = 2;
// Loss configuration for DensePose heatmap and regression losses. Note
// that the localization loss is used for surface coordinate losses and
// classification loss is used for part classification losses.
optional Loss loss = 3;
// The number of body parts.
optional int32 num_parts = 4 [default = 24];
// Loss weights for the two DensePose heads.
optional float part_loss_weight = 5 [default = 1.0];
optional float coordinate_loss_weight = 6 [default = 1.0];
// Whether to upsample the prediction feature maps back to the original
// input dimension prior to applying loss. This has the benefit of
// maintaining finer groundtruth location information.
optional bool upsample_to_input_res = 7 [default = true];
// The initial bias value of the convlution kernel of the class heatmap
// prediction head. -2.19 corresponds to predicting foreground with
// a probability of 0.1.
optional float heatmap_bias_init = 8 [default = -2.19];
}
optional DensePoseEstimation densepose_estimation_task = 9;
} }
message CenterNetFeatureExtractor { message CenterNetFeatureExtractor {
......
...@@ -31,7 +31,7 @@ enum InputType { ...@@ -31,7 +31,7 @@ enum InputType {
TF_SEQUENCE_EXAMPLE = 2; // TfSequenceExample Input TF_SEQUENCE_EXAMPLE = 2; // TfSequenceExample Input
} }
// Next id: 31 // Next id: 33
message InputReader { message InputReader {
// Name of input reader. Typically used to describe the dataset that is read // Name of input reader. Typically used to describe the dataset that is read
// by this input reader. // by this input reader.
...@@ -119,6 +119,10 @@ message InputReader { ...@@ -119,6 +119,10 @@ message InputReader {
// Type of instance mask. // Type of instance mask.
optional InstanceMaskType mask_type = 10 [default = NUMERICAL_MASKS]; optional InstanceMaskType mask_type = 10 [default = NUMERICAL_MASKS];
// Whether to load DensePose data. If set, must also set load_instance_masks
// to true.
optional bool load_dense_pose = 31 [default = false];
// Whether to use the display name when decoding examples. This is only used // Whether to use the display name when decoding examples. This is only used
// when mapping class text strings to integers. // when mapping class text strings to integers.
optional bool use_display_name = 17 [default = false]; optional bool use_display_name = 17 [default = false];
...@@ -129,6 +133,10 @@ message InputReader { ...@@ -129,6 +133,10 @@ message InputReader {
// Whether input data type is tf.Examples or tf.SequenceExamples // Whether input data type is tf.Examples or tf.SequenceExamples
optional InputType input_type = 30 [default = TF_EXAMPLE]; optional InputType input_type = 30 [default = TF_EXAMPLE];
// Which frame to choose from the input if Sequence Example. -1 indicates
// random choice.
optional int32 frame_index = 32 [default = -1];
oneof input_reader { oneof input_reader {
TFRecordInputReader tf_record_input_reader = 8; TFRecordInputReader tf_record_input_reader = 8;
ExternalInputReader external_input_reader = 9; ExternalInputReader external_input_reader = 9;
......
...@@ -4,7 +4,7 @@ package object_detection.protos; ...@@ -4,7 +4,7 @@ package object_detection.protos;
// Message for defining a preprocessing operation on input data. // Message for defining a preprocessing operation on input data.
// See: //third_party/tensorflow_models/object_detection/core/preprocessor.py // See: //third_party/tensorflow_models/object_detection/core/preprocessor.py
// Next ID: 38 // Next ID: 39
message PreprocessingStep { message PreprocessingStep {
oneof preprocessing_step { oneof preprocessing_step {
NormalizeImage normalize_image = 1; NormalizeImage normalize_image = 1;
...@@ -44,6 +44,7 @@ message PreprocessingStep { ...@@ -44,6 +44,7 @@ message PreprocessingStep {
RandomDownscaleToTargetPixels random_downscale_to_target_pixels = 35; RandomDownscaleToTargetPixels random_downscale_to_target_pixels = 35;
RandomPatchGaussian random_patch_gaussian = 36; RandomPatchGaussian random_patch_gaussian = 36;
RandomSquareCropByScale random_square_crop_by_scale = 37; RandomSquareCropByScale random_square_crop_by_scale = 37;
RandomScaleCropAndPadToSquare random_scale_crop_and_pad_to_square = 38;
} }
} }
...@@ -572,3 +573,20 @@ message RandomSquareCropByScale { ...@@ -572,3 +573,20 @@ message RandomSquareCropByScale {
// [min_scale, max_scale] // [min_scale, max_scale]
optional int32 num_scales = 4 [default=8]; optional int32 num_scales = 4 [default=8];
} }
// Randomly scale, crop, and then pad an image to the desired square output
// dimensions. Specifically, this method first samples a random_scale factor
// from a uniform distribution between scale_min and scale_max, and then resizes
// the image such that it's maximum dimension is (output_size * random_scale).
// Secondly, a square output_size crop is extracted from the resized image, and
// finally the cropped region is padded to the desired square output_size.
// The augmentation is borrowed from [1]
// [1]: https://arxiv.org/abs/1911.09070
message RandomScaleCropAndPadToSquare {
// The (square) output image size
optional int32 output_size = 1 [default = 512];
// The minimum and maximum values from which to sample the random scale.
optional float scale_min = 2 [default=0.1];
optional float scale_max = 3 [default=2.0];
}
...@@ -145,7 +145,7 @@ message Ssd { ...@@ -145,7 +145,7 @@ message Ssd {
optional MaskHead mask_head_config = 25; optional MaskHead mask_head_config = 25;
} }
// Next id: 18. // Next id: 20.
message SsdFeatureExtractor { message SsdFeatureExtractor {
reserved 6; reserved 6;
...@@ -185,8 +185,13 @@ message SsdFeatureExtractor { ...@@ -185,8 +185,13 @@ message SsdFeatureExtractor {
// feature maps added by SSD. // feature maps added by SSD.
optional bool use_depthwise = 8 [default = false]; optional bool use_depthwise = 8 [default = false];
// Feature Pyramid Networks config. oneof feature_pyramid_oneof {
optional FeaturePyramidNetworks fpn = 10; // Feature Pyramid Networks config.
FeaturePyramidNetworks fpn = 10;
// Bidirectional Feature Pyramid Networks config.
BidirectionalFeaturePyramidNetworks bifpn = 19;
}
// If true, replace preprocess function of feature extractor with a // If true, replace preprocess function of feature extractor with a
// placeholder. This should only be used if all the image preprocessing steps // placeholder. This should only be used if all the image preprocessing steps
...@@ -225,3 +230,23 @@ message FeaturePyramidNetworks { ...@@ -225,3 +230,23 @@ message FeaturePyramidNetworks {
} }
// Configuration for Bidirectional Feature Pyramid Networks.
message BidirectionalFeaturePyramidNetworks {
// minimum level in the feature pyramid.
optional int32 min_level = 1 [default = 3];
// maximum level in the feature pyramid.
optional int32 max_level = 2 [default = 7];
// The number of repeated top-down bottom-up iterations for BiFPN-based
// feature extractors (bidirectional feature pyramid networks).
optional int32 num_iterations = 3;
// The number of filters (channels) to use in feature pyramid layers for
// BiFPN-based feature extractors (bidirectional feature pyramid networks).
optional int32 num_filters = 4;
// Method used to combine inputs to BiFPN nodes.
optional string combine_method = 5 [default = 'fast_attention'];
}
...@@ -59,7 +59,8 @@ message TrainConfig { ...@@ -59,7 +59,8 @@ message TrainConfig {
// Whether to load all checkpoint vars that match model variable names and // Whether to load all checkpoint vars that match model variable names and
// sizes. This option is only available if `from_detection_checkpoint` is // sizes. This option is only available if `from_detection_checkpoint` is
// True. // True. This option is *not* supported for TF2 --- setting it to true
// will raise an error.
optional bool load_all_detection_checkpoint_vars = 19 [default = false]; optional bool load_all_detection_checkpoint_vars = 19 [default = false];
// Number of steps to train the DetectionModel for. If 0, will train the model // Number of steps to train the DetectionModel for. If 0, will train the model
......
# Context R-CNN configuration for Snapshot Serengeti Dataset, with sequence
# example input data with context_features.
# This model uses attention into contextual features within the Faster R-CNN
# object detection framework to improve object detection performance.
# See https://arxiv.org/abs/1912.03538 for more information.
# Search for "PATH_TO_BE_CONFIGURED" to find the fields that should be
# configured.
model {
faster_rcnn {
num_classes: 48
image_resizer {
fixed_shape_resizer {
height: 640
width: 640
}
}
feature_extractor {
type: "faster_rcnn_resnet101"
first_stage_features_stride: 16
batch_norm_trainable: true
}
first_stage_anchor_generator {
grid_anchor_generator {
height_stride: 16
width_stride: 16
scales: 0.25
scales: 0.5
scales: 1.0
scales: 2.0
aspect_ratios: 0.5
aspect_ratios: 1.0
aspect_ratios: 2.0
}
}
first_stage_box_predictor_conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
truncated_normal_initializer {
stddev: 0.00999999977648
}
}
}
first_stage_nms_score_threshold: 0.0
first_stage_nms_iou_threshold: 0.699999988079
first_stage_max_proposals: 300
first_stage_localization_loss_weight: 2.0
first_stage_objectness_loss_weight: 1.0
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
second_stage_box_predictor {
mask_rcnn_box_predictor {
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
variance_scaling_initializer {
factor: 1.0
uniform: true
mode: FAN_AVG
}
}
}
use_dropout: false
dropout_keep_probability: 1.0
share_box_across_classes: true
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.0
iou_threshold: 0.600000023842
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
second_stage_localization_loss_weight: 2.0
second_stage_classification_loss_weight: 1.0
use_matmul_crop_and_resize: true
clip_anchors_to_image: true
use_matmul_gather_in_matcher: true
use_static_balanced_label_sampler: true
use_static_shapes: true
context_config {
max_num_context_features: 2000
context_feature_length: 2057
}
}
}
train_config {
batch_size: 8
data_augmentation_options {
random_horizontal_flip {
}
}
sync_replicas: true
optimizer {
momentum_optimizer {
learning_rate {
manual_step_learning_rate {
initial_learning_rate: 0.0
schedule {
step: 400000
learning_rate: 0.002
}
schedule {
step: 500000
learning_rate: 0.0002
}
schedule {
step: 600000
learning_rate: 0.00002
}
warmup: true
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
gradient_clipping_by_norm: 10.0
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/faster_rcnn_resnet101_coco_2018_08_14/model.ckpt"
from_detection_checkpoint: true
num_steps: 5000000
replicas_to_aggregate: 8
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
use_bfloat16: true
}
train_input_reader {
label_map_path: "PATH_TO_BE_CONFIGURED/ss_label_map.pbtxt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/snapshot_serengeti_train-?????-of-?????"
}
load_context_features: true
input_type: TF_SEQUENCE_EXAMPLE
}
eval_config {
max_evals: 50
metrics_set: "coco_detection_metrics"
use_moving_averages: false
batch_size: 1
}
eval_input_reader {
label_map_path: "PATH_TO_BE_CONFIGURED/ss_label_map.pbtxt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/snapshot_serengeti_val-?????-of-?????"
}
load_context_features: true
input_type: TF_SEQUENCE_EXAMPLE
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment