Unverified Commit 212bd0a6 authored by Jonathan Huang's avatar Jonathan Huang Committed by GitHub
Browse files

Merge pull request #2619 from tombstone/update_protos

update proto definitions
parents 34f1d6bf 9adf0242
......@@ -54,6 +54,17 @@ py_proto_library(
deps = [":faster_rcnn_box_coder_proto"],
)
proto_library(
name = "keypoint_box_coder_proto",
srcs = ["keypoint_box_coder.proto"],
)
py_proto_library(
name = "keypoint_box_coder_py_pb2",
api_version = 2,
deps = [":keypoint_box_coder_proto"],
)
proto_library(
name = "mean_stddev_box_coder_proto",
srcs = ["mean_stddev_box_coder.proto"],
......@@ -81,6 +92,7 @@ proto_library(
srcs = ["box_coder.proto"],
deps = [
":faster_rcnn_box_coder_proto",
":keypoint_box_coder_proto",
":mean_stddev_box_coder_proto",
":square_box_coder_proto",
],
......
......@@ -3,6 +3,7 @@ syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/faster_rcnn_box_coder.proto";
import "object_detection/protos/keypoint_box_coder.proto";
import "object_detection/protos/mean_stddev_box_coder.proto";
import "object_detection/protos/square_box_coder.proto";
......@@ -13,5 +14,6 @@ message BoxCoder {
FasterRcnnBoxCoder faster_rcnn_box_coder = 1;
MeanStddevBoxCoder mean_stddev_box_coder = 2;
SquareBoxCoder square_box_coder = 3;
KeypointBoxCoder keypoint_box_coder = 4;
}
}
......@@ -48,6 +48,8 @@ message ConvolutionalBoxPredictor {
// Whether to apply sigmoid to the output of class predictions.
// TODO: Do we need this since we have a post processing module.?
optional bool apply_sigmoid_to_scores = 9 [default = false];
optional float class_prediction_bias_init = 10 [default = 0.0];
}
message MaskRCNNBoxPredictor {
......
......@@ -38,10 +38,10 @@ message EvalConfig {
optional bool ignore_groundtruth = 10 [default=false];
// Use exponential moving averages of variables for evaluation.
// TODO: When this is false make sure the model is constructed
// without moving averages in restore_fn.
optional bool use_moving_averages = 11 [default=false];
// Whether to evaluate instance masks.
// Note that since there is no evaluation code currently for instance
// segmenation this option is unused.
optional bool eval_instance_masks = 12 [default=false];
}
......@@ -116,16 +116,34 @@ message FasterRcnn {
// Second stage classification loss weight
optional float second_stage_classification_loss_weight = 26 [default=1.0];
// If not left to default, applies hard example mining.
optional HardExampleMiner hard_example_miner = 27;
// Second stage instance mask loss weight. Note that this is only applicable
// when `MaskRCNNBoxPredictor` is selected for second stage and configured to
// predict instance masks.
optional float second_stage_mask_prediction_loss_weight = 27 [default=1.0];
// If not left to default, applies hard example mining only to classification
// and localization loss..
optional HardExampleMiner hard_example_miner = 28;
// Loss for second stage box classifers, supports Softmax and Sigmoid.
// Note that score converter must be consistent with loss type.
// When there are multiple labels assigned to the same boxes, recommend
// to use sigmoid loss and enable merge_multiple_label_boxes.
// If not specified, Softmax loss is used as default.
optional ClassificationLoss second_stage_classification_loss = 29;
}
message FasterRcnnFeatureExtractor {
// Type of Faster R-CNN model (e.g., 'faster_rcnn_resnet101';
// See models/model_builder.py for expected types).
// See builders/model_builder.py for expected types).
optional string type = 1;
// Output stride of extracted RPN feature map.
optional int32 first_stage_features_stride = 2 [default=16];
// Whether to update batch norm parameters during training or not.
// When training with a relative large batch size (e.g. 8), it could be
// desirable to enable batch norm update.
optional bool batch_norm_trainable = 3 [default=false];
}
......@@ -11,6 +11,13 @@ message ImageResizer {
}
}
// Enumeration type for image resizing methods provided in TensorFlow.
enum ResizeType {
BILINEAR = 0; // Corresponds to tf.image.ResizeMethod.BILINEAR
NEAREST_NEIGHBOR = 1; // Corresponds to tf.image.ResizeMethod.NEAREST_NEIGHBOR
BICUBIC = 2; // Corresponds to tf.image.ResizeMethod.BICUBIC
AREA = 3; // Corresponds to tf.image.ResizeMethod.AREA
}
// Configuration proto for image resizer that keeps aspect ratio.
message KeepAspectRatioResizer {
......@@ -19,8 +26,10 @@ message KeepAspectRatioResizer {
// Desired size of the larger image dimension in pixels.
optional int32 max_dimension = 2 [default = 1024];
}
// Desired method when resizing image.
optional ResizeType resize_method = 3 [default = BILINEAR];
}
// Configuration proto for image resizer that resizes to a fixed shape.
message FixedShapeResizer {
......@@ -29,4 +38,7 @@ message FixedShapeResizer {
// Desired width of image in pixels.
optional int32 width = 2 [default = 300];
// Desired method when resizing image.
optional ResizeType resize_method = 3 [default = BILINEAR];
}
......@@ -49,8 +49,8 @@ message InputReader {
// An input reader that reads TF Example protos from local TFRecord files.
message TFRecordInputReader {
// Path to TFRecordFile.
optional string input_path = 1 [default=""];
// Path(s) to `TFRecordFile`s.
repeated string input_path = 1;
}
// An externally defined input reader. Users may define an extension to this
......
syntax = "proto2";
package object_detection.protos;
// Configuration proto for KeypointBoxCoder. See
// box_coders/keypoint_box_coder.py for details.
message KeypointBoxCoder {
optional int32 num_keypoints = 1;
// Scale factor for anchor encoded box center and keypoints.
optional float y_scale = 2 [default = 10.0];
optional float x_scale = 3 [default = 10.0];
// Scale factor for anchor encoded box height.
optional float height_scale = 4 [default = 5.0];
// Scale factor for anchor encoded box width.
optional float width_scale = 5 [default = 5.0];
}
......@@ -53,6 +53,7 @@ message ClassificationLoss {
WeightedSigmoidClassificationLoss weighted_sigmoid = 1;
WeightedSoftmaxClassificationLoss weighted_softmax = 2;
BootstrappedSigmoidClassificationLoss bootstrapped_sigmoid = 3;
SigmoidFocalClassificationLoss weighted_sigmoid_focal = 4;
}
}
......@@ -62,10 +63,23 @@ message WeightedSigmoidClassificationLoss {
optional bool anchorwise_output = 1 [default=false];
}
// Sigmoid Focal cross entropy loss as described in
// https://arxiv.org/abs/1708.02002
message SigmoidFocalClassificationLoss {
optional bool anchorwise_output = 1 [default = false];
// modulating factor for the loss.
optional float gamma = 2 [default = 2.0];
// alpha weighting factor for the loss.
optional float alpha = 3;
}
// Classification loss using a softmax function over class predictions.
message WeightedSoftmaxClassificationLoss {
// Output loss per anchor.
optional bool anchorwise_output = 1 [default=false];
// Scale logit (input) value before calculating softmax classification loss.
// Typically used for softmax distillation.
optional float logit_scale = 2 [default = 1.0];
}
// Classification loss using a sigmoid function over the class prediction with
......
......@@ -12,24 +12,24 @@ message Optimizer {
MomentumOptimizer momentum_optimizer = 2;
AdamOptimizer adam_optimizer = 3;
}
optional bool use_moving_average = 4 [default=true];
optional float moving_average_decay = 5 [default=0.9999];
optional bool use_moving_average = 4 [default = true];
optional float moving_average_decay = 5 [default = 0.9999];
}
// Configuration message for the RMSPropOptimizer
// See: https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
message RMSPropOptimizer {
optional LearningRate learning_rate = 1;
optional float momentum_optimizer_value = 2 [default=0.9];
optional float decay = 3 [default=0.9];
optional float epsilon = 4 [default=1.0];
optional float momentum_optimizer_value = 2 [default = 0.9];
optional float decay = 3 [default = 0.9];
optional float epsilon = 4 [default = 1.0];
}
// Configuration message for the MomentumOptimizer
// See: https://www.tensorflow.org/api_docs/python/tf/train/MomentumOptimizer
message MomentumOptimizer {
optional LearningRate learning_rate = 1;
optional float momentum_optimizer_value = 2 [default=0.9];
optional float momentum_optimizer_value = 2 [default = 0.9];
}
// Configuration message for the AdamOptimizer
......@@ -44,30 +44,40 @@ message LearningRate {
ConstantLearningRate constant_learning_rate = 1;
ExponentialDecayLearningRate exponential_decay_learning_rate = 2;
ManualStepLearningRate manual_step_learning_rate = 3;
CosineDecayLearningRate cosine_decay_learning_rate = 4;
}
}
// Configuration message for a constant learning rate.
message ConstantLearningRate {
optional float learning_rate = 1 [default=0.002];
optional float learning_rate = 1 [default = 0.002];
}
// Configuration message for an exponentially decaying learning rate.
// See https://www.tensorflow.org/versions/master/api_docs/python/train/ \
// decaying_the_learning_rate#exponential_decay
message ExponentialDecayLearningRate {
optional float initial_learning_rate = 1 [default=0.002];
optional uint32 decay_steps = 2 [default=4000000];
optional float decay_factor = 3 [default=0.95];
optional bool staircase = 4 [default=true];
optional float initial_learning_rate = 1 [default = 0.002];
optional uint32 decay_steps = 2 [default = 4000000];
optional float decay_factor = 3 [default = 0.95];
optional bool staircase = 4 [default = true];
}
// Configuration message for a manually defined learning rate schedule.
message ManualStepLearningRate {
optional float initial_learning_rate = 1 [default=0.002];
optional float initial_learning_rate = 1 [default = 0.002];
message LearningRateSchedule {
optional uint32 step = 1;
optional float learning_rate = 2 [default=0.002];
optional float learning_rate = 2 [default = 0.002];
}
repeated LearningRateSchedule schedule = 2;
}
// Configuration message for a cosine decaying learning rate as defined in
// object_detection/utils/learning_schedules.py
message CosineDecayLearningRate {
optional float learning_rate_base = 1 [default = 0.002];
optional uint32 total_steps = 2 [default = 4000000];
optional float warmup_learning_rate = 3 [default = 0.0002];
optional uint32 warmup_steps = 4 [default = 10000];
}
......@@ -39,4 +39,8 @@ message PostProcessing {
// Score converter to use.
optional ScoreConverter score_converter = 2 [default = IDENTITY];
// Scale logit (input) value before conversion in post-processing step.
// Typically used for softmax distillation, though can be used to scale for
// other reasons.
optional float logit_scale = 3 [default = 1.0];
}
......@@ -29,6 +29,9 @@ message PreprocessingStep {
SSDRandomCrop ssd_random_crop = 21;
SSDRandomCropPad ssd_random_crop_pad = 22;
SSDRandomCropFixedAspectRatio ssd_random_crop_fixed_aspect_ratio = 23;
SSDRandomCropPadFixedAspectRatio ssd_random_crop_pad_fixed_aspect_ratio = 24;
RandomVerticalFlip random_vertical_flip = 25;
RandomRotation90 random_rotation90 = 26;
}
}
......@@ -42,10 +45,44 @@ message NormalizeImage {
optional float target_maxval = 4 [default=1];
}
// Randomly horizontally mirrors the image and detections 50% of the time.
// Randomly horizontally flips the image and detections 50% of the time.
message RandomHorizontalFlip {
// Specifies a mapping from the original keypoint indices to horizontally
// flipped indices. This is used in the event that keypoints are specified,
// in which case when the image is horizontally flipped the keypoints will
// need to be permuted. E.g. for keypoints representing left_eye, right_eye,
// nose_tip, mouth, left_ear, right_ear (in that order), one might specify
// the keypoint_flip_permutation below:
// keypoint_flip_permutation: 1
// keypoint_flip_permutation: 0
// keypoint_flip_permutation: 2
// keypoint_flip_permutation: 3
// keypoint_flip_permutation: 5
// keypoint_flip_permutation: 4
repeated int32 keypoint_flip_permutation = 1;
}
// Randomly vertically flips the image and detections 50% of the time.
message RandomVerticalFlip {
// Specifies a mapping from the original keypoint indices to vertically
// flipped indices. This is used in the event that keypoints are specified,
// in which case when the image is vertically flipped the keypoints will
// need to be permuted. E.g. for keypoints representing left_eye, right_eye,
// nose_tip, mouth, left_ear, right_ear (in that order), one might specify
// the keypoint_flip_permutation below:
// keypoint_flip_permutation: 1
// keypoint_flip_permutation: 0
// keypoint_flip_permutation: 2
// keypoint_flip_permutation: 3
// keypoint_flip_permutation: 5
// keypoint_flip_permutation: 4
repeated int32 keypoint_flip_permutation = 1;
}
// Randomly rotates the image and detections by 90 degrees counter-clockwise
// 50% of the time.
message RandomRotation90 {}
// Randomly scales the values of all pixels in the image by some constant value
// between [minval, maxval], then clip the value to a range between [0, 1.0].
message RandomPixelValueScale {
......@@ -324,3 +361,45 @@ message SSDRandomCropFixedAspectRatio {
// Aspect ratio to crop to. This value is used for all crop operations.
optional float aspect_ratio = 2 [default=1.0];
}
message SSDRandomCropPadFixedAspectRatioOperation {
// Cropped image must cover at least this fraction of one original bounding
// box.
optional float min_object_covered = 1;
// The aspect ratio of the cropped image must be within the range of
// [min_aspect_ratio, max_aspect_ratio].
optional float min_aspect_ratio = 2;
optional float max_aspect_ratio = 3;
// The area of the cropped image must be within the range of
// [min_area, max_area].
optional float min_area = 4;
optional float max_area = 5;
// Cropped box area ratio must be above this threhold to be kept.
optional float overlap_thresh = 6;
// Probability a crop operation is skipped.
optional float random_coef = 7;
// Min ratio of padded image height and width to the input image's height and
// width. Two entries per operation.
repeated float min_padded_size_ratio = 8;
// Max ratio of padded image height and width to the input image's height and
// width. Two entries per operation.
repeated float max_padded_size_ratio = 9;
}
// Randomly crops and pads an image to a fixed aspect ratio according to:
// Liu et al., SSD: Single shot multibox detector.
// Multiple SSDRandomCropPadFixedAspectRatioOperations are defined by this
// preprocessing step. Only one operation (chosen at random) is actually
// performed on an image.
message SSDRandomCropPadFixedAspectRatio {
repeated SSDRandomCropPadFixedAspectRatioOperation operations = 1;
// Aspect ratio to pad to. This value is used for all crop and pad operations.
optional float aspect_ratio = 2 [default=1.0];
}
......@@ -62,4 +62,24 @@ message SsdFeatureExtractor {
// Hyperparameters for the feature extractor.
optional Hyperparams conv_hyperparams = 4;
// The nearest multiple to zero-pad the input height and width dimensions to.
// For example, if pad_to_multiple = 2, input dimensions are zero-padded
// until the resulting dimensions are even.
optional int32 pad_to_multiple = 5 [default = 1];
// Whether to update batch norm parameters during training or not.
// When training with a relative small batch size (e.g. 1), it is
// desirable to disable batch norm update and use pretrained batch norm
// params.
//
// Note: Some feature extractors are used with canned arg_scopes
// (e.g resnet arg scopes). In these cases training behavior of batch norm
// variables may depend on both values of `batch_norm_trainable` and
// `is_training`.
//
// When canned arg_scopes are used with feature extractors `conv_hyperparams`
// will apply only to the additional layers that are added and are outside the
// canned arg_scope.
optional bool batch_norm_trainable = 6 [default=true];
}
......@@ -15,11 +15,41 @@ message SsdAnchorGenerator {
// Scale of anchors corresponding to coarsest resolution
optional float max_scale = 3 [default = 0.95];
// Can be used to override min_scale->max_scale, with an explicitly defined
// set of scales. If empty, then min_scale->max_scale is used.
repeated float scales = 12;
// Aspect ratios for anchors at each grid point.
repeated float aspect_ratios = 4;
// When this aspect ratio is greater than 0, then an additional
// anchor, with an interpolated scale is added with this aspect ratio.
optional float interpolated_scale_aspect_ratio = 13 [default = 1.0];
// Whether to use the following aspect ratio and scale combination for the
// layer with the finest resolution : (scale=0.1, aspect_ratio=1.0),
// (scale=min_scale, aspect_ration=2.0), (scale=min_scale, aspect_ratio=0.5).
optional bool reduce_boxes_in_lowest_layer = 5 [default = true];
// The base anchor size in height dimension.
optional float base_anchor_height = 6 [default = 1.0];
// The base anchor size in width dimension.
optional float base_anchor_width = 7 [default = 1.0];
// Anchor stride in height dimension in pixels for each layer. The length of
// this field is expected to be equal to the value of num_layers.
repeated int32 height_stride = 8;
// Anchor stride in width dimension in pixels for each layer. The length of
// this field is expected to be equal to the value of num_layers.
repeated int32 width_stride = 9;
// Anchor height offset in pixels for each layer. The length of this field is
// expected to be equal to the value of num_layers.
repeated int32 height_offset = 10;
// Anchor width offset in pixels for each layer. The length of this field is
// expected to be equal to the value of num_layers.
repeated int32 width_offset = 11;
}
......@@ -54,11 +54,16 @@ message TrainConfig {
optional int32 replicas_to_aggregate = 13 [default=1];
// Maximum number of elements to store within a queue.
optional int32 batch_queue_capacity = 14 [default=600];
optional int32 batch_queue_capacity = 14 [default=150];
// Number of threads to use for batching.
optional int32 num_batch_queue_threads = 15 [default=8];
// Maximum capacity of the queue used to prefetch assembled batches.
optional int32 prefetch_queue_capacity = 16 [default=10];
optional int32 prefetch_queue_capacity = 16 [default=5];
// If true, boxes with the same coordinates will be merged together.
// This is useful when each box can have multiple labels.
// Note that only Sigmoid classification losses should be used.
optional bool merge_multiple_label_boxes = 17 [default=false];
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment