Commit 47bc1813 authored by syiming's avatar syiming
Browse files

Merge remote-tracking branch 'upstream/master' into add_multilevel_crop_and_resize

parents d8611151 b035a227
......@@ -14,6 +14,7 @@
# ==============================================================================
"""Tests for object_detection.predictors.rfcn_box_predictor."""
import unittest
import numpy as np
import tensorflow.compat.v1 as tf
......@@ -22,8 +23,10 @@ from object_detection.builders import hyperparams_builder
from object_detection.predictors import rfcn_keras_box_predictor as box_predictor
from object_detection.protos import hyperparams_pb2
from object_detection.utils import test_case
from object_detection.utils import tf_version
@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class RfcnKerasBoxPredictorTest(test_case.TestCase):
def _build_conv_hyperparams(self):
......@@ -42,8 +45,6 @@ class RfcnKerasBoxPredictorTest(test_case.TestCase):
return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
def test_get_correct_box_encoding_and_class_prediction_shapes(self):
def graph_fn(image_features, proposal_boxes):
rfcn_box_predictor = box_predictor.RfcnKerasBoxPredictor(
is_training=False,
num_classes=2,
......@@ -52,8 +53,9 @@ class RfcnKerasBoxPredictorTest(test_case.TestCase):
num_spatial_bins=[3, 3],
depth=4,
crop_size=[12, 12],
box_code_size=4
)
box_code_size=4)
def graph_fn(image_features, proposal_boxes):
box_predictions = rfcn_box_predictor(
[image_features],
proposal_boxes=proposal_boxes)
......
syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/image_resizer.proto";
import "object_detection/protos/losses.proto";
// Configuration for the CenterNet meta architecture from the "Objects as
// Points" paper [1]
// [1]: https://arxiv.org/abs/1904.07850
message CenterNet {
// Number of classes to predict.
optional int32 num_classes = 1;
// Feature extractor config.
optional CenterNetFeatureExtractor feature_extractor = 2;
// Image resizer for preprocessing the input image.
optional ImageResizer image_resizer = 3;
// Parameters which are related to object detection task.
message ObjectDetection {
// The original fields are moved to ObjectCenterParams or deleted.
reserved 2, 5, 6, 7;
// Weight of the task loss. The total loss of the model will be the
// summation of task losses weighted by the weights.
optional float task_loss_weight = 1 [default = 1.0];
// Weight for the offset localization loss.
optional float offset_loss_weight = 3 [default = 1.0];
// Weight for the height/width localization loss.
optional float scale_loss_weight = 4 [default = 0.1];
// Localization loss configuration for object scale and offset losses.
optional LocalizationLoss localization_loss = 8;
}
optional ObjectDetection object_detection_task = 4;
// Parameters related to object center prediction. This is required for both
// object detection and keypoint estimation tasks.
message ObjectCenterParams {
// Weight for the object center loss.
optional float object_center_loss_weight = 1 [default = 1.0];
// Classification loss configuration for object center loss.
optional ClassificationLoss classification_loss = 2;
// The initial bias value of the convlution kernel of the class heatmap
// prediction head. -2.19 corresponds to predicting foreground with
// a probability of 0.1. See "Focal Loss for Dense Object Detection"
// at https://arxiv.org/abs/1708.02002.
optional float heatmap_bias_init = 3 [default = -2.19];
// The minimum IOU overlap boxes need to have to not be penalized.
optional float min_box_overlap_iou = 4 [default = 0.7];
// Maximum number of boxes to predict.
optional int32 max_box_predictions = 5 [default = 100];
// If set, loss is only computed for the labeled classes.
optional bool use_labeled_classes = 6 [default = false];
}
optional ObjectCenterParams object_center_params = 5;
// Path of the file that conatins the label map along with the keypoint
// information, including the keypoint indices, corresponding labels, and the
// corresponding class. The file should be the same one as used in the input
// pipeline. Note that a plain text of StringIntLabelMap proto is expected in
// this file.
// It is required only if the keypoint estimation task is specified.
optional string keypoint_label_map_path = 6;
// Parameters which are related to keypoint estimation task.
message KeypointEstimation {
// Name of the task, e.g. "human pose". Note that the task name should be
// unique to each keypoint task.
optional string task_name = 1;
// Weight of the task loss. The total loss of the model will be their
// summation of task losses weighted by the weights.
optional float task_loss_weight = 2 [default = 1.0];
// Loss configuration for keypoint heatmap, offset, regression losses. Note
// that the localization loss is used for offset/regression losses and
// classification loss is used for heatmap loss.
optional Loss loss = 3;
// The name of the class that contains the keypoints for this task. This is
// used to retrieve the corresponding keypoint indices from the label map.
// Note that this corresponds to the "name" field, not "display_name".
optional string keypoint_class_name = 4;
// The standard deviation of the Gaussian kernel used to generate the
// keypoint heatmap. The unit is the pixel in the output image. It is to
// provide the flexibility of using different sizes of Gaussian kernel for
// each keypoint class. Note that if provided, the keypoint standard
// deviations will be overridden by the specified values here, otherwise,
// the default value 5.0 will be used.
// TODO(yuhuic): Update the default value once we found the best value.
map<string, float> keypoint_label_to_std = 5;
// Loss weights corresponding to different heads.
optional float keypoint_regression_loss_weight = 6 [default = 1.0];
optional float keypoint_heatmap_loss_weight = 7 [default = 1.0];
optional float keypoint_offset_loss_weight = 8 [default = 1.0];
// The initial bias value of the convolution kernel of the keypoint heatmap
// prediction head. -2.19 corresponds to predicting foreground with
// a probability of 0.1. See "Focal Loss for Dense Object Detection"
// at https://arxiv.org/abs/1708.02002.
optional float heatmap_bias_init = 9 [default = -2.19];
// The heatmap score threshold for a keypoint to become a valid candidate.
optional float keypoint_candidate_score_threshold = 10 [default = 0.1];
// The maximum number of candidates to retrieve for each keypoint.
optional int32 num_candidates_per_keypoint = 11 [default = 100];
// Max pool kernel size to use to pull off peak score locations in a
// neighborhood (independently for each keypoint types).
optional int32 peak_max_pool_kernel_size = 12 [default = 3];
// The default score to use for regressed keypoints that are not
// successfully snapped to a nearby candidate.
optional float unmatched_keypoint_score = 13 [default = 0.1];
// The multiplier to expand the bounding boxes (either the provided boxes or
// those which tightly cover the regressed keypoints). Note that new
// expanded box for an instance becomes the feasible search window for all
// associated keypoints.
optional float box_scale = 14 [default = 1.2];
// The scale parameter that multiplies the largest dimension of a bounding
// box. The resulting distance becomes a search radius for candidates in the
// vicinity of each regressed keypoint.
optional float candidate_search_scale = 15 [default = 0.3];
// One of ['min_distance', 'score_distance_ratio'] indicating how to select
// the keypoint candidate.
optional string candidate_ranking_mode = 16 [default = "min_distance"];
// The radius (in the unit of output pixel) around heatmap peak to assign
// the offset targets. If set 0, then the offset target will only be
// assigned to the heatmap peak (same behavior as the original paper).
optional int32 offset_peak_radius = 17 [default = 0];
// Indicates whether to assign offsets for each keypoint channel
// separately. If set False, the output offset target has the shape
// [batch_size, out_height, out_width, 2] (same behavior as the original
// paper). If set True, the output offset target has the shape [batch_size,
// out_height, out_width, 2 * num_keypoints] (recommended when the
// offset_peak_radius is not zero).
optional bool per_keypoint_offset = 18 [default = false];
}
repeated KeypointEstimation keypoint_estimation_task = 7;
// Parameters which are related to mask estimation task.
// Note: Currently, CenterNet supports a weak instance segmentation, where
// semantic segmentation masks are estimated, and then cropped based on
// bounding box detections. Therefore, it is possible for the same image
// pixel to be assigned to multiple instances.
message MaskEstimation {
// Weight of the task loss. The total loss of the model will be their
// summation of task losses weighted by the weights.
optional float task_loss_weight = 1 [default = 1.0];
// Classification loss configuration for segmentation loss.
optional ClassificationLoss classification_loss = 2;
// Each instance mask (one per detection) is cropped and resized (bilinear
// resampling) from the predicted segmentation feature map. After
// resampling, the masks are binarized with the provided score threshold.
optional int32 mask_height = 4 [default = 256];
optional int32 mask_width = 5 [default = 256];
optional float score_threshold = 6 [default = 0.5];
// The initial bias value of the convlution kernel of the class heatmap
// prediction head. -2.19 corresponds to predicting foreground with
// a probability of 0.1.
optional float heatmap_bias_init = 3 [default = -2.19];
}
optional MaskEstimation mask_estimation_task = 8;
}
message CenterNetFeatureExtractor {
optional string type = 1;
// Channel means to be subtracted from each image channel. If not specified,
// we use a default value of 0.
repeated float channel_means = 2;
// Channel standard deviations. Each channel will be normalized by dividing
// it by its standard deviation. If not specified, we use a default value
// of 1.
repeated float channel_stds = 3;
// If set, will change channel order to be [blue, green, red]. This can be
// useful to be compatible with some pre-trained feature extractors.
optional bool bgr_ordering = 4 [default = false];
}
......@@ -188,7 +188,7 @@ message Context {
// Next id: 4
// The maximum number of contextual features per-image, used for padding
optional int32 max_num_context_features = 1 [default = 8500];
optional int32 max_num_context_features = 1 [default = 2000];
// The bottleneck feature dimension of the attention block.
optional int32 attention_bottleneck_dimension = 2 [default = 2048];
......
......@@ -52,6 +52,12 @@ message Hyperparams {
// Whether depthwise convolutions should be regularized. If this parameter is
// NOT set then the conv hyperparams will default to the parent scope.
optional bool regularize_depthwise = 6 [default = false];
// By default, use_bias is set to False if batch_norm is not None and
// batch_norm.center is True. When force_use_bias is set to True, this
// behavior will be overridden, and use_bias will be set to True, regardless
// of batch norm parameters. Note, this only applies to KerasLayerHyperparams.
optional bool force_use_bias = 8 [default = false];
}
// Proto with one-of field for regularizers.
......
......@@ -31,7 +31,7 @@ enum InputType {
TF_SEQUENCE_EXAMPLE = 2; // TfSequenceExample Input
}
// Next id: 31
// Next id: 32
message InputReader {
// Name of input reader. Typically used to describe the dataset that is read
// by this input reader.
......@@ -119,6 +119,10 @@ message InputReader {
// Type of instance mask.
optional InstanceMaskType mask_type = 10 [default = NUMERICAL_MASKS];
// Whether to load DensePose data. If set, must also set load_instance_masks
// to true.
optional bool load_dense_pose = 31 [default = false];
// Whether to use the display name when decoding examples. This is only used
// when mapping class text strings to integers.
optional bool use_display_name = 17 [default = false];
......
......@@ -2,6 +2,7 @@ syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/center_net.proto";
import "object_detection/protos/faster_rcnn.proto";
import "object_detection/protos/ssd.proto";
......@@ -17,6 +18,7 @@ message DetectionModel {
// value to a function that builds your model.
ExperimentalModel experimental_model = 3;
CenterNet center_net = 4;
}
}
......
......@@ -57,7 +57,8 @@ message NormalizeImage {
optional float target_maxval = 4 [default=1];
}
// Randomly horizontally flips the image and detections 50% of the time.
// Randomly horizontally flips the image and detections with the specified
// probability, default to 50% of the time.
message RandomHorizontalFlip {
// Specifies a mapping from the original keypoint indices to horizontally
// flipped indices. This is used in the event that keypoints are specified,
......@@ -71,10 +72,15 @@ message RandomHorizontalFlip {
// keypoint_flip_permutation: 3
// keypoint_flip_permutation: 5
// keypoint_flip_permutation: 4
// If nothing is specified the order of keypoint will be mantained.
repeated int32 keypoint_flip_permutation = 1;
// The probability of running this augmentation for each image.
optional float probability = 2 [default=0.5];
}
// Randomly vertically flips the image and detections 50% of the time.
// Randomly vertically flips the image and detections with the specified
// probability, default to 50% of the time.
message RandomVerticalFlip {
// Specifies a mapping from the original keypoint indices to vertically
// flipped indices. This is used in the event that keypoints are specified,
......@@ -89,11 +95,23 @@ message RandomVerticalFlip {
// keypoint_flip_permutation: 5
// keypoint_flip_permutation: 4
repeated int32 keypoint_flip_permutation = 1;
// The probability of running this augmentation for each image.
optional float probability = 2 [default=0.5];
}
// Randomly rotates the image and detections by 90 degrees counter-clockwise
// 50% of the time.
message RandomRotation90 {}
// with the specified probability, default to 50% of the time.
message RandomRotation90 {
// Specifies a mapping from the original keypoint indices to 90 degree counter
// clockwise indices. This is used in the event that keypoints are specified,
// in which case when the image is rotated the keypoints might need to be
// permuted.
repeated int32 keypoint_rot_permutation = 1;
// The probability of running this augmentation for each image.
optional float probability = 2 [default=0.5];
}
// Randomly scales the values of all pixels in the image by some constant value
// between [minval, maxval], then clip the value to a range between [0, 1.0].
......@@ -457,7 +475,6 @@ message SSDRandomCropPadFixedAspectRatio {
// Converts class logits to softmax optionally scaling the values by temperature
// first.
message ConvertClassLogitsToSoftmax {
// Scale to use on logits before applying softmax.
optional float temperature = 1 [default=1.0];
}
......@@ -472,12 +489,10 @@ message RandomSelfConcatImage {
// Apply an Autoaugment policy to the image and bounding boxes.
message AutoAugmentImage {
// What AutoAugment policy to apply to the Image
optional string policy_name = 1 [default="v0"];
}
// Randomly drops ground truth boxes for a label with some probability.
message DropLabelProbabilistically {
// The label that should be dropped. This corresponds to one of the entries
......@@ -487,7 +502,6 @@ message DropLabelProbabilistically {
optional float drop_probability = 2 [default = 1.0];
}
//Remap a set of labels to a new label.
message RemapLabels {
// Labels to be remapped.
......
......@@ -59,7 +59,8 @@ message TrainConfig {
// Whether to load all checkpoint vars that match model variable names and
// sizes. This option is only available if `from_detection_checkpoint` is
// True.
// True. This option is *not* supported for TF2 --- setting it to true
// will raise an error.
optional bool load_all_detection_checkpoint_vars = 19 [default = false];
// Number of steps to train the DetectionModel for. If 0, will train the model
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment