Merge branch 'master' of github.com:tensorflow/models

f282f6ef · Alexander Gorban · 58a5da7b · a2970b03 · f282f6ef · f282f6ef
Commit f282f6ef authored Jul 05, 2017 by Alexander Gorban
20 changed files
--- a/object_detection/protos/input_reader.proto
+++ b/object_detection/protos/input_reader.proto
+syntax = "proto2";
+package object_detection.protos;
+// Configuration proto for defining input readers that generate Object Detection
+// Examples from input sources. Input readers are expected to generate a
+// dictionary of tensors, with the following fields populated:
+//
+// 'image': an [image_height, image_width, channels] image tensor that detection
+//    will be run on.
+// 'groundtruth_classes': a [num_boxes] int32 tensor storing the class
+//    labels of detected boxes in the image.
+// 'groundtruth_boxes': a [num_boxes, 4] float tensor storing the coordinates of
+//    detected boxes in the image.
+// 'groundtruth_instance_masks': (Optional), a [num_boxes, image_height,
+//    image_width] float tensor storing binary mask of the objects in boxes.
+message InputReader {
+  // Path to StringIntLabelMap pbtxt file specifying the mapping from string
+  // labels to integer ids.
+  optional string label_map_path = 1 [default=""];
+  // Whether data should be processed in the order they are read in, or
+  // shuffled randomly.
+  optional bool shuffle = 2 [default=true];
+  // Maximum number of records to keep in reader queue.
+  optional uint32 queue_capacity = 3 [default=2000];
+  // Minimum number of records to keep in reader queue. A large value is needed
+  // to generate a good random shuffle.
+  optional uint32 min_after_dequeue = 4 [default=1000];
+  // The number of times a data source is read. If set to zero, the data source
+  // will be reused indefinitely.
+  optional uint32 num_epochs = 5 [default=0];
+  // Number of reader instances to create.
+  optional uint32 num_readers = 6 [default=8];
+  // Whether to load groundtruth instance masks.
+  optional bool load_instance_masks = 7 [default = false];
+  oneof input_reader {
+    TFRecordInputReader tf_record_input_reader = 8;
+    ExternalInputReader external_input_reader = 9;
+  }
+}
+// An input reader that reads TF Example protos from local TFRecord files.
+message TFRecordInputReader {
+  // Path to TFRecordFile.
+  optional string input_path = 1 [default=""];
+}
+// An externally defined input reader. Users may define an extension to this
+// proto to interface their own input readers.
+message ExternalInputReader {
+  extensions 1 to 999;
+}
--- a/object_detection/protos/losses.proto
+++ b/object_detection/protos/losses.proto
+syntax = "proto2";
+package object_detection.protos;
+// Message for configuring the localization loss, classification loss and hard
+// example miner used for training object detection models. See core/losses.py
+// for details
+message Loss {
+  // Localization loss to use.
+  optional LocalizationLoss localization_loss = 1;
+  // Classification loss to use.
+  optional ClassificationLoss classification_loss = 2;
+  // If not left to default, applies hard example mining.
+  optional HardExampleMiner hard_example_miner = 3;
+  // Classification loss weight.
+  optional float classification_weight = 4 [default=1.0];
+  // Localization loss weight.
+  optional float localization_weight = 5 [default=1.0];
+}
+// Configuration for bounding box localization loss function.
+message LocalizationLoss {
+  oneof localization_loss {
+    WeightedL2LocalizationLoss weighted_l2 = 1;
+    WeightedSmoothL1LocalizationLoss weighted_smooth_l1 = 2;
+    WeightedIOULocalizationLoss weighted_iou = 3;
+  }
+}
+// L2 location loss: 0.5 * ||weight * (a - b)|| ^ 2
+message WeightedL2LocalizationLoss {
+  // Output loss per anchor.
+  optional bool anchorwise_output = 1 [default=false];
+}
+// SmoothL1 (Huber) location loss: .5 * x ^ 2 if |x| < 1 else |x| - .5
+message WeightedSmoothL1LocalizationLoss {
+  // Output loss per anchor.
+  optional bool anchorwise_output = 1 [default=false];
+}
+// Intersection over union location loss: 1 - IOU
+message WeightedIOULocalizationLoss {
+}
+// Configuration for class prediction loss function.
+message ClassificationLoss {
+  oneof classification_loss {
+    WeightedSigmoidClassificationLoss weighted_sigmoid = 1;
+    WeightedSoftmaxClassificationLoss weighted_softmax = 2;
+    BootstrappedSigmoidClassificationLoss bootstrapped_sigmoid = 3;
+  }
+}
+// Classification loss using a sigmoid function over class predictions.
+message WeightedSigmoidClassificationLoss {
+  // Output loss per anchor.
+  optional bool anchorwise_output = 1 [default=false];
+}
+// Classification loss using a softmax function over class predictions.
+message WeightedSoftmaxClassificationLoss {
+  // Output loss per anchor.
+  optional bool anchorwise_output = 1 [default=false];
+}
+// Classification loss using a sigmoid function over the class prediction with
+// the highest prediction score.
+message BootstrappedSigmoidClassificationLoss {
+  // Interpolation weight between 0 and 1.
+  optional float alpha = 1;
+  // Whether hard boot strapping should be used or not. If true, will only use
+  // one class favored by model. Othewise, will use all predicted class
+  // probabilities.
+  optional bool hard_bootstrap = 2 [default=false];
+  // Output loss per anchor.
+  optional bool anchorwise_output = 3 [default=false];
+}
+// Configuation for hard example miner.
+message HardExampleMiner {
+  // Maximum number of hard examples to be selected per image (prior to
+  // enforcing max negative to positive ratio constraint).  If set to 0,
+  // all examples obtained after NMS are considered.
+  optional int32 num_hard_examples = 1 [default=64];
+  // Minimum intersection over union for an example to be discarded during NMS.
+  optional float iou_threshold = 2 [default=0.7];
+  // Whether to use classification losses ('cls', default), localization losses
+  // ('loc') or both losses ('both'). In the case of 'both', cls_loss_weight and
+  // loc_loss_weight are used to compute weighted sum of the two losses.
+  enum LossType {
+    BOTH = 0;
+    CLASSIFICATION = 1;
+    LOCALIZATION = 2;
+  }
+  optional LossType loss_type = 3 [default=BOTH];
+  // Maximum number of negatives to retain for each positive anchor. If
+  // num_negatives_per_positive is 0 no prespecified negative:positive ratio is
+  // enforced.
+  optional int32 max_negatives_per_positive = 4 [default=0];
+  // Minimum number of negative anchors to sample for a given image. Setting
+  // this to a positive number samples negatives in an image without any
+  // positive anchors and thus not bias the model towards having at least one
+  // detection per image.
+  optional int32 min_negatives_per_image = 5 [default=0];
+}
--- a/object_detection/protos/matcher.proto
+++ b/object_detection/protos/matcher.proto
+syntax = "proto2";
+package object_detection.protos;
+import "object_detection/protos/argmax_matcher.proto";
+import "object_detection/protos/bipartite_matcher.proto";
+// Configuration proto for the matcher to be used in the object detection
+// pipeline. See core/matcher.py for details.
+message Matcher {
+  oneof matcher_oneof {
+    ArgMaxMatcher argmax_matcher = 1;
+    BipartiteMatcher bipartite_matcher = 2;
+  }
+}
--- a/object_detection/protos/mean_stddev_box_coder.proto
+++ b/object_detection/protos/mean_stddev_box_coder.proto
+syntax = "proto2";
+package object_detection.protos;
+// Configuration proto for MeanStddevBoxCoder. See
+// box_coders/mean_stddev_box_coder.py for details.
+message MeanStddevBoxCoder {
+}
--- a/object_detection/protos/model.proto
+++ b/object_detection/protos/model.proto
+syntax = "proto2";
+package object_detection.protos;
+import "object_detection/protos/faster_rcnn.proto";
+import "object_detection/protos/ssd.proto";
+// Top level configuration for DetectionModels.
+message DetectionModel {
+  oneof model {
+    FasterRcnn faster_rcnn = 1;
+    Ssd ssd = 2;
+  }
+}
--- a/object_detection/protos/optimizer.proto
+++ b/object_detection/protos/optimizer.proto
+syntax = "proto2";
+package object_detection.protos;
+// Messages for configuring the optimizing strategy for training object
+// detection models.
+// Top level optimizer message.
+message Optimizer {
+  oneof optimizer {
+    RMSPropOptimizer rms_prop_optimizer = 1;
+    MomentumOptimizer momentum_optimizer = 2;
+    AdamOptimizer adam_optimizer = 3;
+  }
+  optional bool use_moving_average = 4 [default=true];
+  optional float moving_average_decay = 5 [default=0.9999];
+}
+// Configuration message for the RMSPropOptimizer
+// See: https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
+message RMSPropOptimizer {
+  optional LearningRate learning_rate = 1;
+  optional float momentum_optimizer_value = 2 [default=0.9];
+  optional float decay = 3 [default=0.9];
+  optional float epsilon = 4 [default=1.0];
+}
+// Configuration message for the MomentumOptimizer
+// See: https://www.tensorflow.org/api_docs/python/tf/train/MomentumOptimizer
+message MomentumOptimizer {
+  optional LearningRate learning_rate = 1;
+  optional float momentum_optimizer_value = 2 [default=0.9];
+}
+// Configuration message for the AdamOptimizer
+// See: https://www.tensorflow.org/api_docs/python/tf/train/AdamOptimizer
+message AdamOptimizer {
+  optional LearningRate learning_rate = 1;
+}
+// Configuration message for optimizer learning rate.
+message LearningRate {
+  oneof learning_rate {
+    ConstantLearningRate constant_learning_rate = 1;
+    ExponentialDecayLearningRate exponential_decay_learning_rate = 2;
+    ManualStepLearningRate manual_step_learning_rate = 3;
+  }
+}
+// Configuration message for a constant learning rate.
+message ConstantLearningRate {
+  optional float learning_rate = 1 [default=0.002];
+}
+// Configuration message for an exponentially decaying learning rate.
+// See https://www.tensorflow.org/versions/master/api_docs/python/train/ \
+//     decaying_the_learning_rate#exponential_decay
+message ExponentialDecayLearningRate {
+  optional float initial_learning_rate = 1 [default=0.002];
+  optional uint32 decay_steps = 2 [default=4000000];
+  optional float decay_factor = 3 [default=0.95];
+  optional bool staircase = 4 [default=true];
+}
+// Configuration message for a manually defined learning rate schedule.
+message ManualStepLearningRate {
+  optional float initial_learning_rate = 1 [default=0.002];
+  message LearningRateSchedule {
+    optional uint32 step = 1;
+    optional float learning_rate = 2 [default=0.002];
+  }
+  repeated LearningRateSchedule schedule = 2;
+}
--- a/object_detection/protos/pipeline.proto
+++ b/object_detection/protos/pipeline.proto
+syntax = "proto2";
+package object_detection.protos;
+import "object_detection/protos/eval.proto";
+import "object_detection/protos/input_reader.proto";
+import "object_detection/protos/model.proto";
+import "object_detection/protos/train.proto";
+// Convenience message for configuring a training and eval pipeline. Allows all
+// of the pipeline parameters to be configured from one file.
+message TrainEvalPipelineConfig {
+  optional DetectionModel model = 1;
+  optional TrainConfig train_config = 2;
+  optional InputReader train_input_reader = 3;
+  optional EvalConfig eval_config = 4;
+  optional InputReader eval_input_reader = 5;
+}
--- a/object_detection/protos/post_processing.proto
+++ b/object_detection/protos/post_processing.proto
+syntax = "proto2";
+package object_detection.protos;
+// Configuration proto for non-max-suppression operation on a batch of
+// detections.
+message BatchNonMaxSuppression {
+  // Scalar threshold for score (low scoring boxes are removed).
+  optional float score_threshold = 1 [default = 0.0];
+  // Scalar threshold for IOU (boxes that have high IOU overlap
+  // with previously selected boxes are removed).
+  optional float iou_threshold = 2 [default = 0.6];
+  // Maximum number of detections to retain per class.
+  optional int32 max_detections_per_class = 3 [default = 100];
+  // Maximum number of detections to retain across all classes.
+  optional int32 max_total_detections = 5 [default = 100];
+}
+// Configuration proto for post-processing predicted boxes and
+// scores.
+message PostProcessing {
+  // Non max suppression parameters.
+  optional BatchNonMaxSuppression batch_non_max_suppression = 1;
+  // Enum to specify how to convert the detection scores.
+  enum ScoreConverter {
+    // Input scores equals output scores.
+    IDENTITY = 0;
+    // Applies a sigmoid on input scores.
+    SIGMOID = 1;
+    // Applies a softmax on input scores
+    SOFTMAX = 2;
+  }
+  // Score converter to use.
+  optional ScoreConverter score_converter = 2 [default = IDENTITY];
+}
--- a/object_detection/protos/preprocessor.proto
+++ b/object_detection/protos/preprocessor.proto
+syntax = "proto2";
+package object_detection.protos;
+// Message for defining a preprocessing operation on input data.
+// See: //object_detection/core/preprocessor.py
+message PreprocessingStep {
+  oneof preprocessing_step {
+    NormalizeImage normalize_image = 1;
+    RandomHorizontalFlip random_horizontal_flip = 2;
+    RandomPixelValueScale random_pixel_value_scale = 3;
+    RandomImageScale random_image_scale = 4;
+    RandomRGBtoGray random_rgb_to_gray = 5;
+    RandomAdjustBrightness random_adjust_brightness = 6;
+    RandomAdjustContrast random_adjust_contrast = 7;
+    RandomAdjustHue random_adjust_hue = 8;
+    RandomAdjustSaturation random_adjust_saturation = 9;
+    RandomDistortColor random_distort_color = 10;
+    RandomJitterBoxes random_jitter_boxes = 11;
+    RandomCropImage random_crop_image = 12;
+    RandomPadImage random_pad_image = 13;
+    RandomCropPadImage random_crop_pad_image = 14;
+    RandomCropToAspectRatio random_crop_to_aspect_ratio = 15;
+    RandomBlackPatches random_black_patches = 16;
+    RandomResizeMethod random_resize_method = 17;
+    ScaleBoxesToPixelCoordinates scale_boxes_to_pixel_coordinates = 18;
+    ResizeImage resize_image = 19;
+    SubtractChannelMean subtract_channel_mean = 20;
+    SSDRandomCrop ssd_random_crop = 21;
+    SSDRandomCropPad ssd_random_crop_pad = 22;
+    SSDRandomCropFixedAspectRatio ssd_random_crop_fixed_aspect_ratio = 23;
+  }
+}
+// Normalizes pixel values in an image.
+// For every channel in the image, moves the pixel values from the range
+// [original_minval, original_maxval] to [target_minval, target_maxval].
+message NormalizeImage {
+  optional float original_minval = 1;
+  optional float original_maxval = 2;
+  optional float target_minval = 3 [default=0];
+  optional float target_maxval = 4 [default=1];
+}
+// Randomly horizontally mirrors the image and detections 50% of the time.
+message RandomHorizontalFlip {
+}
+// Randomly scales the values of all pixels in the image by some constant value
+// between [minval, maxval], then clip the value to a range between [0, 1.0].
+message RandomPixelValueScale {
+  optional float minval = 1 [default=0.9];
+  optional float maxval = 2 [default=1.1];
+}
+// Randomly enlarges or shrinks image (keeping aspect ratio).
+message RandomImageScale {
+  optional float min_scale_ratio = 1 [default=0.5];
+  optional float max_scale_ratio = 2 [default=2.0];
+}
+// Randomly convert entire image to grey scale.
+message RandomRGBtoGray {
+  optional float probability = 1 [default=0.1];
+}
+// Randomly changes image brightness by up to max_delta. Image outputs will be
+// saturated between 0 and 1.
+message RandomAdjustBrightness {
+  optional float max_delta=1 [default=0.2];
+}
+// Randomly scales contract by a value between [min_delta, max_delta].
+message RandomAdjustContrast {
+  optional float min_delta = 1 [default=0.8];
+  optional float max_delta = 2 [default=1.25];
+}
+// Randomly alters hue by a value of up to max_delta.
+message RandomAdjustHue {
+  optional float max_delta = 1 [default=0.02];
+}
+// Randomly changes saturation by a value between [min_delta, max_delta].
+message RandomAdjustSaturation {
+  optional float min_delta = 1 [default=0.8];
+  optional float max_delta = 2 [default=1.25];
+}
+// Performs a random color distortion. color_orderings should either be 0 or 1.
+message RandomDistortColor {
+  optional int32 color_ordering = 1;
+}
+// Randomly jitters corners of boxes in the image determined by ratio.
+// ie. If a box is [100, 200] and ratio is 0.02, the corners can move by [1, 4].
+message RandomJitterBoxes {
+  optional float ratio = 1 [default=0.05];
+}
+// Randomly crops the image and bounding boxes.
+message RandomCropImage {
+  // Cropped image must cover at least one box by this fraction.
+  optional float min_object_covered = 1 [default=1.0];
+  // Aspect ratio bounds of cropped image.
+  optional float min_aspect_ratio = 2 [default=0.75];
+  optional float max_aspect_ratio = 3 [default=1.33];
+  // Allowed area ratio of cropped image to original image.
+  optional float min_area = 4 [default=0.1];
+  optional float max_area = 5 [default=1.0];
+  // Minimum overlap threshold of cropped boxes to keep in new image. If the
+  // ratio between a cropped bounding box and the original is less than this
+  // value, it is removed from the new image.
+  optional float overlap_thresh = 6 [default=0.3];
+  // Probability of keeping the original image.
+  optional float random_coef = 7 [default=0.0];
+}
+// Randomly adds padding to the image.
+message RandomPadImage {
+  // Minimum dimensions for padded image. If unset, will use original image
+  // dimension as a lower bound.
+  optional float min_image_height = 1;
+  optional float min_image_width = 2;
+  // Maximum dimensions for padded image. If unset, will use double the original
+  // image dimension as a lower bound.
+  optional float max_image_height = 3;
+  optional float max_image_width = 4;
+  // Color of the padding. If unset, will pad using average color of the input
+  // image.
+  repeated float pad_color = 5;
+}
+// Randomly crops an image followed by a random pad.
+message RandomCropPadImage {
+  // Cropping operation must cover at least one box by this fraction.
+  optional float min_object_covered = 1 [default=1.0];
+  // Aspect ratio bounds of image after cropping operation.
+  optional float min_aspect_ratio = 2 [default=0.75];
+  optional float max_aspect_ratio = 3 [default=1.33];
+  // Allowed area ratio of image after cropping operation.
+  optional float min_area = 4 [default=0.1];
+  optional float max_area = 5 [default=1.0];
+  // Minimum overlap threshold of cropped boxes to keep in new image. If the
+  // ratio between a cropped bounding box and the original is less than this
+  // value, it is removed from the new image.
+  optional float overlap_thresh = 6 [default=0.3];
+  // Probability of keeping the original image during the crop operation.
+  optional float random_coef = 7 [default=0.0];
+  // Maximum dimensions for padded image. If unset, will use double the original
+  // image dimension as a lower bound. Both of the following fields should be
+  // length 2.
+  repeated float min_padded_size_ratio = 8;
+  repeated float max_padded_size_ratio = 9;
+  // Color of the padding. If unset, will pad using average color of the input
+  // image.
+  repeated float pad_color = 10;
+}
+// Randomly crops an iamge to a given aspect ratio.
+message RandomCropToAspectRatio {
+  // Aspect ratio.
+  optional float aspect_ratio = 1 [default=1.0];
+  // Minimum overlap threshold of cropped boxes to keep in new image. If the
+  // ratio between a cropped bounding box and the original is less than this
+  // value, it is removed from the new image.
+  optional float overlap_thresh = 2 [default=0.3];
+}
+// Randomly adds black square patches to an image.
+message RandomBlackPatches {
+  // The maximum number of black patches to add.
+  optional int32 max_black_patches = 1 [default=10];
+  // The probability of a black patch being added to an image.
+  optional float probability = 2 [default=0.5];
+  // Ratio between the dimension of the black patch to the minimum dimension of
+  // the image (patch_width = patch_height = min(image_height, image_width)).
+  optional float size_to_image_ratio = 3 [default=0.1];
+}
+// Randomly resizes the image up to [target_height, target_width].
+message RandomResizeMethod {
+  optional float target_height = 1;
+  optional float target_width = 2;
+}
+// Scales boxes from normalized coordinates to pixel coordinates.
+message ScaleBoxesToPixelCoordinates {
+}
+// Resizes images to [new_height, new_width].
+message ResizeImage {
+  optional int32 new_height = 1;
+  optional int32 new_width = 2;
+  enum Method {
+    AREA=1;
+    BICUBIC=2;
+    BILINEAR=3;
+    NEAREST_NEIGHBOR=4;
+  }
+  optional Method method = 3 [default=BILINEAR];
+}
+// Normalizes an image by subtracting a mean from each channel.
+message SubtractChannelMean {
+  // The mean to subtract from each channel. Should be of same dimension of
+  // channels in the input image.
+  repeated float means = 1;
+}
+message SSDRandomCropOperation {
+  // Cropped image must cover at least this fraction of one original bounding
+  // box.
+  optional float min_object_covered = 1;
+  // The aspect ratio of the cropped image must be within the range of
+  // [min_aspect_ratio, max_aspect_ratio].
+  optional float min_aspect_ratio = 2;
+  optional float max_aspect_ratio = 3;
+  // The area of the cropped image must be within the range of
+  // [min_area, max_area].
+  optional float min_area = 4;
+  optional float max_area = 5;
+  // Cropped box area ratio must be above this threhold to be kept.
+  optional float overlap_thresh = 6;
+  // Probability a crop operation is skipped.
+  optional float random_coef = 7;
+}
+// Randomly crops a image according to:
+//     Liu et al., SSD: Single shot multibox detector.
+// This preprocessing step defines multiple SSDRandomCropOperations. Only one
+// operation (chosen at random) is actually performed on an image.
+message SSDRandomCrop {
+  repeated SSDRandomCropOperation operations = 1;
+}
+message SSDRandomCropPadOperation {
+  // Cropped image must cover at least this fraction of one original bounding
+  // box.
+  optional float min_object_covered = 1;
+  // The aspect ratio of the cropped image must be within the range of
+  // [min_aspect_ratio, max_aspect_ratio].
+  optional float min_aspect_ratio = 2;
+  optional float max_aspect_ratio = 3;
+  // The area of the cropped image must be within the range of
+  // [min_area, max_area].
+  optional float min_area = 4;
+  optional float max_area = 5;
+  // Cropped box area ratio must be above this threhold to be kept.
+  optional float overlap_thresh = 6;
+  // Probability a crop operation is skipped.
+  optional float random_coef = 7;
+  // Min ratio of padded image height and width to the input image's height and
+  // width. Two entries per operation.
+  repeated float min_padded_size_ratio = 8;
+  // Max ratio of padded image height and width to the input image's height and
+  // width. Two entries per operation.
+  repeated float max_padded_size_ratio = 9;
+  // Padding color.
+  optional float pad_color_r = 10;
+  optional float pad_color_g = 11;
+  optional float pad_color_b = 12;
+}
+// Randomly crops and pads an image according to:
+//     Liu et al., SSD: Single shot multibox detector.
+// This preprocessing step defines multiple SSDRandomCropPadOperations. Only one
+// operation (chosen at random) is actually performed on an image.
+message SSDRandomCropPad {
+  repeated SSDRandomCropPadOperation operations = 1;
+}
+message SSDRandomCropFixedAspectRatioOperation {
+  // Cropped image must cover at least this fraction of one original bounding
+  // box.
+  optional float min_object_covered = 1;
+  // The area of the cropped image must be within the range of
+  // [min_area, max_area].
+  optional float min_area = 4;
+  optional float max_area = 5;
+  // Cropped box area ratio must be above this threhold to be kept.
+  optional float overlap_thresh = 6;
+  // Probability a crop operation is skipped.
+  optional float random_coef = 7;
+}
+// Randomly crops a image to a fixed aspect ratio according to:
+//     Liu et al., SSD: Single shot multibox detector.
+// Multiple SSDRandomCropFixedAspectRatioOperations are defined by this
+// preprocessing step. Only one operation (chosen at random) is actually
+// performed on an image.
+message SSDRandomCropFixedAspectRatio {
+  repeated SSDRandomCropFixedAspectRatioOperation operations = 1;
+  // Aspect ratio to crop to. This value is used for all crop operations.
+  optional float aspect_ratio = 2 [default=1.0];
+}
--- a/object_detection/protos/region_similarity_calculator.proto
+++ b/object_detection/protos/region_similarity_calculator.proto
+syntax = "proto2";
+package object_detection.protos;
+// Configuration proto for region similarity calculators. See
+// core/region_similarity_calculator.py for details.
+message RegionSimilarityCalculator {
+  oneof region_similarity {
+    NegSqDistSimilarity neg_sq_dist_similarity = 1;
+    IouSimilarity iou_similarity = 2;
+    IoaSimilarity ioa_similarity = 3;
+  }
+}
+// Configuration for negative squared distance similarity calculator.
+message NegSqDistSimilarity {
+}
+// Configuration for intersection-over-union (IOU) similarity calculator.
+message IouSimilarity {
+}
+// Configuration for intersection-over-area (IOA) similarity calculator.
+message IoaSimilarity {
+}
--- a/object_detection/protos/square_box_coder.proto
+++ b/object_detection/protos/square_box_coder.proto
+syntax = "proto2";
+package object_detection.protos;
+// Configuration proto for SquareBoxCoder. See
+// box_coders/square_box_coder.py for details.
+message SquareBoxCoder {
+  // Scale factor for anchor encoded box center.
+  optional float y_scale = 1 [default = 10.0];
+  optional float x_scale = 2 [default = 10.0];
+  // Scale factor for anchor encoded box length.
+  optional float length_scale = 3 [default = 5.0];
+}
--- a/object_detection/protos/ssd.proto
+++ b/object_detection/protos/ssd.proto
+syntax = "proto2";
+package object_detection.protos;
+import "object_detection/protos/anchor_generator.proto";
+import "object_detection/protos/box_coder.proto";
+import "object_detection/protos/box_predictor.proto";
+import "object_detection/protos/hyperparams.proto";
+import "object_detection/protos/image_resizer.proto";
+import "object_detection/protos/matcher.proto";
+import "object_detection/protos/losses.proto";
+import "object_detection/protos/post_processing.proto";
+import "object_detection/protos/region_similarity_calculator.proto";
+// Configuration for Single Shot Detection (SSD) models.
+message Ssd {
+  // Number of classes to predict.
+  optional int32 num_classes = 1;
+  // Image resizer for preprocessing the input image.
+  optional ImageResizer image_resizer = 2;
+  // Feature extractor config.
+  optional SsdFeatureExtractor feature_extractor = 3;
+  // Box coder to encode the boxes.
+  optional BoxCoder box_coder = 4;
+  // Matcher to match groundtruth with anchors.
+  optional Matcher matcher = 5;
+  // Region similarity calculator to compute similarity of boxes.
+  optional RegionSimilarityCalculator similarity_calculator = 6;
+  // Box predictor to attach to the features.
+  optional BoxPredictor box_predictor = 7;
+  // Anchor generator to compute anchors.
+  optional AnchorGenerator anchor_generator = 8;
+  // Post processing to apply on the predictions.
+  optional PostProcessing post_processing = 9;
+  // Whether to normalize the loss by number of groundtruth boxes that match to
+  // the anchors.
+  optional bool normalize_loss_by_num_matches = 10 [default=true];
+  // Loss configuration for training.
+  optional Loss loss = 11;
+}
+message SsdFeatureExtractor {
+  // Type of ssd feature extractor.
+  optional string type = 1;
+  // The factor to alter the depth of the channels in the feature extractor.
+  optional float depth_multiplier = 2 [default=1.0];
+  // Minimum number of the channels in the feature extractor.
+  optional int32 min_depth = 3 [default=16];
+  // Hyperparameters for the feature extractor.
+  optional Hyperparams conv_hyperparams = 4;
+}
--- a/object_detection/protos/ssd_anchor_generator.proto
+++ b/object_detection/protos/ssd_anchor_generator.proto
+syntax = "proto2";
+package object_detection.protos;
+// Configuration proto for SSD anchor generator described in
+// https://arxiv.org/abs/1512.02325. See
+// anchor_generators/multiple_grid_anchor_generator.py for details.
+message SsdAnchorGenerator {
+  // Number of grid layers to create anchors for.
+  optional int32 num_layers = 1 [default = 6];
+  // Scale of anchors corresponding to finest resolution.
+  optional float min_scale = 2 [default = 0.2];
+  // Scale of anchors corresponding to coarsest resolution
+  optional float max_scale = 3 [default = 0.95];
+  // Aspect ratios for anchors at each grid point.
+  repeated float aspect_ratios = 4;
+  // Whether to use the following aspect ratio and scale combination for the
+  // layer with the finest resolution : (scale=0.1, aspect_ratio=1.0),
+  // (scale=min_scale, aspect_ration=2.0), (scale=min_scale, aspect_ratio=0.5).
+  optional bool reduce_boxes_in_lowest_layer = 5 [default = true];
+}
--- a/object_detection/protos/string_int_label_map.proto
+++ b/object_detection/protos/string_int_label_map.proto
+// Message to store the mapping from class label strings to class id. Datasets
+// use string labels to represent classes while the object detection framework
+// works with class ids. This message maps them so they can be converted back
+// and forth as needed.
+syntax = "proto2";
+package object_detection.protos;
+message StringIntLabelMapItem {
+  // String name. The most common practice is to set this to a MID or synsets
+  // id.
+  optional string name = 1;
+  // Integer id that maps to the string name above. Label ids should start from
+  // 1.
+  optional int32 id = 2;
+  // Human readable string label.
+  optional string display_name = 3;
+};
+message StringIntLabelMap {
+  repeated StringIntLabelMapItem item = 1;
+};
--- a/object_detection/protos/train.proto
+++ b/object_detection/protos/train.proto
+syntax = "proto2";
+package object_detection.protos;
+import "object_detection/protos/optimizer.proto";
+import "object_detection/protos/preprocessor.proto";
+// Message for configuring DetectionModel training jobs (train.py).
+message TrainConfig {
+  // Input queue batch size.
+  optional uint32 batch_size = 1 [default=32];
+  // Data augmentation options.
+  repeated PreprocessingStep data_augmentation_options = 2;
+  // Whether to synchronize replicas during training.
+  optional bool sync_replicas = 3 [default=false];
+  // How frequently to keep checkpoints.
+  optional uint32 keep_checkpoint_every_n_hours = 4 [default=1000];
+  // Optimizer used to train the DetectionModel.
+  optional Optimizer optimizer = 5;
+  // If greater than 0, clips gradients by this value.
+  optional float gradient_clipping_by_norm = 6 [default=0.0];
+  // Checkpoint to restore variables from. Typically used to load feature
+  // extractor variables trained outside of object detection.
+  optional string fine_tune_checkpoint = 7 [default=""];
+  // Specifies if the finetune checkpoint is from an object detection model.
+  // If from an object detection model, the model being trained should have
+  // the same parameters with the exception of the num_classes parameter.
+  // If false, it assumes the checkpoint was a object classification model.
+  optional bool from_detection_checkpoint = 8 [default=false];
+  // Number of steps to train the DetectionModel for. If 0, will train the model
+  // indefinitely.
+  optional uint32 num_steps = 9 [default=0];
+  // Number of training steps between replica startup.
+  // This flag must be set to 0 if sync_replicas is set to true.
+  optional float startup_delay_steps = 10 [default=15];
+  // If greater than 0, multiplies the gradient of bias variables by this
+  // amount.
+  optional float bias_grad_multiplier = 11 [default=0];
+  // Variables that should not be updated during training.
+  repeated string freeze_variables = 12;
+  // Number of replicas to aggregate before making parameter updates.
+  optional int32 replicas_to_aggregate = 13 [default=1];
+  // Maximum number of elements to store within a queue.
+  optional int32 batch_queue_capacity = 14 [default=600];
+  // Number of threads to use for batching.
+  optional int32 num_batch_queue_threads = 15 [default=8];
+  // Maximum capacity of the queue used to prefetch assembled batches.
+  optional int32 prefetch_queue_capacity = 16 [default=10];
+}
--- a/object_detection/samples/cloud/cloud.yml
+++ b/object_detection/samples/cloud/cloud.yml
+trainingInput:
+  runtimeVersion: "1.0"
+  scaleTier: CUSTOM
+  masterType: standard_gpu
+  workerCount: 5
+  workerType: standard_gpu
+  parameterServerCount: 3
+  parameterServerType: standard
--- a/object_detection/samples/configs/faster_rcnn_inception_resnet_v2_atrous_pets.config
+++ b/object_detection/samples/configs/faster_rcnn_inception_resnet_v2_atrous_pets.config
+# Faster R-CNN with Inception Resnet v2, Atrous version;
+# Configured for Oxford-IIIT Pets Dataset.
+# Users should configure the fine_tune_checkpoint field in the train config as
+# well as the label_map_path and input_path fields in the train_input_reader and
+# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
+# should be configured.
+model {
+  faster_rcnn {
+    num_classes: 37
+    image_resizer {
+      keep_aspect_ratio_resizer {
+        min_dimension: 600
+        max_dimension: 1024
+      }
+    }
+    feature_extractor {
+      type: 'faster_rcnn_inception_resnet_v2'
+      first_stage_features_stride: 8
+    }
+    first_stage_anchor_generator {
+      grid_anchor_generator {
+        scales: [0.25, 0.5, 1.0, 2.0]
+        aspect_ratios: [0.5, 1.0, 2.0]
+        height_stride: 8
+        width_stride: 8
+      }
+    }
+    first_stage_atrous_rate: 2
+    first_stage_box_predictor_conv_hyperparams {
+      op: CONV
+      regularizer {
+        l2_regularizer {
+          weight: 0.0
+        }
+      }
+      initializer {
+        truncated_normal_initializer {
+          stddev: 0.01
+        }
+      }
+    }
+    first_stage_nms_score_threshold: 0.0
+    first_stage_nms_iou_threshold: 0.7
+    first_stage_max_proposals: 300
+    first_stage_localization_loss_weight: 2.0
+    first_stage_objectness_loss_weight: 1.0
+    initial_crop_size: 17
+    maxpool_kernel_size: 1
+    maxpool_stride: 1
+    second_stage_box_predictor {
+      mask_rcnn_box_predictor {
+        use_dropout: false
+        dropout_keep_probability: 1.0
+        fc_hyperparams {
+          op: FC
+          regularizer {
+            l2_regularizer {
+              weight: 0.0
+            }
+          }
+          initializer {
+            variance_scaling_initializer {
+              factor: 1.0
+              uniform: true
+              mode: FAN_AVG
+            }
+          }
+        }
+      }
+    }
+    second_stage_post_processing {
+      batch_non_max_suppression {
+        score_threshold: 0.0
+        iou_threshold: 0.6
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SOFTMAX
+    }
+    second_stage_localization_loss_weight: 2.0
+    second_stage_classification_loss_weight: 1.0
+  }
+}
+train_config: {
+  batch_size: 1
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        manual_step_learning_rate {
+          initial_learning_rate: 0.0003
+          schedule {
+            step: 0
+            learning_rate: .0003
+          }
+          schedule {
+            step: 900000
+            learning_rate: .00003
+          }
+          schedule {
+            step: 1200000
+            learning_rate: .000003
+          }
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  gradient_clipping_by_norm: 10.0
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
+  from_detection_checkpoint: true
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+}
+train_input_reader: {
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/pet_train.record"
+  }
+  label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
+}
+eval_config: {
+  num_examples: 2000
+}
+eval_input_reader: {
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/pet_val.record"
+  }
+  label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
+  shuffle: false
+  num_readers: 1
+}
--- a/object_detection/samples/configs/faster_rcnn_resnet101_pets.config
+++ b/object_detection/samples/configs/faster_rcnn_resnet101_pets.config
+# Faster R-CNN with Resnet-101 (v1) configured for the Oxford-IIIT Pet Dataset.
+# Users should configure the fine_tune_checkpoint field in the train config as
+# well as the label_map_path and input_path fields in the train_input_reader and
+# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
+# should be configured.
+model {
+  faster_rcnn {
+    num_classes: 37
+    image_resizer {
+      keep_aspect_ratio_resizer {
+        min_dimension: 600
+        max_dimension: 1024
+      }
+    }
+    feature_extractor {
+      type: 'faster_rcnn_resnet101'
+      first_stage_features_stride: 16
+    }
+    first_stage_anchor_generator {
+      grid_anchor_generator {
+        scales: [0.25, 0.5, 1.0, 2.0]
+        aspect_ratios: [0.5, 1.0, 2.0]
+        height_stride: 16
+        width_stride: 16
+      }
+    }
+    first_stage_box_predictor_conv_hyperparams {
+      op: CONV
+      regularizer {
+        l2_regularizer {
+          weight: 0.0
+        }
+      }
+      initializer {
+        truncated_normal_initializer {
+          stddev: 0.01
+        }
+      }
+    }
+    first_stage_nms_score_threshold: 0.0
+    first_stage_nms_iou_threshold: 0.7
+    first_stage_max_proposals: 300
+    first_stage_localization_loss_weight: 2.0
+    first_stage_objectness_loss_weight: 1.0
+    initial_crop_size: 14
+    maxpool_kernel_size: 2
+    maxpool_stride: 2
+    second_stage_box_predictor {
+      mask_rcnn_box_predictor {
+        use_dropout: false
+        dropout_keep_probability: 1.0
+        fc_hyperparams {
+          op: FC
+          regularizer {
+            l2_regularizer {
+              weight: 0.0
+            }
+          }
+          initializer {
+            variance_scaling_initializer {
+              factor: 1.0
+              uniform: true
+              mode: FAN_AVG
+            }
+          }
+        }
+      }
+    }
+    second_stage_post_processing {
+      batch_non_max_suppression {
+        score_threshold: 0.0
+        iou_threshold: 0.6
+        max_detections_per_class: 100
+        max_total_detections: 300
+      }
+      score_converter: SOFTMAX
+    }
+    second_stage_localization_loss_weight: 2.0
+    second_stage_classification_loss_weight: 1.0
+  }
+}
+train_config: {
+  batch_size: 1
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        manual_step_learning_rate {
+          initial_learning_rate: 0.0003
+          schedule {
+            step: 0
+            learning_rate: .0003
+          }
+          schedule {
+            step: 900000
+            learning_rate: .00003
+          }
+          schedule {
+            step: 1200000
+            learning_rate: .000003
+          }
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  gradient_clipping_by_norm: 10.0
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
+  from_detection_checkpoint: true
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+}
+train_input_reader: {
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/pet_train.record"
+  }
+  label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
+}
+eval_config: {
+  num_examples: 2000
+}
+eval_input_reader: {
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/pet_val.record"
+  }
+  label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
+  shuffle: false
+  num_readers: 1
+}
--- a/object_detection/samples/configs/faster_rcnn_resnet101_voc07.config
+++ b/object_detection/samples/configs/faster_rcnn_resnet101_voc07.config
+# Faster R-CNN with Resnet-101 (v1), configured for Pascal VOC Dataset.
+# Users should configure the fine_tune_checkpoint field in the train config as
+# well as the label_map_path and input_path fields in the train_input_reader and
+# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
+# should be configured.
+model {
+  faster_rcnn {
+    num_classes: 20
+    image_resizer {
+      keep_aspect_ratio_resizer {
+        min_dimension: 600
+        max_dimension: 1024
+      }
+    }
+    feature_extractor {
+      type: 'faster_rcnn_resnet101'
+      first_stage_features_stride: 16
+    }
+    first_stage_anchor_generator {
+      grid_anchor_generator {
+        scales: [0.25, 0.5, 1.0, 2.0]
+        aspect_ratios: [0.5, 1.0, 2.0]
+        height_stride: 16
+        width_stride: 16
+      }
+    }
+    first_stage_box_predictor_conv_hyperparams {
+      op: CONV
+      regularizer {
+        l2_regularizer {
+          weight: 0.0
+        }
+      }
+      initializer {
+        truncated_normal_initializer {
+          stddev: 0.01
+        }
+      }
+    }
+    first_stage_nms_score_threshold: 0.0
+    first_stage_nms_iou_threshold: 0.7
+    first_stage_max_proposals: 300
+    first_stage_localization_loss_weight: 2.0
+    first_stage_objectness_loss_weight: 1.0
+    initial_crop_size: 14
+    maxpool_kernel_size: 2
+    maxpool_stride: 2
+    second_stage_box_predictor {
+      mask_rcnn_box_predictor {
+        use_dropout: false
+        dropout_keep_probability: 1.0
+        fc_hyperparams {
+          op: FC
+          regularizer {
+            l2_regularizer {
+              weight: 0.0
+            }
+          }
+          initializer {
+            variance_scaling_initializer {
+              factor: 1.0
+              uniform: true
+              mode: FAN_AVG
+            }
+          }
+        }
+      }
+    }
+    second_stage_post_processing {
+      batch_non_max_suppression {
+        score_threshold: 0.0
+        iou_threshold: 0.6
+        max_detections_per_class: 100
+        max_total_detections: 300
+      }
+      score_converter: SOFTMAX
+    }
+    second_stage_localization_loss_weight: 2.0
+    second_stage_classification_loss_weight: 1.0
+  }
+}
+train_config: {
+  batch_size: 1
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        manual_step_learning_rate {
+          initial_learning_rate: 0.0001
+          schedule {
+            step: 0
+            learning_rate: .0001
+          }
+          schedule {
+            step: 500000
+            learning_rate: .00001
+          }
+          schedule {
+            step: 700000
+            learning_rate: .000001
+          }
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  gradient_clipping_by_norm: 10.0
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
+  from_detection_checkpoint: true
+  num_steps: 800000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+}
+train_input_reader: {
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/pascal_train.record"
+  }
+  label_map_path: "PATH_TO_BE_CONFIGURED/pascal_label_map.pbtxt"
+}
+eval_config: {
+  num_examples: 4952
+}
+eval_input_reader: {
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/pascal_val.record"
+  }
+  label_map_path: "PATH_TO_BE_CONFIGURED/pascal_label_map.pbtxt"
+  shuffle: false
+  num_readers: 1
+}
--- a/object_detection/samples/configs/faster_rcnn_resnet152_pets.config
+++ b/object_detection/samples/configs/faster_rcnn_resnet152_pets.config
+# Faster R-CNN with Resnet-152 (v1), configured for Oxford-IIIT Pets Dataset.
+# Users should configure the fine_tune_checkpoint field in the train config as
+# well as the label_map_path and input_path fields in the train_input_reader and
+# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
+# should be configured.
+model {
+  faster_rcnn {
+    num_classes: 37
+    image_resizer {
+      keep_aspect_ratio_resizer {
+        min_dimension: 600
+        max_dimension: 1024
+      }
+    }
+    feature_extractor {
+      type: 'faster_rcnn_resnet152'
+      first_stage_features_stride: 16
+    }
+    first_stage_anchor_generator {
+      grid_anchor_generator {
+        scales: [0.25, 0.5, 1.0, 2.0]
+        aspect_ratios: [0.5, 1.0, 2.0]
+        height_stride: 16
+        width_stride: 16
+      }
+    }
+    first_stage_box_predictor_conv_hyperparams {
+      op: CONV
+      regularizer {
+        l2_regularizer {
+          weight: 0.0
+        }
+      }
+      initializer {
+        truncated_normal_initializer {
+          stddev: 0.01
+        }
+      }
+    }
+    first_stage_nms_score_threshold: 0.0
+    first_stage_nms_iou_threshold: 0.7
+    first_stage_max_proposals: 300
+    first_stage_localization_loss_weight: 2.0
+    first_stage_objectness_loss_weight: 1.0
+    initial_crop_size: 14
+    maxpool_kernel_size: 2
+    maxpool_stride: 2
+    second_stage_box_predictor {
+      mask_rcnn_box_predictor {
+        use_dropout: false
+        dropout_keep_probability: 1.0
+        fc_hyperparams {
+          op: FC
+          regularizer {
+            l2_regularizer {
+              weight: 0.0
+            }
+          }
+          initializer {
+            variance_scaling_initializer {
+              factor: 1.0
+              uniform: true
+              mode: FAN_AVG
+            }
+          }
+        }
+      }
+    }
+    second_stage_post_processing {
+      batch_non_max_suppression {
+        score_threshold: 0.0
+        iou_threshold: 0.6
+        max_detections_per_class: 100
+        max_total_detections: 300
+      }
+      score_converter: SOFTMAX
+    }
+    second_stage_localization_loss_weight: 2.0
+    second_stage_classification_loss_weight: 1.0
+  }
+}
+train_config: {
+  batch_size: 1
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        manual_step_learning_rate {
+          initial_learning_rate: 0.0003
+          schedule {
+            step: 0
+            learning_rate: .0003
+          }
+          schedule {
+            step: 900000
+            learning_rate: .00003
+          }
+          schedule {
+            step: 1200000
+            learning_rate: .000003
+          }
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  gradient_clipping_by_norm: 10.0
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
+  from_detection_checkpoint: true
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+}
+train_input_reader: {
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/pet_train.record"
+  }
+  label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
+}
+eval_config: {
+  num_examples: 2000
+}
+eval_input_reader: {
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/pet_val.record"
+  }
+  label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
+  shuffle: false
+  num_readers: 1
+}