Commit f282f6ef authored by Alexander Gorban's avatar Alexander Gorban
Browse files

Merge branch 'master' of github.com:tensorflow/models

parents 58a5da7b a2970b03
syntax = "proto2";
package object_detection.protos;
// Configuration proto for defining input readers that generate Object Detection
// Examples from input sources. Input readers are expected to generate a
// dictionary of tensors, with the following fields populated:
//
// 'image': an [image_height, image_width, channels] image tensor that detection
// will be run on.
// 'groundtruth_classes': a [num_boxes] int32 tensor storing the class
// labels of detected boxes in the image.
// 'groundtruth_boxes': a [num_boxes, 4] float tensor storing the coordinates of
// detected boxes in the image.
// 'groundtruth_instance_masks': (Optional), a [num_boxes, image_height,
// image_width] float tensor storing binary mask of the objects in boxes.
message InputReader {
// Path to StringIntLabelMap pbtxt file specifying the mapping from string
// labels to integer ids.
optional string label_map_path = 1 [default=""];
// Whether data should be processed in the order they are read in, or
// shuffled randomly.
optional bool shuffle = 2 [default=true];
// Maximum number of records to keep in reader queue.
optional uint32 queue_capacity = 3 [default=2000];
// Minimum number of records to keep in reader queue. A large value is needed
// to generate a good random shuffle.
optional uint32 min_after_dequeue = 4 [default=1000];
// The number of times a data source is read. If set to zero, the data source
// will be reused indefinitely.
optional uint32 num_epochs = 5 [default=0];
// Number of reader instances to create.
optional uint32 num_readers = 6 [default=8];
// Whether to load groundtruth instance masks.
optional bool load_instance_masks = 7 [default = false];
oneof input_reader {
TFRecordInputReader tf_record_input_reader = 8;
ExternalInputReader external_input_reader = 9;
}
}
// An input reader that reads TF Example protos from local TFRecord files.
message TFRecordInputReader {
// Path to TFRecordFile.
optional string input_path = 1 [default=""];
}
// An externally defined input reader. Users may define an extension to this
// proto to interface their own input readers.
message ExternalInputReader {
extensions 1 to 999;
}
syntax = "proto2";
package object_detection.protos;
// Message for configuring the localization loss, classification loss and hard
// example miner used for training object detection models. See core/losses.py
// for details
message Loss {
// Localization loss to use.
optional LocalizationLoss localization_loss = 1;
// Classification loss to use.
optional ClassificationLoss classification_loss = 2;
// If not left to default, applies hard example mining.
optional HardExampleMiner hard_example_miner = 3;
// Classification loss weight.
optional float classification_weight = 4 [default=1.0];
// Localization loss weight.
optional float localization_weight = 5 [default=1.0];
}
// Configuration for bounding box localization loss function.
message LocalizationLoss {
oneof localization_loss {
WeightedL2LocalizationLoss weighted_l2 = 1;
WeightedSmoothL1LocalizationLoss weighted_smooth_l1 = 2;
WeightedIOULocalizationLoss weighted_iou = 3;
}
}
// L2 location loss: 0.5 * ||weight * (a - b)|| ^ 2
message WeightedL2LocalizationLoss {
// Output loss per anchor.
optional bool anchorwise_output = 1 [default=false];
}
// SmoothL1 (Huber) location loss: .5 * x ^ 2 if |x| < 1 else |x| - .5
message WeightedSmoothL1LocalizationLoss {
// Output loss per anchor.
optional bool anchorwise_output = 1 [default=false];
}
// Intersection over union location loss: 1 - IOU
message WeightedIOULocalizationLoss {
}
// Configuration for class prediction loss function.
message ClassificationLoss {
oneof classification_loss {
WeightedSigmoidClassificationLoss weighted_sigmoid = 1;
WeightedSoftmaxClassificationLoss weighted_softmax = 2;
BootstrappedSigmoidClassificationLoss bootstrapped_sigmoid = 3;
}
}
// Classification loss using a sigmoid function over class predictions.
message WeightedSigmoidClassificationLoss {
// Output loss per anchor.
optional bool anchorwise_output = 1 [default=false];
}
// Classification loss using a softmax function over class predictions.
message WeightedSoftmaxClassificationLoss {
// Output loss per anchor.
optional bool anchorwise_output = 1 [default=false];
}
// Classification loss using a sigmoid function over the class prediction with
// the highest prediction score.
message BootstrappedSigmoidClassificationLoss {
// Interpolation weight between 0 and 1.
optional float alpha = 1;
// Whether hard boot strapping should be used or not. If true, will only use
// one class favored by model. Othewise, will use all predicted class
// probabilities.
optional bool hard_bootstrap = 2 [default=false];
// Output loss per anchor.
optional bool anchorwise_output = 3 [default=false];
}
// Configuation for hard example miner.
message HardExampleMiner {
// Maximum number of hard examples to be selected per image (prior to
// enforcing max negative to positive ratio constraint). If set to 0,
// all examples obtained after NMS are considered.
optional int32 num_hard_examples = 1 [default=64];
// Minimum intersection over union for an example to be discarded during NMS.
optional float iou_threshold = 2 [default=0.7];
// Whether to use classification losses ('cls', default), localization losses
// ('loc') or both losses ('both'). In the case of 'both', cls_loss_weight and
// loc_loss_weight are used to compute weighted sum of the two losses.
enum LossType {
BOTH = 0;
CLASSIFICATION = 1;
LOCALIZATION = 2;
}
optional LossType loss_type = 3 [default=BOTH];
// Maximum number of negatives to retain for each positive anchor. If
// num_negatives_per_positive is 0 no prespecified negative:positive ratio is
// enforced.
optional int32 max_negatives_per_positive = 4 [default=0];
// Minimum number of negative anchors to sample for a given image. Setting
// this to a positive number samples negatives in an image without any
// positive anchors and thus not bias the model towards having at least one
// detection per image.
optional int32 min_negatives_per_image = 5 [default=0];
}
syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/argmax_matcher.proto";
import "object_detection/protos/bipartite_matcher.proto";
// Configuration proto for the matcher to be used in the object detection
// pipeline. See core/matcher.py for details.
message Matcher {
oneof matcher_oneof {
ArgMaxMatcher argmax_matcher = 1;
BipartiteMatcher bipartite_matcher = 2;
}
}
syntax = "proto2";
package object_detection.protos;
// Configuration proto for MeanStddevBoxCoder. See
// box_coders/mean_stddev_box_coder.py for details.
message MeanStddevBoxCoder {
}
syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/faster_rcnn.proto";
import "object_detection/protos/ssd.proto";
// Top level configuration for DetectionModels.
message DetectionModel {
oneof model {
FasterRcnn faster_rcnn = 1;
Ssd ssd = 2;
}
}
syntax = "proto2";
package object_detection.protos;
// Messages for configuring the optimizing strategy for training object
// detection models.
// Top level optimizer message.
message Optimizer {
oneof optimizer {
RMSPropOptimizer rms_prop_optimizer = 1;
MomentumOptimizer momentum_optimizer = 2;
AdamOptimizer adam_optimizer = 3;
}
optional bool use_moving_average = 4 [default=true];
optional float moving_average_decay = 5 [default=0.9999];
}
// Configuration message for the RMSPropOptimizer
// See: https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
message RMSPropOptimizer {
optional LearningRate learning_rate = 1;
optional float momentum_optimizer_value = 2 [default=0.9];
optional float decay = 3 [default=0.9];
optional float epsilon = 4 [default=1.0];
}
// Configuration message for the MomentumOptimizer
// See: https://www.tensorflow.org/api_docs/python/tf/train/MomentumOptimizer
message MomentumOptimizer {
optional LearningRate learning_rate = 1;
optional float momentum_optimizer_value = 2 [default=0.9];
}
// Configuration message for the AdamOptimizer
// See: https://www.tensorflow.org/api_docs/python/tf/train/AdamOptimizer
message AdamOptimizer {
optional LearningRate learning_rate = 1;
}
// Configuration message for optimizer learning rate.
message LearningRate {
oneof learning_rate {
ConstantLearningRate constant_learning_rate = 1;
ExponentialDecayLearningRate exponential_decay_learning_rate = 2;
ManualStepLearningRate manual_step_learning_rate = 3;
}
}
// Configuration message for a constant learning rate.
message ConstantLearningRate {
optional float learning_rate = 1 [default=0.002];
}
// Configuration message for an exponentially decaying learning rate.
// See https://www.tensorflow.org/versions/master/api_docs/python/train/ \
// decaying_the_learning_rate#exponential_decay
message ExponentialDecayLearningRate {
optional float initial_learning_rate = 1 [default=0.002];
optional uint32 decay_steps = 2 [default=4000000];
optional float decay_factor = 3 [default=0.95];
optional bool staircase = 4 [default=true];
}
// Configuration message for a manually defined learning rate schedule.
message ManualStepLearningRate {
optional float initial_learning_rate = 1 [default=0.002];
message LearningRateSchedule {
optional uint32 step = 1;
optional float learning_rate = 2 [default=0.002];
}
repeated LearningRateSchedule schedule = 2;
}
syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/eval.proto";
import "object_detection/protos/input_reader.proto";
import "object_detection/protos/model.proto";
import "object_detection/protos/train.proto";
// Convenience message for configuring a training and eval pipeline. Allows all
// of the pipeline parameters to be configured from one file.
message TrainEvalPipelineConfig {
optional DetectionModel model = 1;
optional TrainConfig train_config = 2;
optional InputReader train_input_reader = 3;
optional EvalConfig eval_config = 4;
optional InputReader eval_input_reader = 5;
}
syntax = "proto2";
package object_detection.protos;
// Configuration proto for non-max-suppression operation on a batch of
// detections.
message BatchNonMaxSuppression {
// Scalar threshold for score (low scoring boxes are removed).
optional float score_threshold = 1 [default = 0.0];
// Scalar threshold for IOU (boxes that have high IOU overlap
// with previously selected boxes are removed).
optional float iou_threshold = 2 [default = 0.6];
// Maximum number of detections to retain per class.
optional int32 max_detections_per_class = 3 [default = 100];
// Maximum number of detections to retain across all classes.
optional int32 max_total_detections = 5 [default = 100];
}
// Configuration proto for post-processing predicted boxes and
// scores.
message PostProcessing {
// Non max suppression parameters.
optional BatchNonMaxSuppression batch_non_max_suppression = 1;
// Enum to specify how to convert the detection scores.
enum ScoreConverter {
// Input scores equals output scores.
IDENTITY = 0;
// Applies a sigmoid on input scores.
SIGMOID = 1;
// Applies a softmax on input scores
SOFTMAX = 2;
}
// Score converter to use.
optional ScoreConverter score_converter = 2 [default = IDENTITY];
}
syntax = "proto2";
package object_detection.protos;
// Message for defining a preprocessing operation on input data.
// See: //object_detection/core/preprocessor.py
message PreprocessingStep {
oneof preprocessing_step {
NormalizeImage normalize_image = 1;
RandomHorizontalFlip random_horizontal_flip = 2;
RandomPixelValueScale random_pixel_value_scale = 3;
RandomImageScale random_image_scale = 4;
RandomRGBtoGray random_rgb_to_gray = 5;
RandomAdjustBrightness random_adjust_brightness = 6;
RandomAdjustContrast random_adjust_contrast = 7;
RandomAdjustHue random_adjust_hue = 8;
RandomAdjustSaturation random_adjust_saturation = 9;
RandomDistortColor random_distort_color = 10;
RandomJitterBoxes random_jitter_boxes = 11;
RandomCropImage random_crop_image = 12;
RandomPadImage random_pad_image = 13;
RandomCropPadImage random_crop_pad_image = 14;
RandomCropToAspectRatio random_crop_to_aspect_ratio = 15;
RandomBlackPatches random_black_patches = 16;
RandomResizeMethod random_resize_method = 17;
ScaleBoxesToPixelCoordinates scale_boxes_to_pixel_coordinates = 18;
ResizeImage resize_image = 19;
SubtractChannelMean subtract_channel_mean = 20;
SSDRandomCrop ssd_random_crop = 21;
SSDRandomCropPad ssd_random_crop_pad = 22;
SSDRandomCropFixedAspectRatio ssd_random_crop_fixed_aspect_ratio = 23;
}
}
// Normalizes pixel values in an image.
// For every channel in the image, moves the pixel values from the range
// [original_minval, original_maxval] to [target_minval, target_maxval].
message NormalizeImage {
optional float original_minval = 1;
optional float original_maxval = 2;
optional float target_minval = 3 [default=0];
optional float target_maxval = 4 [default=1];
}
// Randomly horizontally mirrors the image and detections 50% of the time.
message RandomHorizontalFlip {
}
// Randomly scales the values of all pixels in the image by some constant value
// between [minval, maxval], then clip the value to a range between [0, 1.0].
message RandomPixelValueScale {
optional float minval = 1 [default=0.9];
optional float maxval = 2 [default=1.1];
}
// Randomly enlarges or shrinks image (keeping aspect ratio).
message RandomImageScale {
optional float min_scale_ratio = 1 [default=0.5];
optional float max_scale_ratio = 2 [default=2.0];
}
// Randomly convert entire image to grey scale.
message RandomRGBtoGray {
optional float probability = 1 [default=0.1];
}
// Randomly changes image brightness by up to max_delta. Image outputs will be
// saturated between 0 and 1.
message RandomAdjustBrightness {
optional float max_delta=1 [default=0.2];
}
// Randomly scales contract by a value between [min_delta, max_delta].
message RandomAdjustContrast {
optional float min_delta = 1 [default=0.8];
optional float max_delta = 2 [default=1.25];
}
// Randomly alters hue by a value of up to max_delta.
message RandomAdjustHue {
optional float max_delta = 1 [default=0.02];
}
// Randomly changes saturation by a value between [min_delta, max_delta].
message RandomAdjustSaturation {
optional float min_delta = 1 [default=0.8];
optional float max_delta = 2 [default=1.25];
}
// Performs a random color distortion. color_orderings should either be 0 or 1.
message RandomDistortColor {
optional int32 color_ordering = 1;
}
// Randomly jitters corners of boxes in the image determined by ratio.
// ie. If a box is [100, 200] and ratio is 0.02, the corners can move by [1, 4].
message RandomJitterBoxes {
optional float ratio = 1 [default=0.05];
}
// Randomly crops the image and bounding boxes.
message RandomCropImage {
// Cropped image must cover at least one box by this fraction.
optional float min_object_covered = 1 [default=1.0];
// Aspect ratio bounds of cropped image.
optional float min_aspect_ratio = 2 [default=0.75];
optional float max_aspect_ratio = 3 [default=1.33];
// Allowed area ratio of cropped image to original image.
optional float min_area = 4 [default=0.1];
optional float max_area = 5 [default=1.0];
// Minimum overlap threshold of cropped boxes to keep in new image. If the
// ratio between a cropped bounding box and the original is less than this
// value, it is removed from the new image.
optional float overlap_thresh = 6 [default=0.3];
// Probability of keeping the original image.
optional float random_coef = 7 [default=0.0];
}
// Randomly adds padding to the image.
message RandomPadImage {
// Minimum dimensions for padded image. If unset, will use original image
// dimension as a lower bound.
optional float min_image_height = 1;
optional float min_image_width = 2;
// Maximum dimensions for padded image. If unset, will use double the original
// image dimension as a lower bound.
optional float max_image_height = 3;
optional float max_image_width = 4;
// Color of the padding. If unset, will pad using average color of the input
// image.
repeated float pad_color = 5;
}
// Randomly crops an image followed by a random pad.
message RandomCropPadImage {
// Cropping operation must cover at least one box by this fraction.
optional float min_object_covered = 1 [default=1.0];
// Aspect ratio bounds of image after cropping operation.
optional float min_aspect_ratio = 2 [default=0.75];
optional float max_aspect_ratio = 3 [default=1.33];
// Allowed area ratio of image after cropping operation.
optional float min_area = 4 [default=0.1];
optional float max_area = 5 [default=1.0];
// Minimum overlap threshold of cropped boxes to keep in new image. If the
// ratio between a cropped bounding box and the original is less than this
// value, it is removed from the new image.
optional float overlap_thresh = 6 [default=0.3];
// Probability of keeping the original image during the crop operation.
optional float random_coef = 7 [default=0.0];
// Maximum dimensions for padded image. If unset, will use double the original
// image dimension as a lower bound. Both of the following fields should be
// length 2.
repeated float min_padded_size_ratio = 8;
repeated float max_padded_size_ratio = 9;
// Color of the padding. If unset, will pad using average color of the input
// image.
repeated float pad_color = 10;
}
// Randomly crops an iamge to a given aspect ratio.
message RandomCropToAspectRatio {
// Aspect ratio.
optional float aspect_ratio = 1 [default=1.0];
// Minimum overlap threshold of cropped boxes to keep in new image. If the
// ratio between a cropped bounding box and the original is less than this
// value, it is removed from the new image.
optional float overlap_thresh = 2 [default=0.3];
}
// Randomly adds black square patches to an image.
message RandomBlackPatches {
// The maximum number of black patches to add.
optional int32 max_black_patches = 1 [default=10];
// The probability of a black patch being added to an image.
optional float probability = 2 [default=0.5];
// Ratio between the dimension of the black patch to the minimum dimension of
// the image (patch_width = patch_height = min(image_height, image_width)).
optional float size_to_image_ratio = 3 [default=0.1];
}
// Randomly resizes the image up to [target_height, target_width].
message RandomResizeMethod {
optional float target_height = 1;
optional float target_width = 2;
}
// Scales boxes from normalized coordinates to pixel coordinates.
message ScaleBoxesToPixelCoordinates {
}
// Resizes images to [new_height, new_width].
message ResizeImage {
optional int32 new_height = 1;
optional int32 new_width = 2;
enum Method {
AREA=1;
BICUBIC=2;
BILINEAR=3;
NEAREST_NEIGHBOR=4;
}
optional Method method = 3 [default=BILINEAR];
}
// Normalizes an image by subtracting a mean from each channel.
message SubtractChannelMean {
// The mean to subtract from each channel. Should be of same dimension of
// channels in the input image.
repeated float means = 1;
}
message SSDRandomCropOperation {
// Cropped image must cover at least this fraction of one original bounding
// box.
optional float min_object_covered = 1;
// The aspect ratio of the cropped image must be within the range of
// [min_aspect_ratio, max_aspect_ratio].
optional float min_aspect_ratio = 2;
optional float max_aspect_ratio = 3;
// The area of the cropped image must be within the range of
// [min_area, max_area].
optional float min_area = 4;
optional float max_area = 5;
// Cropped box area ratio must be above this threhold to be kept.
optional float overlap_thresh = 6;
// Probability a crop operation is skipped.
optional float random_coef = 7;
}
// Randomly crops a image according to:
// Liu et al., SSD: Single shot multibox detector.
// This preprocessing step defines multiple SSDRandomCropOperations. Only one
// operation (chosen at random) is actually performed on an image.
message SSDRandomCrop {
repeated SSDRandomCropOperation operations = 1;
}
message SSDRandomCropPadOperation {
// Cropped image must cover at least this fraction of one original bounding
// box.
optional float min_object_covered = 1;
// The aspect ratio of the cropped image must be within the range of
// [min_aspect_ratio, max_aspect_ratio].
optional float min_aspect_ratio = 2;
optional float max_aspect_ratio = 3;
// The area of the cropped image must be within the range of
// [min_area, max_area].
optional float min_area = 4;
optional float max_area = 5;
// Cropped box area ratio must be above this threhold to be kept.
optional float overlap_thresh = 6;
// Probability a crop operation is skipped.
optional float random_coef = 7;
// Min ratio of padded image height and width to the input image's height and
// width. Two entries per operation.
repeated float min_padded_size_ratio = 8;
// Max ratio of padded image height and width to the input image's height and
// width. Two entries per operation.
repeated float max_padded_size_ratio = 9;
// Padding color.
optional float pad_color_r = 10;
optional float pad_color_g = 11;
optional float pad_color_b = 12;
}
// Randomly crops and pads an image according to:
// Liu et al., SSD: Single shot multibox detector.
// This preprocessing step defines multiple SSDRandomCropPadOperations. Only one
// operation (chosen at random) is actually performed on an image.
message SSDRandomCropPad {
repeated SSDRandomCropPadOperation operations = 1;
}
message SSDRandomCropFixedAspectRatioOperation {
// Cropped image must cover at least this fraction of one original bounding
// box.
optional float min_object_covered = 1;
// The area of the cropped image must be within the range of
// [min_area, max_area].
optional float min_area = 4;
optional float max_area = 5;
// Cropped box area ratio must be above this threhold to be kept.
optional float overlap_thresh = 6;
// Probability a crop operation is skipped.
optional float random_coef = 7;
}
// Randomly crops a image to a fixed aspect ratio according to:
// Liu et al., SSD: Single shot multibox detector.
// Multiple SSDRandomCropFixedAspectRatioOperations are defined by this
// preprocessing step. Only one operation (chosen at random) is actually
// performed on an image.
message SSDRandomCropFixedAspectRatio {
repeated SSDRandomCropFixedAspectRatioOperation operations = 1;
// Aspect ratio to crop to. This value is used for all crop operations.
optional float aspect_ratio = 2 [default=1.0];
}
syntax = "proto2";
package object_detection.protos;
// Configuration proto for region similarity calculators. See
// core/region_similarity_calculator.py for details.
message RegionSimilarityCalculator {
oneof region_similarity {
NegSqDistSimilarity neg_sq_dist_similarity = 1;
IouSimilarity iou_similarity = 2;
IoaSimilarity ioa_similarity = 3;
}
}
// Configuration for negative squared distance similarity calculator.
message NegSqDistSimilarity {
}
// Configuration for intersection-over-union (IOU) similarity calculator.
message IouSimilarity {
}
// Configuration for intersection-over-area (IOA) similarity calculator.
message IoaSimilarity {
}
syntax = "proto2";
package object_detection.protos;
// Configuration proto for SquareBoxCoder. See
// box_coders/square_box_coder.py for details.
message SquareBoxCoder {
// Scale factor for anchor encoded box center.
optional float y_scale = 1 [default = 10.0];
optional float x_scale = 2 [default = 10.0];
// Scale factor for anchor encoded box length.
optional float length_scale = 3 [default = 5.0];
}
syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/anchor_generator.proto";
import "object_detection/protos/box_coder.proto";
import "object_detection/protos/box_predictor.proto";
import "object_detection/protos/hyperparams.proto";
import "object_detection/protos/image_resizer.proto";
import "object_detection/protos/matcher.proto";
import "object_detection/protos/losses.proto";
import "object_detection/protos/post_processing.proto";
import "object_detection/protos/region_similarity_calculator.proto";
// Configuration for Single Shot Detection (SSD) models.
message Ssd {
// Number of classes to predict.
optional int32 num_classes = 1;
// Image resizer for preprocessing the input image.
optional ImageResizer image_resizer = 2;
// Feature extractor config.
optional SsdFeatureExtractor feature_extractor = 3;
// Box coder to encode the boxes.
optional BoxCoder box_coder = 4;
// Matcher to match groundtruth with anchors.
optional Matcher matcher = 5;
// Region similarity calculator to compute similarity of boxes.
optional RegionSimilarityCalculator similarity_calculator = 6;
// Box predictor to attach to the features.
optional BoxPredictor box_predictor = 7;
// Anchor generator to compute anchors.
optional AnchorGenerator anchor_generator = 8;
// Post processing to apply on the predictions.
optional PostProcessing post_processing = 9;
// Whether to normalize the loss by number of groundtruth boxes that match to
// the anchors.
optional bool normalize_loss_by_num_matches = 10 [default=true];
// Loss configuration for training.
optional Loss loss = 11;
}
message SsdFeatureExtractor {
// Type of ssd feature extractor.
optional string type = 1;
// The factor to alter the depth of the channels in the feature extractor.
optional float depth_multiplier = 2 [default=1.0];
// Minimum number of the channels in the feature extractor.
optional int32 min_depth = 3 [default=16];
// Hyperparameters for the feature extractor.
optional Hyperparams conv_hyperparams = 4;
}
syntax = "proto2";
package object_detection.protos;
// Configuration proto for SSD anchor generator described in
// https://arxiv.org/abs/1512.02325. See
// anchor_generators/multiple_grid_anchor_generator.py for details.
message SsdAnchorGenerator {
// Number of grid layers to create anchors for.
optional int32 num_layers = 1 [default = 6];
// Scale of anchors corresponding to finest resolution.
optional float min_scale = 2 [default = 0.2];
// Scale of anchors corresponding to coarsest resolution
optional float max_scale = 3 [default = 0.95];
// Aspect ratios for anchors at each grid point.
repeated float aspect_ratios = 4;
// Whether to use the following aspect ratio and scale combination for the
// layer with the finest resolution : (scale=0.1, aspect_ratio=1.0),
// (scale=min_scale, aspect_ration=2.0), (scale=min_scale, aspect_ratio=0.5).
optional bool reduce_boxes_in_lowest_layer = 5 [default = true];
}
// Message to store the mapping from class label strings to class id. Datasets
// use string labels to represent classes while the object detection framework
// works with class ids. This message maps them so they can be converted back
// and forth as needed.
syntax = "proto2";
package object_detection.protos;
message StringIntLabelMapItem {
// String name. The most common practice is to set this to a MID or synsets
// id.
optional string name = 1;
// Integer id that maps to the string name above. Label ids should start from
// 1.
optional int32 id = 2;
// Human readable string label.
optional string display_name = 3;
};
message StringIntLabelMap {
repeated StringIntLabelMapItem item = 1;
};
syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/optimizer.proto";
import "object_detection/protos/preprocessor.proto";
// Message for configuring DetectionModel training jobs (train.py).
message TrainConfig {
// Input queue batch size.
optional uint32 batch_size = 1 [default=32];
// Data augmentation options.
repeated PreprocessingStep data_augmentation_options = 2;
// Whether to synchronize replicas during training.
optional bool sync_replicas = 3 [default=false];
// How frequently to keep checkpoints.
optional uint32 keep_checkpoint_every_n_hours = 4 [default=1000];
// Optimizer used to train the DetectionModel.
optional Optimizer optimizer = 5;
// If greater than 0, clips gradients by this value.
optional float gradient_clipping_by_norm = 6 [default=0.0];
// Checkpoint to restore variables from. Typically used to load feature
// extractor variables trained outside of object detection.
optional string fine_tune_checkpoint = 7 [default=""];
// Specifies if the finetune checkpoint is from an object detection model.
// If from an object detection model, the model being trained should have
// the same parameters with the exception of the num_classes parameter.
// If false, it assumes the checkpoint was a object classification model.
optional bool from_detection_checkpoint = 8 [default=false];
// Number of steps to train the DetectionModel for. If 0, will train the model
// indefinitely.
optional uint32 num_steps = 9 [default=0];
// Number of training steps between replica startup.
// This flag must be set to 0 if sync_replicas is set to true.
optional float startup_delay_steps = 10 [default=15];
// If greater than 0, multiplies the gradient of bias variables by this
// amount.
optional float bias_grad_multiplier = 11 [default=0];
// Variables that should not be updated during training.
repeated string freeze_variables = 12;
// Number of replicas to aggregate before making parameter updates.
optional int32 replicas_to_aggregate = 13 [default=1];
// Maximum number of elements to store within a queue.
optional int32 batch_queue_capacity = 14 [default=600];
// Number of threads to use for batching.
optional int32 num_batch_queue_threads = 15 [default=8];
// Maximum capacity of the queue used to prefetch assembled batches.
optional int32 prefetch_queue_capacity = 16 [default=10];
}
trainingInput:
runtimeVersion: "1.0"
scaleTier: CUSTOM
masterType: standard_gpu
workerCount: 5
workerType: standard_gpu
parameterServerCount: 3
parameterServerType: standard
# Faster R-CNN with Inception Resnet v2, Atrous version;
# Configured for Oxford-IIIT Pets Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {
faster_rcnn {
num_classes: 37
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 600
max_dimension: 1024
}
}
feature_extractor {
type: 'faster_rcnn_inception_resnet_v2'
first_stage_features_stride: 8
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 8
width_stride: 8
}
}
first_stage_atrous_rate: 2
first_stage_box_predictor_conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
first_stage_nms_score_threshold: 0.0
first_stage_nms_iou_threshold: 0.7
first_stage_max_proposals: 300
first_stage_localization_loss_weight: 2.0
first_stage_objectness_loss_weight: 1.0
initial_crop_size: 17
maxpool_kernel_size: 1
maxpool_stride: 1
second_stage_box_predictor {
mask_rcnn_box_predictor {
use_dropout: false
dropout_keep_probability: 1.0
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
variance_scaling_initializer {
factor: 1.0
uniform: true
mode: FAN_AVG
}
}
}
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.0
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SOFTMAX
}
second_stage_localization_loss_weight: 2.0
second_stage_classification_loss_weight: 1.0
}
}
train_config: {
batch_size: 1
optimizer {
momentum_optimizer: {
learning_rate: {
manual_step_learning_rate {
initial_learning_rate: 0.0003
schedule {
step: 0
learning_rate: .0003
}
schedule {
step: 900000
learning_rate: .00003
}
schedule {
step: 1200000
learning_rate: .000003
}
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
gradient_clipping_by_norm: 10.0
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
from_detection_checkpoint: true
data_augmentation_options {
random_horizontal_flip {
}
}
}
train_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/pet_train.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
}
eval_config: {
num_examples: 2000
}
eval_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/pet_val.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
shuffle: false
num_readers: 1
}
# Faster R-CNN with Resnet-101 (v1) configured for the Oxford-IIIT Pet Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {
faster_rcnn {
num_classes: 37
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 600
max_dimension: 1024
}
}
feature_extractor {
type: 'faster_rcnn_resnet101'
first_stage_features_stride: 16
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
first_stage_nms_score_threshold: 0.0
first_stage_nms_iou_threshold: 0.7
first_stage_max_proposals: 300
first_stage_localization_loss_weight: 2.0
first_stage_objectness_loss_weight: 1.0
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
second_stage_box_predictor {
mask_rcnn_box_predictor {
use_dropout: false
dropout_keep_probability: 1.0
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
variance_scaling_initializer {
factor: 1.0
uniform: true
mode: FAN_AVG
}
}
}
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.0
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
second_stage_localization_loss_weight: 2.0
second_stage_classification_loss_weight: 1.0
}
}
train_config: {
batch_size: 1
optimizer {
momentum_optimizer: {
learning_rate: {
manual_step_learning_rate {
initial_learning_rate: 0.0003
schedule {
step: 0
learning_rate: .0003
}
schedule {
step: 900000
learning_rate: .00003
}
schedule {
step: 1200000
learning_rate: .000003
}
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
gradient_clipping_by_norm: 10.0
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
from_detection_checkpoint: true
data_augmentation_options {
random_horizontal_flip {
}
}
}
train_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/pet_train.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
}
eval_config: {
num_examples: 2000
}
eval_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/pet_val.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
shuffle: false
num_readers: 1
}
# Faster R-CNN with Resnet-101 (v1), configured for Pascal VOC Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {
faster_rcnn {
num_classes: 20
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 600
max_dimension: 1024
}
}
feature_extractor {
type: 'faster_rcnn_resnet101'
first_stage_features_stride: 16
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
first_stage_nms_score_threshold: 0.0
first_stage_nms_iou_threshold: 0.7
first_stage_max_proposals: 300
first_stage_localization_loss_weight: 2.0
first_stage_objectness_loss_weight: 1.0
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
second_stage_box_predictor {
mask_rcnn_box_predictor {
use_dropout: false
dropout_keep_probability: 1.0
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
variance_scaling_initializer {
factor: 1.0
uniform: true
mode: FAN_AVG
}
}
}
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.0
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
second_stage_localization_loss_weight: 2.0
second_stage_classification_loss_weight: 1.0
}
}
train_config: {
batch_size: 1
optimizer {
momentum_optimizer: {
learning_rate: {
manual_step_learning_rate {
initial_learning_rate: 0.0001
schedule {
step: 0
learning_rate: .0001
}
schedule {
step: 500000
learning_rate: .00001
}
schedule {
step: 700000
learning_rate: .000001
}
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
gradient_clipping_by_norm: 10.0
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
from_detection_checkpoint: true
num_steps: 800000
data_augmentation_options {
random_horizontal_flip {
}
}
}
train_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/pascal_train.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/pascal_label_map.pbtxt"
}
eval_config: {
num_examples: 4952
}
eval_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/pascal_val.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/pascal_label_map.pbtxt"
shuffle: false
num_readers: 1
}
# Faster R-CNN with Resnet-152 (v1), configured for Oxford-IIIT Pets Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {
faster_rcnn {
num_classes: 37
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 600
max_dimension: 1024
}
}
feature_extractor {
type: 'faster_rcnn_resnet152'
first_stage_features_stride: 16
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
first_stage_nms_score_threshold: 0.0
first_stage_nms_iou_threshold: 0.7
first_stage_max_proposals: 300
first_stage_localization_loss_weight: 2.0
first_stage_objectness_loss_weight: 1.0
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
second_stage_box_predictor {
mask_rcnn_box_predictor {
use_dropout: false
dropout_keep_probability: 1.0
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
variance_scaling_initializer {
factor: 1.0
uniform: true
mode: FAN_AVG
}
}
}
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.0
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
second_stage_localization_loss_weight: 2.0
second_stage_classification_loss_weight: 1.0
}
}
train_config: {
batch_size: 1
optimizer {
momentum_optimizer: {
learning_rate: {
manual_step_learning_rate {
initial_learning_rate: 0.0003
schedule {
step: 0
learning_rate: .0003
}
schedule {
step: 900000
learning_rate: .00003
}
schedule {
step: 1200000
learning_rate: .000003
}
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
gradient_clipping_by_norm: 10.0
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
from_detection_checkpoint: true
data_augmentation_options {
random_horizontal_flip {
}
}
}
train_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/pet_train.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
}
eval_config: {
num_examples: 2000
}
eval_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/pet_val.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
shuffle: false
num_readers: 1
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment