Commit a4944a57 authored by derekjchow's avatar derekjchow Committed by Sergio Guadarrama
Browse files

Add Tensorflow Object Detection API. (#1561)

For details see our paper:
"Speed/accuracy trade-offs for modern convolutional object detectors."
Huang J, Rathod V, Sun C, Zhu M, Korattikara A, Fathi A, Fischer I,
Wojna Z, Song Y, Guadarrama S, Murphy K, CVPR 2017
https://arxiv.org/abs/1611.10012
parent 60c3ed2e
syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/argmax_matcher.proto";
import "object_detection/protos/bipartite_matcher.proto";
// Configuration proto for the matcher to be used in the object detection
// pipeline. See core/matcher.py for details.
message Matcher {
oneof matcher_oneof {
ArgMaxMatcher argmax_matcher = 1;
BipartiteMatcher bipartite_matcher = 2;
}
}
syntax = "proto2";
package object_detection.protos;
// Configuration proto for MeanStddevBoxCoder. See
// box_coders/mean_stddev_box_coder.py for details.
message MeanStddevBoxCoder {
}
syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/faster_rcnn.proto";
import "object_detection/protos/ssd.proto";
// Top level configuration for DetectionModels.
message DetectionModel {
oneof model {
FasterRcnn faster_rcnn = 1;
Ssd ssd = 2;
}
}
syntax = "proto2";
package object_detection.protos;
// Messages for configuring the optimizing strategy for training object
// detection models.
// Top level optimizer message.
message Optimizer {
oneof optimizer {
RMSPropOptimizer rms_prop_optimizer = 1;
MomentumOptimizer momentum_optimizer = 2;
AdamOptimizer adam_optimizer = 3;
}
optional bool use_moving_average = 4 [default=true];
optional float moving_average_decay = 5 [default=0.9999];
}
// Configuration message for the RMSPropOptimizer
// See: https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
message RMSPropOptimizer {
optional LearningRate learning_rate = 1;
optional float momentum_optimizer_value = 2 [default=0.9];
optional float decay = 3 [default=0.9];
optional float epsilon = 4 [default=1.0];
}
// Configuration message for the MomentumOptimizer
// See: https://www.tensorflow.org/api_docs/python/tf/train/MomentumOptimizer
message MomentumOptimizer {
optional LearningRate learning_rate = 1;
optional float momentum_optimizer_value = 2 [default=0.9];
}
// Configuration message for the AdamOptimizer
// See: https://www.tensorflow.org/api_docs/python/tf/train/AdamOptimizer
message AdamOptimizer {
optional LearningRate learning_rate = 1;
}
// Configuration message for optimizer learning rate.
message LearningRate {
oneof learning_rate {
ConstantLearningRate constant_learning_rate = 1;
ExponentialDecayLearningRate exponential_decay_learning_rate = 2;
ManualStepLearningRate manual_step_learning_rate = 3;
}
}
// Configuration message for a constant learning rate.
message ConstantLearningRate {
optional float learning_rate = 1 [default=0.002];
}
// Configuration message for an exponentially decaying learning rate.
// See https://www.tensorflow.org/versions/master/api_docs/python/train/ \
// decaying_the_learning_rate#exponential_decay
message ExponentialDecayLearningRate {
optional float initial_learning_rate = 1 [default=0.002];
optional uint32 decay_steps = 2 [default=4000000];
optional float decay_factor = 3 [default=0.95];
optional bool staircase = 4 [default=true];
}
// Configuration message for a manually defined learning rate schedule.
message ManualStepLearningRate {
optional float initial_learning_rate = 1 [default=0.002];
message LearningRateSchedule {
optional uint32 step = 1;
optional float learning_rate = 2 [default=0.002];
}
repeated LearningRateSchedule schedule = 2;
}
syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/eval.proto";
import "object_detection/protos/input_reader.proto";
import "object_detection/protos/model.proto";
import "object_detection/protos/train.proto";
// Convenience message for configuring a training and eval pipeline. Allows all
// of the pipeline parameters to be configured from one file.
message TrainEvalPipelineConfig {
optional DetectionModel model = 1;
optional TrainConfig train_config = 2;
optional InputReader train_input_reader = 3;
optional EvalConfig eval_config = 4;
optional InputReader eval_input_reader = 5;
}
syntax = "proto2";
package object_detection.protos;
// Configuration proto for non-max-suppression operation on a batch of
// detections.
message BatchNonMaxSuppression {
// Scalar threshold for score (low scoring boxes are removed).
optional float score_threshold = 1 [default = 0.0];
// Scalar threshold for IOU (boxes that have high IOU overlap
// with previously selected boxes are removed).
optional float iou_threshold = 2 [default = 0.6];
// Maximum number of detections to retain per class.
optional int32 max_detections_per_class = 3 [default = 100];
// Maximum number of detections to retain across all classes.
optional int32 max_total_detections = 5 [default = 100];
}
// Configuration proto for post-processing predicted boxes and
// scores.
message PostProcessing {
// Non max suppression parameters.
optional BatchNonMaxSuppression batch_non_max_suppression = 1;
// Enum to specify how to convert the detection scores.
enum ScoreConverter {
// Input scores equals output scores.
IDENTITY = 0;
// Applies a sigmoid on input scores.
SIGMOID = 1;
// Applies a softmax on input scores
SOFTMAX = 2;
}
// Score converter to use.
optional ScoreConverter score_converter = 2 [default = IDENTITY];
}
syntax = "proto2";
package object_detection.protos;
// Message for defining a preprocessing operation on input data.
// See: //object_detection/core/preprocessor.py
message PreprocessingStep {
oneof preprocessing_step {
NormalizeImage normalize_image = 1;
RandomHorizontalFlip random_horizontal_flip = 2;
RandomPixelValueScale random_pixel_value_scale = 3;
RandomImageScale random_image_scale = 4;
RandomRGBtoGray random_rgb_to_gray = 5;
RandomAdjustBrightness random_adjust_brightness = 6;
RandomAdjustContrast random_adjust_contrast = 7;
RandomAdjustHue random_adjust_hue = 8;
RandomAdjustSaturation random_adjust_saturation = 9;
RandomDistortColor random_distort_color = 10;
RandomJitterBoxes random_jitter_boxes = 11;
RandomCropImage random_crop_image = 12;
RandomPadImage random_pad_image = 13;
RandomCropPadImage random_crop_pad_image = 14;
RandomCropToAspectRatio random_crop_to_aspect_ratio = 15;
RandomBlackPatches random_black_patches = 16;
RandomResizeMethod random_resize_method = 17;
ScaleBoxesToPixelCoordinates scale_boxes_to_pixel_coordinates = 18;
ResizeImage resize_image = 19;
SubtractChannelMean subtract_channel_mean = 20;
SSDRandomCrop ssd_random_crop = 21;
SSDRandomCropPad ssd_random_crop_pad = 22;
SSDRandomCropFixedAspectRatio ssd_random_crop_fixed_aspect_ratio = 23;
}
}
// Normalizes pixel values in an image.
// For every channel in the image, moves the pixel values from the range
// [original_minval, original_maxval] to [target_minval, target_maxval].
message NormalizeImage {
optional float original_minval = 1;
optional float original_maxval = 2;
optional float target_minval = 3 [default=0];
optional float target_maxval = 4 [default=1];
}
// Randomly horizontally mirrors the image and detections 50% of the time.
message RandomHorizontalFlip {
}
// Randomly scales the values of all pixels in the image by some constant value
// between [minval, maxval], then clip the value to a range between [0, 1.0].
message RandomPixelValueScale {
optional float minval = 1 [default=0.9];
optional float maxval = 2 [default=1.1];
}
// Randomly enlarges or shrinks image (keeping aspect ratio).
message RandomImageScale {
optional float min_scale_ratio = 1 [default=0.5];
optional float max_scale_ratio = 2 [default=2.0];
}
// Randomly convert entire image to grey scale.
message RandomRGBtoGray {
optional float probability = 1 [default=0.1];
}
// Randomly changes image brightness by up to max_delta. Image outputs will be
// saturated between 0 and 1.
message RandomAdjustBrightness {
optional float max_delta=1 [default=0.2];
}
// Randomly scales contract by a value between [min_delta, max_delta].
message RandomAdjustContrast {
optional float min_delta = 1 [default=0.8];
optional float max_delta = 2 [default=1.25];
}
// Randomly alters hue by a value of up to max_delta.
message RandomAdjustHue {
optional float max_delta = 1 [default=0.02];
}
// Randomly changes saturation by a value between [min_delta, max_delta].
message RandomAdjustSaturation {
optional float min_delta = 1 [default=0.8];
optional float max_delta = 2 [default=1.25];
}
// Performs a random color distortion. color_orderings should either be 0 or 1.
message RandomDistortColor {
optional int32 color_ordering = 1;
}
// Randomly jitters corners of boxes in the image determined by ratio.
// ie. If a box is [100, 200] and ratio is 0.02, the corners can move by [1, 4].
message RandomJitterBoxes {
optional float ratio = 1 [default=0.05];
}
// Randomly crops the image and bounding boxes.
message RandomCropImage {
// Cropped image must cover at least one box by this fraction.
optional float min_object_covered = 1 [default=1.0];
// Aspect ratio bounds of cropped image.
optional float min_aspect_ratio = 2 [default=0.75];
optional float max_aspect_ratio = 3 [default=1.33];
// Allowed area ratio of cropped image to original image.
optional float min_area = 4 [default=0.1];
optional float max_area = 5 [default=1.0];
// Minimum overlap threshold of cropped boxes to keep in new image. If the
// ratio between a cropped bounding box and the original is less than this
// value, it is removed from the new image.
optional float overlap_thresh = 6 [default=0.3];
// Probability of keeping the original image.
optional float random_coef = 7 [default=0.0];
}
// Randomly adds padding to the image.
message RandomPadImage {
// Minimum dimensions for padded image. If unset, will use original image
// dimension as a lower bound.
optional float min_image_height = 1;
optional float min_image_width = 2;
// Maximum dimensions for padded image. If unset, will use double the original
// image dimension as a lower bound.
optional float max_image_height = 3;
optional float max_image_width = 4;
// Color of the padding. If unset, will pad using average color of the input
// image.
repeated float pad_color = 5;
}
// Randomly crops an image followed by a random pad.
message RandomCropPadImage {
// Cropping operation must cover at least one box by this fraction.
optional float min_object_covered = 1 [default=1.0];
// Aspect ratio bounds of image after cropping operation.
optional float min_aspect_ratio = 2 [default=0.75];
optional float max_aspect_ratio = 3 [default=1.33];
// Allowed area ratio of image after cropping operation.
optional float min_area = 4 [default=0.1];
optional float max_area = 5 [default=1.0];
// Minimum overlap threshold of cropped boxes to keep in new image. If the
// ratio between a cropped bounding box and the original is less than this
// value, it is removed from the new image.
optional float overlap_thresh = 6 [default=0.3];
// Probability of keeping the original image during the crop operation.
optional float random_coef = 7 [default=0.0];
// Maximum dimensions for padded image. If unset, will use double the original
// image dimension as a lower bound. Both of the following fields should be
// length 2.
repeated float min_padded_size_ratio = 8;
repeated float max_padded_size_ratio = 9;
// Color of the padding. If unset, will pad using average color of the input
// image.
repeated float pad_color = 10;
}
// Randomly crops an iamge to a given aspect ratio.
message RandomCropToAspectRatio {
// Aspect ratio.
optional float aspect_ratio = 1 [default=1.0];
// Minimum overlap threshold of cropped boxes to keep in new image. If the
// ratio between a cropped bounding box and the original is less than this
// value, it is removed from the new image.
optional float overlap_thresh = 2 [default=0.3];
}
// Randomly adds black square patches to an image.
message RandomBlackPatches {
// The maximum number of black patches to add.
optional int32 max_black_patches = 1 [default=10];
// The probability of a black patch being added to an image.
optional float probability = 2 [default=0.5];
// Ratio between the dimension of the black patch to the minimum dimension of
// the image (patch_width = patch_height = min(image_height, image_width)).
optional float size_to_image_ratio = 3 [default=0.1];
}
// Randomly resizes the image up to [target_height, target_width].
message RandomResizeMethod {
optional float target_height = 1;
optional float target_width = 2;
}
// Scales boxes from normalized coordinates to pixel coordinates.
message ScaleBoxesToPixelCoordinates {
}
// Resizes images to [new_height, new_width].
message ResizeImage {
optional int32 new_height = 1;
optional int32 new_width = 2;
enum Method {
AREA=1;
BICUBIC=2;
BILINEAR=3;
NEAREST_NEIGHBOR=4;
}
optional Method method = 3 [default=BILINEAR];
}
// Normalizes an image by subtracting a mean from each channel.
message SubtractChannelMean {
// The mean to subtract from each channel. Should be of same dimension of
// channels in the input image.
repeated float means = 1;
}
message SSDRandomCropOperation {
// Cropped image must cover at least this fraction of one original bounding
// box.
optional float min_object_covered = 1;
// The aspect ratio of the cropped image must be within the range of
// [min_aspect_ratio, max_aspect_ratio].
optional float min_aspect_ratio = 2;
optional float max_aspect_ratio = 3;
// The area of the cropped image must be within the range of
// [min_area, max_area].
optional float min_area = 4;
optional float max_area = 5;
// Cropped box area ratio must be above this threhold to be kept.
optional float overlap_thresh = 6;
// Probability a crop operation is skipped.
optional float random_coef = 7;
}
// Randomly crops a image according to:
// Liu et al., SSD: Single shot multibox detector.
// This preprocessing step defines multiple SSDRandomCropOperations. Only one
// operation (chosen at random) is actually performed on an image.
message SSDRandomCrop {
repeated SSDRandomCropOperation operations = 1;
}
message SSDRandomCropPadOperation {
// Cropped image must cover at least this fraction of one original bounding
// box.
optional float min_object_covered = 1;
// The aspect ratio of the cropped image must be within the range of
// [min_aspect_ratio, max_aspect_ratio].
optional float min_aspect_ratio = 2;
optional float max_aspect_ratio = 3;
// The area of the cropped image must be within the range of
// [min_area, max_area].
optional float min_area = 4;
optional float max_area = 5;
// Cropped box area ratio must be above this threhold to be kept.
optional float overlap_thresh = 6;
// Probability a crop operation is skipped.
optional float random_coef = 7;
// Min ratio of padded image height and width to the input image's height and
// width. Two entries per operation.
repeated float min_padded_size_ratio = 8;
// Max ratio of padded image height and width to the input image's height and
// width. Two entries per operation.
repeated float max_padded_size_ratio = 9;
// Padding color.
optional float pad_color_r = 10;
optional float pad_color_g = 11;
optional float pad_color_b = 12;
}
// Randomly crops and pads an image according to:
// Liu et al., SSD: Single shot multibox detector.
// This preprocessing step defines multiple SSDRandomCropPadOperations. Only one
// operation (chosen at random) is actually performed on an image.
message SSDRandomCropPad {
repeated SSDRandomCropPadOperation operations = 1;
}
message SSDRandomCropFixedAspectRatioOperation {
// Cropped image must cover at least this fraction of one original bounding
// box.
optional float min_object_covered = 1;
// The area of the cropped image must be within the range of
// [min_area, max_area].
optional float min_area = 4;
optional float max_area = 5;
// Cropped box area ratio must be above this threhold to be kept.
optional float overlap_thresh = 6;
// Probability a crop operation is skipped.
optional float random_coef = 7;
}
// Randomly crops a image to a fixed aspect ratio according to:
// Liu et al., SSD: Single shot multibox detector.
// Multiple SSDRandomCropFixedAspectRatioOperations are defined by this
// preprocessing step. Only one operation (chosen at random) is actually
// performed on an image.
message SSDRandomCropFixedAspectRatio {
repeated SSDRandomCropFixedAspectRatioOperation operations = 1;
// Aspect ratio to crop to. This value is used for all crop operations.
optional float aspect_ratio = 2 [default=1.0];
}
syntax = "proto2";
package object_detection.protos;
// Configuration proto for region similarity calculators. See
// core/region_similarity_calculator.py for details.
message RegionSimilarityCalculator {
oneof region_similarity {
NegSqDistSimilarity neg_sq_dist_similarity = 1;
IouSimilarity iou_similarity = 2;
IoaSimilarity ioa_similarity = 3;
}
}
// Configuration for negative squared distance similarity calculator.
message NegSqDistSimilarity {
}
// Configuration for intersection-over-union (IOU) similarity calculator.
message IouSimilarity {
}
// Configuration for intersection-over-area (IOA) similarity calculator.
message IoaSimilarity {
}
syntax = "proto2";
package object_detection.protos;
// Configuration proto for SquareBoxCoder. See
// box_coders/square_box_coder.py for details.
message SquareBoxCoder {
// Scale factor for anchor encoded box center.
optional float y_scale = 1 [default = 10.0];
optional float x_scale = 2 [default = 10.0];
// Scale factor for anchor encoded box length.
optional float length_scale = 3 [default = 5.0];
}
syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/anchor_generator.proto";
import "object_detection/protos/box_coder.proto";
import "object_detection/protos/box_predictor.proto";
import "object_detection/protos/hyperparams.proto";
import "object_detection/protos/image_resizer.proto";
import "object_detection/protos/matcher.proto";
import "object_detection/protos/losses.proto";
import "object_detection/protos/post_processing.proto";
import "object_detection/protos/region_similarity_calculator.proto";
// Configuration for Single Shot Detection (SSD) models.
message Ssd {
// Number of classes to predict.
optional int32 num_classes = 1;
// Image resizer for preprocessing the input image.
optional ImageResizer image_resizer = 2;
// Feature extractor config.
optional SsdFeatureExtractor feature_extractor = 3;
// Box coder to encode the boxes.
optional BoxCoder box_coder = 4;
// Matcher to match groundtruth with anchors.
optional Matcher matcher = 5;
// Region similarity calculator to compute similarity of boxes.
optional RegionSimilarityCalculator similarity_calculator = 6;
// Box predictor to attach to the features.
optional BoxPredictor box_predictor = 7;
// Anchor generator to compute anchors.
optional AnchorGenerator anchor_generator = 8;
// Post processing to apply on the predictions.
optional PostProcessing post_processing = 9;
// Whether to normalize the loss by number of groundtruth boxes that match to
// the anchors.
optional bool normalize_loss_by_num_matches = 10 [default=true];
// Loss configuration for training.
optional Loss loss = 11;
}
message SsdFeatureExtractor {
// Type of ssd feature extractor.
optional string type = 1;
// The factor to alter the depth of the channels in the feature extractor.
optional float depth_multiplier = 2 [default=1.0];
// Minimum number of the channels in the feature extractor.
optional int32 min_depth = 3 [default=16];
// Hyperparameters for the feature extractor.
optional Hyperparams conv_hyperparams = 4;
}
syntax = "proto2";
package object_detection.protos;
// Configuration proto for SSD anchor generator described in
// https://arxiv.org/abs/1512.02325. See
// anchor_generators/multiple_grid_anchor_generator.py for details.
message SsdAnchorGenerator {
// Number of grid layers to create anchors for.
optional int32 num_layers = 1 [default = 6];
// Scale of anchors corresponding to finest resolution.
optional float min_scale = 2 [default = 0.2];
// Scale of anchors corresponding to coarsest resolution
optional float max_scale = 3 [default = 0.95];
// Aspect ratios for anchors at each grid point.
repeated float aspect_ratios = 4;
// Whether to use the following aspect ratio and scale combination for the
// layer with the finest resolution : (scale=0.1, aspect_ratio=1.0),
// (scale=min_scale, aspect_ration=2.0), (scale=min_scale, aspect_ratio=0.5).
optional bool reduce_boxes_in_lowest_layer = 5 [default = true];
}
// Message to store the mapping from class label strings to class id. Datasets
// use string labels to represent classes while the object detection framework
// works with class ids. This message maps them so they can be converted back
// and forth as needed.
syntax = "proto2";
package object_detection.protos;
message StringIntLabelMapItem {
// String name. The most common practice is to set this to a MID or synsets
// id.
optional string name = 1;
// Integer id that maps to the string name above. Label ids should start from
// 1.
optional int32 id = 2;
// Human readable string label.
optional string display_name = 3;
};
message StringIntLabelMap {
repeated StringIntLabelMapItem item = 1;
};
syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/optimizer.proto";
import "object_detection/protos/preprocessor.proto";
// Message for configuring DetectionModel training jobs (train.py).
message TrainConfig {
// Input queue batch size.
optional uint32 batch_size = 1 [default=32];
// Data augmentation options.
repeated PreprocessingStep data_augmentation_options = 2;
// Whether to synchronize replicas during training.
optional bool sync_replicas = 3 [default=false];
// How frequently to keep checkpoints.
optional uint32 keep_checkpoint_every_n_hours = 4 [default=1000];
// Optimizer used to train the DetectionModel.
optional Optimizer optimizer = 5;
// If greater than 0, clips gradients by this value.
optional float gradient_clipping_by_norm = 6 [default=0.0];
// Checkpoint to restore variables from. Typically used to load feature
// extractor variables trained outside of object detection.
optional string fine_tune_checkpoint = 7 [default=""];
// Specifies if the finetune checkpoint is from an object detection model.
// If from an object detection model, the model being trained should have
// the same parameters with the exception of the num_classes parameter.
// If false, it assumes the checkpoint was a object classification model.
optional bool from_detection_checkpoint = 8 [default=false];
// Number of steps to train the DetectionModel for. If 0, will train the model
// indefinitely.
optional uint32 num_steps = 9 [default=0];
// Number of training steps between replica startup.
// This flag must be set to 0 if sync_replicas is set to true.
optional float startup_delay_steps = 10 [default=15];
// If greater than 0, multiplies the gradient of bias variables by this
// amount.
optional float bias_grad_multiplier = 11 [default=0];
// Variables that should not be updated during training.
repeated string freeze_variables = 12;
// Number of replicas to aggregate before making parameter updates.
optional int32 replicas_to_aggregate = 13 [default=1];
// Maximum number of elements to store within a queue.
optional int32 batch_queue_capacity = 14 [default=600];
// Number of threads to use for batching.
optional int32 num_batch_queue_threads = 15 [default=8];
// Maximum capacity of the queue used to prefetch assembled batches.
optional int32 prefetch_queue_capacity = 16 [default=10];
}
trainingInput:
runtimeVersion: "1.0"
scaleTier: CUSTOM
masterType: standard_gpu
workerCount: 5
workerType: standard_gpu
parameterServerCount: 3
parameterServerType: standard
# Faster R-CNN with Inception Resnet v2, Atrous version;
# Configured for Oxford-IIT Pets Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {
faster_rcnn {
num_classes: 37
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 600
max_dimension: 1024
}
}
feature_extractor {
type: 'faster_rcnn_inception_resnet_v2'
first_stage_features_stride: 8
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 8
width_stride: 8
}
}
first_stage_atrous_rate: 2
first_stage_box_predictor_conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
first_stage_nms_score_threshold: 0.0
first_stage_nms_iou_threshold: 0.7
first_stage_max_proposals: 300
first_stage_localization_loss_weight: 2.0
first_stage_objectness_loss_weight: 1.0
initial_crop_size: 17
maxpool_kernel_size: 1
maxpool_stride: 1
second_stage_box_predictor {
mask_rcnn_box_predictor {
use_dropout: false
dropout_keep_probability: 1.0
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
variance_scaling_initializer {
factor: 1.0
uniform: true
mode: FAN_AVG
}
}
}
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.0
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SOFTMAX
}
second_stage_localization_loss_weight: 2.0
second_stage_classification_loss_weight: 1.0
}
}
train_config: {
batch_size: 1
optimizer {
momentum_optimizer: {
learning_rate: {
manual_step_learning_rate {
initial_learning_rate: 0.0003
schedule {
step: 0
learning_rate: .0003
}
schedule {
step: 900000
learning_rate: .00003
}
schedule {
step: 1200000
learning_rate: .000003
}
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
gradient_clipping_by_norm: 10.0
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
from_detection_checkpoint: true
data_augmentation_options {
random_horizontal_flip {
}
}
}
train_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/pet_train.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
}
eval_config: {
num_examples: 2000
}
eval_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/pet_val.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
}
# Faster R-CNN with Resnet-101 (v1) configured for the Oxford-IIT Pet Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {
faster_rcnn {
num_classes: 37
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 600
max_dimension: 1024
}
}
feature_extractor {
type: 'faster_rcnn_resnet101'
first_stage_features_stride: 16
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
first_stage_nms_score_threshold: 0.0
first_stage_nms_iou_threshold: 0.7
first_stage_max_proposals: 300
first_stage_localization_loss_weight: 2.0
first_stage_objectness_loss_weight: 1.0
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
second_stage_box_predictor {
mask_rcnn_box_predictor {
use_dropout: false
dropout_keep_probability: 1.0
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
variance_scaling_initializer {
factor: 1.0
uniform: true
mode: FAN_AVG
}
}
}
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.0
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
second_stage_localization_loss_weight: 2.0
second_stage_classification_loss_weight: 1.0
}
}
train_config: {
batch_size: 1
optimizer {
momentum_optimizer: {
learning_rate: {
manual_step_learning_rate {
initial_learning_rate: 0.0003
schedule {
step: 0
learning_rate: .0003
}
schedule {
step: 900000
learning_rate: .00003
}
schedule {
step: 1200000
learning_rate: .000003
}
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
gradient_clipping_by_norm: 10.0
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
from_detection_checkpoint: true
data_augmentation_options {
random_horizontal_flip {
}
}
}
train_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/pet_train.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
}
eval_config: {
num_examples: 2000
}
eval_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/pet_val.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
}
# Faster R-CNN with Resnet-101 (v1), configured for Pascal VOC Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {
faster_rcnn {
num_classes: 20
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 600
max_dimension: 1024
}
}
feature_extractor {
type: 'faster_rcnn_resnet101'
first_stage_features_stride: 16
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
first_stage_nms_score_threshold: 0.0
first_stage_nms_iou_threshold: 0.7
first_stage_max_proposals: 300
first_stage_localization_loss_weight: 2.0
first_stage_objectness_loss_weight: 1.0
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
second_stage_box_predictor {
mask_rcnn_box_predictor {
use_dropout: false
dropout_keep_probability: 1.0
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
variance_scaling_initializer {
factor: 1.0
uniform: true
mode: FAN_AVG
}
}
}
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.0
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
second_stage_localization_loss_weight: 2.0
second_stage_classification_loss_weight: 1.0
}
}
train_config: {
batch_size: 1
optimizer {
momentum_optimizer: {
learning_rate: {
manual_step_learning_rate {
initial_learning_rate: 0.0001
schedule {
step: 0
learning_rate: .0001
}
schedule {
step: 500000
learning_rate: .00001
}
schedule {
step: 700000
learning_rate: .000001
}
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
gradient_clipping_by_norm: 10.0
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
from_detection_checkpoint: true
num_steps: 800000
data_augmentation_options {
random_horizontal_flip {
}
}
}
train_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/pascal_voc_train.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/pascal_voc_label_map.pbtxt"
}
eval_config: {
num_examples: 4952
}
eval_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/pascal_voc_val.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/pascal_voc_label_map.pbtxt"
}
# Faster R-CNN with Resnet-152 (v1), configured for Oxford-IIT Pets Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {
faster_rcnn {
num_classes: 37
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 600
max_dimension: 1024
}
}
feature_extractor {
type: 'faster_rcnn_resnet152'
first_stage_features_stride: 16
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
first_stage_nms_score_threshold: 0.0
first_stage_nms_iou_threshold: 0.7
first_stage_max_proposals: 300
first_stage_localization_loss_weight: 2.0
first_stage_objectness_loss_weight: 1.0
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
second_stage_box_predictor {
mask_rcnn_box_predictor {
use_dropout: false
dropout_keep_probability: 1.0
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
variance_scaling_initializer {
factor: 1.0
uniform: true
mode: FAN_AVG
}
}
}
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.0
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
second_stage_localization_loss_weight: 2.0
second_stage_classification_loss_weight: 1.0
}
}
train_config: {
batch_size: 1
optimizer {
momentum_optimizer: {
learning_rate: {
manual_step_learning_rate {
initial_learning_rate: 0.0003
schedule {
step: 0
learning_rate: .0003
}
schedule {
step: 900000
learning_rate: .00003
}
schedule {
step: 1200000
learning_rate: .000003
}
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
gradient_clipping_by_norm: 10.0
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
from_detection_checkpoint: true
data_augmentation_options {
random_horizontal_flip {
}
}
}
train_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/pet_train.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
}
eval_config: {
num_examples: 2000
}
eval_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/pet_val.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
}
# Faster R-CNN with Resnet-50 (v1), configured for Oxford-IIT Pets Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {
faster_rcnn {
num_classes: 37
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 600
max_dimension: 1024
}
}
feature_extractor {
type: 'faster_rcnn_resnet50'
first_stage_features_stride: 16
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
first_stage_nms_score_threshold: 0.0
first_stage_nms_iou_threshold: 0.7
first_stage_max_proposals: 300
first_stage_localization_loss_weight: 2.0
first_stage_objectness_loss_weight: 1.0
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
second_stage_box_predictor {
mask_rcnn_box_predictor {
use_dropout: false
dropout_keep_probability: 1.0
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
variance_scaling_initializer {
factor: 1.0
uniform: true
mode: FAN_AVG
}
}
}
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.0
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
second_stage_localization_loss_weight: 2.0
second_stage_classification_loss_weight: 1.0
}
}
train_config: {
batch_size: 1
optimizer {
momentum_optimizer: {
learning_rate: {
manual_step_learning_rate {
initial_learning_rate: 0.0003
schedule {
step: 0
learning_rate: .0003
}
schedule {
step: 900000
learning_rate: .00003
}
schedule {
step: 1200000
learning_rate: .000003
}
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
gradient_clipping_by_norm: 10.0
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
from_detection_checkpoint: true
data_augmentation_options {
random_horizontal_flip {
}
}
}
train_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/pet_train.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
}
eval_config: {
num_examples: 2000
}
eval_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/pet_val.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
}
# R-FCN with Resnet-101 (v1), configured for Oxford-IIT Pets Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {
faster_rcnn {
num_classes: 37
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 600
max_dimension: 1024
}
}
feature_extractor {
type: 'faster_rcnn_resnet101'
first_stage_features_stride: 16
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
first_stage_nms_score_threshold: 0.0
first_stage_nms_iou_threshold: 0.7
first_stage_max_proposals: 300
first_stage_localization_loss_weight: 2.0
first_stage_objectness_loss_weight: 1.0
second_stage_box_predictor {
rfcn_box_predictor {
conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
crop_height: 18
crop_width: 18
num_spatial_bins_height: 3
num_spatial_bins_width: 3
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.0
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
second_stage_localization_loss_weight: 2.0
second_stage_classification_loss_weight: 1.0
}
}
train_config: {
batch_size: 1
optimizer {
momentum_optimizer: {
learning_rate: {
manual_step_learning_rate {
initial_learning_rate: 0.0003
schedule {
step: 0
learning_rate: .0003
}
schedule {
step: 900000
learning_rate: .00003
}
schedule {
step: 1200000
learning_rate: .000003
}
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
gradient_clipping_by_norm: 10.0
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
from_detection_checkpoint: true
data_augmentation_options {
random_horizontal_flip {
}
}
}
train_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/pet_train.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
}
eval_config: {
num_examples: 2000
}
eval_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/pet_val.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment