ssd.proto 4.35 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
syntax = "proto2";
package object_detection.protos;

import "object_detection/protos/anchor_generator.proto";
import "object_detection/protos/box_coder.proto";
import "object_detection/protos/box_predictor.proto";
import "object_detection/protos/hyperparams.proto";
import "object_detection/protos/image_resizer.proto";
import "object_detection/protos/matcher.proto";
import "object_detection/protos/losses.proto";
import "object_detection/protos/post_processing.proto";
import "object_detection/protos/region_similarity_calculator.proto";

// Configuration for Single Shot Detection (SSD) models.
message Ssd {

  // Number of classes to predict.
  optional int32 num_classes = 1;

  // Image resizer for preprocessing the input image.
  optional ImageResizer image_resizer = 2;

  // Feature extractor config.
  optional SsdFeatureExtractor feature_extractor = 3;

  // Box coder to encode the boxes.
  optional BoxCoder box_coder = 4;

  // Matcher to match groundtruth with anchors.
  optional Matcher matcher = 5;

  // Region similarity calculator to compute similarity of boxes.
  optional RegionSimilarityCalculator similarity_calculator = 6;

35
36
37
38
  // Whether background targets are to be encoded as an all
  // zeros vector or a one-hot vector (where background is the 0th class).
  optional bool encode_background_as_zeros = 12 [default=false];

39
40
41
42
  // classification weight to be associated to negative
  // anchors (default: 1.0). The weight must be in [0., 1.].
  optional float negative_class_weight = 13 [default = 1.0];

43
44
45
46
47
48
49
50
51
52
53
54
55
  // Box predictor to attach to the features.
  optional BoxPredictor box_predictor = 7;

  // Anchor generator to compute anchors.
  optional AnchorGenerator anchor_generator = 8;

  // Post processing to apply on the predictions.
  optional PostProcessing post_processing = 9;

  // Whether to normalize the loss by number of groundtruth boxes that match to
  // the anchors.
  optional bool normalize_loss_by_num_matches = 10 [default=true];

56
57
58
59
  // Whether to normalize the localization loss by the code size of the box
  // encodings. This is applied along with other normalization factors.
  optional bool normalize_loc_loss_by_codesize = 14 [default=false];

60
61
  // Loss configuration for training.
  optional Loss loss = 11;
62
63
64
65
66
67

  // Whether to update batch_norm inplace during training. This is required
  // for batch norm to work correctly on TPUs. When this is false, user must add
  // a control dependency on tf.GraphKeys.UPDATE_OPS for train/loss op in order
  // to update the batch norm moving average parameters.
  optional bool inplace_batchnorm_update = 15 [default = false];
68
69
70
71
72
73
74
75
76
77
78
79
80
}


message SsdFeatureExtractor {
  // Type of ssd feature extractor.
  optional string type = 1;

  // The factor to alter the depth of the channels in the feature extractor.
  optional float depth_multiplier = 2 [default=1.0];

  // Minimum number of the channels in the feature extractor.
  optional int32 min_depth = 3 [default=16];

81
82
  // Hyperparameters that affect the layers of feature extractor added on top
  // of the base feature extractor.
83
  optional Hyperparams conv_hyperparams = 4;
Vivek Rathod's avatar
Vivek Rathod committed
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103

  // The nearest multiple to zero-pad the input height and width dimensions to.
  // For example, if pad_to_multiple = 2, input dimensions are zero-padded
  // until the resulting dimensions are even.
  optional int32 pad_to_multiple = 5 [default = 1];

  // Whether to update batch norm parameters during training or not.
  // When training with a relative small batch size (e.g. 1), it is
  // desirable to disable batch norm update and use pretrained batch norm
  // params.
  //
  // Note: Some feature extractors are used with canned arg_scopes
  // (e.g resnet arg scopes).  In these cases training behavior of batch norm
  // variables may depend on both values of `batch_norm_trainable` and
  // `is_training`.
  //
  // When canned arg_scopes are used with feature extractors `conv_hyperparams`
  // will apply only to the additional layers that are added and are outside the
  // canned arg_scope.
  optional bool batch_norm_trainable = 6 [default=true];
104
105
106
107

  // Whether to use explicit padding when extracting SSD multiresolution
  // features. Note that this does not apply to the base feature extractor.
  optional bool use_explicit_padding = 7 [default=false];
108
109
110
111

  // Whether to use depthwise separable convolutions for to extract additional
  // feature maps added by SSD.
  optional bool use_depthwise = 8 [default=false];
112
}