input_reader.proto 7.97 KB
Newer Older
1
2
3
4
syntax = "proto2";

package object_detection.protos;

5

6
7
8
9
10
11
12
13
14
15
16
17
18
// Configuration proto for defining input readers that generate Object Detection
// Examples from input sources. Input readers are expected to generate a
// dictionary of tensors, with the following fields populated:
//
// 'image': an [image_height, image_width, channels] image tensor that detection
//    will be run on.
// 'groundtruth_classes': a [num_boxes] int32 tensor storing the class
//    labels of detected boxes in the image.
// 'groundtruth_boxes': a [num_boxes, 4] float tensor storing the coordinates of
//    detected boxes in the image.
// 'groundtruth_instance_masks': (Optional), a [num_boxes, image_height,
//    image_width] float tensor storing binary mask of the objects in boxes.

19
20
21
22
23
24
25
// Instance mask format. Note that PNG masks are much more space efficient.
enum InstanceMaskType {
  DEFAULT = 0;          // Default implementation, currently NUMERICAL_MASKS
  NUMERICAL_MASKS = 1;  // [num_masks, H, W] float32 binary masks.
  PNG_MASKS = 2;        // Encoded PNG masks.
}

26
27
// Input type format: whether inputs are TfExamples or TfSequenceExamples.
enum InputType {
28
29
30
  INPUT_DEFAULT = 0;        // Default implementation, currently TF_EXAMPLE
  TF_EXAMPLE = 1;           // TfExample input
  TF_SEQUENCE_EXAMPLE = 2;  // TfSequenceExample Input
31
32
}

33
// Next id: 38
34
message InputReader {
35
36
  // Name of input reader. Typically used to describe the dataset that is read
  // by this input reader.
37
  optional string name = 23 [default = ""];
38

39
40
  // Path to StringIntLabelMap pbtxt file specifying the mapping from string
  // labels to integer ids.
41
  optional string label_map_path = 1 [default = ""];
42
43
44

  // Whether data should be processed in the order they are read in, or
  // shuffled randomly.
45
  optional bool shuffle = 2 [default = true];
46

47
  // Buffer size to be used when shuffling.
48
  optional uint32 shuffle_buffer_size = 11 [default = 2048];
49
50
51
52

  // Buffer size to be used when shuffling file names.
  optional uint32 filenames_shuffle_buffer_size = 12 [default = 100];

53
54
  // The number of times a data source is read. If set to zero, the data source
  // will be reused indefinitely.
55
  optional uint32 num_epochs = 5 [default = 0];
56

57
58
59
60
  // Integer representing how often an example should be sampled. To feed
  // only 1/3 of your data into your model, set `sample_1_of_n_examples` to 3.
  // This is particularly useful for evaluation, where you might not prefer to
  // evaluate all of your samples.
61
  optional uint32 sample_1_of_n_examples = 22 [default = 1];
62

63
  // Number of file shards to read in parallel.
64
65
66
  //
  // When sample_from_datasets_weights are configured, num_readers is applied
  // for each dataset.
67
  optional uint32 num_readers = 6 [default = 64];
68
69
70

  // Number of batches to produce in parallel. If this is run on a 2x2 TPU set
  // this to 8.
71
  optional uint32 num_parallel_batches = 19 [default = 8];
72
73
74
75
76
77

  // Number of batches to prefetch. Prefetch decouples input pipeline and
  // model so they can be pipelined resulting in higher throughput. Set this
  // to a small constant and increment linearly until the improvements become
  // marginal or you exceed your cpu memory budget. Setting this to -1,
  // automatically tunes this value for you.
78
  optional int32 num_prefetch_batches = 20 [default = 2];
79

80
  // Maximum number of records to keep in reader queue.
81
  optional uint32 queue_capacity = 3 [default = 2000, deprecated = true];
82
83
84

  // Minimum number of records to keep in reader queue. A large value is needed
  // to generate a good random shuffle.
85
  optional uint32 min_after_dequeue = 4 [default = 1000, deprecated = true];
86

87
  // Number of records to read from each reader at once.
88
  optional uint32 read_block_length = 15 [default = 32];
89

90
  // Number of decoded records to prefetch before batching.
91
  optional uint32 prefetch_size = 13 [default = 512, deprecated = true];
92
93

  // Number of parallel decode ops to apply.
94
  optional uint32 num_parallel_map_calls = 14 [default = 64, deprecated = true];
95

96
97
98
  // Drop remainder when batch size does not divide dataset size.
  optional bool drop_remainder = 35 [default = true];

99
100
101
  // If positive, TfExampleDecoder will try to decode rasters of additional
  // channels from tf.Examples.
  optional int32 num_additional_channels = 18 [default = 0];
102

103
104
105
  // Number of groundtruth keypoints per object.
  optional uint32 num_keypoints = 16 [default = 0];

106
107
108
109
  // Keypoint weights. These weights can be used to apply per-keypoint loss
  // multipliers. The size of this field should agree with `num_keypoints`.
  repeated float keypoint_type_weight = 26;

110
111
112
  // Maximum number of boxes to pad to during training / evaluation.
  // Set this to at least the maximum amount of boxes in the input data,
  // otherwise some groundtruth boxes may be clipped.
113
  optional int32 max_number_of_boxes = 21 [default = 100];
114

115
116
117
  // Whether to load multiclass scores from the dataset.
  optional bool load_multiclass_scores = 24 [default = false];

118
119
120
  // Whether to load context features from the dataset.
  optional bool load_context_features = 25 [default = false];

121
122
123
  // Whether to load context image ids from the dataset.
  optional bool load_context_image_ids = 36 [default = false];

124
125
126
  // Whether to load groundtruth instance masks.
  optional bool load_instance_masks = 7 [default = false];

127
128
129
  // Type of instance mask.
  optional InstanceMaskType mask_type = 10 [default = NUMERICAL_MASKS];

130
131
132
133
  // Whether to load DensePose data. If set, must also set load_instance_masks
  // to true.
  optional bool load_dense_pose = 31 [default = false];

134
135
136
  // Whether to load track information.
  optional bool load_track_id = 33 [default = false];

137
138
139
  // Whether to load keypoint depth features.
  optional bool load_keypoint_depth_features = 37 [default = false];

140
141
142
143
  // Whether to use the display name when decoding examples. This is only used
  // when mapping class text strings to integers.
  optional bool use_display_name = 17 [default = false];

144
145
146
  // Whether to include the source_id string in the input features.
  optional bool include_source_id = 27 [default = false];

147
148
149
  // Whether input data type is tf.Examples or tf.SequenceExamples
  optional InputType input_type = 30 [default = TF_EXAMPLE];

150
151
  // Which frame to choose from the input if Sequence Example. -1 indicates
  // random choice.
152
153
  optional int32 frame_index = 32 [default = -1];

154
155
156
157
  oneof input_reader {
    TFRecordInputReader tf_record_input_reader = 8;
    ExternalInputReader external_input_reader = 9;
  }
158

159
160
161
162
163
  // When multiple input files are configured, we can sample across them based
  // on weights.
  //
  // The number of weights must match the number of input files configured.
  //
164
165
166
167
  // The number of input readers per dataset is num_readers, scaled relative to
  // the dataset weight.
  //
  // When set, shuffling and shuffle buffer size, settings are
168
169
170
  // applied individually to each dataset.
  //
  // Implementation follows tf.data.experimental.sample_from_datasets sampling
171
  // strategy. Weights may take any value - only relative weights matter.
172
173
174
  //
  // Zero weights will result in a dataset not being sampled and no input
  // readers spawned.
175
176
177
178
179
180
181
182
183
184
185
186
187
188
  //
  // Examples, assuming two input files configured:
  //
  // Equal weighting:
  // sample_from_datasets_weights: 0.5
  // sample_from_datasets_weights: 0.5
  //
  // 2:1 weighting:
  // sample_from_datasets_weights: 2
  // sample_from_datasets_weights: 1
  //
  // Exclude the second dataset:
  // sample_from_datasets_weights: 1
  // sample_from_datasets_weights: 0
189
190
  repeated float sample_from_datasets_weights = 34;

191

192
193
194
  // Expand labels to ancestors or descendants in the hierarchy for
  // for positive and negative labels, respectively.
  optional bool expand_labels_hierarchy = 29 [default = false];
195
196
}

197
198
// An input reader that reads TF Example or TF Sequence Example protos from
// local TFRecord files.
199
message TFRecordInputReader {
Vivek Rathod's avatar
Vivek Rathod committed
200
201
  // Path(s) to `TFRecordFile`s.
  repeated string input_path = 1;
202
203
204
205
206
207
208
}

// An externally defined input reader. Users may define an extension to this
// proto to interface their own input readers.
message ExternalInputReader {
  extensions 1 to 999;
}