Unverified Commit 8518d053 authored by pkulzc's avatar pkulzc Committed by GitHub
Browse files

Open source MnasFPN and minor fixes to OD API (#8484)

310447280  by lzc:

    Internal change

310420845  by Zhichao Lu:

    Open source the internal Context RCNN code.

--
310362339  by Zhichao Lu:

    Internal change

310259448  by lzc:

    Update required TF version for OD API.

--
310252159  by Zhichao Lu:

    Port patch_ops_test to TF1/TF2 as TPUs.

--
310247180  by Zhichao Lu:

    Ignore keypoint heatmap loss in the regions/bounding boxes with target keypoint
    class but no valid keypoint annotations.

--
310178294  by Zhichao Lu:

    Opensource MnasFPN
    https://arxiv.org/abs/1912.01106

--
310094222  by lzc:

    Internal changes.

--
310085250  by lzc:

    Internal Change.

--
310016447  by huizhongc:

    Remove unrecognized classes from labeled_classes.

--
310009470  by rathodv:

    Mark batcher.py as TF1 only.

--
310001984  by rathodv:

    Update core/preprocessor.py to be compatible with TF1/TF2..

--
309455035  by Zhi...
parent ac5fff19
...@@ -32,6 +32,9 @@ message Hyperparams { ...@@ -32,6 +32,9 @@ message Hyperparams {
// Use tf.nn.relu6 // Use tf.nn.relu6
RELU_6 = 2; RELU_6 = 2;
// Use tf.nn.swish
SWISH = 3;
} }
optional Activation activation = 4 [default = RELU]; optional Activation activation = 4 [default = RELU];
......
...@@ -10,6 +10,7 @@ message ImageResizer { ...@@ -10,6 +10,7 @@ message ImageResizer {
FixedShapeResizer fixed_shape_resizer = 2; FixedShapeResizer fixed_shape_resizer = 2;
IdentityResizer identity_resizer = 3; IdentityResizer identity_resizer = 3;
ConditionalShapeResizer conditional_shape_resizer = 4; ConditionalShapeResizer conditional_shape_resizer = 4;
PadToMultipleResizer pad_to_multiple_resizer = 5;
} }
} }
...@@ -90,3 +91,18 @@ message ConditionalShapeResizer { ...@@ -90,3 +91,18 @@ message ConditionalShapeResizer {
optional bool convert_to_grayscale = 4 [default = false]; optional bool convert_to_grayscale = 4 [default = false];
} }
// An image resizer which resizes inputs by zero padding them such that their
// spatial dimensions are divisible by a specified multiple. This is useful
// when you want to concatenate or compare the input to an output of a
// fully convolutional network.
message PadToMultipleResizer {
// The multiple to which the spatial dimensions will be padded to.
optional int32 multiple = 1 [default = 1];
// Whether to also resize the image channels from 3 to 1 (RGB to grayscale).
optional bool convert_to_grayscale = 4 [default = false];
}
...@@ -2,6 +2,8 @@ syntax = "proto2"; ...@@ -2,6 +2,8 @@ syntax = "proto2";
package object_detection.protos; package object_detection.protos;
import "object_detection/protos/image_resizer.proto";
// Configuration proto for defining input readers that generate Object Detection // Configuration proto for defining input readers that generate Object Detection
// Examples from input sources. Input readers are expected to generate a // Examples from input sources. Input readers are expected to generate a
// dictionary of tensors, with the following fields populated: // dictionary of tensors, with the following fields populated:
...@@ -22,19 +24,19 @@ enum InstanceMaskType { ...@@ -22,19 +24,19 @@ enum InstanceMaskType {
PNG_MASKS = 2; // Encoded PNG masks. PNG_MASKS = 2; // Encoded PNG masks.
} }
// Next id: 25 // Next id: 29
message InputReader { message InputReader {
// Name of input reader. Typically used to describe the dataset that is read // Name of input reader. Typically used to describe the dataset that is read
// by this input reader. // by this input reader.
optional string name = 23 [default=""]; optional string name = 23 [default = ""];
// Path to StringIntLabelMap pbtxt file specifying the mapping from string // Path to StringIntLabelMap pbtxt file specifying the mapping from string
// labels to integer ids. // labels to integer ids.
optional string label_map_path = 1 [default=""]; optional string label_map_path = 1 [default = ""];
// Whether data should be processed in the order they are read in, or // Whether data should be processed in the order they are read in, or
// shuffled randomly. // shuffled randomly.
optional bool shuffle = 2 [default=true]; optional bool shuffle = 2 [default = true];
// Buffer size to be used when shuffling. // Buffer size to be used when shuffling.
optional uint32 shuffle_buffer_size = 11 [default = 2048]; optional uint32 shuffle_buffer_size = 11 [default = 2048];
...@@ -44,43 +46,43 @@ message InputReader { ...@@ -44,43 +46,43 @@ message InputReader {
// The number of times a data source is read. If set to zero, the data source // The number of times a data source is read. If set to zero, the data source
// will be reused indefinitely. // will be reused indefinitely.
optional uint32 num_epochs = 5 [default=0]; optional uint32 num_epochs = 5 [default = 0];
// Integer representing how often an example should be sampled. To feed // Integer representing how often an example should be sampled. To feed
// only 1/3 of your data into your model, set `sample_1_of_n_examples` to 3. // only 1/3 of your data into your model, set `sample_1_of_n_examples` to 3.
// This is particularly useful for evaluation, where you might not prefer to // This is particularly useful for evaluation, where you might not prefer to
// evaluate all of your samples. // evaluate all of your samples.
optional uint32 sample_1_of_n_examples = 22 [default=1]; optional uint32 sample_1_of_n_examples = 22 [default = 1];
// Number of file shards to read in parallel. // Number of file shards to read in parallel.
optional uint32 num_readers = 6 [default=64]; optional uint32 num_readers = 6 [default = 64];
// Number of batches to produce in parallel. If this is run on a 2x2 TPU set // Number of batches to produce in parallel. If this is run on a 2x2 TPU set
// this to 8. // this to 8.
optional uint32 num_parallel_batches = 19 [default=8]; optional uint32 num_parallel_batches = 19 [default = 8];
// Number of batches to prefetch. Prefetch decouples input pipeline and // Number of batches to prefetch. Prefetch decouples input pipeline and
// model so they can be pipelined resulting in higher throughput. Set this // model so they can be pipelined resulting in higher throughput. Set this
// to a small constant and increment linearly until the improvements become // to a small constant and increment linearly until the improvements become
// marginal or you exceed your cpu memory budget. Setting this to -1, // marginal or you exceed your cpu memory budget. Setting this to -1,
// automatically tunes this value for you. // automatically tunes this value for you.
optional int32 num_prefetch_batches = 20 [default=2]; optional int32 num_prefetch_batches = 20 [default = 2];
// Maximum number of records to keep in reader queue. // Maximum number of records to keep in reader queue.
optional uint32 queue_capacity = 3 [default=2000, deprecated=true]; optional uint32 queue_capacity = 3 [default = 2000, deprecated = true];
// Minimum number of records to keep in reader queue. A large value is needed // Minimum number of records to keep in reader queue. A large value is needed
// to generate a good random shuffle. // to generate a good random shuffle.
optional uint32 min_after_dequeue = 4 [default=1000, deprecated=true]; optional uint32 min_after_dequeue = 4 [default = 1000, deprecated = true];
// Number of records to read from each reader at once. // Number of records to read from each reader at once.
optional uint32 read_block_length = 15 [default=32]; optional uint32 read_block_length = 15 [default = 32];
// Number of decoded records to prefetch before batching. // Number of decoded records to prefetch before batching.
optional uint32 prefetch_size = 13 [default = 512, deprecated=true]; optional uint32 prefetch_size = 13 [default = 512, deprecated = true];
// Number of parallel decode ops to apply. // Number of parallel decode ops to apply.
optional uint32 num_parallel_map_calls = 14 [default = 64, deprecated=true]; optional uint32 num_parallel_map_calls = 14 [default = 64, deprecated = true];
// If positive, TfExampleDecoder will try to decode rasters of additional // If positive, TfExampleDecoder will try to decode rasters of additional
// channels from tf.Examples. // channels from tf.Examples.
...@@ -89,14 +91,21 @@ message InputReader { ...@@ -89,14 +91,21 @@ message InputReader {
// Number of groundtruth keypoints per object. // Number of groundtruth keypoints per object.
optional uint32 num_keypoints = 16 [default = 0]; optional uint32 num_keypoints = 16 [default = 0];
// Keypoint weights. These weights can be used to apply per-keypoint loss
// multipliers. The size of this field should agree with `num_keypoints`.
repeated float keypoint_type_weight = 26;
// Maximum number of boxes to pad to during training / evaluation. // Maximum number of boxes to pad to during training / evaluation.
// Set this to at least the maximum amount of boxes in the input data, // Set this to at least the maximum amount of boxes in the input data,
// otherwise some groundtruth boxes may be clipped. // otherwise some groundtruth boxes may be clipped.
optional int32 max_number_of_boxes = 21 [default=100]; optional int32 max_number_of_boxes = 21 [default = 100];
// Whether to load multiclass scores from the dataset. // Whether to load multiclass scores from the dataset.
optional bool load_multiclass_scores = 24 [default = false]; optional bool load_multiclass_scores = 24 [default = false];
// Whether to load context features from the dataset.
optional bool load_context_features = 25 [default = false];
// Whether to load groundtruth instance masks. // Whether to load groundtruth instance masks.
optional bool load_instance_masks = 7 [default = false]; optional bool load_instance_masks = 7 [default = false];
...@@ -107,10 +116,15 @@ message InputReader { ...@@ -107,10 +116,15 @@ message InputReader {
// when mapping class text strings to integers. // when mapping class text strings to integers.
optional bool use_display_name = 17 [default = false]; optional bool use_display_name = 17 [default = false];
// Whether to include the source_id string in the input features.
optional bool include_source_id = 27 [default = false];
oneof input_reader { oneof input_reader {
TFRecordInputReader tf_record_input_reader = 8; TFRecordInputReader tf_record_input_reader = 8;
ExternalInputReader external_input_reader = 9; ExternalInputReader external_input_reader = 9;
} }
} }
// An input reader that reads TF Example protos from local TFRecord files. // An input reader that reads TF Example protos from local TFRecord files.
......
...@@ -69,6 +69,7 @@ message LocalizationLoss { ...@@ -69,6 +69,7 @@ message LocalizationLoss {
WeightedL2LocalizationLoss weighted_l2 = 1; WeightedL2LocalizationLoss weighted_l2 = 1;
WeightedSmoothL1LocalizationLoss weighted_smooth_l1 = 2; WeightedSmoothL1LocalizationLoss weighted_smooth_l1 = 2;
WeightedIOULocalizationLoss weighted_iou = 3; WeightedIOULocalizationLoss weighted_iou = 3;
L1LocalizationLoss l1_localization_loss = 4;
} }
} }
...@@ -96,6 +97,10 @@ message WeightedSmoothL1LocalizationLoss { ...@@ -96,6 +97,10 @@ message WeightedSmoothL1LocalizationLoss {
message WeightedIOULocalizationLoss { message WeightedIOULocalizationLoss {
} }
// L1 Localization Loss.
message L1LocalizationLoss {
}
// Configuration for class prediction loss function. // Configuration for class prediction loss function.
message ClassificationLoss { message ClassificationLoss {
oneof classification_loss { oneof classification_loss {
...@@ -104,6 +109,7 @@ message ClassificationLoss { ...@@ -104,6 +109,7 @@ message ClassificationLoss {
WeightedSoftmaxClassificationAgainstLogitsLoss weighted_logits_softmax = 5; WeightedSoftmaxClassificationAgainstLogitsLoss weighted_logits_softmax = 5;
BootstrappedSigmoidClassificationLoss bootstrapped_sigmoid = 3; BootstrappedSigmoidClassificationLoss bootstrapped_sigmoid = 3;
SigmoidFocalClassificationLoss weighted_sigmoid_focal = 4; SigmoidFocalClassificationLoss weighted_sigmoid_focal = 4;
PenaltyReducedLogisticFocalLoss penalty_reduced_logistic_focal_loss = 6;
} }
} }
...@@ -162,6 +168,17 @@ message BootstrappedSigmoidClassificationLoss { ...@@ -162,6 +168,17 @@ message BootstrappedSigmoidClassificationLoss {
optional bool anchorwise_output = 3 [default=false]; optional bool anchorwise_output = 3 [default=false];
} }
// Pixelwise logistic focal loss with pixels near the target having a reduced
// penalty.
message PenaltyReducedLogisticFocalLoss {
// Focussing parameter of the focal loss.
optional float alpha = 1;
// Penalty reduction factor.
optional float beta = 2;
}
// Configuration for hard example miner. // Configuration for hard example miner.
message HardExampleMiner { message HardExampleMiner {
// Maximum number of hard examples to be selected per image (prior to // Maximum number of hard examples to be selected per image (prior to
......
...@@ -13,9 +13,10 @@ message DetectionModel { ...@@ -13,9 +13,10 @@ message DetectionModel {
// This can be used to define experimental models. To define your own // This can be used to define experimental models. To define your own
// experimental meta architecture, populate a key in the // experimental meta architecture, populate a key in the
// model_builder.EXPERIMENTAL_META_ARCHITECURE_BUILDER_MAP dict and set its // model_builder.EXPERIMENTAL_META_ARCH_BUILDER_MAP dict and set its
// value to a function that builds your model. // value to a function that builds your model.
ExperimentalModel experimental_model = 3; ExperimentalModel experimental_model = 3;
} }
} }
......
...@@ -36,6 +36,10 @@ message MomentumOptimizer { ...@@ -36,6 +36,10 @@ message MomentumOptimizer {
// See: https://www.tensorflow.org/api_docs/python/tf/train/AdamOptimizer // See: https://www.tensorflow.org/api_docs/python/tf/train/AdamOptimizer
message AdamOptimizer { message AdamOptimizer {
optional LearningRate learning_rate = 1; optional LearningRate learning_rate = 1;
// Default value for epsilon (1e-8) matches default value in
// tf.compat.v1.train.AdamOptimizer. This differs from tf2 default of 1e-7
// in tf.keras.optimizers.Adam .
optional float epsilon = 2 [default = 1e-8];
} }
......
...@@ -10,7 +10,7 @@ import "object_detection/protos/train.proto"; ...@@ -10,7 +10,7 @@ import "object_detection/protos/train.proto";
// Convenience message for configuring a training and eval pipeline. Allows all // Convenience message for configuring a training and eval pipeline. Allows all
// of the pipeline parameters to be configured from one file. // of the pipeline parameters to be configured from one file.
// Next id: 7 // Next id: 8
message TrainEvalPipelineConfig { message TrainEvalPipelineConfig {
optional DetectionModel model = 1; optional DetectionModel model = 1;
optional TrainConfig train_config = 2; optional TrainConfig train_config = 2;
......
...@@ -45,6 +45,10 @@ message BatchNonMaxSuppression { ...@@ -45,6 +45,10 @@ message BatchNonMaxSuppression {
// Whether to use tf.image.combined_non_max_suppression. // Whether to use tf.image.combined_non_max_suppression.
optional bool use_combined_nms = 11 [default = false]; optional bool use_combined_nms = 11 [default = false];
// Whether to change coordinate frame of the boxlist to be relative to
// window's frame.
optional bool change_coordinate_frame = 12 [default = true];
} }
// Configuration proto for post-processing predicted boxes and // Configuration proto for post-processing predicted boxes and
......
...@@ -4,6 +4,7 @@ package object_detection.protos; ...@@ -4,6 +4,7 @@ package object_detection.protos;
// Message for defining a preprocessing operation on input data. // Message for defining a preprocessing operation on input data.
// See: //third_party/tensorflow_models/object_detection/core/preprocessor.py // See: //third_party/tensorflow_models/object_detection/core/preprocessor.py
// Next ID: 38
message PreprocessingStep { message PreprocessingStep {
oneof preprocessing_step { oneof preprocessing_step {
NormalizeImage normalize_image = 1; NormalizeImage normalize_image = 1;
...@@ -42,6 +43,7 @@ message PreprocessingStep { ...@@ -42,6 +43,7 @@ message PreprocessingStep {
RandomJpegQuality random_jpeg_quality = 34; RandomJpegQuality random_jpeg_quality = 34;
RandomDownscaleToTargetPixels random_downscale_to_target_pixels = 35; RandomDownscaleToTargetPixels random_downscale_to_target_pixels = 35;
RandomPatchGaussian random_patch_gaussian = 36; RandomPatchGaussian random_patch_gaussian = 36;
RandomSquareCropByScale random_square_crop_by_scale = 37;
} }
} }
...@@ -533,3 +535,26 @@ message RandomPatchGaussian { ...@@ -533,3 +535,26 @@ message RandomPatchGaussian {
optional float min_gaussian_stddev = 4 [default = 0.0]; optional float min_gaussian_stddev = 4 [default = 0.0];
optional float max_gaussian_stddev = 5 [default = 1.0]; optional float max_gaussian_stddev = 5 [default = 1.0];
} }
// Extract a square sized crop from an image whose side length is sampled by
// randomly scaling the maximum spatial dimension of the image. If part of the
// crop falls outside the image, it is filled with zeros.
// The augmentation is borrowed from [1]
// [1]: https://arxiv.org/abs/1904.07850
message RandomSquareCropByScale {
// The maximum size of the border. The border defines distance in pixels to
// the image boundaries that will not be considered as a center of a crop.
// To make sure that the border does not go over the center of the image,
// we chose the border value by computing the minimum k, such that
// (max_border / (2**k)) < image_dimension/2
optional int32 max_border = 1 [default = 128];
// The minimum and maximum values of scale.
optional float scale_min = 2 [default=0.6];
optional float scale_max = 3 [default=1.3];
// The number of discrete scale values to randomly sample between
// [min_scale, max_scale]
optional int32 num_scales = 4 [default=8];
}
...@@ -145,6 +145,7 @@ message Ssd { ...@@ -145,6 +145,7 @@ message Ssd {
optional MaskHead mask_head_config = 25; optional MaskHead mask_head_config = 25;
} }
// Next id: 18.
message SsdFeatureExtractor { message SsdFeatureExtractor {
reserved 6; reserved 6;
......
...@@ -17,6 +17,22 @@ message StringIntLabelMapItem { ...@@ -17,6 +17,22 @@ message StringIntLabelMapItem {
// Human readable string label. // Human readable string label.
optional string display_name = 3; optional string display_name = 3;
// Name of class specific keypoints for each class object and their respective
// keypoint IDs.
message KeypointMap {
// Id for the keypoint. Id must be unique within a given class, however, it
// could be shared across classes. For example "nose" keypoint can occur
// in both "face" and "person" classes. Hence they can be mapped to the same
// id.
//
// Note: It is advised to assign ids in range [1, num_unique_keypoints] to
// encode keypoint targets efficiently.
optional int32 id = 1;
// Label for the keypoint.
optional string label = 2;
}
repeated KeypointMap keypoints = 4;
}; };
message StringIntLabelMap { message StringIntLabelMap {
......
...@@ -5,8 +5,16 @@ package object_detection.protos; ...@@ -5,8 +5,16 @@ package object_detection.protos;
import "object_detection/protos/optimizer.proto"; import "object_detection/protos/optimizer.proto";
import "object_detection/protos/preprocessor.proto"; import "object_detection/protos/preprocessor.proto";
enum CheckpointVersion {
UNKNOWN = 0;
V1 = 1;
V2 = 2;
}
// Message for configuring DetectionModel training jobs (train.py). // Message for configuring DetectionModel training jobs (train.py).
// Next id: 28 // Next id: 30
message TrainConfig { message TrainConfig {
// Effective batch size to use for training. // Effective batch size to use for training.
// For TPU (or sync SGD jobs), the batch size per core (or GPU) is going to be // For TPU (or sync SGD jobs), the batch size per core (or GPU) is going to be
...@@ -37,6 +45,11 @@ message TrainConfig { ...@@ -37,6 +45,11 @@ message TrainConfig {
// Typically used to load feature extractor variables from trained models. // Typically used to load feature extractor variables from trained models.
optional string fine_tune_checkpoint_type = 22 [default=""]; optional string fine_tune_checkpoint_type = 22 [default=""];
// Either "v1" or "v2". If v1, restores the checkpoint using the tensorflow
// v1 style of restoring checkpoints. If v2, uses the eager mode checkpoint
// restoration API.
optional CheckpointVersion fine_tune_checkpoint_version = 28 [default=V1];
// [Deprecated]: use fine_tune_checkpoint_type instead. // [Deprecated]: use fine_tune_checkpoint_type instead.
// Specifies if the finetune checkpoint is from an object detection model. // Specifies if the finetune checkpoint is from an object detection model.
// If from an object detection model, the model being trained should have // If from an object detection model, the model being trained should have
...@@ -119,4 +132,6 @@ message TrainConfig { ...@@ -119,4 +132,6 @@ message TrainConfig {
// Whether to summarize gradients. // Whether to summarize gradients.
optional bool summarize_gradients = 27 [default=false]; optional bool summarize_gradients = 27 [default=false];
} }
# SSD with MnasFPN feature extractor, shared box predictor
# See Chen et al, https://arxiv.org/abs/1912.01106
# Trained on COCO, initialized from scratch.
#
# 0.92B MulAdds, 2.5M Parameters. Latency is 193ms on Pixel 1.
# Achieves 26.6 mAP on COCO14 minival dataset.
# This config is TPU compatible
model {
ssd {
inplace_batchnorm_update: true
freeze_batchnorm: false
num_classes: 90
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
encode_background_as_zeros: true
anchor_generator {
multiscale_anchor_generator {
min_level: 3
max_level: 6
anchor_scale: 3.0
aspect_ratios: [1.0, 2.0, 0.5]
scales_per_octave: 3
}
}
image_resizer {
fixed_shape_resizer {
height: 320
width: 320
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
depth: 64
class_prediction_bias_init: -4.6
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.997,
epsilon: 0.001,
}
}
num_layers_before_predictor: 4
share_prediction_tower: true
use_depthwise: true
kernel_size: 3
}
}
feature_extractor {
type: 'ssd_mobilenet_v2_mnasfpn'
fpn {
min_level: 3
max_level: 6
additional_layer_depth: 48
}
min_depth: 16
depth_multiplier: 1.0
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.97,
epsilon: 0.001,
}
}
override_base_feature_extractor_hyperparams: true
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.25
gamma: 2.0
}
}
localization_loss {
weighted_smooth_l1 {
}
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
normalize_loc_loss_by_codesize: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
batch_size: 1024
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 32
num_steps: 50000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_crop_image {
min_object_covered: 0.0
min_aspect_ratio: 0.75
max_aspect_ratio: 3.0
min_area: 0.75
max_area: 1.0
overlap_thresh: 0.0
}
}
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: 4.
total_steps: 50000
warmup_learning_rate: .026666
warmup_steps: 5000
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
}
train_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record-?????-of-00100"
}
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
num_examples: 8000
}
eval_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record-?????-of-00010"
}
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
shuffle: false
num_readers: 1
}
model {
ssd {
num_classes: 2
box_coder {
keypoint_box_coder {
num_keypoints: 23
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
num_layers: 6
min_scale: 0.2
max_scale: 0.95
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.3333
height_stride: 16
height_stride: 32
height_stride: 64
height_stride: 128
height_stride: 256
height_stride: 512
width_stride: 16
width_stride: 32
width_stride: 64
width_stride: 128
width_stride: 256
width_stride: 512
height_offset: 0
height_offset: 0
height_offset: 0
height_offset: 0
height_offset: 0
height_offset: 0
width_offset: 0
width_offset: 0
width_offset: 0
width_offset: 0
width_offset: 0
width_offset: 0
}
}
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 320
max_dimension: 640
convert_to_grayscale: true
}
}
box_predictor {
convolutional_box_predictor {
min_depth: 0
max_depth: 0
num_layers_before_predictor: 0
use_dropout: false
kernel_size: 3
box_code_size: 50
apply_sigmoid_to_scores: false
conv_hyperparams {
activation: RELU_6
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true
scale: true
center: true
decay: 0.9997
epsilon: 0.001
}
}
}
}
feature_extractor {
type: "ssd_mobilenet_v1"
min_depth: 16
depth_multiplier: 0.25
use_explicit_padding: true
conv_hyperparams {
activation: RELU_6
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true
scale: true
center: true
decay: 0.9997
epsilon: 0.001
}
}
}
loss {
classification_loss {
weighted_sigmoid {
}
}
localization_loss {
weighted_smooth_l1 {
}
}
hard_example_miner {
num_hard_examples: 3000
iou_threshold: 0.99
loss_type: CLASSIFICATION
max_negatives_per_positive: 3
min_negatives_per_image: 10
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.5
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
fine_tune_checkpoint: ""
num_steps: 10000000
batch_size: 32
data_augmentation_options {
random_horizontal_flip {
keypoint_flip_permutation: 1
keypoint_flip_permutation: 0
keypoint_flip_permutation: 2
keypoint_flip_permutation: 3
keypoint_flip_permutation: 5
keypoint_flip_permutation: 4
keypoint_flip_permutation: 6
keypoint_flip_permutation: 8
keypoint_flip_permutation: 7
keypoint_flip_permutation: 10
keypoint_flip_permutation: 9
keypoint_flip_permutation: 12
keypoint_flip_permutation: 11
keypoint_flip_permutation: 14
keypoint_flip_permutation: 13
keypoint_flip_permutation: 16
keypoint_flip_permutation: 15
keypoint_flip_permutation: 18
keypoint_flip_permutation: 17
keypoint_flip_permutation: 20
keypoint_flip_permutation: 19
keypoint_flip_permutation: 22
keypoint_flip_permutation: 21
}
}
data_augmentation_options {
ssd_random_crop_fixed_aspect_ratio {
}
}
optimizer {
rms_prop_optimizer {
learning_rate {
exponential_decay_learning_rate {
initial_learning_rate: 0.0004
decay_steps: 800720
decay_factor: 0.95
}
}
momentum_optimizer_value: 0.9
decay: 0.9
epsilon: 1.0
}
}
}
train_input_reader {
label_map_path: "PATH_TO_BE_CONFIGURED/face_person_with_keypoints_label_map.pbtxt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/faces_train.record-?????-of-00010"
}
num_keypoints: 23
}
eval_config {
num_visualizations: 10
metrics_set: "coco_detection_metrics"
use_moving_averages: true
parameterized_metric {
coco_keypoint_metrics {
class_label: "face"
}
}
parameterized_metric {
coco_keypoint_metrics {
class_label: "PERSON"
}
}
}
eval_input_reader {
label_map_path: "PATH_TO_BE_CONFIGURED/face_person_with_keypoints_label_map.pbtxt"
shuffle: true
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/faces_val.record-?????-of-00010"
}
num_keypoints: 23
}
graph_rewriter {
quantization {
delay: 2000000
activation_bits: 8
weight_bits: 8
}
}
...@@ -24,6 +24,7 @@ from google.protobuf import text_format ...@@ -24,6 +24,7 @@ from google.protobuf import text_format
from tensorflow.python.saved_model import loader from tensorflow.python.saved_model import loader
from tensorflow.python.saved_model import signature_constants from tensorflow.python.saved_model import signature_constants
from tensorflow.python.saved_model import tag_constants from tensorflow.python.saved_model import tag_constants
from tensorflow.python.tpu import tpu
# pylint: enable=g-direct-tensorflow-import # pylint: enable=g-direct-tensorflow-import
from object_detection.protos import pipeline_pb2 from object_detection.protos import pipeline_pb2
from object_detection.tpu_exporters import faster_rcnn from object_detection.tpu_exporters import faster_rcnn
...@@ -160,7 +161,7 @@ def run_inference(inputs, ...@@ -160,7 +161,7 @@ def run_inference(inputs,
saver = tf.train.Saver() saver = tf.train.Saver()
init_op = tf.global_variables_initializer() init_op = tf.global_variables_initializer()
sess.run(tf.contrib.tpu.initialize_system()) sess.run(tpu.initialize_system())
sess.run(init_op) sess.run(init_op)
if ckpt_path is not None: if ckpt_path is not None:
...@@ -170,7 +171,7 @@ def run_inference(inputs, ...@@ -170,7 +171,7 @@ def run_inference(inputs,
tensor_dict_out = sess.run( tensor_dict_out = sess.run(
result_tensor_dict, feed_dict={placeholder_tensor: [inputs]}) result_tensor_dict, feed_dict={placeholder_tensor: [inputs]})
sess.run(tf.contrib.tpu.shutdown_system()) sess.run(tpu.shutdown_system())
return tensor_dict_out return tensor_dict_out
...@@ -194,7 +195,7 @@ def run_inference_from_saved_model(inputs, ...@@ -194,7 +195,7 @@ def run_inference_from_saved_model(inputs,
meta_graph = loader.load(sess, [tag_constants.SERVING, tag_constants.TPU], meta_graph = loader.load(sess, [tag_constants.SERVING, tag_constants.TPU],
saved_model_dir) saved_model_dir)
sess.run(tf.contrib.tpu.initialize_system()) sess.run(tpu.initialize_system())
key_prediction = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY key_prediction = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
...@@ -210,6 +211,6 @@ def run_inference_from_saved_model(inputs, ...@@ -210,6 +211,6 @@ def run_inference_from_saved_model(inputs,
tensor_dict_out = sess.run( tensor_dict_out = sess.run(
tensor_name_output, feed_dict={tensor_name_input: [inputs]}) tensor_name_output, feed_dict={tensor_name_input: [inputs]})
sess.run(tf.contrib.tpu.shutdown_system()) sess.run(tpu.shutdown_system())
return tensor_dict_out return tensor_dict_out
...@@ -31,6 +31,8 @@ if int(major) < 1 or (int(major == 1) and int(minor) < 14): ...@@ -31,6 +31,8 @@ if int(major) < 1 or (int(major == 1) and int(minor) < 14):
from tensorflow.python.framework import function from tensorflow.python.framework import function
from tensorflow.python.tpu import functional as tpu_functional from tensorflow.python.tpu import functional as tpu_functional
from tensorflow.python.tpu import tpu
from tensorflow.python.tpu.bfloat16 import bfloat16_scope
from tensorflow.python.tpu.ops import tpu_ops from tensorflow.python.tpu.ops import tpu_ops
from object_detection import exporter from object_detection import exporter
from object_detection.builders import model_builder from object_detection.builders import model_builder
...@@ -169,12 +171,12 @@ def build_graph(pipeline_config, ...@@ -169,12 +171,12 @@ def build_graph(pipeline_config,
@function.Defun(capture_resource_var_by_value=False) @function.Defun(capture_resource_var_by_value=False)
def tpu_subgraph_predict(): def tpu_subgraph_predict():
if use_bfloat16: if use_bfloat16:
with tf.contrib.tpu.bfloat16_scope(): with bfloat16_scope():
return tf.contrib.tpu.rewrite(tpu_subgraph_predict_fn, return tpu.rewrite(tpu_subgraph_predict_fn,
[preprocessed_inputs, true_image_shapes]) [preprocessed_inputs, true_image_shapes])
else: else:
return tf.contrib.tpu.rewrite(tpu_subgraph_predict_fn, return tpu.rewrite(tpu_subgraph_predict_fn,
[preprocessed_inputs, true_image_shapes]) [preprocessed_inputs, true_image_shapes])
(rpn_box_encodings, rpn_objectness_predictions_with_background, anchors, (rpn_box_encodings, rpn_objectness_predictions_with_background, anchors,
refined_box_encodings, class_predictions_with_background, num_proposals, refined_box_encodings, class_predictions_with_background, num_proposals,
......
...@@ -30,6 +30,8 @@ if int(major) < 1 or (int(major == 1) and int(minor) < 14): ...@@ -30,6 +30,8 @@ if int(major) < 1 or (int(major == 1) and int(minor) < 14):
from tensorflow.python.framework import function from tensorflow.python.framework import function
from tensorflow.python.tpu import functional as tpu_functional from tensorflow.python.tpu import functional as tpu_functional
from tensorflow.python.tpu import tpu
from tensorflow.python.tpu.bfloat16 import bfloat16_scope
from tensorflow.python.tpu.ops import tpu_ops from tensorflow.python.tpu.ops import tpu_ops
from object_detection import exporter from object_detection import exporter
from object_detection.builders import model_builder from object_detection.builders import model_builder
...@@ -171,7 +173,7 @@ def build_graph(pipeline_config, ...@@ -171,7 +173,7 @@ def build_graph(pipeline_config,
# Dimshuffle: (b, c, h, w) -> (b, h, w, c) # Dimshuffle: (b, c, h, w) -> (b, h, w, c)
preprocessed_inputs = tf.transpose(preprocessed_inputs, perm=[0, 2, 3, 1]) preprocessed_inputs = tf.transpose(preprocessed_inputs, perm=[0, 2, 3, 1])
if use_bfloat16: if use_bfloat16:
with tf.contrib.tpu.bfloat16_scope(): with bfloat16_scope():
prediction_dict = detection_model.predict(preprocessed_inputs, prediction_dict = detection_model.predict(preprocessed_inputs,
true_image_shapes) true_image_shapes)
else: else:
...@@ -188,8 +190,8 @@ def build_graph(pipeline_config, ...@@ -188,8 +190,8 @@ def build_graph(pipeline_config,
@function.Defun(capture_resource_var_by_value=False) @function.Defun(capture_resource_var_by_value=False)
def predict_tpu(): def predict_tpu():
return tf.contrib.tpu.rewrite(predict_tpu_subgraph, return tpu.rewrite(predict_tpu_subgraph,
[preprocessed_inputs, true_image_shapes]) [preprocessed_inputs, true_image_shapes])
prediction_outputs = tpu_functional.TPUPartitionedCall( prediction_outputs = tpu_functional.TPUPartitionedCall(
args=predict_tpu.captured_inputs, args=predict_tpu.captured_inputs,
......
# Lint as: python2, python3
# Copyright 2019 The TensorFlow Authors. All Rights Reserved. # Copyright 2019 The TensorFlow Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
...@@ -18,6 +19,7 @@ from __future__ import absolute_import ...@@ -18,6 +19,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from six.moves import range
import tensorflow as tf import tensorflow as tf
from object_detection.tpu_exporters import utils from object_detection.tpu_exporters import utils
......
...@@ -24,6 +24,14 @@ import math ...@@ -24,6 +24,14 @@ import math
import six import six
import tensorflow as tf import tensorflow as tf
# pylint: disable=g-import-not-at-top
try:
from tensorflow.contrib import image as contrib_image
from tensorflow.contrib import training as contrib_training
except ImportError:
# TF 2.0 doesn't ship with contrib.
pass
# pylint: enable=g-import-not-at-top
# This signifies the max integer that the controller RNN could predict for the # This signifies the max integer that the controller RNN could predict for the
# augmentation scheme. # augmentation scheme.
...@@ -315,7 +323,7 @@ def rotate(image, degrees, replace): ...@@ -315,7 +323,7 @@ def rotate(image, degrees, replace):
# In practice, we should randomize the rotation degrees by flipping # In practice, we should randomize the rotation degrees by flipping
# it negatively half the time, but that's done on 'degrees' outside # it negatively half the time, but that's done on 'degrees' outside
# of the function. # of the function.
image = tf.contrib.image.rotate(wrap(image), radians) image = contrib_image.rotate(wrap(image), radians)
return unwrap(image, replace) return unwrap(image, replace)
...@@ -870,13 +878,13 @@ def rotate_with_bboxes(image, bboxes, degrees, replace): ...@@ -870,13 +878,13 @@ def rotate_with_bboxes(image, bboxes, degrees, replace):
def translate_x(image, pixels, replace): def translate_x(image, pixels, replace):
"""Equivalent of PIL Translate in X dimension.""" """Equivalent of PIL Translate in X dimension."""
image = tf.contrib.image.translate(wrap(image), [-pixels, 0]) image = contrib_image.translate(wrap(image), [-pixels, 0])
return unwrap(image, replace) return unwrap(image, replace)
def translate_y(image, pixels, replace): def translate_y(image, pixels, replace):
"""Equivalent of PIL Translate in Y dimension.""" """Equivalent of PIL Translate in Y dimension."""
image = tf.contrib.image.translate(wrap(image), [0, -pixels]) image = contrib_image.translate(wrap(image), [0, -pixels])
return unwrap(image, replace) return unwrap(image, replace)
...@@ -961,7 +969,7 @@ def shear_x(image, level, replace): ...@@ -961,7 +969,7 @@ def shear_x(image, level, replace):
# with a matrix form of: # with a matrix form of:
# [1 level # [1 level
# 0 1]. # 0 1].
image = tf.contrib.image.transform( image = contrib_image.transform(
wrap(image), [1., level, 0., 0., 1., 0., 0., 0.]) wrap(image), [1., level, 0., 0., 1., 0., 0., 0.])
return unwrap(image, replace) return unwrap(image, replace)
...@@ -972,7 +980,7 @@ def shear_y(image, level, replace): ...@@ -972,7 +980,7 @@ def shear_y(image, level, replace):
# with a matrix form of: # with a matrix form of:
# [1 0 # [1 0
# level 1]. # level 1].
image = tf.contrib.image.transform( image = contrib_image.transform(
wrap(image), [1., 0., 0., level, 1., 0., 0., 0.]) wrap(image), [1., 0., 0., level, 1., 0., 0., 0.])
return unwrap(image, replace) return unwrap(image, replace)
...@@ -1628,9 +1636,12 @@ def distort_image_with_autoaugment(image, bboxes, augmentation_name): ...@@ -1628,9 +1636,12 @@ def distort_image_with_autoaugment(image, bboxes, augmentation_name):
policy = available_policies[augmentation_name]() policy = available_policies[augmentation_name]()
# Hparams that will be used for AutoAugment. # Hparams that will be used for AutoAugment.
augmentation_hparams = tf.contrib.training.HParams( augmentation_hparams = contrib_training.HParams(
cutout_max_pad_fraction=0.75, cutout_bbox_replace_with_mean=False, cutout_max_pad_fraction=0.75,
cutout_const=100, translate_const=250, cutout_bbox_const=50, cutout_bbox_replace_with_mean=False,
cutout_const=100,
translate_const=250,
cutout_bbox_const=50,
translate_bbox_const=120) translate_bbox_const=120)
augmented_image, augmented_bbox = ( augmented_image, augmented_bbox = (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment