post_processing.proto

syntax = "proto2";

package object_detection.protos;

import "object_detection/protos/calibration.proto";

// Configuration proto for non-max-suppression operation on a batch of
// detections.
message BatchNonMaxSuppression {
  // Scalar threshold for score (low scoring boxes are removed).
  optional float score_threshold = 1 [default = 0.0];

  // Scalar threshold for IOU (boxes that have high IOU overlap
  // with previously selected boxes are removed).
  optional float iou_threshold = 2 [default = 0.6];

  // Maximum number of detections to retain per class.
  optional int32 max_detections_per_class = 3 [default = 100];

  // Maximum number of detections to retain across all classes.
  optional int32 max_total_detections = 5 [default = 100];

  // Whether to use the implementation of NMS that guarantees static shapes.
  optional bool use_static_shapes = 6 [default = false];

  // Whether to use class agnostic NMS.
  // Class-agnostic NMS function implements a class-agnostic version
  // of Non Maximal Suppression where if max_classes_per_detection=k,
  // 1) we keep the top-k scores for each detection and
  // 2) during NMS, each detection only uses the highest class score for
  // sorting. 3) Compared to regular NMS, the worst runtime of this version is
  // O(N^2) instead of O(KN^2) where N is the number of detections and K the
  // number of classes.
  optional bool use_class_agnostic_nms = 7 [default = false];

  // Number of classes retained per detection in class agnostic NMS.

  optional int32 max_classes_per_detection = 8 [default = 1];

  // Soft NMS sigma parameter; Bodla et al, https://arxiv.org/abs/1704.04503)
  optional float soft_nms_sigma = 9 [default = 0.0];

  // Whether to use partitioned version of non_max_suppression.
  optional bool use_partitioned_nms = 10 [default = false];

  // Whether to use tf.image.combined_non_max_suppression.
  optional bool use_combined_nms = 11 [default = false];

  // Whether to change coordinate frame of the boxlist to be relative to
  // window's frame.
  optional bool change_coordinate_frame = 12 [default = true];

  // Use hard NMS. Note that even if this field is set false, the behavior of
  // NMS will be equivalent to hard NMS; This field when set to true forces the
  // tf.image.non_max_suppression function to be called instead
  // of tf.image.non_max_suppression_with_scores and can be used to
  // export models for older versions of TF.
  optional bool use_hard_nms = 13 [default = false];

  // Use cpu NMS. NMSV3/NMSV4 by default runs on GPU, which may cause OOM issue
  // if the model is large and/or batch size is large during training.
  // Setting this flag to false moves the nms op to CPU when OOM happens.
  // The flag is not needed if use_hard_nms = false, as soft NMS currently
  // runs on CPU by default.
  optional bool use_cpu_nms = 14 [default = false];
}

// Configuration proto for post-processing predicted boxes and
// scores.
message PostProcessing {
  // Non max suppression parameters.
  optional BatchNonMaxSuppression batch_non_max_suppression = 1;

  // Enum to specify how to convert the detection scores.
  enum ScoreConverter {
    // Input scores equals output scores.
    IDENTITY = 0;

    // Applies a sigmoid on input scores.
    SIGMOID = 1;

    // Applies a softmax on input scores
    SOFTMAX = 2;
  }

  // Score converter to use.
  optional ScoreConverter score_converter = 2 [default = IDENTITY];
  // Scale logit (input) value before conversion in post-processing step.
  // Typically used for softmax distillation, though can be used to scale for
  // other reasons.
  optional float logit_scale = 3 [default = 1.0];
  // Calibrate score outputs. Calibration is applied after score converter
  // and before non max suppression.
  optional CalibrationConfig calibration_config = 4;
}