detectron2_config.py

# -*- coding: utf-8 -*-
def add_layoutlmv2_config(cfg):
    _C = cfg
    # -----------------------------------------------------------------------------
    # Config definition
    # -----------------------------------------------------------------------------
    _C.MODEL.MASK_ON = True

    # When using pre-trained models in Detectron1 or any MSRA models,
    # std has been absorbed into its conv1 weights, so the std needs to be set 1.
    # Otherwise, you can use [57.375, 57.120, 58.395] (ImageNet std)
    _C.MODEL.PIXEL_STD = [57.375, 57.120, 58.395]

    # ---------------------------------------------------------------------------- #
    # Backbone options
    # ---------------------------------------------------------------------------- #
    _C.MODEL.BACKBONE.NAME = "build_resnet_fpn_backbone"

    # ---------------------------------------------------------------------------- #
    # FPN options
    # ---------------------------------------------------------------------------- #
    # Names of the input feature maps to be used by FPN
    # They must have contiguous power of 2 strides
    # e.g., ["res2", "res3", "res4", "res5"]
    _C.MODEL.FPN.IN_FEATURES = ["res2", "res3", "res4", "res5"]

    # ---------------------------------------------------------------------------- #
    # Anchor generator options
    # ---------------------------------------------------------------------------- #
    # Anchor sizes (i.e. sqrt of area) in absolute pixels w.r.t. the network input.
    # Format: list[list[float]]. SIZES[i] specifies the list of sizes
    # to use for IN_FEATURES[i]; len(SIZES) == len(IN_FEATURES) must be true,
    # or len(SIZES) == 1 is true and size list SIZES[0] is used for all
    # IN_FEATURES.
    _C.MODEL.ANCHOR_GENERATOR.SIZES = [[32], [64], [128], [256], [512]]

    # ---------------------------------------------------------------------------- #
    # RPN options
    # ---------------------------------------------------------------------------- #
    # Names of the input feature maps to be used by RPN
    # e.g., ["p2", "p3", "p4", "p5", "p6"] for FPN
    _C.MODEL.RPN.IN_FEATURES = ["p2", "p3", "p4", "p5", "p6"]
    # Number of top scoring RPN proposals to keep before applying NMS
    # When FPN is used, this is *per FPN level* (not total)
    _C.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 2000
    _C.MODEL.RPN.PRE_NMS_TOPK_TEST = 1000
    # Number of top scoring RPN proposals to keep after applying NMS
    # When FPN is used, this limit is applied per level and then again to the union
    # of proposals from all levels
    # NOTE: When FPN is used, the meaning of this config is different from Detectron1.
    # It means per-batch topk in Detectron1, but per-image topk here.
    # See the "find_top_rpn_proposals" function for details.
    _C.MODEL.RPN.POST_NMS_TOPK_TRAIN = 1000
    _C.MODEL.RPN.POST_NMS_TOPK_TEST = 1000

    # ---------------------------------------------------------------------------- #
    # ROI HEADS options
    # ---------------------------------------------------------------------------- #
    _C.MODEL.ROI_HEADS.NAME = "StandardROIHeads"
    # Number of foreground classes
    _C.MODEL.ROI_HEADS.NUM_CLASSES = 5
    # Names of the input feature maps to be used by ROI heads
    # Currently all heads (box, mask, ...) use the same input feature map list
    # e.g., ["p2", "p3", "p4", "p5"] is commonly used for FPN
    _C.MODEL.ROI_HEADS.IN_FEATURES = ["p2", "p3", "p4", "p5"]

    # ---------------------------------------------------------------------------- #
    # Box Head
    # ---------------------------------------------------------------------------- #
    # C4 don't use head name option
    # Options for non-C4 models: FastRCNNConvFCHead,
    _C.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead"
    _C.MODEL.ROI_BOX_HEAD.NUM_FC = 2
    _C.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION = 14

    # ---------------------------------------------------------------------------- #
    # Mask Head
    # ---------------------------------------------------------------------------- #
    _C.MODEL.ROI_MASK_HEAD.NAME = "MaskRCNNConvUpsampleHead"
    _C.MODEL.ROI_MASK_HEAD.NUM_CONV = 4  # The number of convs in the mask head
    _C.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION = 7

    # ---------------------------------------------------------------------------- #
    # ResNe[X]t options (ResNets = {ResNet, ResNeXt}
    # Note that parts of a resnet may be used for both the backbone and the head
    # These options apply to both
    # ---------------------------------------------------------------------------- #
    _C.MODEL.RESNETS.DEPTH = 101
    _C.MODEL.RESNETS.SIZES = [[32], [64], [128], [256], [512]]
    _C.MODEL.RESNETS.ASPECT_RATIOS = [[0.5, 1.0, 2.0]]
    _C.MODEL.RESNETS.OUT_FEATURES = ["res2", "res3", "res4", "res5"]  # res4 for C4 backbone, res2..5 for FPN backbone

    # Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt
    _C.MODEL.RESNETS.NUM_GROUPS = 32

    # Baseline width of each group.
    # Scaling this parameters will scale the width of all bottleneck layers.
    _C.MODEL.RESNETS.WIDTH_PER_GROUP = 8

    # Place the stride 2 conv on the 1x1 filter
    # Use True only for the original MSRA ResNet; use False for C2 and Torch models
    _C.MODEL.RESNETS.STRIDE_IN_1X1 = False