support v0.6

b634945d · limm · 5b3792fc · b634945d · b634945d · b634945d
Commit b634945d authored Apr 09, 2025 by limm
20 changed files
--- a/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py
+++ b/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py
+from ..common.optim import SGD as optimizer
+from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
+from ..common.data.coco import dataloader
+from ..common.models.mask_rcnn_fpn import model
+from ..common.train import train
+from detectron2.config import LazyCall as L
+from detectron2.modeling.backbone import RegNet
+from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock
+# Replace default ResNet with RegNetX-4GF from the DDS paper. Config source:
+# https://github.com/facebookresearch/pycls/blob/2c152a6e5d913e898cca4f0a758f41e6b976714d/configs/dds_baselines/regnetx/RegNetX-4.0GF_dds_8gpu.yaml#L4-L9  # noqa
+model.backbone.bottom_up = L(RegNet)(
+    stem_class=SimpleStem,
+    stem_width=32,
+    block_class=ResBottleneckBlock,
+    depth=23,
+    w_a=38.65,
+    w_0=96,
+    w_m=2.43,
+    group_width=40,
+    freeze_at=2,
+    norm="FrozenBN",
+    out_features=["s1", "s2", "s3", "s4"],
+)
+model.pixel_std = [57.375, 57.120, 58.395]
+optimizer.weight_decay = 5e-5
+train.init_checkpoint = (
+    "https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906383/RegNetX-4.0GF_dds_8gpu.pyth"
+)
+# RegNets benefit from enabling cudnn benchmark mode
+train.cudnn_benchmark = True
--- a/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py
+++ b/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py
+from ..common.optim import SGD as optimizer
+from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
+from ..common.data.coco import dataloader
+from ..common.models.mask_rcnn_fpn import model
+from ..common.train import train
+from detectron2.config import LazyCall as L
+from detectron2.modeling.backbone import RegNet
+from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock
+# Replace default ResNet with RegNetY-4GF from the DDS paper. Config source:
+# https://github.com/facebookresearch/pycls/blob/2c152a6e5d913e898cca4f0a758f41e6b976714d/configs/dds_baselines/regnety/RegNetY-4.0GF_dds_8gpu.yaml#L4-L10  # noqa
+model.backbone.bottom_up = L(RegNet)(
+    stem_class=SimpleStem,
+    stem_width=32,
+    block_class=ResBottleneckBlock,
+    depth=22,
+    w_a=31.41,
+    w_0=96,
+    w_m=2.24,
+    group_width=64,
+    se_ratio=0.25,
+    freeze_at=2,
+    norm="FrozenBN",
+    out_features=["s1", "s2", "s3", "s4"],
+)
+model.pixel_std = [57.375, 57.120, 58.395]
+optimizer.weight_decay = 5e-5
+train.init_checkpoint = (
+    "https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906838/RegNetY-4.0GF_dds_8gpu.pyth"
+)
+# RegNets benefit from enabling cudnn benchmark mode
+train.cudnn_benchmark = True
--- a/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml
+++ b/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  KEYPOINT_ON: True
+  ROI_HEADS:
+    NUM_CLASSES: 1
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 0.5  # Keypoint AP degrades (though box AP improves) when using plain L1 loss
+  RPN:
+    # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
+    # 1000 proposals per-image is found to hurt box AP.
+    # Therefore we increase it to 1500 per-image.
+    POST_NMS_TOPK_TRAIN: 1500
+DATASETS:
+  TRAIN: ("keypoints_coco_2017_train",)
+  TEST: ("keypoints_coco_2017_val",)
--- a/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml
+++ b/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml
+_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
--- a/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.py
+++ b/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.py
+from ..common.optim import SGD as optimizer
+from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
+from ..common.data.coco_keypoint import dataloader
+from ..common.models.keypoint_rcnn_fpn import model
+from ..common.train import train
+model.backbone.bottom_up.freeze_at = 2
+train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
--- a/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml
+++ b/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml
+_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
--- a/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml
+++ b/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml
+_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
--- a/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml
+++ b/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml
+_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+  PIXEL_STD: [57.375, 57.120, 58.395]
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
--- a/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml
+++ b/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "PanopticFPN"
+  MASK_ON: True
+  SEM_SEG_HEAD:
+    LOSS_WEIGHT: 0.5
+DATASETS:
+  TRAIN: ("coco_2017_train_panoptic_separated",)
+  TEST: ("coco_2017_val_panoptic_separated",)
+DATALOADER:
+  FILTER_EMPTY_ANNOTATIONS: False
--- a/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml
+++ b/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml
+_BASE_: "Base-Panoptic-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
--- a/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.py
+++ b/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.py
+from ..common.optim import SGD as optimizer
+from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
+from ..common.data.coco_panoptic_separated import dataloader
+from ..common.models.panoptic_fpn import model
+from ..common.train import train
+model.backbone.bottom_up.freeze_at = 2
+train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
--- a/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml
+++ b/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml
+_BASE_: "Base-Panoptic-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
--- a/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml
+++ b/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml
+_BASE_: "Base-Panoptic-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
--- a/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml
+++ b/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  # For better, more stable performance initialize from COCO
+  WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
+  MASK_ON: True
+  ROI_HEADS:
+    NUM_CLASSES: 8
+# This is similar to the setting used in Mask R-CNN paper, Appendix A
+# But there are some differences, e.g., we did not initialize the output
+# layer using the corresponding classes from COCO
+INPUT:
+  MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
+  MIN_SIZE_TRAIN_SAMPLING: "choice"
+  MIN_SIZE_TEST: 1024
+  MAX_SIZE_TRAIN: 2048
+  MAX_SIZE_TEST: 2048
+DATASETS:
+  TRAIN: ("cityscapes_fine_instance_seg_train",)
+  TEST: ("cityscapes_fine_instance_seg_val",)
+SOLVER:
+  BASE_LR: 0.01
+  STEPS: (18000,)
+  MAX_ITER: 24000
+  IMS_PER_BATCH: 8
+TEST:
+  EVAL_PERIOD: 8000
--- a/configs/Detectron1-Comparisons/README.md
+++ b/configs/Detectron1-Comparisons/README.md
+Detectron2 model zoo's experimental settings and a few implementation details are different from Detectron.
+The differences in implementation details are shared in
+[Compatibility with Other Libraries](../../docs/notes/compatibility.md).
+The differences in model zoo's experimental settings include:
+* Use scale augmentation during training. This improves AP with lower training cost.
+* Use L1 loss instead of smooth L1 loss for simplicity. This sometimes improves box AP but may
+  affect other AP.
+* Use `POOLER_SAMPLING_RATIO=0` instead of 2. This does not significantly affect AP.
+* Use `ROIAlignV2`. This does not significantly affect AP.
+In this directory, we provide a few configs that __do not__ have the above changes.
+They mimic Detectron's behavior as close as possible,
+and provide a fair comparison of accuracy and speed against Detectron.
+<!--
+./gen_html_table.py --config 'Detectron1-Comparisons/*.yaml' --name "Faster R-CNN" "Keypoint R-CNN" "Mask R-CNN" --fields lr_sched train_speed inference_speed mem box_AP mask_AP keypoint_AP --base-dir ../../../configs/Detectron1-Comparisons
+-->
+<table><tbody>
+<!-- START TABLE -->
+<!-- TABLE HEADER -->
+<th valign="bottom">Name</th>
+<th valign="bottom">lr<br/>sched</th>
+<th valign="bottom">train<br/>time<br/>(s/iter)</th>
+<th valign="bottom">inference<br/>time<br/>(s/im)</th>
+<th valign="bottom">train<br/>mem<br/>(GB)</th>
+<th valign="bottom">box<br/>AP</th>
+<th valign="bottom">mask<br/>AP</th>
+<th valign="bottom">kp.<br/>AP</th>
+<th valign="bottom">model id</th>
+<th valign="bottom">download</th>
+<!-- TABLE BODY -->
+<!-- ROW: faster_rcnn_R_50_FPN_noaug_1x -->
+ <tr><td align="left"><a href="faster_rcnn_R_50_FPN_noaug_1x.yaml">Faster R-CNN</a></td>
+<td align="center">1x</td>
+<td align="center">0.219</td>
+<td align="center">0.038</td>
+<td align="center">3.1</td>
+<td align="center">36.9</td>
+<td align="center"></td>
+<td align="center"></td>
+<td align="center">137781054</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x/137781054/model_final_7ab50c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x/137781054/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: keypoint_rcnn_R_50_FPN_1x -->
+ <tr><td align="left"><a href="keypoint_rcnn_R_50_FPN_1x.yaml">Keypoint R-CNN</a></td>
+<td align="center">1x</td>
+<td align="center">0.313</td>
+<td align="center">0.071</td>
+<td align="center">5.0</td>
+<td align="center">53.1</td>
+<td align="center"></td>
+<td align="center">64.2</td>
+<td align="center">137781195</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x/137781195/model_final_cce136.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x/137781195/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: mask_rcnn_R_50_FPN_noaug_1x -->
+ <tr><td align="left"><a href="mask_rcnn_R_50_FPN_noaug_1x.yaml">Mask R-CNN</a></td>
+<td align="center">1x</td>
+<td align="center">0.273</td>
+<td align="center">0.043</td>
+<td align="center">3.4</td>
+<td align="center">37.8</td>
+<td align="center">34.9</td>
+<td align="center"></td>
+<td align="center">137781281</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x/137781281/model_final_62ca52.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x/137781281/metrics.json">metrics</a></td>
+</tr>
+</tbody></table>
+## Comparisons:
+* Faster R-CNN: Detectron's AP is 36.7, similar to ours.
+* Keypoint R-CNN: Detectron's AP is box 53.6, keypoint 64.2. Fixing a Detectron's
+  [bug](https://github.com/facebookresearch/Detectron/issues/459) lead to a drop in box AP, and can be
+	compensated back by some parameter tuning.
+* Mask R-CNN: Detectron's AP is box 37.7, mask 33.9. We're 1 AP better in mask AP, due to more correct implementation.
+  See [this article](https://ppwwyyxx.com/blog/2021/Where-are-Pixels/) for details.
+For speed comparison, see [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html).
--- a/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml
+++ b/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
+  # Detectron1 uses smooth L1 loss with some magic beta values.
+  # The defaults are changed to L1 loss in Detectron2.
+  RPN:
+    SMOOTH_L1_BETA: 0.1111
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 1.0
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+INPUT:
+  # no scale augmentation
+  MIN_SIZE_TRAIN: (800, )
--- a/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml
+++ b/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  KEYPOINT_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NUM_CLASSES: 1
+  ROI_KEYPOINT_HEAD:
+    POOLER_RESOLUTION: 14
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+  # Detectron1 uses smooth L1 loss with some magic beta values.
+  # The defaults are changed to L1 loss in Detectron2.
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 1.0
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+  RPN:
+    SMOOTH_L1_BETA: 0.1111
+    # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2
+    # 1000 proposals per-image is found to hurt box AP.
+    # Therefore we increase it to 1500 per-image.
+    POST_NMS_TOPK_TRAIN: 1500
+DATASETS:
+  TRAIN: ("keypoints_coco_2017_train",)
+  TEST: ("keypoints_coco_2017_val",)
--- a/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml
+++ b/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  # Detectron1 uses smooth L1 loss with some magic beta values.
+  # The defaults are changed to L1 loss in Detectron2.
+  RPN:
+    SMOOTH_L1_BETA: 0.1111
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 1.0
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+  ROI_MASK_HEAD:
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+INPUT:
+  # no scale augmentation
+  MIN_SIZE_TRAIN: (800, )
--- a/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
+++ b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 101
+  ROI_HEADS:
+    NUM_CLASSES: 1230
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v0.5_train",)
+  TEST: ("lvis_v0.5_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001
--- a/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
+++ b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NUM_CLASSES: 1230
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v0.5_train",)
+  TEST: ("lvis_v0.5_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001