push v0.1.3 version commit bd2ea47

c732df65 · limm · 5b3792fc · c732df65 · c732df65 · c732df65
Commit c732df65 authored Jan 18, 2024 by limm
20 changed files
--- a/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml
+++ b/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "PanopticFPN"
+  MASK_ON: True
+  SEM_SEG_HEAD:
+    LOSS_WEIGHT: 0.5
+DATASETS:
+  TRAIN: ("coco_2017_train_panoptic_separated",)
+  TEST: ("coco_2017_val_panoptic_separated",)
--- a/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml
+++ b/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml
+_BASE_: "Base-Panoptic-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
--- a/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml
+++ b/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml
+_BASE_: "Base-Panoptic-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
--- a/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml
+++ b/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml
+_BASE_: "Base-Panoptic-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
--- a/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml
+++ b/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  # For better, more stable performance initialize from COCO
+  WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
+  MASK_ON: True
+  ROI_HEADS:
+    NUM_CLASSES: 8
+# This is similar to the setting used in Mask R-CNN paper, Appendix A
+# But there are some differences, e.g., we did not initialize the output
+# layer using the corresponding classes from COCO
+INPUT:
+  MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
+  MIN_SIZE_TRAIN_SAMPLING: "choice"
+  MIN_SIZE_TEST: 1024
+  MAX_SIZE_TRAIN: 2048
+  MAX_SIZE_TEST: 2048
+DATASETS:
+  TRAIN: ("cityscapes_fine_instance_seg_train",)
+  TEST: ("cityscapes_fine_instance_seg_val",)
+SOLVER:
+  BASE_LR: 0.01
+  STEPS: (18000,)
+  MAX_ITER: 24000
+  IMS_PER_BATCH: 8
+TEST:
+  EVAL_PERIOD: 8000
--- a/configs/Detectron1-Comparisons/README.md
+++ b/configs/Detectron1-Comparisons/README.md
+
+Detectron2 model zoo's experimental settings and a few implementation details are different from Detectron.
+
+The differences in implementation details are shared in
+[Compatibility with Other Libraries](../../docs/notes/compatibility.md).
+
+The differences in model zoo's experimental settings include:
+* Use scale augmentation during training. This improves AP with lower training cost.
+* Use L1 loss instead of smooth L1 loss for simplicity. This sometimes improves box AP but may
+  affect other AP.
+* Use `POOLER_SAMPLING_RATIO=0` instead of 2. This does not significantly affect AP.
+* Use `ROIAlignV2`. This does not significantly affect AP.
+
+In this directory, we provide a few configs that __do not__ have the above changes.
+They mimic Detectron's behavior as close as possible,
+and provide a fair comparison of accuracy and speed against Detectron.
+
+<!--
+./gen_html_table.py --config 'Detectron1-Comparisons/*.yaml' --name "Faster R-CNN" "Keypoint R-CNN" "Mask R-CNN" --fields lr_sched train_speed inference_speed mem box_AP mask_AP keypoint_AP --base-dir ../../../configs/Detectron1-Comparisons
+-->
+
+
+<table><tbody>
+<!-- START TABLE -->
+<!-- TABLE HEADER -->
+<th valign="bottom">Name</th>
+<th valign="bottom">lr<br/>sched</th>
+<th valign="bottom">train<br/>time<br/>(s/iter)</th>
+<th valign="bottom">inference<br/>time<br/>(s/im)</th>
+<th valign="bottom">train<br/>mem<br/>(GB)</th>
+<th valign="bottom">box<br/>AP</th>
+<th valign="bottom">mask<br/>AP</th>
+<th valign="bottom">kp.<br/>AP</th>
+<th valign="bottom">model id</th>
+<th valign="bottom">download</th>
+<!-- TABLE BODY -->
+<!-- ROW: faster_rcnn_R_50_FPN_noaug_1x -->
+ <tr><td align="left"><a href="faster_rcnn_R_50_FPN_noaug_1x.yaml">Faster R-CNN</a></td>
+<td align="center">1x</td>
+<td align="center">0.219</td>
+<td align="center">0.038</td>
+<td align="center">3.1</td>
+<td align="center">36.9</td>
+<td align="center"></td>
+<td align="center"></td>
+<td align="center">137781054</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x/137781054/model_final_7ab50c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x/137781054/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: keypoint_rcnn_R_50_FPN_1x -->
+ <tr><td align="left"><a href="keypoint_rcnn_R_50_FPN_1x.yaml">Keypoint R-CNN</a></td>
+<td align="center">1x</td>
+<td align="center">0.313</td>
+<td align="center">0.071</td>
+<td align="center">5.0</td>
+<td align="center">53.1</td>
+<td align="center"></td>
+<td align="center">64.2</td>
+<td align="center">137781195</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x/137781195/model_final_cce136.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x/137781195/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: mask_rcnn_R_50_FPN_noaug_1x -->
+ <tr><td align="left"><a href="mask_rcnn_R_50_FPN_noaug_1x.yaml">Mask R-CNN</a></td>
+<td align="center">1x</td>
+<td align="center">0.273</td>
+<td align="center">0.043</td>
+<td align="center">3.4</td>
+<td align="center">37.8</td>
+<td align="center">34.9</td>
+<td align="center"></td>
+<td align="center">137781281</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x/137781281/model_final_62ca52.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x/137781281/metrics.json">metrics</a></td>
+</tr>
+</tbody></table>
+
+## Comparisons:
+
+* Faster R-CNN: Detectron's AP is 36.7, similar to ours.
+* Keypoint R-CNN: Detectron's AP is box 53.6, keypoint 64.2. Fixing a Detectron's
+  [bug](https://github.com/facebookresearch/Detectron/issues/459) lead to a drop in box AP, and can be
+	compensated back by some parameter tuning.
+* Mask R-CNN: Detectron's AP is box 37.7, mask 33.9. We're 1 AP better in mask AP, due to more correct implementation.
+
+For speed comparison, see [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html).
--- a/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml
+++ b/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
+  # Detectron1 uses smooth L1 loss with some magic beta values.
+  # The defaults are changed to L1 loss in Detectron2.
+  RPN:
+    SMOOTH_L1_BETA: 0.1111
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 1.0
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+INPUT:
+  # no scale augmentation
+  MIN_SIZE_TRAIN: (800, )
--- a/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml
+++ b/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  KEYPOINT_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NUM_CLASSES: 1
+  ROI_KEYPOINT_HEAD:
+    POOLER_RESOLUTION: 14
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+  # Detectron1 uses smooth L1 loss with some magic beta values.
+  # The defaults are changed to L1 loss in Detectron2.
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 1.0
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+  RPN:
+    SMOOTH_L1_BETA: 0.1111
+    # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2
+    # 1000 proposals per-image is found to hurt box AP.
+    # Therefore we increase it to 1500 per-image.
+    POST_NMS_TOPK_TRAIN: 1500
+DATASETS:
+  TRAIN: ("keypoints_coco_2017_train",)
+  TEST: ("keypoints_coco_2017_val",)
--- a/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml
+++ b/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  # Detectron1 uses smooth L1 loss with some magic beta values.
+  # The defaults are changed to L1 loss in Detectron2.
+  RPN:
+    SMOOTH_L1_BETA: 0.1111
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 1.0
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+  ROI_MASK_HEAD:
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+INPUT:
+  # no scale augmentation
+  MIN_SIZE_TRAIN: (800, )
--- a/configs/LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
+++ b/configs/LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 101
+  ROI_HEADS:
+    NUM_CLASSES: 1230
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v0.5_train",)
+  TEST: ("lvis_v0.5_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001
--- a/configs/LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
+++ b/configs/LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NUM_CLASSES: 1230
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v0.5_train",)
+  TEST: ("lvis_v0.5_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001
--- a/configs/LVIS-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
+++ b/configs/LVIS-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+  PIXEL_STD: [57.375, 57.120, 58.395]
+  MASK_ON: True
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 101
+  ROI_HEADS:
+    NUM_CLASSES: 1230
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v0.5_train",)
+  TEST: ("lvis_v0.5_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001
--- a/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml
+++ b/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NAME: CascadeROIHeads
+  ROI_BOX_HEAD:
+    CLS_AGNOSTIC_BBOX_REG: True
+  RPN:
+    POST_NMS_TOPK_TRAIN: 2000
--- a/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml
+++ b/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NAME: CascadeROIHeads
+  ROI_BOX_HEAD:
+    CLS_AGNOSTIC_BBOX_REG: True
+  RPN:
+    POST_NMS_TOPK_TRAIN: 2000
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
--- a/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml
+++ b/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  MASK_ON: True
+  WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k"
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 152
+    DEFORM_ON_PER_STAGE: [False, True, True, True]
+  ROI_HEADS:
+    NAME: "CascadeROIHeads"
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_CONV: 4
+    NUM_FC: 1
+    NORM: "GN"
+    CLS_AGNOSTIC_BBOX_REG: True
+  ROI_MASK_HEAD:
+    NUM_CONV: 8
+    NORM: "GN"
+  RPN:
+    POST_NMS_TOPK_TRAIN: 2000
+SOLVER:
+  IMS_PER_BATCH: 128
+  STEPS: (35000, 45000)
+  MAX_ITER: 50000
+  BASE_LR: 0.16
+INPUT:
+  MIN_SIZE_TRAIN: (640, 864)
+  MIN_SIZE_TRAIN_SAMPLING: "range"
+  MAX_SIZE_TRAIN: 1440
+  CROP:
+    ENABLED: True
+TEST:
+  EVAL_PERIOD: 2500
--- a/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml
+++ b/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_BOX_HEAD:
+    CLS_AGNOSTIC_BBOX_REG: True
+  ROI_MASK_HEAD:
+    CLS_AGNOSTIC_MASK: True
--- a/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml
+++ b/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+    DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
+    DEFORM_MODULATED: False
--- a/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml
+++ b/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+    DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
+    DEFORM_MODULATED: False
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
--- a/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml
+++ b/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-50-GN"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+    NORM: "GN"
+    STRIDE_IN_1X1: False
+  FPN:
+    NORM: "GN"
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_CONV: 4
+    NUM_FC: 1
+    NORM: "GN"
+  ROI_MASK_HEAD:
+    NORM: "GN"
+SOLVER:
+  # 3x schedule
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
--- a/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml
+++ b/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+    NORM: "SyncBN"
+    STRIDE_IN_1X1: True
+  FPN:
+    NORM: "SyncBN"
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_CONV: 4
+    NUM_FC: 1
+    NORM: "SyncBN"
+  ROI_MASK_HEAD:
+    NORM: "SyncBN"
+SOLVER:
+  # 3x schedule
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
+TEST:
+  PRECISE_BN:
+    ENABLED: True