Commit b634945d authored by limm's avatar limm
Browse files

support v0.6

parent 5b3792fc
from ..common.optim import SGD as optimizer
from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
from ..common.data.coco import dataloader
from ..common.models.mask_rcnn_fpn import model
from ..common.train import train
from detectron2.config import LazyCall as L
from detectron2.modeling.backbone import RegNet
from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock
# Replace default ResNet with RegNetX-4GF from the DDS paper. Config source:
# https://github.com/facebookresearch/pycls/blob/2c152a6e5d913e898cca4f0a758f41e6b976714d/configs/dds_baselines/regnetx/RegNetX-4.0GF_dds_8gpu.yaml#L4-L9 # noqa
model.backbone.bottom_up = L(RegNet)(
stem_class=SimpleStem,
stem_width=32,
block_class=ResBottleneckBlock,
depth=23,
w_a=38.65,
w_0=96,
w_m=2.43,
group_width=40,
freeze_at=2,
norm="FrozenBN",
out_features=["s1", "s2", "s3", "s4"],
)
model.pixel_std = [57.375, 57.120, 58.395]
optimizer.weight_decay = 5e-5
train.init_checkpoint = (
"https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906383/RegNetX-4.0GF_dds_8gpu.pyth"
)
# RegNets benefit from enabling cudnn benchmark mode
train.cudnn_benchmark = True
from ..common.optim import SGD as optimizer
from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
from ..common.data.coco import dataloader
from ..common.models.mask_rcnn_fpn import model
from ..common.train import train
from detectron2.config import LazyCall as L
from detectron2.modeling.backbone import RegNet
from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock
# Replace default ResNet with RegNetY-4GF from the DDS paper. Config source:
# https://github.com/facebookresearch/pycls/blob/2c152a6e5d913e898cca4f0a758f41e6b976714d/configs/dds_baselines/regnety/RegNetY-4.0GF_dds_8gpu.yaml#L4-L10 # noqa
model.backbone.bottom_up = L(RegNet)(
stem_class=SimpleStem,
stem_width=32,
block_class=ResBottleneckBlock,
depth=22,
w_a=31.41,
w_0=96,
w_m=2.24,
group_width=64,
se_ratio=0.25,
freeze_at=2,
norm="FrozenBN",
out_features=["s1", "s2", "s3", "s4"],
)
model.pixel_std = [57.375, 57.120, 58.395]
optimizer.weight_decay = 5e-5
train.init_checkpoint = (
"https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906838/RegNetY-4.0GF_dds_8gpu.pyth"
)
# RegNets benefit from enabling cudnn benchmark mode
train.cudnn_benchmark = True
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
KEYPOINT_ON: True
ROI_HEADS:
NUM_CLASSES: 1
ROI_BOX_HEAD:
SMOOTH_L1_BETA: 0.5 # Keypoint AP degrades (though box AP improves) when using plain L1 loss
RPN:
# Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
# 1000 proposals per-image is found to hurt box AP.
# Therefore we increase it to 1500 per-image.
POST_NMS_TOPK_TRAIN: 1500
DATASETS:
TRAIN: ("keypoints_coco_2017_train",)
TEST: ("keypoints_coco_2017_val",)
_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
RESNETS:
DEPTH: 101
SOLVER:
STEPS: (210000, 250000)
MAX_ITER: 270000
from ..common.optim import SGD as optimizer
from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
from ..common.data.coco_keypoint import dataloader
from ..common.models.keypoint_rcnn_fpn import model
from ..common.train import train
model.backbone.bottom_up.freeze_at = 2
train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
RESNETS:
DEPTH: 50
_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
RESNETS:
DEPTH: 50
SOLVER:
STEPS: (210000, 250000)
MAX_ITER: 270000
_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
PIXEL_STD: [57.375, 57.120, 58.395]
RESNETS:
STRIDE_IN_1X1: False # this is a C2 model
NUM_GROUPS: 32
WIDTH_PER_GROUP: 8
DEPTH: 101
SOLVER:
STEPS: (210000, 250000)
MAX_ITER: 270000
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
META_ARCHITECTURE: "PanopticFPN"
MASK_ON: True
SEM_SEG_HEAD:
LOSS_WEIGHT: 0.5
DATASETS:
TRAIN: ("coco_2017_train_panoptic_separated",)
TEST: ("coco_2017_val_panoptic_separated",)
DATALOADER:
FILTER_EMPTY_ANNOTATIONS: False
_BASE_: "Base-Panoptic-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
RESNETS:
DEPTH: 101
SOLVER:
STEPS: (210000, 250000)
MAX_ITER: 270000
from ..common.optim import SGD as optimizer
from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
from ..common.data.coco_panoptic_separated import dataloader
from ..common.models.panoptic_fpn import model
from ..common.train import train
model.backbone.bottom_up.freeze_at = 2
train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
_BASE_: "Base-Panoptic-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
RESNETS:
DEPTH: 50
_BASE_: "Base-Panoptic-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
RESNETS:
DEPTH: 50
SOLVER:
STEPS: (210000, 250000)
MAX_ITER: 270000
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
# WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
# For better, more stable performance initialize from COCO
WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
MASK_ON: True
ROI_HEADS:
NUM_CLASSES: 8
# This is similar to the setting used in Mask R-CNN paper, Appendix A
# But there are some differences, e.g., we did not initialize the output
# layer using the corresponding classes from COCO
INPUT:
MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
MIN_SIZE_TRAIN_SAMPLING: "choice"
MIN_SIZE_TEST: 1024
MAX_SIZE_TRAIN: 2048
MAX_SIZE_TEST: 2048
DATASETS:
TRAIN: ("cityscapes_fine_instance_seg_train",)
TEST: ("cityscapes_fine_instance_seg_val",)
SOLVER:
BASE_LR: 0.01
STEPS: (18000,)
MAX_ITER: 24000
IMS_PER_BATCH: 8
TEST:
EVAL_PERIOD: 8000
Detectron2 model zoo's experimental settings and a few implementation details are different from Detectron.
The differences in implementation details are shared in
[Compatibility with Other Libraries](../../docs/notes/compatibility.md).
The differences in model zoo's experimental settings include:
* Use scale augmentation during training. This improves AP with lower training cost.
* Use L1 loss instead of smooth L1 loss for simplicity. This sometimes improves box AP but may
affect other AP.
* Use `POOLER_SAMPLING_RATIO=0` instead of 2. This does not significantly affect AP.
* Use `ROIAlignV2`. This does not significantly affect AP.
In this directory, we provide a few configs that __do not__ have the above changes.
They mimic Detectron's behavior as close as possible,
and provide a fair comparison of accuracy and speed against Detectron.
<!--
./gen_html_table.py --config 'Detectron1-Comparisons/*.yaml' --name "Faster R-CNN" "Keypoint R-CNN" "Mask R-CNN" --fields lr_sched train_speed inference_speed mem box_AP mask_AP keypoint_AP --base-dir ../../../configs/Detectron1-Comparisons
-->
<table><tbody>
<!-- START TABLE -->
<!-- TABLE HEADER -->
<th valign="bottom">Name</th>
<th valign="bottom">lr<br/>sched</th>
<th valign="bottom">train<br/>time<br/>(s/iter)</th>
<th valign="bottom">inference<br/>time<br/>(s/im)</th>
<th valign="bottom">train<br/>mem<br/>(GB)</th>
<th valign="bottom">box<br/>AP</th>
<th valign="bottom">mask<br/>AP</th>
<th valign="bottom">kp.<br/>AP</th>
<th valign="bottom">model id</th>
<th valign="bottom">download</th>
<!-- TABLE BODY -->
<!-- ROW: faster_rcnn_R_50_FPN_noaug_1x -->
<tr><td align="left"><a href="faster_rcnn_R_50_FPN_noaug_1x.yaml">Faster R-CNN</a></td>
<td align="center">1x</td>
<td align="center">0.219</td>
<td align="center">0.038</td>
<td align="center">3.1</td>
<td align="center">36.9</td>
<td align="center"></td>
<td align="center"></td>
<td align="center">137781054</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x/137781054/model_final_7ab50c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x/137781054/metrics.json">metrics</a></td>
</tr>
<!-- ROW: keypoint_rcnn_R_50_FPN_1x -->
<tr><td align="left"><a href="keypoint_rcnn_R_50_FPN_1x.yaml">Keypoint R-CNN</a></td>
<td align="center">1x</td>
<td align="center">0.313</td>
<td align="center">0.071</td>
<td align="center">5.0</td>
<td align="center">53.1</td>
<td align="center"></td>
<td align="center">64.2</td>
<td align="center">137781195</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x/137781195/model_final_cce136.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x/137781195/metrics.json">metrics</a></td>
</tr>
<!-- ROW: mask_rcnn_R_50_FPN_noaug_1x -->
<tr><td align="left"><a href="mask_rcnn_R_50_FPN_noaug_1x.yaml">Mask R-CNN</a></td>
<td align="center">1x</td>
<td align="center">0.273</td>
<td align="center">0.043</td>
<td align="center">3.4</td>
<td align="center">37.8</td>
<td align="center">34.9</td>
<td align="center"></td>
<td align="center">137781281</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x/137781281/model_final_62ca52.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x/137781281/metrics.json">metrics</a></td>
</tr>
</tbody></table>
## Comparisons:
* Faster R-CNN: Detectron's AP is 36.7, similar to ours.
* Keypoint R-CNN: Detectron's AP is box 53.6, keypoint 64.2. Fixing a Detectron's
[bug](https://github.com/facebookresearch/Detectron/issues/459) lead to a drop in box AP, and can be
compensated back by some parameter tuning.
* Mask R-CNN: Detectron's AP is box 37.7, mask 33.9. We're 1 AP better in mask AP, due to more correct implementation.
See [this article](https://ppwwyyxx.com/blog/2021/Where-are-Pixels/) for details.
For speed comparison, see [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html).
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
MASK_ON: False
RESNETS:
DEPTH: 50
# Detectron1 uses smooth L1 loss with some magic beta values.
# The defaults are changed to L1 loss in Detectron2.
RPN:
SMOOTH_L1_BETA: 0.1111
ROI_BOX_HEAD:
SMOOTH_L1_BETA: 1.0
POOLER_SAMPLING_RATIO: 2
POOLER_TYPE: "ROIAlign"
INPUT:
# no scale augmentation
MIN_SIZE_TRAIN: (800, )
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
KEYPOINT_ON: True
RESNETS:
DEPTH: 50
ROI_HEADS:
NUM_CLASSES: 1
ROI_KEYPOINT_HEAD:
POOLER_RESOLUTION: 14
POOLER_SAMPLING_RATIO: 2
POOLER_TYPE: "ROIAlign"
# Detectron1 uses smooth L1 loss with some magic beta values.
# The defaults are changed to L1 loss in Detectron2.
ROI_BOX_HEAD:
SMOOTH_L1_BETA: 1.0
POOLER_SAMPLING_RATIO: 2
POOLER_TYPE: "ROIAlign"
RPN:
SMOOTH_L1_BETA: 0.1111
# Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2
# 1000 proposals per-image is found to hurt box AP.
# Therefore we increase it to 1500 per-image.
POST_NMS_TOPK_TRAIN: 1500
DATASETS:
TRAIN: ("keypoints_coco_2017_train",)
TEST: ("keypoints_coco_2017_val",)
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
MASK_ON: True
RESNETS:
DEPTH: 50
# Detectron1 uses smooth L1 loss with some magic beta values.
# The defaults are changed to L1 loss in Detectron2.
RPN:
SMOOTH_L1_BETA: 0.1111
ROI_BOX_HEAD:
SMOOTH_L1_BETA: 1.0
POOLER_SAMPLING_RATIO: 2
POOLER_TYPE: "ROIAlign"
ROI_MASK_HEAD:
POOLER_SAMPLING_RATIO: 2
POOLER_TYPE: "ROIAlign"
INPUT:
# no scale augmentation
MIN_SIZE_TRAIN: (800, )
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
MASK_ON: True
RESNETS:
DEPTH: 101
ROI_HEADS:
NUM_CLASSES: 1230
SCORE_THRESH_TEST: 0.0001
INPUT:
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
DATASETS:
TRAIN: ("lvis_v0.5_train",)
TEST: ("lvis_v0.5_val",)
TEST:
DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300
DATALOADER:
SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
REPEAT_THRESHOLD: 0.001
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
MASK_ON: True
RESNETS:
DEPTH: 50
ROI_HEADS:
NUM_CLASSES: 1230
SCORE_THRESH_TEST: 0.0001
INPUT:
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
DATASETS:
TRAIN: ("lvis_v0.5_train",)
TEST: ("lvis_v0.5_val",)
TEST:
DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300
DATALOADER:
SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
REPEAT_THRESHOLD: 0.001
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment