Merge branch 'add_v0.6' into 'v0.6-release'

support v0.6 See merge request !3

Merge branch 'add_v0.6' into 'v0.6-release'
support v0.6 See merge request !3
63e10e00 · limm · 2ca8e9fd · b634945d · 63e10e00 · 63e10e00
Commit 63e10e00 authored Apr 09, 2025 by limm
20 changed files
--- a/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
+++ b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+  PIXEL_STD: [57.375, 57.120, 58.395]
+  MASK_ON: True
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 101
+  ROI_HEADS:
+    NUM_CLASSES: 1230
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v0.5_train",)
+  TEST: ("lvis_v0.5_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001
--- a/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
+++ b/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 101
+  ROI_HEADS:
+    NUM_CLASSES: 1203
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v1_train",)
+  TEST: ("lvis_v1_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+SOLVER:
+  STEPS: (120000, 160000)
+  MAX_ITER: 180000  # 180000 * 16 / 100000 ~ 28.8 epochs
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001
--- a/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
+++ b/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NUM_CLASSES: 1203
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v1_train",)
+  TEST: ("lvis_v1_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+SOLVER:
+  STEPS: (120000, 160000)
+  MAX_ITER: 180000  # 180000 * 16 / 100000 ~ 28.8 epochs
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001
--- a/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
+++ b/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+  PIXEL_STD: [57.375, 57.120, 58.395]
+  MASK_ON: True
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 101
+  ROI_HEADS:
+    NUM_CLASSES: 1203
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v1_train",)
+  TEST: ("lvis_v1_val",)
+SOLVER:
+  STEPS: (120000, 160000)
+  MAX_ITER: 180000  # 180000 * 16 / 100000 ~ 28.8 epochs
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001
--- a/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml
+++ b/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NAME: CascadeROIHeads
+  ROI_BOX_HEAD:
+    CLS_AGNOSTIC_BBOX_REG: True
+  RPN:
+    POST_NMS_TOPK_TRAIN: 2000
--- a/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml
+++ b/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NAME: CascadeROIHeads
+  ROI_BOX_HEAD:
+    CLS_AGNOSTIC_BBOX_REG: True
+  RPN:
+    POST_NMS_TOPK_TRAIN: 2000
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
--- a/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml
+++ b/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  MASK_ON: True
+  WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k"
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 152
+    DEFORM_ON_PER_STAGE: [False, True, True, True]
+  ROI_HEADS:
+    NAME: "CascadeROIHeads"
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_CONV: 4
+    NUM_FC: 1
+    NORM: "GN"
+    CLS_AGNOSTIC_BBOX_REG: True
+  ROI_MASK_HEAD:
+    NUM_CONV: 8
+    NORM: "GN"
+  RPN:
+    POST_NMS_TOPK_TRAIN: 2000
+SOLVER:
+  IMS_PER_BATCH: 128
+  STEPS: (35000, 45000)
+  MAX_ITER: 50000
+  BASE_LR: 0.16
+INPUT:
+  MIN_SIZE_TRAIN: (640, 864)
+  MIN_SIZE_TRAIN_SAMPLING: "range"
+  MAX_SIZE_TRAIN: 1440
+  CROP:
+    ENABLED: True
+TEST:
+  EVAL_PERIOD: 2500
--- a/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml
+++ b/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_BOX_HEAD:
+    CLS_AGNOSTIC_BBOX_REG: True
+  ROI_MASK_HEAD:
+    CLS_AGNOSTIC_MASK: True
--- a/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml
+++ b/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+    DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
+    DEFORM_MODULATED: False
--- a/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml
+++ b/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+    DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
+    DEFORM_MODULATED: False
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
--- a/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml
+++ b/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-50-GN"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+    NORM: "GN"
+    STRIDE_IN_1X1: False
+  FPN:
+    NORM: "GN"
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_CONV: 4
+    NUM_FC: 1
+    NORM: "GN"
+  ROI_MASK_HEAD:
+    NORM: "GN"
+SOLVER:
+  # 3x schedule
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
--- a/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml
+++ b/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+    NORM: "SyncBN"
+    STRIDE_IN_1X1: True
+  FPN:
+    NORM: "SyncBN"
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_CONV: 4
+    NUM_FC: 1
+    NORM: "SyncBN"
+  ROI_MASK_HEAD:
+    NORM: "SyncBN"
+SOLVER:
+  # 3x schedule
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
+TEST:
+  PRECISE_BN:
+    ENABLED: True
--- a/configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py
+++ b/configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py
+# An example config to train a mmdetection model using detectron2.
+
+from ..common.data.coco import dataloader
+from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
+from ..common.optim import SGD as optimizer
+from ..common.train import train
+
+from detectron2.modeling.mmdet_wrapper import MMDetDetector
+from detectron2.config import LazyCall as L
+
+model = L(MMDetDetector)(
+    detector=dict(
+        type="MaskRCNN",
+        pretrained="torchvision://resnet50",
+        backbone=dict(
+            type="ResNet",
+            depth=50,
+            num_stages=4,
+            out_indices=(0, 1, 2, 3),
+            frozen_stages=1,
+            norm_cfg=dict(type="BN", requires_grad=True),
+            norm_eval=True,
+            style="pytorch",
+        ),
+        neck=dict(type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5),
+        rpn_head=dict(
+            type="RPNHead",
+            in_channels=256,
+            feat_channels=256,
+            anchor_generator=dict(
+                type="AnchorGenerator",
+                scales=[8],
+                ratios=[0.5, 1.0, 2.0],
+                strides=[4, 8, 16, 32, 64],
+            ),
+            bbox_coder=dict(
+                type="DeltaXYWHBBoxCoder",
+                target_means=[0.0, 0.0, 0.0, 0.0],
+                target_stds=[1.0, 1.0, 1.0, 1.0],
+            ),
+            loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0),
+            loss_bbox=dict(type="L1Loss", loss_weight=1.0),
+        ),
+        roi_head=dict(
+            type="StandardRoIHead",
+            bbox_roi_extractor=dict(
+                type="SingleRoIExtractor",
+                roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0),
+                out_channels=256,
+                featmap_strides=[4, 8, 16, 32],
+            ),
+            bbox_head=dict(
+                type="Shared2FCBBoxHead",
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=80,
+                bbox_coder=dict(
+                    type="DeltaXYWHBBoxCoder",
+                    target_means=[0.0, 0.0, 0.0, 0.0],
+                    target_stds=[0.1, 0.1, 0.2, 0.2],
+                ),
+                reg_class_agnostic=False,
+                loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0),
+                loss_bbox=dict(type="L1Loss", loss_weight=1.0),
+            ),
+            mask_roi_extractor=dict(
+                type="SingleRoIExtractor",
+                roi_layer=dict(type="RoIAlign", output_size=14, sampling_ratio=0),
+                out_channels=256,
+                featmap_strides=[4, 8, 16, 32],
+            ),
+            mask_head=dict(
+                type="FCNMaskHead",
+                num_convs=4,
+                in_channels=256,
+                conv_out_channels=256,
+                num_classes=80,
+                loss_mask=dict(type="CrossEntropyLoss", use_mask=True, loss_weight=1.0),
+            ),
+        ),
+        # model training and testing settings
+        train_cfg=dict(
+            rpn=dict(
+                assigner=dict(
+                    type="MaxIoUAssigner",
+                    pos_iou_thr=0.7,
+                    neg_iou_thr=0.3,
+                    min_pos_iou=0.3,
+                    match_low_quality=True,
+                    ignore_iof_thr=-1,
+                ),
+                sampler=dict(
+                    type="RandomSampler",
+                    num=256,
+                    pos_fraction=0.5,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=False,
+                ),
+                allowed_border=-1,
+                pos_weight=-1,
+                debug=False,
+            ),
+            rpn_proposal=dict(
+                nms_pre=2000,
+                max_per_img=1000,
+                nms=dict(type="nms", iou_threshold=0.7),
+                min_bbox_size=0,
+            ),
+            rcnn=dict(
+                assigner=dict(
+                    type="MaxIoUAssigner",
+                    pos_iou_thr=0.5,
+                    neg_iou_thr=0.5,
+                    min_pos_iou=0.5,
+                    match_low_quality=True,
+                    ignore_iof_thr=-1,
+                ),
+                sampler=dict(
+                    type="RandomSampler",
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True,
+                ),
+                mask_size=28,
+                pos_weight=-1,
+                debug=False,
+            ),
+        ),
+        test_cfg=dict(
+            rpn=dict(
+                nms_pre=1000,
+                max_per_img=1000,
+                nms=dict(type="nms", iou_threshold=0.7),
+                min_bbox_size=0,
+            ),
+            rcnn=dict(
+                score_thr=0.05,
+                nms=dict(type="nms", iou_threshold=0.5),
+                max_per_img=100,
+                mask_thr_binary=0.5,
+            ),
+        ),
+    ),
+    pixel_mean=[123.675, 116.280, 103.530],
+    pixel_std=[58.395, 57.120, 57.375],
+)
+
+dataloader.train.mapper.image_format = "RGB"  # torchvision pretrained model
+train.init_checkpoint = None  # pretrained model is loaded inside backbone
--- a/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml
+++ b/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml
+# A large PanopticFPN for demo purposes.
+# Use GN on backbone to support semantic seg.
+# Use Cascade + Deform Conv to improve localization.
+_BASE_: "../COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml"
+MODEL:
+  WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-101-GN"
+  RESNETS:
+    DEPTH: 101
+    NORM: "GN"
+    DEFORM_ON_PER_STAGE: [False, True, True, True]
+    STRIDE_IN_1X1: False
+  FPN:
+    NORM: "GN"
+  ROI_HEADS:
+    NAME: CascadeROIHeads
+  ROI_BOX_HEAD:
+    CLS_AGNOSTIC_BBOX_REG: True
+  ROI_MASK_HEAD:
+    NORM: "GN"
+  RPN:
+    POST_NMS_TOPK_TRAIN: 2000
+SOLVER:
+  STEPS: (105000, 125000)
+  MAX_ITER: 135000
+  IMS_PER_BATCH: 32
+  BASE_LR: 0.04
--- a/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml
+++ b/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml
+_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
+MODEL:
+  # Train from random initialization.
+  WEIGHTS: ""
+  # It makes sense to divide by STD when training from scratch
+  # But it seems to make no difference on the results and C2's models didn't do this.
+  # So we keep things consistent with C2.
+  # PIXEL_STD: [57.375, 57.12, 58.395]
+  MASK_ON: True
+  BACKBONE:
+    FREEZE_AT: 0
+# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
+# to learn what you need for training from scratch.
--- a/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml
+++ b/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml
+_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
+MODEL:
+  PIXEL_STD: [57.375, 57.12, 58.395]
+  WEIGHTS: ""
+  MASK_ON: True
+  RESNETS:
+    STRIDE_IN_1X1: False
+  BACKBONE:
+    FREEZE_AT: 0
+SOLVER:
+  # 9x schedule
+  IMS_PER_BATCH: 64  # 4x the standard
+  STEPS: (187500, 197500)  # last 60/4==15k and last 20/4==5k
+  MAX_ITER: 202500   # 90k * 9 / 4
+  BASE_LR: 0.08
+TEST:
+  EVAL_PERIOD: 2500
+# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
+# to learn what you need for training from scratch.
--- a/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml
+++ b/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml
+_BASE_: "mask_rcnn_R_50_FPN_3x_syncbn.yaml"
+MODEL:
+  PIXEL_STD: [57.375, 57.12, 58.395]
+  WEIGHTS: ""
+  MASK_ON: True
+  RESNETS:
+    STRIDE_IN_1X1: False
+  BACKBONE:
+    FREEZE_AT: 0
+SOLVER:
+  # 9x schedule
+  IMS_PER_BATCH: 64  # 4x the standard
+  STEPS: (187500, 197500)  # last 60/4==15k and last 20/4==5k
+  MAX_ITER: 202500   # 90k * 9 / 4
+  BASE_LR: 0.08
+TEST:
+  EVAL_PERIOD: 2500
+# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
+# to learn what you need for training from scratch.
--- a/configs/Misc/semantic_R_50_FPN_1x.yaml
+++ b/configs/Misc/semantic_R_50_FPN_1x.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "SemanticSegmentor"
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+DATASETS:
+  TRAIN: ("coco_2017_train_panoptic_stuffonly",)
+  TEST: ("coco_2017_val_panoptic_stuffonly",)
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
--- a/configs/Misc/torchvision_imagenet_R_50.py
+++ b/configs/Misc/torchvision_imagenet_R_50.py
+"""
+An example config file to train a ImageNet classifier with detectron2.
+Model and dataloader both come from torchvision.
+This shows how to use detectron2 as a general engine for any new models and tasks.
+
+To run, use the following command:
+
+python tools/lazyconfig_train_net.py --config-file configs/Misc/torchvision_imagenet_R_50.py \
+    --num-gpus 8 dataloader.train.dataset.root=/path/to/imagenet/
+
+"""
+
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+from omegaconf import OmegaConf
+import torchvision
+from torchvision.transforms import transforms as T
+from torchvision.models.resnet import ResNet, Bottleneck
+from fvcore.common.param_scheduler import MultiStepParamScheduler
+
+from detectron2.solver import WarmupParamScheduler
+from detectron2.solver.build import get_default_optimizer_params
+from detectron2.config import LazyCall as L
+from detectron2.model_zoo import get_config
+from detectron2.data.samplers import TrainingSampler, InferenceSampler
+from detectron2.evaluation import DatasetEvaluator
+from detectron2.utils import comm
+
+
+"""
+Note: Here we put reusable code (models, evaluation, data) together with configs just as a
+proof-of-concept, to easily demonstrate what's needed to train a ImageNet classifier in detectron2.
+Writing code in configs offers extreme flexibility but is often not a good engineering practice.
+In practice, you might want to put code in your project and import them instead.
+"""
+
+
+def build_data_loader(dataset, batch_size, num_workers, training=True):
+    return torch.utils.data.DataLoader(
+        dataset,
+        sampler=(TrainingSampler if training else InferenceSampler)(len(dataset)),
+        batch_size=batch_size,
+        num_workers=num_workers,
+        pin_memory=True,
+    )
+
+
+class ClassificationNet(nn.Module):
+    def __init__(self, model: nn.Module):
+        super().__init__()
+        self.model = model
+
+    @property
+    def device(self):
+        return list(self.model.parameters())[0].device
+
+    def forward(self, inputs):
+        image, label = inputs
+        pred = self.model(image.to(self.device))
+        if self.training:
+            label = label.to(self.device)
+            return F.cross_entropy(pred, label)
+        else:
+            return pred
+
+
+class ClassificationAcc(DatasetEvaluator):
+    def reset(self):
+        self.corr = self.total = 0
+
+    def process(self, inputs, outputs):
+        image, label = inputs
+        self.corr += (outputs.argmax(dim=1).cpu() == label.cpu()).sum().item()
+        self.total += len(label)
+
+    def evaluate(self):
+        all_corr_total = comm.all_gather([self.corr, self.total])
+        corr = sum(x[0] for x in all_corr_total)
+        total = sum(x[1] for x in all_corr_total)
+        return {"accuracy": corr / total}
+
+
+# --- End of code that could be in a project and be imported
+
+
+dataloader = OmegaConf.create()
+dataloader.train = L(build_data_loader)(
+    dataset=L(torchvision.datasets.ImageNet)(
+        root="/path/to/imagenet",
+        split="train",
+        transform=L(T.Compose)(
+            transforms=[
+                L(T.RandomResizedCrop)(size=224),
+                L(T.RandomHorizontalFlip)(),
+                T.ToTensor(),
+                L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
+            ]
+        ),
+    ),
+    batch_size=256 // 8,
+    num_workers=4,
+    training=True,
+)
+
+dataloader.test = L(build_data_loader)(
+    dataset=L(torchvision.datasets.ImageNet)(
+        root="${...train.dataset.root}",
+        split="val",
+        transform=L(T.Compose)(
+            transforms=[
+                L(T.Resize)(size=256),
+                L(T.CenterCrop)(size=224),
+                T.ToTensor(),
+                L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
+            ]
+        ),
+    ),
+    batch_size=256 // 8,
+    num_workers=4,
+    training=False,
+)
+
+dataloader.evaluator = L(ClassificationAcc)()
+
+model = L(ClassificationNet)(
+    model=(ResNet)(block=Bottleneck, layers=[3, 4, 6, 3], zero_init_residual=True)
+)
+
+
+optimizer = L(torch.optim.SGD)(
+    params=L(get_default_optimizer_params)(),
+    lr=0.1,
+    momentum=0.9,
+    weight_decay=1e-4,
+)
+
+lr_multiplier = L(WarmupParamScheduler)(
+    scheduler=L(MultiStepParamScheduler)(
+        values=[1.0, 0.1, 0.01, 0.001], milestones=[30, 60, 90, 100]
+    ),
+    warmup_length=1 / 100,
+    warmup_factor=0.1,
+)
+
+
+train = get_config("common/train.py").train
+train.init_checkpoint = None
+train.max_iter = 100 * 1281167 // 256
--- a/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml
+++ b/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NUM_CLASSES: 20
+INPUT:
+  MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
+  MIN_SIZE_TEST: 800
+DATASETS:
+  TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
+  TEST: ('voc_2007_test',)
+SOLVER:
+  STEPS: (12000, 16000)
+  MAX_ITER: 18000  # 17.4 epochs
+  WARMUP_ITERS: 100