Commit b634945d authored by limm's avatar limm
Browse files

support v0.6

parent 5b3792fc
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
MASK_ON: False
RESNETS:
DEPTH: 50
ROI_HEADS:
NUM_CLASSES: 20
INPUT:
MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
MIN_SIZE_TEST: 800
DATASETS:
TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
TEST: ('voc_2007_test',)
SOLVER:
STEPS: (12000, 16000)
MAX_ITER: 18000 # 17.4 epochs
WARMUP_ITERS: 100
This directory provides definitions for a few common models, dataloaders, scheduler,
and optimizers that are often used in training.
The definition of these objects are provided in the form of lazy instantiation:
their arguments can be edited by users before constructing the objects.
They can be imported, or loaded by `model_zoo.get_config` API in users' own configs.
from fvcore.common.param_scheduler import MultiStepParamScheduler
from detectron2.config import LazyCall as L
from detectron2.solver import WarmupParamScheduler
def default_X_scheduler(num_X):
"""
Returns the config for a default multi-step LR scheduler such as "1x", "3x",
commonly referred to in papers, where every 1x has the total length of 1440k
training images (~12 COCO epochs). LR is decayed twice at the end of training
following the strategy defined in "Rethinking ImageNet Pretraining", Sec 4.
Args:
num_X: a positive real number
Returns:
DictConfig: configs that define the multiplier for LR during training
"""
# total number of iterations assuming 16 batch size, using 1440000/16=90000
total_steps_16bs = num_X * 90000
if num_X <= 2:
scheduler = L(MultiStepParamScheduler)(
values=[1.0, 0.1, 0.01],
# note that scheduler is scale-invariant. This is equivalent to
# milestones=[6, 8, 9]
milestones=[60000, 80000, 90000],
)
else:
scheduler = L(MultiStepParamScheduler)(
values=[1.0, 0.1, 0.01],
milestones=[total_steps_16bs - 60000, total_steps_16bs - 20000, total_steps_16bs],
)
return L(WarmupParamScheduler)(
scheduler=scheduler,
warmup_length=1000 / total_steps_16bs,
warmup_method="linear",
warmup_factor=0.001,
)
lr_multiplier_1x = default_X_scheduler(1)
lr_multiplier_2x = default_X_scheduler(2)
lr_multiplier_3x = default_X_scheduler(3)
lr_multiplier_6x = default_X_scheduler(6)
lr_multiplier_9x = default_X_scheduler(9)
from omegaconf import OmegaConf
import detectron2.data.transforms as T
from detectron2.config import LazyCall as L
from detectron2.data import (
DatasetMapper,
build_detection_test_loader,
build_detection_train_loader,
get_detection_dataset_dicts,
)
from detectron2.evaluation import COCOEvaluator
dataloader = OmegaConf.create()
dataloader.train = L(build_detection_train_loader)(
dataset=L(get_detection_dataset_dicts)(names="coco_2017_train"),
mapper=L(DatasetMapper)(
is_train=True,
augmentations=[
L(T.ResizeShortestEdge)(
short_edge_length=(640, 672, 704, 736, 768, 800),
sample_style="choice",
max_size=1333,
),
L(T.RandomFlip)(horizontal=True),
],
image_format="BGR",
use_instance_mask=True,
),
total_batch_size=16,
num_workers=4,
)
dataloader.test = L(build_detection_test_loader)(
dataset=L(get_detection_dataset_dicts)(names="coco_2017_val", filter_empty=False),
mapper=L(DatasetMapper)(
is_train=False,
augmentations=[
L(T.ResizeShortestEdge)(short_edge_length=800, max_size=1333),
],
image_format="${...train.mapper.image_format}",
),
num_workers=4,
)
dataloader.evaluator = L(COCOEvaluator)(
dataset_name="${..test.dataset.names}",
)
from detectron2.data.detection_utils import create_keypoint_hflip_indices
from .coco import dataloader
dataloader.train.dataset.min_keypoints = 1
dataloader.train.dataset.names = "keypoints_coco_2017_train"
dataloader.test.dataset.names = "keypoints_coco_2017_val"
dataloader.train.mapper.update(
use_instance_mask=False,
use_keypoint=True,
keypoint_hflip_indices=create_keypoint_hflip_indices(dataloader.train.dataset.names),
)
from detectron2.config import LazyCall as L
from detectron2.evaluation import (
COCOEvaluator,
COCOPanopticEvaluator,
DatasetEvaluators,
SemSegEvaluator,
)
from .coco import dataloader
dataloader.train.dataset.names = "coco_2017_train_panoptic_separated"
dataloader.train.dataset.filter_empty = False
dataloader.test.dataset.names = "coco_2017_val_panoptic_separated"
dataloader.evaluator = [
L(COCOEvaluator)(
dataset_name="${...test.dataset.names}",
),
L(SemSegEvaluator)(
dataset_name="${...test.dataset.names}",
),
L(COCOPanopticEvaluator)(
dataset_name="${...test.dataset.names}",
),
]
from detectron2.config import LazyCall as L
from detectron2.layers import ShapeSpec
from detectron2.modeling.box_regression import Box2BoxTransform
from detectron2.modeling.matcher import Matcher
from detectron2.modeling.roi_heads import FastRCNNOutputLayers, FastRCNNConvFCHead, CascadeROIHeads
from .mask_rcnn_fpn import model
# arguments that don't exist for Cascade R-CNN
[model.roi_heads.pop(k) for k in ["box_head", "box_predictor", "proposal_matcher"]]
model.roi_heads.update(
_target_=CascadeROIHeads,
box_heads=[
L(FastRCNNConvFCHead)(
input_shape=ShapeSpec(channels=256, height=7, width=7),
conv_dims=[],
fc_dims=[1024, 1024],
)
for k in range(3)
],
box_predictors=[
L(FastRCNNOutputLayers)(
input_shape=ShapeSpec(channels=1024),
test_score_thresh=0.05,
box2box_transform=L(Box2BoxTransform)(weights=(w1, w1, w2, w2)),
cls_agnostic_bbox_reg=True,
num_classes="${...num_classes}",
)
for (w1, w2) in [(10, 5), (20, 10), (30, 15)]
],
proposal_matchers=[
L(Matcher)(thresholds=[th], labels=[0, 1], allow_low_quality_matches=False)
for th in [0.5, 0.6, 0.7]
],
)
from detectron2.modeling.meta_arch.fcos import FCOS, FCOSHead
from .retinanet import model
model._target_ = FCOS
del model.anchor_generator
del model.box2box_transform
del model.anchor_matcher
del model.input_format
# Use P5 instead of C5 to compute P6/P7
# (Sec 2.2 of https://arxiv.org/abs/2006.09214)
model.backbone.top_block.in_feature = "p5"
model.backbone.top_block.in_channels = 256
# New score threshold determined based on sqrt(cls_score * centerness)
model.test_score_thresh = 0.2
model.test_nms_thresh = 0.6
model.head._target_ = FCOSHead
del model.head.num_anchors
model.head.norm = "GN"
from detectron2.config import LazyCall as L
from detectron2.layers import ShapeSpec
from detectron2.modeling.poolers import ROIPooler
from detectron2.modeling.roi_heads import KRCNNConvDeconvUpsampleHead
from .mask_rcnn_fpn import model
[model.roi_heads.pop(x) for x in ["mask_in_features", "mask_pooler", "mask_head"]]
model.roi_heads.update(
num_classes=1,
keypoint_in_features=["p2", "p3", "p4", "p5"],
keypoint_pooler=L(ROIPooler)(
output_size=14,
scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
sampling_ratio=0,
pooler_type="ROIAlignV2",
),
keypoint_head=L(KRCNNConvDeconvUpsampleHead)(
input_shape=ShapeSpec(channels=256, width=14, height=14),
num_keypoints=17,
conv_dims=[512] * 8,
loss_normalizer="visible",
),
)
# Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
# 1000 proposals per-image is found to hurt box AP.
# Therefore we increase it to 1500 per-image.
model.proposal_generator.post_nms_topk = (1500, 1000)
# Keypoint AP degrades (though box AP improves) when using plain L1 loss
model.roi_heads.box_predictor.smooth_l1_beta = 0.5
from detectron2.config import LazyCall as L
from detectron2.layers import ShapeSpec
from detectron2.modeling.meta_arch import GeneralizedRCNN
from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
from detectron2.modeling.backbone import BasicStem, BottleneckBlock, ResNet
from detectron2.modeling.box_regression import Box2BoxTransform
from detectron2.modeling.matcher import Matcher
from detectron2.modeling.poolers import ROIPooler
from detectron2.modeling.proposal_generator import RPN, StandardRPNHead
from detectron2.modeling.roi_heads import (
FastRCNNOutputLayers,
MaskRCNNConvUpsampleHead,
Res5ROIHeads,
)
model = L(GeneralizedRCNN)(
backbone=L(ResNet)(
stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
stages=L(ResNet.make_default_stages)(
depth=50,
stride_in_1x1=True,
norm="FrozenBN",
),
out_features=["res4"],
),
proposal_generator=L(RPN)(
in_features=["res4"],
head=L(StandardRPNHead)(in_channels=1024, num_anchors=15),
anchor_generator=L(DefaultAnchorGenerator)(
sizes=[[32, 64, 128, 256, 512]],
aspect_ratios=[0.5, 1.0, 2.0],
strides=[16],
offset=0.0,
),
anchor_matcher=L(Matcher)(
thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True
),
box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
batch_size_per_image=256,
positive_fraction=0.5,
pre_nms_topk=(12000, 6000),
post_nms_topk=(2000, 1000),
nms_thresh=0.7,
),
roi_heads=L(Res5ROIHeads)(
num_classes=80,
batch_size_per_image=512,
positive_fraction=0.25,
proposal_matcher=L(Matcher)(
thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False
),
in_features=["res4"],
pooler=L(ROIPooler)(
output_size=14,
scales=(1.0 / 16,),
sampling_ratio=0,
pooler_type="ROIAlignV2",
),
res5=L(ResNet.make_stage)(
block_class=BottleneckBlock,
num_blocks=3,
stride_per_block=[2, 1, 1],
in_channels=1024,
bottleneck_channels=512,
out_channels=2048,
norm="FrozenBN",
stride_in_1x1=True,
),
box_predictor=L(FastRCNNOutputLayers)(
input_shape=L(ShapeSpec)(channels="${...res5.out_channels}", height=1, width=1),
test_score_thresh=0.05,
box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)),
num_classes="${..num_classes}",
),
mask_head=L(MaskRCNNConvUpsampleHead)(
input_shape=L(ShapeSpec)(
channels="${...res5.out_channels}",
width="${...pooler.output_size}",
height="${...pooler.output_size}",
),
num_classes="${..num_classes}",
conv_dims=[256],
),
),
pixel_mean=[103.530, 116.280, 123.675],
pixel_std=[1.0, 1.0, 1.0],
input_format="BGR",
)
from detectron2.config import LazyCall as L
from detectron2.layers import ShapeSpec
from detectron2.modeling.meta_arch import GeneralizedRCNN
from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
from detectron2.modeling.backbone.fpn import LastLevelMaxPool
from detectron2.modeling.backbone import BasicStem, FPN, ResNet
from detectron2.modeling.box_regression import Box2BoxTransform
from detectron2.modeling.matcher import Matcher
from detectron2.modeling.poolers import ROIPooler
from detectron2.modeling.proposal_generator import RPN, StandardRPNHead
from detectron2.modeling.roi_heads import (
StandardROIHeads,
FastRCNNOutputLayers,
MaskRCNNConvUpsampleHead,
FastRCNNConvFCHead,
)
model = L(GeneralizedRCNN)(
backbone=L(FPN)(
bottom_up=L(ResNet)(
stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
stages=L(ResNet.make_default_stages)(
depth=50,
stride_in_1x1=True,
norm="FrozenBN",
),
out_features=["res2", "res3", "res4", "res5"],
),
in_features="${.bottom_up.out_features}",
out_channels=256,
top_block=L(LastLevelMaxPool)(),
),
proposal_generator=L(RPN)(
in_features=["p2", "p3", "p4", "p5", "p6"],
head=L(StandardRPNHead)(in_channels=256, num_anchors=3),
anchor_generator=L(DefaultAnchorGenerator)(
sizes=[[32], [64], [128], [256], [512]],
aspect_ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64],
offset=0.0,
),
anchor_matcher=L(Matcher)(
thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True
),
box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
batch_size_per_image=256,
positive_fraction=0.5,
pre_nms_topk=(2000, 1000),
post_nms_topk=(1000, 1000),
nms_thresh=0.7,
),
roi_heads=L(StandardROIHeads)(
num_classes=80,
batch_size_per_image=512,
positive_fraction=0.25,
proposal_matcher=L(Matcher)(
thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False
),
box_in_features=["p2", "p3", "p4", "p5"],
box_pooler=L(ROIPooler)(
output_size=7,
scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
sampling_ratio=0,
pooler_type="ROIAlignV2",
),
box_head=L(FastRCNNConvFCHead)(
input_shape=ShapeSpec(channels=256, height=7, width=7),
conv_dims=[],
fc_dims=[1024, 1024],
),
box_predictor=L(FastRCNNOutputLayers)(
input_shape=ShapeSpec(channels=1024),
test_score_thresh=0.05,
box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)),
num_classes="${..num_classes}",
),
mask_in_features=["p2", "p3", "p4", "p5"],
mask_pooler=L(ROIPooler)(
output_size=14,
scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
sampling_ratio=0,
pooler_type="ROIAlignV2",
),
mask_head=L(MaskRCNNConvUpsampleHead)(
input_shape=ShapeSpec(channels=256, width=14, height=14),
num_classes="${..num_classes}",
conv_dims=[256, 256, 256, 256, 256],
),
),
pixel_mean=[103.530, 116.280, 123.675],
pixel_std=[1.0, 1.0, 1.0],
input_format="BGR",
)
from detectron2.config import LazyCall as L
from detectron2.layers import ShapeSpec
from detectron2.modeling import PanopticFPN
from detectron2.modeling.meta_arch.semantic_seg import SemSegFPNHead
from .mask_rcnn_fpn import model
model._target_ = PanopticFPN
model.sem_seg_head = L(SemSegFPNHead)(
input_shape={
f: L(ShapeSpec)(stride=s, channels="${....backbone.out_channels}")
for f, s in zip(["p2", "p3", "p4", "p5"], [4, 8, 16, 32])
},
ignore_value=255,
num_classes=54, # COCO stuff + 1
conv_dims=128,
common_stride=4,
loss_weight=0.5,
norm="GN",
)
# -*- coding: utf-8 -*-
from detectron2.config import LazyCall as L
from detectron2.layers import ShapeSpec
from detectron2.modeling.meta_arch import RetinaNet
from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
from detectron2.modeling.backbone.fpn import LastLevelP6P7
from detectron2.modeling.backbone import BasicStem, FPN, ResNet
from detectron2.modeling.box_regression import Box2BoxTransform
from detectron2.modeling.matcher import Matcher
from detectron2.modeling.meta_arch.retinanet import RetinaNetHead
model = L(RetinaNet)(
backbone=L(FPN)(
bottom_up=L(ResNet)(
stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
stages=L(ResNet.make_default_stages)(
depth=50,
stride_in_1x1=True,
norm="FrozenBN",
),
out_features=["res3", "res4", "res5"],
),
in_features=["res3", "res4", "res5"],
out_channels=256,
top_block=L(LastLevelP6P7)(in_channels=2048, out_channels="${..out_channels}"),
),
head=L(RetinaNetHead)(
# Shape for each input feature map
input_shape=[ShapeSpec(channels=256)] * 5,
num_classes="${..num_classes}",
conv_dims=[256, 256, 256, 256],
prior_prob=0.01,
num_anchors=9,
),
anchor_generator=L(DefaultAnchorGenerator)(
sizes=[[x, x * 2 ** (1.0 / 3), x * 2 ** (2.0 / 3)] for x in [32, 64, 128, 256, 512]],
aspect_ratios=[0.5, 1.0, 2.0],
strides=[8, 16, 32, 64, 128],
offset=0.0,
),
box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
anchor_matcher=L(Matcher)(
thresholds=[0.4, 0.5], labels=[0, -1, 1], allow_low_quality_matches=True
),
num_classes=80,
head_in_features=["p3", "p4", "p5", "p6", "p7"],
focal_loss_alpha=0.25,
focal_loss_gamma=2.0,
pixel_mean=[103.530, 116.280, 123.675],
pixel_std=[1.0, 1.0, 1.0],
input_format="BGR",
)
import torch
from detectron2.config import LazyCall as L
from detectron2.solver.build import get_default_optimizer_params
SGD = L(torch.optim.SGD)(
params=L(get_default_optimizer_params)(
# params.model is meant to be set to the model object, before instantiating
# the optimizer.
weight_decay_norm=0.0
),
lr=0.02,
momentum=0.9,
weight_decay=1e-4,
)
# Common training-related configs that are designed for "tools/lazyconfig_train_net.py"
# You can use your own instead, together with your own train_net.py
train = dict(
output_dir="./output",
init_checkpoint="",
max_iter=90000,
amp=dict(enabled=False), # options for Automatic Mixed Precision
ddp=dict( # options for DistributedDataParallel
broadcast_buffers=False,
find_unused_parameters=False,
fp16_compression=False,
),
checkpointer=dict(period=5000, max_to_keep=100), # options for PeriodicCheckpointer
eval_period=5000,
log_period=20,
device="cuda"
# ...
)
from .mask_rcnn_R_50_FPN_100ep_LSJ import (
dataloader,
lr_multiplier,
model,
optimizer,
train,
)
model.backbone.bottom_up.stages.depth = 101
from .mask_rcnn_R_101_FPN_100ep_LSJ import (
dataloader,
lr_multiplier,
model,
optimizer,
train,
)
train.max_iter *= 2 # 100ep -> 200ep
lr_multiplier.scheduler.milestones = [
milestone * 2 for milestone in lr_multiplier.scheduler.milestones
]
lr_multiplier.scheduler.num_updates = train.max_iter
from .mask_rcnn_R_101_FPN_100ep_LSJ import (
dataloader,
lr_multiplier,
model,
optimizer,
train,
)
train.max_iter *= 4 # 100ep -> 400ep
lr_multiplier.scheduler.milestones = [
milestone * 4 for milestone in lr_multiplier.scheduler.milestones
]
lr_multiplier.scheduler.num_updates = train.max_iter
import detectron2.data.transforms as T
from detectron2.config.lazy import LazyCall as L
from detectron2.layers.batch_norm import NaiveSyncBatchNorm
from detectron2.solver import WarmupParamScheduler
from fvcore.common.param_scheduler import MultiStepParamScheduler
from ..common.data.coco import dataloader
from ..common.models.mask_rcnn_fpn import model
from ..common.optim import SGD as optimizer
from ..common.train import train
# train from scratch
train.init_checkpoint = ""
train.amp.enabled = True
train.ddp.fp16_compression = True
model.backbone.bottom_up.freeze_at = 0
# SyncBN
# fmt: off
model.backbone.bottom_up.stem.norm = \
model.backbone.bottom_up.stages.norm = \
model.backbone.norm = "SyncBN"
# Using NaiveSyncBatchNorm becase heads may have empty input. That is not supported by
# torch.nn.SyncBatchNorm. We can remove this after
# https://github.com/pytorch/pytorch/issues/36530 is fixed.
model.roi_heads.box_head.conv_norm = \
model.roi_heads.mask_head.conv_norm = lambda c: NaiveSyncBatchNorm(c,
stats_mode="N")
# fmt: on
# 2conv in RPN:
# https://github.com/tensorflow/tpu/blob/b24729de804fdb751b06467d3dce0637fa652060/models/official/detection/modeling/architecture/heads.py#L95-L97 # noqa: E501, B950
model.proposal_generator.head.conv_dims = [-1, -1]
# 4conv1fc box head
model.roi_heads.box_head.conv_dims = [256, 256, 256, 256]
model.roi_heads.box_head.fc_dims = [1024]
# resize_and_crop_image in:
# https://github.com/tensorflow/tpu/blob/b24729de804fdb751b06467d3dce0637fa652060/models/official/detection/utils/input_utils.py#L127 # noqa: E501, B950
image_size = 1024
dataloader.train.mapper.augmentations = [
L(T.ResizeScale)(
min_scale=0.1, max_scale=2.0, target_height=image_size, target_width=image_size
),
L(T.FixedSizeCrop)(crop_size=(image_size, image_size)),
L(T.RandomFlip)(horizontal=True),
]
# recompute boxes due to cropping
dataloader.train.mapper.recompute_boxes = True
# larger batch-size.
dataloader.train.total_batch_size = 64
# Equivalent to 100 epochs.
# 100 ep = 184375 iters * 64 images/iter / 118000 images/ep
train.max_iter = 184375
lr_multiplier = L(WarmupParamScheduler)(
scheduler=L(MultiStepParamScheduler)(
values=[1.0, 0.1, 0.01],
milestones=[163889, 177546],
num_updates=train.max_iter,
),
warmup_length=500 / train.max_iter,
warmup_factor=0.067,
)
optimizer.lr = 0.1
optimizer.weight_decay = 4e-5
from .mask_rcnn_R_50_FPN_100ep_LSJ import (
dataloader,
lr_multiplier,
model,
optimizer,
train,
)
train.max_iter *= 2 # 100ep -> 200ep
lr_multiplier.scheduler.milestones = [
milestone * 2 for milestone in lr_multiplier.scheduler.milestones
]
lr_multiplier.scheduler.num_updates = train.max_iter
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment