Commit b634945d authored by limm's avatar limm
Browse files

support v0.6

parent 5b3792fc
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
PIXEL_STD: [57.375, 57.120, 58.395]
MASK_ON: True
RESNETS:
STRIDE_IN_1X1: False # this is a C2 model
NUM_GROUPS: 32
WIDTH_PER_GROUP: 8
DEPTH: 101
ROI_HEADS:
NUM_CLASSES: 1230
SCORE_THRESH_TEST: 0.0001
INPUT:
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
DATASETS:
TRAIN: ("lvis_v0.5_train",)
TEST: ("lvis_v0.5_val",)
TEST:
DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300
DATALOADER:
SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
REPEAT_THRESHOLD: 0.001
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
MASK_ON: True
RESNETS:
DEPTH: 101
ROI_HEADS:
NUM_CLASSES: 1203
SCORE_THRESH_TEST: 0.0001
INPUT:
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
DATASETS:
TRAIN: ("lvis_v1_train",)
TEST: ("lvis_v1_val",)
TEST:
DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300
SOLVER:
STEPS: (120000, 160000)
MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs
DATALOADER:
SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
REPEAT_THRESHOLD: 0.001
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
MASK_ON: True
RESNETS:
DEPTH: 50
ROI_HEADS:
NUM_CLASSES: 1203
SCORE_THRESH_TEST: 0.0001
INPUT:
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
DATASETS:
TRAIN: ("lvis_v1_train",)
TEST: ("lvis_v1_val",)
TEST:
DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300
SOLVER:
STEPS: (120000, 160000)
MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs
DATALOADER:
SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
REPEAT_THRESHOLD: 0.001
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
PIXEL_STD: [57.375, 57.120, 58.395]
MASK_ON: True
RESNETS:
STRIDE_IN_1X1: False # this is a C2 model
NUM_GROUPS: 32
WIDTH_PER_GROUP: 8
DEPTH: 101
ROI_HEADS:
NUM_CLASSES: 1203
SCORE_THRESH_TEST: 0.0001
INPUT:
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
DATASETS:
TRAIN: ("lvis_v1_train",)
TEST: ("lvis_v1_val",)
SOLVER:
STEPS: (120000, 160000)
MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs
TEST:
DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300
DATALOADER:
SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
REPEAT_THRESHOLD: 0.001
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
MASK_ON: True
RESNETS:
DEPTH: 50
ROI_HEADS:
NAME: CascadeROIHeads
ROI_BOX_HEAD:
CLS_AGNOSTIC_BBOX_REG: True
RPN:
POST_NMS_TOPK_TRAIN: 2000
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
MASK_ON: True
RESNETS:
DEPTH: 50
ROI_HEADS:
NAME: CascadeROIHeads
ROI_BOX_HEAD:
CLS_AGNOSTIC_BBOX_REG: True
RPN:
POST_NMS_TOPK_TRAIN: 2000
SOLVER:
STEPS: (210000, 250000)
MAX_ITER: 270000
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
MASK_ON: True
WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k"
RESNETS:
STRIDE_IN_1X1: False # this is a C2 model
NUM_GROUPS: 32
WIDTH_PER_GROUP: 8
DEPTH: 152
DEFORM_ON_PER_STAGE: [False, True, True, True]
ROI_HEADS:
NAME: "CascadeROIHeads"
ROI_BOX_HEAD:
NAME: "FastRCNNConvFCHead"
NUM_CONV: 4
NUM_FC: 1
NORM: "GN"
CLS_AGNOSTIC_BBOX_REG: True
ROI_MASK_HEAD:
NUM_CONV: 8
NORM: "GN"
RPN:
POST_NMS_TOPK_TRAIN: 2000
SOLVER:
IMS_PER_BATCH: 128
STEPS: (35000, 45000)
MAX_ITER: 50000
BASE_LR: 0.16
INPUT:
MIN_SIZE_TRAIN: (640, 864)
MIN_SIZE_TRAIN_SAMPLING: "range"
MAX_SIZE_TRAIN: 1440
CROP:
ENABLED: True
TEST:
EVAL_PERIOD: 2500
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
MASK_ON: True
RESNETS:
DEPTH: 50
ROI_BOX_HEAD:
CLS_AGNOSTIC_BBOX_REG: True
ROI_MASK_HEAD:
CLS_AGNOSTIC_MASK: True
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
MASK_ON: True
RESNETS:
DEPTH: 50
DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
DEFORM_MODULATED: False
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
MASK_ON: True
RESNETS:
DEPTH: 50
DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
DEFORM_MODULATED: False
SOLVER:
STEPS: (210000, 250000)
MAX_ITER: 270000
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-50-GN"
MASK_ON: True
RESNETS:
DEPTH: 50
NORM: "GN"
STRIDE_IN_1X1: False
FPN:
NORM: "GN"
ROI_BOX_HEAD:
NAME: "FastRCNNConvFCHead"
NUM_CONV: 4
NUM_FC: 1
NORM: "GN"
ROI_MASK_HEAD:
NORM: "GN"
SOLVER:
# 3x schedule
STEPS: (210000, 250000)
MAX_ITER: 270000
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
MASK_ON: True
RESNETS:
DEPTH: 50
NORM: "SyncBN"
STRIDE_IN_1X1: True
FPN:
NORM: "SyncBN"
ROI_BOX_HEAD:
NAME: "FastRCNNConvFCHead"
NUM_CONV: 4
NUM_FC: 1
NORM: "SyncBN"
ROI_MASK_HEAD:
NORM: "SyncBN"
SOLVER:
# 3x schedule
STEPS: (210000, 250000)
MAX_ITER: 270000
TEST:
PRECISE_BN:
ENABLED: True
# An example config to train a mmdetection model using detectron2.
from ..common.data.coco import dataloader
from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
from ..common.optim import SGD as optimizer
from ..common.train import train
from detectron2.modeling.mmdet_wrapper import MMDetDetector
from detectron2.config import LazyCall as L
model = L(MMDetDetector)(
detector=dict(
type="MaskRCNN",
pretrained="torchvision://resnet50",
backbone=dict(
type="ResNet",
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type="BN", requires_grad=True),
norm_eval=True,
style="pytorch",
),
neck=dict(type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5),
rpn_head=dict(
type="RPNHead",
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type="AnchorGenerator",
scales=[8],
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64],
),
bbox_coder=dict(
type="DeltaXYWHBBoxCoder",
target_means=[0.0, 0.0, 0.0, 0.0],
target_stds=[1.0, 1.0, 1.0, 1.0],
),
loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type="L1Loss", loss_weight=1.0),
),
roi_head=dict(
type="StandardRoIHead",
bbox_roi_extractor=dict(
type="SingleRoIExtractor",
roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32],
),
bbox_head=dict(
type="Shared2FCBBoxHead",
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=80,
bbox_coder=dict(
type="DeltaXYWHBBoxCoder",
target_means=[0.0, 0.0, 0.0, 0.0],
target_stds=[0.1, 0.1, 0.2, 0.2],
),
reg_class_agnostic=False,
loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type="L1Loss", loss_weight=1.0),
),
mask_roi_extractor=dict(
type="SingleRoIExtractor",
roi_layer=dict(type="RoIAlign", output_size=14, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32],
),
mask_head=dict(
type="FCNMaskHead",
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=80,
loss_mask=dict(type="CrossEntropyLoss", use_mask=True, loss_weight=1.0),
),
),
# model training and testing settings
train_cfg=dict(
rpn=dict(
assigner=dict(
type="MaxIoUAssigner",
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
match_low_quality=True,
ignore_iof_thr=-1,
),
sampler=dict(
type="RandomSampler",
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False,
),
allowed_border=-1,
pos_weight=-1,
debug=False,
),
rpn_proposal=dict(
nms_pre=2000,
max_per_img=1000,
nms=dict(type="nms", iou_threshold=0.7),
min_bbox_size=0,
),
rcnn=dict(
assigner=dict(
type="MaxIoUAssigner",
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=True,
ignore_iof_thr=-1,
),
sampler=dict(
type="RandomSampler",
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True,
),
mask_size=28,
pos_weight=-1,
debug=False,
),
),
test_cfg=dict(
rpn=dict(
nms_pre=1000,
max_per_img=1000,
nms=dict(type="nms", iou_threshold=0.7),
min_bbox_size=0,
),
rcnn=dict(
score_thr=0.05,
nms=dict(type="nms", iou_threshold=0.5),
max_per_img=100,
mask_thr_binary=0.5,
),
),
),
pixel_mean=[123.675, 116.280, 103.530],
pixel_std=[58.395, 57.120, 57.375],
)
dataloader.train.mapper.image_format = "RGB" # torchvision pretrained model
train.init_checkpoint = None # pretrained model is loaded inside backbone
# A large PanopticFPN for demo purposes.
# Use GN on backbone to support semantic seg.
# Use Cascade + Deform Conv to improve localization.
_BASE_: "../COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml"
MODEL:
WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-101-GN"
RESNETS:
DEPTH: 101
NORM: "GN"
DEFORM_ON_PER_STAGE: [False, True, True, True]
STRIDE_IN_1X1: False
FPN:
NORM: "GN"
ROI_HEADS:
NAME: CascadeROIHeads
ROI_BOX_HEAD:
CLS_AGNOSTIC_BBOX_REG: True
ROI_MASK_HEAD:
NORM: "GN"
RPN:
POST_NMS_TOPK_TRAIN: 2000
SOLVER:
STEPS: (105000, 125000)
MAX_ITER: 135000
IMS_PER_BATCH: 32
BASE_LR: 0.04
_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
MODEL:
# Train from random initialization.
WEIGHTS: ""
# It makes sense to divide by STD when training from scratch
# But it seems to make no difference on the results and C2's models didn't do this.
# So we keep things consistent with C2.
# PIXEL_STD: [57.375, 57.12, 58.395]
MASK_ON: True
BACKBONE:
FREEZE_AT: 0
# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
# to learn what you need for training from scratch.
_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
MODEL:
PIXEL_STD: [57.375, 57.12, 58.395]
WEIGHTS: ""
MASK_ON: True
RESNETS:
STRIDE_IN_1X1: False
BACKBONE:
FREEZE_AT: 0
SOLVER:
# 9x schedule
IMS_PER_BATCH: 64 # 4x the standard
STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k
MAX_ITER: 202500 # 90k * 9 / 4
BASE_LR: 0.08
TEST:
EVAL_PERIOD: 2500
# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
# to learn what you need for training from scratch.
_BASE_: "mask_rcnn_R_50_FPN_3x_syncbn.yaml"
MODEL:
PIXEL_STD: [57.375, 57.12, 58.395]
WEIGHTS: ""
MASK_ON: True
RESNETS:
STRIDE_IN_1X1: False
BACKBONE:
FREEZE_AT: 0
SOLVER:
# 9x schedule
IMS_PER_BATCH: 64 # 4x the standard
STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k
MAX_ITER: 202500 # 90k * 9 / 4
BASE_LR: 0.08
TEST:
EVAL_PERIOD: 2500
# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
# to learn what you need for training from scratch.
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
META_ARCHITECTURE: "SemanticSegmentor"
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
RESNETS:
DEPTH: 50
DATASETS:
TRAIN: ("coco_2017_train_panoptic_stuffonly",)
TEST: ("coco_2017_val_panoptic_stuffonly",)
INPUT:
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
"""
An example config file to train a ImageNet classifier with detectron2.
Model and dataloader both come from torchvision.
This shows how to use detectron2 as a general engine for any new models and tasks.
To run, use the following command:
python tools/lazyconfig_train_net.py --config-file configs/Misc/torchvision_imagenet_R_50.py \
--num-gpus 8 dataloader.train.dataset.root=/path/to/imagenet/
"""
import torch
from torch import nn
from torch.nn import functional as F
from omegaconf import OmegaConf
import torchvision
from torchvision.transforms import transforms as T
from torchvision.models.resnet import ResNet, Bottleneck
from fvcore.common.param_scheduler import MultiStepParamScheduler
from detectron2.solver import WarmupParamScheduler
from detectron2.solver.build import get_default_optimizer_params
from detectron2.config import LazyCall as L
from detectron2.model_zoo import get_config
from detectron2.data.samplers import TrainingSampler, InferenceSampler
from detectron2.evaluation import DatasetEvaluator
from detectron2.utils import comm
"""
Note: Here we put reusable code (models, evaluation, data) together with configs just as a
proof-of-concept, to easily demonstrate what's needed to train a ImageNet classifier in detectron2.
Writing code in configs offers extreme flexibility but is often not a good engineering practice.
In practice, you might want to put code in your project and import them instead.
"""
def build_data_loader(dataset, batch_size, num_workers, training=True):
return torch.utils.data.DataLoader(
dataset,
sampler=(TrainingSampler if training else InferenceSampler)(len(dataset)),
batch_size=batch_size,
num_workers=num_workers,
pin_memory=True,
)
class ClassificationNet(nn.Module):
def __init__(self, model: nn.Module):
super().__init__()
self.model = model
@property
def device(self):
return list(self.model.parameters())[0].device
def forward(self, inputs):
image, label = inputs
pred = self.model(image.to(self.device))
if self.training:
label = label.to(self.device)
return F.cross_entropy(pred, label)
else:
return pred
class ClassificationAcc(DatasetEvaluator):
def reset(self):
self.corr = self.total = 0
def process(self, inputs, outputs):
image, label = inputs
self.corr += (outputs.argmax(dim=1).cpu() == label.cpu()).sum().item()
self.total += len(label)
def evaluate(self):
all_corr_total = comm.all_gather([self.corr, self.total])
corr = sum(x[0] for x in all_corr_total)
total = sum(x[1] for x in all_corr_total)
return {"accuracy": corr / total}
# --- End of code that could be in a project and be imported
dataloader = OmegaConf.create()
dataloader.train = L(build_data_loader)(
dataset=L(torchvision.datasets.ImageNet)(
root="/path/to/imagenet",
split="train",
transform=L(T.Compose)(
transforms=[
L(T.RandomResizedCrop)(size=224),
L(T.RandomHorizontalFlip)(),
T.ToTensor(),
L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
]
),
),
batch_size=256 // 8,
num_workers=4,
training=True,
)
dataloader.test = L(build_data_loader)(
dataset=L(torchvision.datasets.ImageNet)(
root="${...train.dataset.root}",
split="val",
transform=L(T.Compose)(
transforms=[
L(T.Resize)(size=256),
L(T.CenterCrop)(size=224),
T.ToTensor(),
L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
]
),
),
batch_size=256 // 8,
num_workers=4,
training=False,
)
dataloader.evaluator = L(ClassificationAcc)()
model = L(ClassificationNet)(
model=(ResNet)(block=Bottleneck, layers=[3, 4, 6, 3], zero_init_residual=True)
)
optimizer = L(torch.optim.SGD)(
params=L(get_default_optimizer_params)(),
lr=0.1,
momentum=0.9,
weight_decay=1e-4,
)
lr_multiplier = L(WarmupParamScheduler)(
scheduler=L(MultiStepParamScheduler)(
values=[1.0, 0.1, 0.01, 0.001], milestones=[30, 60, 90, 100]
),
warmup_length=1 / 100,
warmup_factor=0.1,
)
train = get_config("common/train.py").train
train.init_checkpoint = None
train.max_iter = 100 * 1281167 // 256
_BASE_: "../Base-RCNN-C4.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
MASK_ON: False
RESNETS:
DEPTH: 50
ROI_HEADS:
NUM_CLASSES: 20
INPUT:
MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
MIN_SIZE_TEST: 800
DATASETS:
TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
TEST: ('voc_2007_test',)
SOLVER:
STEPS: (12000, 16000)
MAX_ITER: 18000 # 17.4 epochs
WARMUP_ITERS: 100
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment