Commit 3144257c authored by mashun1's avatar mashun1
Browse files

catvton

parents
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
MASK_ON: True
RESNETS:
DEPTH: 50
NORM: "SyncBN"
STRIDE_IN_1X1: True
FPN:
NORM: "SyncBN"
ROI_BOX_HEAD:
NAME: "FastRCNNConvFCHead"
NUM_CONV: 4
NUM_FC: 1
NORM: "SyncBN"
ROI_MASK_HEAD:
NORM: "SyncBN"
SOLVER:
# 3x schedule
STEPS: (210000, 250000)
MAX_ITER: 270000
TEST:
PRECISE_BN:
ENABLED: True
# An example config to train a mmdetection model using detectron2.
from ..common.data.coco import dataloader
from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
from ..common.optim import SGD as optimizer
from ..common.train import train
from ..common.data.constants import constants
from detectron2.modeling.mmdet_wrapper import MMDetDetector
from detectron2.config import LazyCall as L
model = L(MMDetDetector)(
detector=dict(
type="MaskRCNN",
pretrained="torchvision://resnet50",
backbone=dict(
type="ResNet",
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type="BN", requires_grad=True),
norm_eval=True,
style="pytorch",
),
neck=dict(type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5),
rpn_head=dict(
type="RPNHead",
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type="AnchorGenerator",
scales=[8],
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64],
),
bbox_coder=dict(
type="DeltaXYWHBBoxCoder",
target_means=[0.0, 0.0, 0.0, 0.0],
target_stds=[1.0, 1.0, 1.0, 1.0],
),
loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type="L1Loss", loss_weight=1.0),
),
roi_head=dict(
type="StandardRoIHead",
bbox_roi_extractor=dict(
type="SingleRoIExtractor",
roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32],
),
bbox_head=dict(
type="Shared2FCBBoxHead",
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=80,
bbox_coder=dict(
type="DeltaXYWHBBoxCoder",
target_means=[0.0, 0.0, 0.0, 0.0],
target_stds=[0.1, 0.1, 0.2, 0.2],
),
reg_class_agnostic=False,
loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type="L1Loss", loss_weight=1.0),
),
mask_roi_extractor=dict(
type="SingleRoIExtractor",
roi_layer=dict(type="RoIAlign", output_size=14, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32],
),
mask_head=dict(
type="FCNMaskHead",
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=80,
loss_mask=dict(type="CrossEntropyLoss", use_mask=True, loss_weight=1.0),
),
),
# model training and testing settings
train_cfg=dict(
rpn=dict(
assigner=dict(
type="MaxIoUAssigner",
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
match_low_quality=True,
ignore_iof_thr=-1,
),
sampler=dict(
type="RandomSampler",
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False,
),
allowed_border=-1,
pos_weight=-1,
debug=False,
),
rpn_proposal=dict(
nms_pre=2000,
max_per_img=1000,
nms=dict(type="nms", iou_threshold=0.7),
min_bbox_size=0,
),
rcnn=dict(
assigner=dict(
type="MaxIoUAssigner",
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=True,
ignore_iof_thr=-1,
),
sampler=dict(
type="RandomSampler",
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True,
),
mask_size=28,
pos_weight=-1,
debug=False,
),
),
test_cfg=dict(
rpn=dict(
nms_pre=1000,
max_per_img=1000,
nms=dict(type="nms", iou_threshold=0.7),
min_bbox_size=0,
),
rcnn=dict(
score_thr=0.05,
nms=dict(type="nms", iou_threshold=0.5),
max_per_img=100,
mask_thr_binary=0.5,
),
),
),
pixel_mean=constants.imagenet_rgb256_mean,
pixel_std=constants.imagenet_rgb256_std,
)
dataloader.train.mapper.image_format = "RGB" # torchvision pretrained model
train.init_checkpoint = None # pretrained model is loaded inside backbone
# A large PanopticFPN for demo purposes.
# Use GN on backbone to support semantic seg.
# Use Cascade + Deform Conv to improve localization.
_BASE_: "../COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml"
MODEL:
WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-101-GN"
RESNETS:
DEPTH: 101
NORM: "GN"
DEFORM_ON_PER_STAGE: [False, True, True, True]
STRIDE_IN_1X1: False
FPN:
NORM: "GN"
ROI_HEADS:
NAME: CascadeROIHeads
ROI_BOX_HEAD:
CLS_AGNOSTIC_BBOX_REG: True
ROI_MASK_HEAD:
NORM: "GN"
RPN:
POST_NMS_TOPK_TRAIN: 2000
SOLVER:
STEPS: (105000, 125000)
MAX_ITER: 135000
IMS_PER_BATCH: 32
BASE_LR: 0.04
_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
MODEL:
# Train from random initialization.
WEIGHTS: ""
# It makes sense to divide by STD when training from scratch
# But it seems to make no difference on the results and C2's models didn't do this.
# So we keep things consistent with C2.
# PIXEL_STD: [57.375, 57.12, 58.395]
MASK_ON: True
BACKBONE:
FREEZE_AT: 0
# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
# to learn what you need for training from scratch.
_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
MODEL:
PIXEL_STD: [57.375, 57.12, 58.395]
WEIGHTS: ""
MASK_ON: True
RESNETS:
STRIDE_IN_1X1: False
BACKBONE:
FREEZE_AT: 0
SOLVER:
# 9x schedule
IMS_PER_BATCH: 64 # 4x the standard
STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k
MAX_ITER: 202500 # 90k * 9 / 4
BASE_LR: 0.08
TEST:
EVAL_PERIOD: 2500
# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
# to learn what you need for training from scratch.
_BASE_: "mask_rcnn_R_50_FPN_3x_syncbn.yaml"
MODEL:
PIXEL_STD: [57.375, 57.12, 58.395]
WEIGHTS: ""
MASK_ON: True
RESNETS:
STRIDE_IN_1X1: False
BACKBONE:
FREEZE_AT: 0
SOLVER:
# 9x schedule
IMS_PER_BATCH: 64 # 4x the standard
STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k
MAX_ITER: 202500 # 90k * 9 / 4
BASE_LR: 0.08
TEST:
EVAL_PERIOD: 2500
# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
# to learn what you need for training from scratch.
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
META_ARCHITECTURE: "SemanticSegmentor"
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
RESNETS:
DEPTH: 50
DATASETS:
TRAIN: ("coco_2017_train_panoptic_stuffonly",)
TEST: ("coco_2017_val_panoptic_stuffonly",)
INPUT:
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
"""
An example config file to train a ImageNet classifier with detectron2.
Model and dataloader both come from torchvision.
This shows how to use detectron2 as a general engine for any new models and tasks.
To run, use the following command:
python tools/lazyconfig_train_net.py --config-file configs/Misc/torchvision_imagenet_R_50.py \
--num-gpus 8 dataloader.train.dataset.root=/path/to/imagenet/
"""
import torch
from torch import nn
from torch.nn import functional as F
from omegaconf import OmegaConf
import torchvision
from torchvision.transforms import transforms as T
from torchvision.models.resnet import ResNet, Bottleneck
from fvcore.common.param_scheduler import MultiStepParamScheduler
from detectron2.solver import WarmupParamScheduler
from detectron2.solver.build import get_default_optimizer_params
from detectron2.config import LazyCall as L
from detectron2.model_zoo import get_config
from detectron2.data.samplers import TrainingSampler, InferenceSampler
from detectron2.evaluation import DatasetEvaluator
from detectron2.utils import comm
"""
Note: Here we put reusable code (models, evaluation, data) together with configs just as a
proof-of-concept, to easily demonstrate what's needed to train a ImageNet classifier in detectron2.
Writing code in configs offers extreme flexibility but is often not a good engineering practice.
In practice, you might want to put code in your project and import them instead.
"""
def build_data_loader(dataset, batch_size, num_workers, training=True):
return torch.utils.data.DataLoader(
dataset,
sampler=(TrainingSampler if training else InferenceSampler)(len(dataset)),
batch_size=batch_size,
num_workers=num_workers,
pin_memory=True,
)
class ClassificationNet(nn.Module):
def __init__(self, model: nn.Module):
super().__init__()
self.model = model
@property
def device(self):
return list(self.model.parameters())[0].device
def forward(self, inputs):
image, label = inputs
pred = self.model(image.to(self.device))
if self.training:
label = label.to(self.device)
return F.cross_entropy(pred, label)
else:
return pred
class ClassificationAcc(DatasetEvaluator):
def reset(self):
self.corr = self.total = 0
def process(self, inputs, outputs):
image, label = inputs
self.corr += (outputs.argmax(dim=1).cpu() == label.cpu()).sum().item()
self.total += len(label)
def evaluate(self):
all_corr_total = comm.all_gather([self.corr, self.total])
corr = sum(x[0] for x in all_corr_total)
total = sum(x[1] for x in all_corr_total)
return {"accuracy": corr / total}
# --- End of code that could be in a project and be imported
dataloader = OmegaConf.create()
dataloader.train = L(build_data_loader)(
dataset=L(torchvision.datasets.ImageNet)(
root="/path/to/imagenet",
split="train",
transform=L(T.Compose)(
transforms=[
L(T.RandomResizedCrop)(size=224),
L(T.RandomHorizontalFlip)(),
T.ToTensor(),
L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
]
),
),
batch_size=256 // 8,
num_workers=4,
training=True,
)
dataloader.test = L(build_data_loader)(
dataset=L(torchvision.datasets.ImageNet)(
root="${...train.dataset.root}",
split="val",
transform=L(T.Compose)(
transforms=[
L(T.Resize)(size=256),
L(T.CenterCrop)(size=224),
T.ToTensor(),
L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
]
),
),
batch_size=256 // 8,
num_workers=4,
training=False,
)
dataloader.evaluator = L(ClassificationAcc)()
model = L(ClassificationNet)(
model=(ResNet)(block=Bottleneck, layers=[3, 4, 6, 3], zero_init_residual=True)
)
optimizer = L(torch.optim.SGD)(
params=L(get_default_optimizer_params)(),
lr=0.1,
momentum=0.9,
weight_decay=1e-4,
)
lr_multiplier = L(WarmupParamScheduler)(
scheduler=L(MultiStepParamScheduler)(
values=[1.0, 0.1, 0.01, 0.001], milestones=[30, 60, 90, 100]
),
warmup_length=1 / 100,
warmup_factor=0.1,
)
train = get_config("common/train.py").train
train.init_checkpoint = None
train.max_iter = 100 * 1281167 // 256
_BASE_: "../Base-RCNN-C4.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
MASK_ON: False
RESNETS:
DEPTH: 50
ROI_HEADS:
NUM_CLASSES: 20
INPUT:
MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
MIN_SIZE_TEST: 800
DATASETS:
TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
TEST: ('voc_2007_test',)
SOLVER:
STEPS: (12000, 16000)
MAX_ITER: 18000 # 17.4 epochs
WARMUP_ITERS: 100
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
MASK_ON: False
RESNETS:
DEPTH: 50
ROI_HEADS:
NUM_CLASSES: 20
INPUT:
MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
MIN_SIZE_TEST: 800
DATASETS:
TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
TEST: ('voc_2007_test',)
SOLVER:
STEPS: (12000, 16000)
MAX_ITER: 18000 # 17.4 epochs
WARMUP_ITERS: 100
This directory provides definitions for a few common models, dataloaders, scheduler,
and optimizers that are often used in training.
The definition of these objects are provided in the form of lazy instantiation:
their arguments can be edited by users before constructing the objects.
They can be imported, or loaded by `model_zoo.get_config` API in users' own configs.
from fvcore.common.param_scheduler import MultiStepParamScheduler
from detectron2.config import LazyCall as L
from detectron2.solver import WarmupParamScheduler
def default_X_scheduler(num_X):
"""
Returns the config for a default multi-step LR scheduler such as "1x", "3x",
commonly referred to in papers, where every 1x has the total length of 1440k
training images (~12 COCO epochs). LR is decayed twice at the end of training
following the strategy defined in "Rethinking ImageNet Pretraining", Sec 4.
Args:
num_X: a positive real number
Returns:
DictConfig: configs that define the multiplier for LR during training
"""
# total number of iterations assuming 16 batch size, using 1440000/16=90000
total_steps_16bs = num_X * 90000
if num_X <= 2:
scheduler = L(MultiStepParamScheduler)(
values=[1.0, 0.1, 0.01],
# note that scheduler is scale-invariant. This is equivalent to
# milestones=[6, 8, 9]
milestones=[60000, 80000, 90000],
)
else:
scheduler = L(MultiStepParamScheduler)(
values=[1.0, 0.1, 0.01],
milestones=[total_steps_16bs - 60000, total_steps_16bs - 20000, total_steps_16bs],
)
return L(WarmupParamScheduler)(
scheduler=scheduler,
warmup_length=1000 / total_steps_16bs,
warmup_method="linear",
warmup_factor=0.001,
)
lr_multiplier_1x = default_X_scheduler(1)
lr_multiplier_2x = default_X_scheduler(2)
lr_multiplier_3x = default_X_scheduler(3)
lr_multiplier_6x = default_X_scheduler(6)
lr_multiplier_9x = default_X_scheduler(9)
from omegaconf import OmegaConf
import detectron2.data.transforms as T
from detectron2.config import LazyCall as L
from detectron2.data import (
DatasetMapper,
build_detection_test_loader,
build_detection_train_loader,
get_detection_dataset_dicts,
)
from detectron2.evaluation import COCOEvaluator
dataloader = OmegaConf.create()
dataloader.train = L(build_detection_train_loader)(
dataset=L(get_detection_dataset_dicts)(names="coco_2017_train"),
mapper=L(DatasetMapper)(
is_train=True,
augmentations=[
L(T.ResizeShortestEdge)(
short_edge_length=(640, 672, 704, 736, 768, 800),
sample_style="choice",
max_size=1333,
),
L(T.RandomFlip)(horizontal=True),
],
image_format="BGR",
use_instance_mask=True,
),
total_batch_size=16,
num_workers=4,
)
dataloader.test = L(build_detection_test_loader)(
dataset=L(get_detection_dataset_dicts)(names="coco_2017_val", filter_empty=False),
mapper=L(DatasetMapper)(
is_train=False,
augmentations=[
L(T.ResizeShortestEdge)(short_edge_length=800, max_size=1333),
],
image_format="${...train.mapper.image_format}",
),
num_workers=4,
)
dataloader.evaluator = L(COCOEvaluator)(
dataset_name="${..test.dataset.names}",
)
from detectron2.data.detection_utils import create_keypoint_hflip_indices
from .coco import dataloader
dataloader.train.dataset.min_keypoints = 1
dataloader.train.dataset.names = "keypoints_coco_2017_train"
dataloader.test.dataset.names = "keypoints_coco_2017_val"
dataloader.train.mapper.update(
use_instance_mask=False,
use_keypoint=True,
keypoint_hflip_indices=create_keypoint_hflip_indices(dataloader.train.dataset.names),
)
from detectron2.config import LazyCall as L
from detectron2.evaluation import (
COCOEvaluator,
COCOPanopticEvaluator,
DatasetEvaluators,
SemSegEvaluator,
)
from .coco import dataloader
dataloader.train.dataset.names = "coco_2017_train_panoptic_separated"
dataloader.train.dataset.filter_empty = False
dataloader.test.dataset.names = "coco_2017_val_panoptic_separated"
dataloader.evaluator = [
L(COCOEvaluator)(
dataset_name="${...test.dataset.names}",
),
L(SemSegEvaluator)(
dataset_name="${...test.dataset.names}",
),
L(COCOPanopticEvaluator)(
dataset_name="${...test.dataset.names}",
),
]
constants = dict(
imagenet_rgb256_mean=[123.675, 116.28, 103.53],
imagenet_rgb256_std=[58.395, 57.12, 57.375],
imagenet_bgr256_mean=[103.530, 116.280, 123.675],
# When using pre-trained models in Detectron1 or any MSRA models,
# std has been absorbed into its conv1 weights, so the std needs to be set 1.
# Otherwise, you can use [57.375, 57.120, 58.395] (ImageNet std)
imagenet_bgr256_std=[1.0, 1.0, 1.0],
)
from detectron2.config import LazyCall as L
from detectron2.layers import ShapeSpec
from detectron2.modeling.box_regression import Box2BoxTransform
from detectron2.modeling.matcher import Matcher
from detectron2.modeling.roi_heads import FastRCNNOutputLayers, FastRCNNConvFCHead, CascadeROIHeads
from .mask_rcnn_fpn import model
# arguments that don't exist for Cascade R-CNN
[model.roi_heads.pop(k) for k in ["box_head", "box_predictor", "proposal_matcher"]]
model.roi_heads.update(
_target_=CascadeROIHeads,
box_heads=[
L(FastRCNNConvFCHead)(
input_shape=ShapeSpec(channels=256, height=7, width=7),
conv_dims=[],
fc_dims=[1024, 1024],
)
for k in range(3)
],
box_predictors=[
L(FastRCNNOutputLayers)(
input_shape=ShapeSpec(channels=1024),
test_score_thresh=0.05,
box2box_transform=L(Box2BoxTransform)(weights=(w1, w1, w2, w2)),
cls_agnostic_bbox_reg=True,
num_classes="${...num_classes}",
)
for (w1, w2) in [(10, 5), (20, 10), (30, 15)]
],
proposal_matchers=[
L(Matcher)(thresholds=[th], labels=[0, 1], allow_low_quality_matches=False)
for th in [0.5, 0.6, 0.7]
],
)
from detectron2.modeling.meta_arch.fcos import FCOS, FCOSHead
from .retinanet import model
model._target_ = FCOS
del model.anchor_generator
del model.box2box_transform
del model.anchor_matcher
del model.input_format
# Use P5 instead of C5 to compute P6/P7
# (Sec 2.2 of https://arxiv.org/abs/2006.09214)
model.backbone.top_block.in_feature = "p5"
model.backbone.top_block.in_channels = 256
# New score threshold determined based on sqrt(cls_score * centerness)
model.test_score_thresh = 0.2
model.test_nms_thresh = 0.6
model.head._target_ = FCOSHead
del model.head.num_anchors
model.head.norm = "GN"
from detectron2.config import LazyCall as L
from detectron2.layers import ShapeSpec
from detectron2.modeling.poolers import ROIPooler
from detectron2.modeling.roi_heads import KRCNNConvDeconvUpsampleHead
from .mask_rcnn_fpn import model
[model.roi_heads.pop(x) for x in ["mask_in_features", "mask_pooler", "mask_head"]]
model.roi_heads.update(
num_classes=1,
keypoint_in_features=["p2", "p3", "p4", "p5"],
keypoint_pooler=L(ROIPooler)(
output_size=14,
scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
sampling_ratio=0,
pooler_type="ROIAlignV2",
),
keypoint_head=L(KRCNNConvDeconvUpsampleHead)(
input_shape=ShapeSpec(channels=256, width=14, height=14),
num_keypoints=17,
conv_dims=[512] * 8,
loss_normalizer="visible",
),
)
# Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
# 1000 proposals per-image is found to hurt box AP.
# Therefore we increase it to 1500 per-image.
model.proposal_generator.post_nms_topk = (1500, 1000)
# Keypoint AP degrades (though box AP improves) when using plain L1 loss
model.roi_heads.box_predictor.smooth_l1_beta = 0.5
from detectron2.config import LazyCall as L
from detectron2.layers import ShapeSpec
from detectron2.modeling.meta_arch import GeneralizedRCNN
from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
from detectron2.modeling.backbone import BasicStem, BottleneckBlock, ResNet
from detectron2.modeling.box_regression import Box2BoxTransform
from detectron2.modeling.matcher import Matcher
from detectron2.modeling.poolers import ROIPooler
from detectron2.modeling.proposal_generator import RPN, StandardRPNHead
from detectron2.modeling.roi_heads import (
FastRCNNOutputLayers,
MaskRCNNConvUpsampleHead,
Res5ROIHeads,
)
from ..data.constants import constants
model = L(GeneralizedRCNN)(
backbone=L(ResNet)(
stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
stages=L(ResNet.make_default_stages)(
depth=50,
stride_in_1x1=True,
norm="FrozenBN",
),
out_features=["res4"],
),
proposal_generator=L(RPN)(
in_features=["res4"],
head=L(StandardRPNHead)(in_channels=1024, num_anchors=15),
anchor_generator=L(DefaultAnchorGenerator)(
sizes=[[32, 64, 128, 256, 512]],
aspect_ratios=[0.5, 1.0, 2.0],
strides=[16],
offset=0.0,
),
anchor_matcher=L(Matcher)(
thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True
),
box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
batch_size_per_image=256,
positive_fraction=0.5,
pre_nms_topk=(12000, 6000),
post_nms_topk=(2000, 1000),
nms_thresh=0.7,
),
roi_heads=L(Res5ROIHeads)(
num_classes=80,
batch_size_per_image=512,
positive_fraction=0.25,
proposal_matcher=L(Matcher)(
thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False
),
in_features=["res4"],
pooler=L(ROIPooler)(
output_size=14,
scales=(1.0 / 16,),
sampling_ratio=0,
pooler_type="ROIAlignV2",
),
res5=L(ResNet.make_stage)(
block_class=BottleneckBlock,
num_blocks=3,
stride_per_block=[2, 1, 1],
in_channels=1024,
bottleneck_channels=512,
out_channels=2048,
norm="FrozenBN",
stride_in_1x1=True,
),
box_predictor=L(FastRCNNOutputLayers)(
input_shape=L(ShapeSpec)(channels="${...res5.out_channels}", height=1, width=1),
test_score_thresh=0.05,
box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)),
num_classes="${..num_classes}",
),
mask_head=L(MaskRCNNConvUpsampleHead)(
input_shape=L(ShapeSpec)(
channels="${...res5.out_channels}",
width="${...pooler.output_size}",
height="${...pooler.output_size}",
),
num_classes="${..num_classes}",
conv_dims=[256],
),
),
pixel_mean=constants.imagenet_bgr256_mean,
pixel_std=constants.imagenet_bgr256_std,
input_format="BGR",
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment