Commit 3144257c authored by mashun1's avatar mashun1
Browse files

catvton

parents
from detectron2.config import LazyCall as L
from detectron2.layers import ShapeSpec
from detectron2.modeling.meta_arch import GeneralizedRCNN
from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
from detectron2.modeling.backbone.fpn import LastLevelMaxPool
from detectron2.modeling.backbone import BasicStem, FPN, ResNet
from detectron2.modeling.box_regression import Box2BoxTransform
from detectron2.modeling.matcher import Matcher
from detectron2.modeling.poolers import ROIPooler
from detectron2.modeling.proposal_generator import RPN, StandardRPNHead
from detectron2.modeling.roi_heads import (
StandardROIHeads,
FastRCNNOutputLayers,
MaskRCNNConvUpsampleHead,
FastRCNNConvFCHead,
)
from ..data.constants import constants
model = L(GeneralizedRCNN)(
backbone=L(FPN)(
bottom_up=L(ResNet)(
stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
stages=L(ResNet.make_default_stages)(
depth=50,
stride_in_1x1=True,
norm="FrozenBN",
),
out_features=["res2", "res3", "res4", "res5"],
),
in_features="${.bottom_up.out_features}",
out_channels=256,
top_block=L(LastLevelMaxPool)(),
),
proposal_generator=L(RPN)(
in_features=["p2", "p3", "p4", "p5", "p6"],
head=L(StandardRPNHead)(in_channels=256, num_anchors=3),
anchor_generator=L(DefaultAnchorGenerator)(
sizes=[[32], [64], [128], [256], [512]],
aspect_ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64],
offset=0.0,
),
anchor_matcher=L(Matcher)(
thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True
),
box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
batch_size_per_image=256,
positive_fraction=0.5,
pre_nms_topk=(2000, 1000),
post_nms_topk=(1000, 1000),
nms_thresh=0.7,
),
roi_heads=L(StandardROIHeads)(
num_classes=80,
batch_size_per_image=512,
positive_fraction=0.25,
proposal_matcher=L(Matcher)(
thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False
),
box_in_features=["p2", "p3", "p4", "p5"],
box_pooler=L(ROIPooler)(
output_size=7,
scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
sampling_ratio=0,
pooler_type="ROIAlignV2",
),
box_head=L(FastRCNNConvFCHead)(
input_shape=ShapeSpec(channels=256, height=7, width=7),
conv_dims=[],
fc_dims=[1024, 1024],
),
box_predictor=L(FastRCNNOutputLayers)(
input_shape=ShapeSpec(channels=1024),
test_score_thresh=0.05,
box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)),
num_classes="${..num_classes}",
),
mask_in_features=["p2", "p3", "p4", "p5"],
mask_pooler=L(ROIPooler)(
output_size=14,
scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
sampling_ratio=0,
pooler_type="ROIAlignV2",
),
mask_head=L(MaskRCNNConvUpsampleHead)(
input_shape=ShapeSpec(channels=256, width=14, height=14),
num_classes="${..num_classes}",
conv_dims=[256, 256, 256, 256, 256],
),
),
pixel_mean=constants.imagenet_bgr256_mean,
pixel_std=constants.imagenet_bgr256_std,
input_format="BGR",
)
from functools import partial
import torch.nn as nn
from detectron2.config import LazyCall as L
from detectron2.modeling import ViT, SimpleFeaturePyramid
from detectron2.modeling.backbone.fpn import LastLevelMaxPool
from .mask_rcnn_fpn import model
from ..data.constants import constants
model.pixel_mean = constants.imagenet_rgb256_mean
model.pixel_std = constants.imagenet_rgb256_std
model.input_format = "RGB"
# Base
embed_dim, depth, num_heads, dp = 768, 12, 12, 0.1
# Creates Simple Feature Pyramid from ViT backbone
model.backbone = L(SimpleFeaturePyramid)(
net=L(ViT)( # Single-scale ViT backbone
img_size=1024,
patch_size=16,
embed_dim=embed_dim,
depth=depth,
num_heads=num_heads,
drop_path_rate=dp,
window_size=14,
mlp_ratio=4,
qkv_bias=True,
norm_layer=partial(nn.LayerNorm, eps=1e-6),
window_block_indexes=[
# 2, 5, 8 11 for global attention
0,
1,
3,
4,
6,
7,
9,
10,
],
residual_block_indexes=[],
use_rel_pos=True,
out_feature="last_feat",
),
in_feature="${.net.out_feature}",
out_channels=256,
scale_factors=(4.0, 2.0, 1.0, 0.5),
top_block=L(LastLevelMaxPool)(),
norm="LN",
square_pad=1024,
)
model.roi_heads.box_head.conv_norm = model.roi_heads.mask_head.conv_norm = "LN"
# 2conv in RPN:
model.proposal_generator.head.conv_dims = [-1, -1]
# 4conv1fc box head
model.roi_heads.box_head.conv_dims = [256, 256, 256, 256]
model.roi_heads.box_head.fc_dims = [1024]
from detectron2.config import LazyCall as L
from detectron2.layers import ShapeSpec
from detectron2.modeling import PanopticFPN
from detectron2.modeling.meta_arch.semantic_seg import SemSegFPNHead
from .mask_rcnn_fpn import model
model._target_ = PanopticFPN
model.sem_seg_head = L(SemSegFPNHead)(
input_shape={
f: L(ShapeSpec)(stride=s, channels="${....backbone.out_channels}")
for f, s in zip(["p2", "p3", "p4", "p5"], [4, 8, 16, 32])
},
ignore_value=255,
num_classes=54, # COCO stuff + 1
conv_dims=128,
common_stride=4,
loss_weight=0.5,
norm="GN",
)
# -*- coding: utf-8 -*-
from detectron2.config import LazyCall as L
from detectron2.layers import ShapeSpec
from detectron2.modeling.meta_arch import RetinaNet
from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
from detectron2.modeling.backbone.fpn import LastLevelP6P7
from detectron2.modeling.backbone import BasicStem, FPN, ResNet
from detectron2.modeling.box_regression import Box2BoxTransform
from detectron2.modeling.matcher import Matcher
from detectron2.modeling.meta_arch.retinanet import RetinaNetHead
from ..data.constants import constants
model = L(RetinaNet)(
backbone=L(FPN)(
bottom_up=L(ResNet)(
stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
stages=L(ResNet.make_default_stages)(
depth=50,
stride_in_1x1=True,
norm="FrozenBN",
),
out_features=["res3", "res4", "res5"],
),
in_features=["res3", "res4", "res5"],
out_channels=256,
top_block=L(LastLevelP6P7)(in_channels=2048, out_channels="${..out_channels}"),
),
head=L(RetinaNetHead)(
# Shape for each input feature map
input_shape=[ShapeSpec(channels=256)] * 5,
num_classes="${..num_classes}",
conv_dims=[256, 256, 256, 256],
prior_prob=0.01,
num_anchors=9,
),
anchor_generator=L(DefaultAnchorGenerator)(
sizes=[[x, x * 2 ** (1.0 / 3), x * 2 ** (2.0 / 3)] for x in [32, 64, 128, 256, 512]],
aspect_ratios=[0.5, 1.0, 2.0],
strides=[8, 16, 32, 64, 128],
offset=0.0,
),
box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
anchor_matcher=L(Matcher)(
thresholds=[0.4, 0.5], labels=[0, -1, 1], allow_low_quality_matches=True
),
num_classes=80,
head_in_features=["p3", "p4", "p5", "p6", "p7"],
focal_loss_alpha=0.25,
focal_loss_gamma=2.0,
pixel_mean=constants.imagenet_bgr256_mean,
pixel_std=constants.imagenet_bgr256_std,
input_format="BGR",
)
import torch
from detectron2.config import LazyCall as L
from detectron2.solver.build import get_default_optimizer_params
SGD = L(torch.optim.SGD)(
params=L(get_default_optimizer_params)(
# params.model is meant to be set to the model object, before instantiating
# the optimizer.
weight_decay_norm=0.0
),
lr=0.02,
momentum=0.9,
weight_decay=1e-4,
)
AdamW = L(torch.optim.AdamW)(
params=L(get_default_optimizer_params)(
# params.model is meant to be set to the model object, before instantiating
# the optimizer.
base_lr="${..lr}",
weight_decay_norm=0.0,
),
lr=1e-4,
betas=(0.9, 0.999),
weight_decay=0.1,
)
# Common training-related configs that are designed for "tools/lazyconfig_train_net.py"
# You can use your own instead, together with your own train_net.py
train = dict(
output_dir="./output",
init_checkpoint="",
max_iter=90000,
amp=dict(enabled=False), # options for Automatic Mixed Precision
ddp=dict( # options for DistributedDataParallel
broadcast_buffers=False,
find_unused_parameters=False,
fp16_compression=False,
),
checkpointer=dict(period=5000, max_to_keep=100), # options for PeriodicCheckpointer
eval_period=5000,
log_period=20,
device="cuda",
# ...
)
from .mask_rcnn_R_50_FPN_100ep_LSJ import (
dataloader,
lr_multiplier,
model,
optimizer,
train,
)
model.backbone.bottom_up.stages.depth = 101
from .mask_rcnn_R_101_FPN_100ep_LSJ import (
dataloader,
lr_multiplier,
model,
optimizer,
train,
)
train.max_iter *= 2 # 100ep -> 200ep
lr_multiplier.scheduler.milestones = [
milestone * 2 for milestone in lr_multiplier.scheduler.milestones
]
lr_multiplier.scheduler.num_updates = train.max_iter
from .mask_rcnn_R_101_FPN_100ep_LSJ import (
dataloader,
lr_multiplier,
model,
optimizer,
train,
)
train.max_iter *= 4 # 100ep -> 400ep
lr_multiplier.scheduler.milestones = [
milestone * 4 for milestone in lr_multiplier.scheduler.milestones
]
lr_multiplier.scheduler.num_updates = train.max_iter
import detectron2.data.transforms as T
from detectron2.config.lazy import LazyCall as L
from detectron2.layers.batch_norm import NaiveSyncBatchNorm
from detectron2.solver import WarmupParamScheduler
from fvcore.common.param_scheduler import MultiStepParamScheduler
from ..common.data.coco import dataloader
from ..common.models.mask_rcnn_fpn import model
from ..common.optim import SGD as optimizer
from ..common.train import train
# train from scratch
train.init_checkpoint = ""
train.amp.enabled = True
train.ddp.fp16_compression = True
model.backbone.bottom_up.freeze_at = 0
# SyncBN
# fmt: off
model.backbone.bottom_up.stem.norm = \
model.backbone.bottom_up.stages.norm = \
model.backbone.norm = "SyncBN"
# Using NaiveSyncBatchNorm becase heads may have empty input. That is not supported by
# torch.nn.SyncBatchNorm. We can remove this after
# https://github.com/pytorch/pytorch/issues/36530 is fixed.
model.roi_heads.box_head.conv_norm = \
model.roi_heads.mask_head.conv_norm = lambda c: NaiveSyncBatchNorm(c,
stats_mode="N")
# fmt: on
# 2conv in RPN:
# https://github.com/tensorflow/tpu/blob/b24729de804fdb751b06467d3dce0637fa652060/models/official/detection/modeling/architecture/heads.py#L95-L97 # noqa: E501, B950
model.proposal_generator.head.conv_dims = [-1, -1]
# 4conv1fc box head
model.roi_heads.box_head.conv_dims = [256, 256, 256, 256]
model.roi_heads.box_head.fc_dims = [1024]
# resize_and_crop_image in:
# https://github.com/tensorflow/tpu/blob/b24729de804fdb751b06467d3dce0637fa652060/models/official/detection/utils/input_utils.py#L127 # noqa: E501, B950
image_size = 1024
dataloader.train.mapper.augmentations = [
L(T.ResizeScale)(
min_scale=0.1, max_scale=2.0, target_height=image_size, target_width=image_size
),
L(T.FixedSizeCrop)(crop_size=(image_size, image_size)),
L(T.RandomFlip)(horizontal=True),
]
# recompute boxes due to cropping
dataloader.train.mapper.recompute_boxes = True
# larger batch-size.
dataloader.train.total_batch_size = 64
# Equivalent to 100 epochs.
# 100 ep = 184375 iters * 64 images/iter / 118000 images/ep
train.max_iter = 184375
lr_multiplier = L(WarmupParamScheduler)(
scheduler=L(MultiStepParamScheduler)(
values=[1.0, 0.1, 0.01],
milestones=[163889, 177546],
num_updates=train.max_iter,
),
warmup_length=500 / train.max_iter,
warmup_factor=0.067,
)
optimizer.lr = 0.1
optimizer.weight_decay = 4e-5
from .mask_rcnn_R_50_FPN_100ep_LSJ import (
dataloader,
lr_multiplier,
model,
optimizer,
train,
)
train.max_iter *= 2 # 100ep -> 200ep
lr_multiplier.scheduler.milestones = [
milestone * 2 for milestone in lr_multiplier.scheduler.milestones
]
lr_multiplier.scheduler.num_updates = train.max_iter
from .mask_rcnn_R_50_FPN_100ep_LSJ import (
dataloader,
lr_multiplier,
model,
optimizer,
train,
)
train.max_iter *= 4 # 100ep -> 400ep
lr_multiplier.scheduler.milestones = [
milestone * 4 for milestone in lr_multiplier.scheduler.milestones
]
lr_multiplier.scheduler.num_updates = train.max_iter
from .mask_rcnn_R_50_FPN_100ep_LSJ import (
dataloader,
lr_multiplier,
model,
optimizer,
train,
)
train.max_iter //= 2 # 100ep -> 50ep
lr_multiplier.scheduler.milestones = [
milestone // 2 for milestone in lr_multiplier.scheduler.milestones
]
lr_multiplier.scheduler.num_updates = train.max_iter
from .mask_rcnn_R_50_FPN_100ep_LSJ import (
dataloader,
lr_multiplier,
model,
optimizer,
train,
)
from detectron2.config import LazyCall as L
from detectron2.modeling.backbone import RegNet
from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock
# Config source:
# https://github.com/facebookresearch/detectron2/blob/main/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py # noqa
model.backbone.bottom_up = L(RegNet)(
stem_class=SimpleStem,
stem_width=32,
block_class=ResBottleneckBlock,
depth=23,
w_a=38.65,
w_0=96,
w_m=2.43,
group_width=40,
norm="SyncBN",
out_features=["s1", "s2", "s3", "s4"],
)
model.pixel_std = [57.375, 57.120, 58.395]
# RegNets benefit from enabling cudnn benchmark mode
train.cudnn_benchmark = True
from .mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ import (
dataloader,
lr_multiplier,
model,
optimizer,
train,
)
train.max_iter *= 2 # 100ep -> 200ep
lr_multiplier.scheduler.milestones = [
milestone * 2 for milestone in lr_multiplier.scheduler.milestones
]
lr_multiplier.scheduler.num_updates = train.max_iter
from .mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ import (
dataloader,
lr_multiplier,
model,
optimizer,
train,
)
train.max_iter *= 4 # 100ep -> 400ep
lr_multiplier.scheduler.milestones = [
milestone * 4 for milestone in lr_multiplier.scheduler.milestones
]
lr_multiplier.scheduler.num_updates = train.max_iter
from .mask_rcnn_R_50_FPN_100ep_LSJ import (
dataloader,
lr_multiplier,
model,
optimizer,
train,
)
from detectron2.config import LazyCall as L
from detectron2.modeling.backbone import RegNet
from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock
# Config source:
# https://github.com/facebookresearch/detectron2/blob/main/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py # noqa
model.backbone.bottom_up = L(RegNet)(
stem_class=SimpleStem,
stem_width=32,
block_class=ResBottleneckBlock,
depth=22,
w_a=31.41,
w_0=96,
w_m=2.24,
group_width=64,
se_ratio=0.25,
norm="SyncBN",
out_features=["s1", "s2", "s3", "s4"],
)
model.pixel_std = [57.375, 57.120, 58.395]
# RegNets benefit from enabling cudnn benchmark mode
train.cudnn_benchmark = True
from .mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ import (
dataloader,
lr_multiplier,
model,
optimizer,
train,
)
train.max_iter *= 2 # 100ep -> 200ep
lr_multiplier.scheduler.milestones = [
milestone * 2 for milestone in lr_multiplier.scheduler.milestones
]
lr_multiplier.scheduler.num_updates = train.max_iter
from .mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ import (
dataloader,
lr_multiplier,
model,
optimizer,
train,
)
train.max_iter *= 4 # 100ep -> 400ep
lr_multiplier.scheduler.milestones = [
milestone * 4 for milestone in lr_multiplier.scheduler.milestones
]
lr_multiplier.scheduler.num_updates = train.max_iter
These are quick configs for performance or accuracy regression tracking purposes.
* `*instance_test.yaml`: can train on 2 GPUs. They are used to test whether the training can
successfully finish. They are not expected to produce reasonable training results.
* `*inference_acc_test.yaml`: They should be run using `--eval-only`. They run inference using pre-trained models and verify
the results are as expected.
* `*training_acc_test.yaml`: They should be trained on 8 GPUs. They finish in about an hour and verify the training accuracy
is within the normal range.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment