Commit c732df65 authored by limm's avatar limm
Browse files

push v0.1.3 version commit bd2ea47

parent 5b3792fc
Pipeline #706 failed with stages
in 0 seconds
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
"""
TensorMask Training Script.
This script is a simplified version of the training script in detectron2/tools.
"""
import os
import detectron2.utils.comm as comm
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import get_cfg
from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch
from detectron2.evaluation import COCOEvaluator, verify_results
from tensormask import add_tensormask_config
class Trainer(DefaultTrainer):
@classmethod
def build_evaluator(cls, cfg, dataset_name, output_folder=None):
if output_folder is None:
output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
return COCOEvaluator(dataset_name, cfg, True, output_folder)
def setup(args):
"""
Create configs and perform basic setups.
"""
cfg = get_cfg()
add_tensormask_config(cfg)
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
cfg.freeze()
default_setup(cfg, args)
return cfg
def main(args):
cfg = setup(args)
if args.eval_only:
model = Trainer.build_model(cfg)
DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
cfg.MODEL.WEIGHTS, resume=args.resume
)
res = Trainer.test(cfg, model)
if comm.is_main_process():
verify_results(cfg, res)
return res
trainer = Trainer(cfg)
trainer.resume_or_load(resume=args.resume)
return trainer.train()
if __name__ == "__main__":
args = default_argument_parser().parse_args()
print("Command Line Args:", args)
launch(
main,
args.num_gpus,
num_machines=args.num_machines,
machine_rank=args.machine_rank,
dist_url=args.dist_url,
args=(args,),
)
# TridentNet in Detectron2
**Scale-Aware Trident Networks for Object Detection**
Yanghao Li\*, Yuntao Chen\*, Naiyan Wang, Zhaoxiang Zhang
[[`TridentNet`](https://github.com/TuSimple/simpledet/tree/master/models/tridentnet)] [[`arXiv`](https://arxiv.org/abs/1901.01892)] [[`BibTeX`](#CitingTridentNet)]
<div align="center">
<img src="https://drive.google.com/uc?export=view&id=10THEPdIPmf3ooMyNzrfZbpWihEBvixwt" width="700px" />
</div>
In this repository, we implement TridentNet-Fast in Detectron2.
Trident Network (TridentNet) aims to generate scale-specific feature maps with a uniform representational power. We construct a parallel multi-branch architecture in which each branch shares the same transformation parameters but with different receptive fields. TridentNet-Fast is a fast approximation version of TridentNet that could achieve significant improvements without any additional parameters and computational cost.
## Training
To train a model, run
```bash
python /path/to/detectron2/projects/TridentNet/train_net.py --config-file <config.yaml>
```
For example, to launch end-to-end TridentNet training with ResNet-50 backbone on 8 GPUs,
one should execute:
```bash
python /path/to/detectron2/projects/TridentNet/train_net.py --config-file configs/tridentnet_fast_R_50_C4_1x.yaml --num-gpus 8
```
## Evaluation
Model evaluation can be done similarly:
```bash
python /path/to/detectron2/projects/TridentNet/train_net.py --config-file configs/tridentnet_fast_R_50_C4_1x.yaml --eval-only MODEL.WEIGHTS model.pth
```
## Results on MS-COCO in Detectron2
|Model|Backbone|Head|lr sched|AP|AP50|AP75|APs|APm|APl|download|
|-----|--------|----|--------|--|----|----|---|---|---|--------|
|Faster|R50-C4|C5-512ROI|1X|35.7|56.1|38.0|19.2|40.9|48.7|<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_1x/137257644/model_final_721ade.pkl">model</a>&nbsp;\|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_1x/137257644/metrics.json">metrics</a>|
|TridentFast|R50-C4|C5-128ROI|1X|38.0|58.1|40.8|19.5|42.2|54.6|<a href="https://dl.fbaipublicfiles.com/detectron2/TridentNet/tridentnet_fast_R_50_C4_1x/148572687/model_final_756cda.pkl">model</a>&nbsp;\|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/TridentNet/tridentnet_fast_R_50_C4_1x/148572687/metrics.json">metrics</a>|
|Faster|R50-C4|C5-512ROI|3X|38.4|58.7|41.3|20.7|42.7|53.1|<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_3x/137849393/model_final_f97cb7.pkl">model</a>&nbsp;\|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_3x/137849393/metrics.json">metrics</a>|
|TridentFast|R50-C4|C5-128ROI|3X|40.6|60.8|43.6|23.4|44.7|57.1|<a href="https://dl.fbaipublicfiles.com/detectron2/TridentNet/tridentnet_fast_R_50_C4_3x/148572287/model_final_e1027c.pkl">model</a>&nbsp;\|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/TridentNet/tridentnet_fast_R_50_C4_3x/148572287/metrics.json">metrics</a>|
|Faster|R101-C4|C5-512ROI|3X|41.1|61.4|44.0|22.2|45.5|55.9|<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_C4_3x/138204752/model_final_298dad.pkl">model</a>&nbsp;\|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_C4_3x/138204752/metrics.json">metrics</a>|
|TridentFast|R101-C4|C5-128ROI|3X|43.6|63.4|47.0|24.3|47.8|60.0|<a href="https://dl.fbaipublicfiles.com/detectron2/TridentNet/tridentnet_fast_R_101_C4_3x/148572198/model_final_164568.pkl">model</a>&nbsp;\|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/TridentNet/tridentnet_fast_R_101_C4_3x/148572198/metrics.json">metrics</a>|
## <a name="CitingTridentNet"></a>Citing TridentNet
If you use TridentNet, please use the following BibTeX entry.
```
@InProceedings{li2019scale,
title={Scale-Aware Trident Networks for Object Detection},
author={Li, Yanghao and Chen, Yuntao and Wang, Naiyan and Zhang, Zhaoxiang},
journal={The International Conference on Computer Vision (ICCV)},
year={2019}
}
```
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
BACKBONE:
NAME: "build_trident_resnet_backbone"
ROI_HEADS:
NAME: "TridentRes5ROIHeads"
POSITIVE_FRACTION: 0.5
BATCH_SIZE_PER_IMAGE: 128
PROPOSAL_APPEND_GT: False
PROPOSAL_GENERATOR:
NAME: "TridentRPN"
RPN:
POST_NMS_TOPK_TRAIN: 500
TRIDENT:
NUM_BRANCH: 3
BRANCH_DILATIONS: [1, 2, 3]
TEST_BRANCH_IDX: 1
TRIDENT_STAGE: "res4"
DATASETS:
TRAIN: ("coco_2017_train",)
TEST: ("coco_2017_val",)
SOLVER:
IMS_PER_BATCH: 16
BASE_LR: 0.02
STEPS: (60000, 80000)
MAX_ITER: 90000
INPUT:
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
VERSION: 2
_BASE_: "Base-TridentNet-Fast-C4.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
MASK_ON: False
RESNETS:
DEPTH: 101
SOLVER:
STEPS: (210000, 250000)
MAX_ITER: 270000
_BASE_: "Base-TridentNet-Fast-C4.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
MASK_ON: False
RESNETS:
DEPTH: 50
_BASE_: "Base-TridentNet-Fast-C4.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
MASK_ON: False
RESNETS:
DEPTH: 50
SOLVER:
STEPS: (210000, 250000)
MAX_ITER: 270000
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
"""
TridentNet Training Script.
This script is a simplified version of the training script in detectron2/tools.
"""
import os
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import get_cfg
from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch
from detectron2.evaluation import COCOEvaluator
from tridentnet import add_tridentnet_config
class Trainer(DefaultTrainer):
@classmethod
def build_evaluator(cls, cfg, dataset_name, output_folder=None):
if output_folder is None:
output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
return COCOEvaluator(dataset_name, cfg, True, output_folder)
def setup(args):
"""
Create configs and perform basic setups.
"""
cfg = get_cfg()
add_tridentnet_config(cfg)
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
cfg.freeze()
default_setup(cfg, args)
return cfg
def main(args):
cfg = setup(args)
if args.eval_only:
model = Trainer.build_model(cfg)
DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
cfg.MODEL.WEIGHTS, resume=args.resume
)
res = Trainer.test(cfg, model)
return res
trainer = Trainer(cfg)
trainer.resume_or_load(resume=args.resume)
return trainer.train()
if __name__ == "__main__":
args = default_argument_parser().parse_args()
print("Command Line Args:", args)
launch(
main,
args.num_gpus,
num_machines=args.num_machines,
machine_rank=args.machine_rank,
dist_url=args.dist_url,
args=(args,),
)
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from .config import add_tridentnet_config
from .trident_backbone import (
TridentBottleneckBlock,
build_trident_resnet_backbone,
make_trident_stage,
)
from .trident_rpn import TridentRPN
from .trident_rcnn import TridentRes5ROIHeads, TridentStandardROIHeads
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from detectron2.config import CfgNode as CN
def add_tridentnet_config(cfg):
"""
Add config for tridentnet.
"""
_C = cfg
_C.MODEL.TRIDENT = CN()
# Number of branches for TridentNet.
_C.MODEL.TRIDENT.NUM_BRANCH = 3
# Specify the dilations for each branch.
_C.MODEL.TRIDENT.BRANCH_DILATIONS = [1, 2, 3]
# Specify the stage for applying trident blocks. Default stage is Res4 according to the
# TridentNet paper.
_C.MODEL.TRIDENT.TRIDENT_STAGE = "res4"
# Specify the test branch index TridentNet Fast inference:
# - use -1 to aggregate results of all branches during inference.
# - otherwise, only using specified branch for fast inference. Recommended setting is
# to use the middle branch.
_C.MODEL.TRIDENT.TEST_BRANCH_IDX = 1
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import fvcore.nn.weight_init as weight_init
import torch
import torch.nn.functional as F
from detectron2.layers import Conv2d, FrozenBatchNorm2d, get_norm
from detectron2.modeling import BACKBONE_REGISTRY, ResNet, ResNetBlockBase, make_stage
from detectron2.modeling.backbone.resnet import BasicStem, BottleneckBlock, DeformBottleneckBlock
from .trident_conv import TridentConv
__all__ = ["TridentBottleneckBlock", "make_trident_stage", "build_trident_resnet_backbone"]
class TridentBottleneckBlock(ResNetBlockBase):
def __init__(
self,
in_channels,
out_channels,
*,
bottleneck_channels,
stride=1,
num_groups=1,
norm="BN",
stride_in_1x1=False,
num_branch=3,
dilations=(1, 2, 3),
concat_output=False,
test_branch_idx=-1,
):
"""
Args:
num_branch (int): the number of branches in TridentNet.
dilations (tuple): the dilations of multiple branches in TridentNet.
concat_output (bool): if concatenate outputs of multiple branches in TridentNet.
Use 'True' for the last trident block.
"""
super().__init__(in_channels, out_channels, stride)
assert num_branch == len(dilations)
self.num_branch = num_branch
self.concat_output = concat_output
self.test_branch_idx = test_branch_idx
if in_channels != out_channels:
self.shortcut = Conv2d(
in_channels,
out_channels,
kernel_size=1,
stride=stride,
bias=False,
norm=get_norm(norm, out_channels),
)
else:
self.shortcut = None
stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)
self.conv1 = Conv2d(
in_channels,
bottleneck_channels,
kernel_size=1,
stride=stride_1x1,
bias=False,
norm=get_norm(norm, bottleneck_channels),
)
self.conv2 = TridentConv(
bottleneck_channels,
bottleneck_channels,
kernel_size=3,
stride=stride_3x3,
paddings=dilations,
bias=False,
groups=num_groups,
dilations=dilations,
num_branch=num_branch,
test_branch_idx=test_branch_idx,
norm=get_norm(norm, bottleneck_channels),
)
self.conv3 = Conv2d(
bottleneck_channels,
out_channels,
kernel_size=1,
bias=False,
norm=get_norm(norm, out_channels),
)
for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]:
if layer is not None: # shortcut can be None
weight_init.c2_msra_fill(layer)
def forward(self, x):
num_branch = self.num_branch if self.training or self.test_branch_idx == -1 else 1
if not isinstance(x, list):
x = [x] * num_branch
out = [self.conv1(b) for b in x]
out = [F.relu_(b) for b in out]
out = self.conv2(out)
out = [F.relu_(b) for b in out]
out = [self.conv3(b) for b in out]
if self.shortcut is not None:
shortcut = [self.shortcut(b) for b in x]
else:
shortcut = x
out = [out_b + shortcut_b for out_b, shortcut_b in zip(out, shortcut)]
out = [F.relu_(b) for b in out]
if self.concat_output:
out = torch.cat(out)
return out
def make_trident_stage(block_class, num_blocks, first_stride, **kwargs):
"""
Create a resnet stage by creating many blocks for TridentNet.
"""
blocks = []
for i in range(num_blocks - 1):
blocks.append(block_class(stride=first_stride if i == 0 else 1, **kwargs))
kwargs["in_channels"] = kwargs["out_channels"]
blocks.append(block_class(stride=1, concat_output=True, **kwargs))
return blocks
@BACKBONE_REGISTRY.register()
def build_trident_resnet_backbone(cfg, input_shape):
"""
Create a ResNet instance from config for TridentNet.
Returns:
ResNet: a :class:`ResNet` instance.
"""
# need registration of new blocks/stems?
norm = cfg.MODEL.RESNETS.NORM
stem = BasicStem(
in_channels=input_shape.channels,
out_channels=cfg.MODEL.RESNETS.STEM_OUT_CHANNELS,
norm=norm,
)
freeze_at = cfg.MODEL.BACKBONE.FREEZE_AT
if freeze_at >= 1:
for p in stem.parameters():
p.requires_grad = False
stem = FrozenBatchNorm2d.convert_frozen_batchnorm(stem)
# fmt: off
out_features = cfg.MODEL.RESNETS.OUT_FEATURES
depth = cfg.MODEL.RESNETS.DEPTH
num_groups = cfg.MODEL.RESNETS.NUM_GROUPS
width_per_group = cfg.MODEL.RESNETS.WIDTH_PER_GROUP
bottleneck_channels = num_groups * width_per_group
in_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS
out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
stride_in_1x1 = cfg.MODEL.RESNETS.STRIDE_IN_1X1
res5_dilation = cfg.MODEL.RESNETS.RES5_DILATION
deform_on_per_stage = cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE
deform_modulated = cfg.MODEL.RESNETS.DEFORM_MODULATED
deform_num_groups = cfg.MODEL.RESNETS.DEFORM_NUM_GROUPS
num_branch = cfg.MODEL.TRIDENT.NUM_BRANCH
branch_dilations = cfg.MODEL.TRIDENT.BRANCH_DILATIONS
trident_stage = cfg.MODEL.TRIDENT.TRIDENT_STAGE
test_branch_idx = cfg.MODEL.TRIDENT.TEST_BRANCH_IDX
# fmt: on
assert res5_dilation in {1, 2}, "res5_dilation cannot be {}.".format(res5_dilation)
num_blocks_per_stage = {50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 152: [3, 8, 36, 3]}[depth]
stages = []
res_stage_idx = {"res2": 2, "res3": 3, "res4": 4, "res5": 5}
out_stage_idx = [res_stage_idx[f] for f in out_features]
trident_stage_idx = res_stage_idx[trident_stage]
max_stage_idx = max(out_stage_idx)
for idx, stage_idx in enumerate(range(2, max_stage_idx + 1)):
dilation = res5_dilation if stage_idx == 5 else 1
first_stride = 1 if idx == 0 or (stage_idx == 5 and dilation == 2) else 2
stage_kargs = {
"num_blocks": num_blocks_per_stage[idx],
"first_stride": first_stride,
"in_channels": in_channels,
"bottleneck_channels": bottleneck_channels,
"out_channels": out_channels,
"num_groups": num_groups,
"norm": norm,
"stride_in_1x1": stride_in_1x1,
"dilation": dilation,
}
if stage_idx == trident_stage_idx:
assert not deform_on_per_stage[
idx
], "Not support deformable conv in Trident blocks yet."
stage_kargs["block_class"] = TridentBottleneckBlock
stage_kargs["num_branch"] = num_branch
stage_kargs["dilations"] = branch_dilations
stage_kargs["test_branch_idx"] = test_branch_idx
stage_kargs.pop("dilation")
elif deform_on_per_stage[idx]:
stage_kargs["block_class"] = DeformBottleneckBlock
stage_kargs["deform_modulated"] = deform_modulated
stage_kargs["deform_num_groups"] = deform_num_groups
else:
stage_kargs["block_class"] = BottleneckBlock
blocks = (
make_trident_stage(**stage_kargs)
if stage_idx == trident_stage_idx
else make_stage(**stage_kargs)
)
in_channels = out_channels
out_channels *= 2
bottleneck_channels *= 2
if freeze_at >= stage_idx:
for block in blocks:
block.freeze()
stages.append(blocks)
return ResNet(stem, stages, out_features=out_features)
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import torch
from torch import nn
from torch.nn import functional as F
from torch.nn.modules.utils import _pair
from detectron2.layers.wrappers import _NewEmptyTensorOp
class TridentConv(nn.Module):
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride=1,
paddings=0,
dilations=1,
groups=1,
num_branch=1,
test_branch_idx=-1,
bias=False,
norm=None,
activation=None,
):
super(TridentConv, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = _pair(kernel_size)
self.num_branch = num_branch
self.stride = _pair(stride)
self.groups = groups
self.with_bias = bias
if isinstance(paddings, int):
paddings = [paddings] * self.num_branch
if isinstance(dilations, int):
dilations = [dilations] * self.num_branch
self.paddings = [_pair(padding) for padding in paddings]
self.dilations = [_pair(dilation) for dilation in dilations]
self.test_branch_idx = test_branch_idx
self.norm = norm
self.activation = activation
assert len({self.num_branch, len(self.paddings), len(self.dilations)}) == 1
self.weight = nn.Parameter(
torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)
)
if bias:
self.bias = nn.Parameter(torch.Tensor(out_channels))
else:
self.bias = None
nn.init.kaiming_uniform_(self.weight, nonlinearity="relu")
if self.bias is not None:
nn.init.constant_(self.bias, 0)
def forward(self, inputs):
num_branch = self.num_branch if self.training or self.test_branch_idx == -1 else 1
assert len(inputs) == num_branch
if inputs[0].numel() == 0:
output_shape = [
(i + 2 * p - (di * (k - 1) + 1)) // s + 1
for i, p, di, k, s in zip(
inputs[0].shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride
)
]
output_shape = [input[0].shape[0], self.weight.shape[0]] + output_shape
return [_NewEmptyTensorOp.apply(input, output_shape) for input in inputs]
if self.training or self.test_branch_idx == -1:
outputs = [
F.conv2d(input, self.weight, self.bias, self.stride, padding, dilation, self.groups)
for input, dilation, padding in zip(inputs, self.dilations, self.paddings)
]
else:
outputs = [
F.conv2d(
inputs[0],
self.weight,
self.bias,
self.stride,
self.paddings[self.test_branch_idx],
self.dilations[self.test_branch_idx],
self.groups,
)
]
if self.norm is not None:
outputs = [self.norm(x) for x in outputs]
if self.activation is not None:
outputs = [self.activation(x) for x in outputs]
return outputs
def extra_repr(self):
tmpstr = "in_channels=" + str(self.in_channels)
tmpstr += ", out_channels=" + str(self.out_channels)
tmpstr += ", kernel_size=" + str(self.kernel_size)
tmpstr += ", num_branch=" + str(self.num_branch)
tmpstr += ", test_branch_idx=" + str(self.test_branch_idx)
tmpstr += ", stride=" + str(self.stride)
tmpstr += ", paddings=" + str(self.paddings)
tmpstr += ", dilations=" + str(self.dilations)
tmpstr += ", groups=" + str(self.groups)
tmpstr += ", bias=" + str(self.with_bias)
return tmpstr
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from detectron2.layers import batched_nms
from detectron2.modeling import ROI_HEADS_REGISTRY, StandardROIHeads
from detectron2.modeling.roi_heads.roi_heads import Res5ROIHeads
from detectron2.structures import Instances
def merge_branch_instances(instances, num_branch, nms_thresh, topk_per_image):
"""
Merge detection results from different branches of TridentNet.
Return detection results by applying non-maximum suppression (NMS) on bounding boxes
and keep the unsuppressed boxes and other instances (e.g mask) if any.
Args:
instances (list[Instances]): A list of N * num_branch instances that store detection
results. Contain N images and each image has num_branch instances.
num_branch (int): Number of branches used for merging detection results for each image.
nms_thresh (float): The threshold to use for box non-maximum suppression. Value in [0, 1].
topk_per_image (int): The number of top scoring detections to return. Set < 0 to return
all detections.
Returns:
results: (list[Instances]): A list of N instances, one for each image in the batch,
that stores the topk most confidence detections after merging results from multiple
branches.
"""
if num_branch == 1:
return instances
batch_size = len(instances) // num_branch
results = []
for i in range(batch_size):
instance = Instances.cat([instances[i + batch_size * j] for j in range(num_branch)])
# Apply per-class NMS
keep = batched_nms(
instance.pred_boxes.tensor, instance.scores, instance.pred_classes, nms_thresh
)
keep = keep[:topk_per_image]
result = instance[keep]
results.append(result)
return results
@ROI_HEADS_REGISTRY.register()
class TridentRes5ROIHeads(Res5ROIHeads):
"""
The TridentNet ROIHeads in a typical "C4" R-CNN model.
See :class:`Res5ROIHeads`.
"""
def __init__(self, cfg, input_shape):
super().__init__(cfg, input_shape)
self.num_branch = cfg.MODEL.TRIDENT.NUM_BRANCH
self.trident_fast = cfg.MODEL.TRIDENT.TEST_BRANCH_IDX != -1
def forward(self, images, features, proposals, targets=None):
"""
See :class:`Res5ROIHeads.forward`.
"""
num_branch = self.num_branch if self.training or not self.trident_fast else 1
all_targets = targets * num_branch if targets is not None else None
pred_instances, losses = super().forward(images, features, proposals, all_targets)
del images, all_targets, targets
if self.training:
return pred_instances, losses
else:
pred_instances = merge_branch_instances(
pred_instances,
num_branch,
self.box_predictor.test_nms_thresh,
self.box_predictor.test_topk_per_image,
)
return pred_instances, {}
@ROI_HEADS_REGISTRY.register()
class TridentStandardROIHeads(StandardROIHeads):
"""
The `StandardROIHeads` for TridentNet.
See :class:`StandardROIHeads`.
"""
def __init__(self, cfg, input_shape):
super(TridentStandardROIHeads, self).__init__(cfg, input_shape)
self.num_branch = cfg.MODEL.TRIDENT.NUM_BRANCH
self.trident_fast = cfg.MODEL.TRIDENT.TEST_BRANCH_IDX != -1
def forward(self, images, features, proposals, targets=None):
"""
See :class:`Res5ROIHeads.forward`.
"""
# Use 1 branch if using trident_fast during inference.
num_branch = self.num_branch if self.training or not self.trident_fast else 1
# Duplicate targets for all branches in TridentNet.
all_targets = targets * num_branch if targets is not None else None
pred_instances, losses = super().forward(images, features, proposals, all_targets)
del images, all_targets, targets
if self.training:
return pred_instances, losses
else:
pred_instances = merge_branch_instances(
pred_instances,
num_branch,
self.box_predictor.test_nms_thresh,
self.box_predictor.test_topk_per_image,
)
return pred_instances, {}
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import torch
from detectron2.modeling import PROPOSAL_GENERATOR_REGISTRY
from detectron2.modeling.proposal_generator.rpn import RPN
from detectron2.structures import ImageList
@PROPOSAL_GENERATOR_REGISTRY.register()
class TridentRPN(RPN):
"""
Trident RPN subnetwork.
"""
def __init__(self, cfg, input_shape):
super(TridentRPN, self).__init__(cfg, input_shape)
self.num_branch = cfg.MODEL.TRIDENT.NUM_BRANCH
self.trident_fast = cfg.MODEL.TRIDENT.TEST_BRANCH_IDX != -1
def forward(self, images, features, gt_instances=None):
"""
See :class:`RPN.forward`.
"""
num_branch = self.num_branch if self.training or not self.trident_fast else 1
# Duplicate images and gt_instances for all branches in TridentNet.
all_images = ImageList(
torch.cat([images.tensor] * num_branch), images.image_sizes * num_branch
)
all_gt_instances = gt_instances * num_branch if gt_instances is not None else None
return super(TridentRPN, self).forward(all_images, features, all_gt_instances)
[isort]
line_length=100
multi_line_output=3
include_trailing_comma=True
known_standard_library=numpy,setuptools,mock
skip=./datasets,docs
skip_glob=*/__init__.py
known_myself=detectron2
known_third_party=fvcore,matplotlib,cv2,torch,torchvision,PIL,pycocotools,yacs,termcolor,cityscapesscripts,tabulate,tqdm,scipy,lvis,psutil,pkg_resources,caffe2,onnx
no_lines_before=STDLIB,THIRDPARTY
sections=FUTURE,STDLIB,THIRDPARTY,myself,FIRSTPARTY,LOCALFOLDER
default_section=FIRSTPARTY
[mypy]
python_version=3.6
ignore_missing_imports = True
warn_unused_configs = True
disallow_untyped_defs = True
check_untyped_defs = True
warn_unused_ignores = True
warn_redundant_casts = True
show_column_numbers = True
follow_imports = silent
allow_redefinition = True
; Require all functions to be annotated
disallow_incomplete_defs = True
#!/usr/bin/env python
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import glob
import os
import shutil
from os import path
from setuptools import find_packages, setup
from typing import List
import torch
from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension
torch_ver = [int(x) for x in torch.__version__.split(".")[:2]]
assert torch_ver >= [1, 4], "Requires PyTorch >= 1.4"
def get_version():
init_py_path = path.join(path.abspath(path.dirname(__file__)), "detectron2", "__init__.py")
init_py = open(init_py_path, "r").readlines()
version_line = [l.strip() for l in init_py if l.startswith("__version__")][0]
version = version_line.split("=")[-1].strip().strip("'\"")
# The following is used to build release packages.
# Users should never use it.
suffix = os.getenv("D2_VERSION_SUFFIX", "")
version = version + suffix
if os.getenv("BUILD_NIGHTLY", "0") == "1":
from datetime import datetime
date_str = datetime.today().strftime("%y%m%d")
version = version + ".dev" + date_str
new_init_py = [l for l in init_py if not l.startswith("__version__")]
new_init_py.append('__version__ = "{}"\n'.format(version))
with open(init_py_path, "w") as f:
f.write("".join(new_init_py))
return version
def get_extensions():
this_dir = path.dirname(path.abspath(__file__))
extensions_dir = path.join(this_dir, "detectron2", "layers", "csrc")
main_source = path.join(extensions_dir, "vision.cpp")
sources = glob.glob(path.join(extensions_dir, "**", "*.cpp"))
source_cuda = glob.glob(path.join(extensions_dir, "**", "*.cu")) + glob.glob(
path.join(extensions_dir, "*.cu")
)
sources = [main_source] + sources
extension = CppExtension
extra_compile_args = {"cxx": []}
define_macros = []
if (
torch.cuda.is_available() and CUDA_HOME is not None and os.path.isdir(CUDA_HOME)
) or os.getenv("FORCE_CUDA", "0") == "1":
extension = CUDAExtension
sources += source_cuda
define_macros += [("WITH_CUDA", None)]
extra_compile_args["nvcc"] = [
"-DCUDA_HAS_FP16=1",
"-D__CUDA_NO_HALF_OPERATORS__",
"-D__CUDA_NO_HALF_CONVERSIONS__",
"-D__CUDA_NO_HALF2_OPERATORS__",
]
# It's better if pytorch can do this by default ..
CC = os.environ.get("CC", None)
if CC is not None:
extra_compile_args["nvcc"].append("-ccbin={}".format(CC))
include_dirs = [extensions_dir]
ext_modules = [
extension(
"detectron2._C",
sources,
include_dirs=include_dirs,
define_macros=define_macros,
extra_compile_args=extra_compile_args,
)
]
return ext_modules
def get_model_zoo_configs() -> List[str]:
"""
Return a list of configs to include in package for model zoo. Copy over these configs inside
detectron2/model_zoo.
"""
# Use absolute paths while symlinking.
source_configs_dir = path.join(path.dirname(path.realpath(__file__)), "configs")
destination = path.join(
path.dirname(path.realpath(__file__)), "detectron2", "model_zoo", "configs"
)
# Symlink the config directory inside package to have a cleaner pip install.
# Remove stale symlink/directory from a previous build.
if path.exists(source_configs_dir):
if path.islink(destination):
os.unlink(destination)
elif path.isdir(destination):
shutil.rmtree(destination)
if not path.exists(destination):
try:
os.symlink(source_configs_dir, destination)
except OSError:
# Fall back to copying if symlink fails: ex. on Windows.
shutil.copytree(source_configs_dir, destination)
config_paths = glob.glob("configs/**/*.yaml", recursive=True)
return config_paths
setup(
name="detectron2",
version=get_version(),
author="FAIR",
url="https://github.com/facebookresearch/detectron2",
description="Detectron2 is FAIR's next-generation research "
"platform for object detection and segmentation.",
packages=find_packages(exclude=("configs", "tests*")),
package_data={"detectron2.model_zoo": get_model_zoo_configs()},
python_requires=">=3.6",
install_requires=[
"termcolor>=1.1",
"Pillow", # you can also use pillow-simd for better performance
"yacs>=0.1.6",
"tabulate",
"cloudpickle",
"matplotlib",
"mock",
"tqdm>4.29.0",
"tensorboard",
"fvcore>=0.1.1",
"future", # used by caffe2
"pydot", # used to save caffe2 SVGs
],
extras_require={
"all": ["shapely", "psutil"],
"dev": [
"flake8==3.7.9",
"isort",
"black @ git+https://github.com/psf/black@673327449f86fce558adde153bb6cbe54bfebad2",
"flake8-bugbear",
"flake8-comprehensions",
],
},
ext_modules=get_extensions(),
cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
)
## Unit Tests
To run the unittests, do:
```
cd detectron2
python -m unittest discover -v -s ./tests
```
There are also end-to-end inference & training tests, in [dev/run_*_tests.sh](../dev).
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import json
import numpy as np
import os
import tempfile
import unittest
import pycocotools
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.data.datasets.coco import convert_to_coco_dict, load_coco_json
from detectron2.structures import BoxMode
def make_mask():
"""
Makes a donut shaped binary mask.
"""
H = 100
W = 100
mask = np.zeros([H, W], dtype=np.uint8)
for x in range(W):
for y in range(H):
d = np.linalg.norm(np.array([W, H]) / 2 - np.array([x, y]))
if d > 10 and d < 20:
mask[y, x] = 1
return mask
def make_dataset_dicts(mask):
"""
Returns a list of dicts that represents a single COCO data point for
object detection. The single instance given by `mask` is represented by
RLE.
"""
record = {}
record["file_name"] = "test"
record["image_id"] = 0
record["height"] = mask.shape[0]
record["width"] = mask.shape[1]
y, x = np.nonzero(mask)
segmentation = pycocotools.mask.encode(np.asarray(mask, order="F"))
min_x = np.min(x)
max_x = np.max(x)
min_y = np.min(y)
max_y = np.max(y)
obj = {
"bbox": [min_x, min_y, max_x, max_y],
"bbox_mode": BoxMode.XYXY_ABS,
"category_id": 0,
"iscrowd": 0,
"segmentation": segmentation,
}
record["annotations"] = [obj]
return [record]
class TestRLEToJson(unittest.TestCase):
def test(self):
# Make a dummy dataset.
mask = make_mask()
DatasetCatalog.register("test_dataset", lambda: make_dataset_dicts(mask))
MetadataCatalog.get("test_dataset").set(thing_classes=["test_label"])
# Dump to json.
json_dict = convert_to_coco_dict("test_dataset")
with tempfile.TemporaryDirectory() as tmpdir:
json_file_name = os.path.join(tmpdir, "test.json")
with open(json_file_name, "w") as f:
json.dump(json_dict, f)
# Load from json.
dicts = load_coco_json(json_file_name, "")
# Check the loaded mask matches the original.
anno = dicts[0]["annotations"][0]
loaded_mask = pycocotools.mask.decode(anno["segmentation"])
self.assertTrue(np.array_equal(loaded_mask, mask))
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import copy
import numpy as np
import unittest
import pycocotools.mask as mask_util
from detectron2.data import detection_utils
from detectron2.data import transforms as T
from detectron2.structures import BitMasks, BoxMode
class TestTransformAnnotations(unittest.TestCase):
def test_transform_simple_annotation(self):
transforms = T.TransformList([T.HFlipTransform(400)])
anno = {
"bbox": np.asarray([10, 10, 200, 300]),
"bbox_mode": BoxMode.XYXY_ABS,
"category_id": 3,
"segmentation": [[10, 10, 100, 100, 100, 10], [150, 150, 200, 150, 200, 200]],
}
output = detection_utils.transform_instance_annotations(anno, transforms, (400, 400))
self.assertTrue(np.allclose(output["bbox"], [200, 10, 390, 300]))
self.assertEqual(len(output["segmentation"]), len(anno["segmentation"]))
self.assertTrue(np.allclose(output["segmentation"][0], [390, 10, 300, 100, 300, 10]))
detection_utils.annotations_to_instances([output, output], (400, 400))
def test_flip_keypoints(self):
transforms = T.TransformList([T.HFlipTransform(400)])
anno = {
"bbox": np.asarray([10, 10, 200, 300]),
"bbox_mode": BoxMode.XYXY_ABS,
"keypoints": np.random.rand(17, 3) * 50 + 15,
}
output = detection_utils.transform_instance_annotations(
copy.deepcopy(anno),
transforms,
(400, 400),
keypoint_hflip_indices=detection_utils.create_keypoint_hflip_indices(
["keypoints_coco_2017_train"]
),
)
# The first keypoint is nose
self.assertTrue(np.allclose(output["keypoints"][0, 0], 400 - anno["keypoints"][0, 0]))
# The last 16 keypoints are 8 left-right pairs
self.assertTrue(
np.allclose(
output["keypoints"][1:, 0].reshape(-1, 2)[:, ::-1],
400 - anno["keypoints"][1:, 0].reshape(-1, 2),
)
)
self.assertTrue(
np.allclose(
output["keypoints"][1:, 1:].reshape(-1, 2, 2)[:, ::-1, :],
anno["keypoints"][1:, 1:].reshape(-1, 2, 2),
)
)
def test_transform_RLE(self):
transforms = T.TransformList([T.HFlipTransform(400)])
mask = np.zeros((300, 400), order="F").astype("uint8")
mask[:, :200] = 1
anno = {
"bbox": np.asarray([10, 10, 200, 300]),
"bbox_mode": BoxMode.XYXY_ABS,
"segmentation": mask_util.encode(mask[:, :, None])[0],
"category_id": 3,
}
output = detection_utils.transform_instance_annotations(
copy.deepcopy(anno), transforms, (300, 400)
)
mask = output["segmentation"]
self.assertTrue((mask[:, 200:] == 1).all())
self.assertTrue((mask[:, :200] == 0).all())
inst = detection_utils.annotations_to_instances(
[output, output], (400, 400), mask_format="bitmask"
)
self.assertTrue(isinstance(inst.gt_masks, BitMasks))
def test_transform_RLE_resize(self):
transforms = T.TransformList(
[T.HFlipTransform(400), T.ScaleTransform(300, 400, 400, 400, "bilinear")]
)
mask = np.zeros((300, 400), order="F").astype("uint8")
mask[:, :200] = 1
anno = {
"bbox": np.asarray([10, 10, 200, 300]),
"bbox_mode": BoxMode.XYXY_ABS,
"segmentation": mask_util.encode(mask[:, :, None])[0],
"category_id": 3,
}
output = detection_utils.transform_instance_annotations(
copy.deepcopy(anno), transforms, (400, 400)
)
inst = detection_utils.annotations_to_instances(
[output, output], (400, 400), mask_format="bitmask"
)
self.assertTrue(isinstance(inst.gt_masks, BitMasks))
def test_gen_crop(self):
instance = {"bbox": [10, 10, 100, 100], "bbox_mode": BoxMode.XYXY_ABS}
t = detection_utils.gen_crop_transform_with_instance((10, 10), (150, 150), instance)
# the box center must fall into the cropped region
self.assertTrue(t.x0 <= 55 <= t.x0 + t.w)
def test_gen_crop_outside_boxes(self):
instance = {"bbox": [10, 10, 100, 100], "bbox_mode": BoxMode.XYXY_ABS}
with self.assertRaises(AssertionError):
detection_utils.gen_crop_transform_with_instance((10, 10), (15, 15), instance)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment