Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
dcuai
dlexamples
Commits
142dcf29
Commit
142dcf29
authored
Apr 15, 2022
by
hepj
Browse files
增加conformer代码
parent
7f99c1c3
Changes
317
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2217 additions
and
0 deletions
+2217
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/assigners/hungarian_assigner.py
...detection/mmdet/core/bbox/assigners/hungarian_assigner.py
+145
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/assigners/max_iou_assigner.py
...mmdetection/mmdet/core/bbox/assigners/max_iou_assigner.py
+212
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/assigners/point_assigner.py
...n/mmdetection/mmdet/core/bbox/assigners/point_assigner.py
+133
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/assigners/region_assigner.py
.../mmdetection/mmdet/core/bbox/assigners/region_assigner.py
+204
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/builder.py
...NLP/Conformer-main/mmdetection/mmdet/core/bbox/builder.py
+20
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/coder/__init__.py
...former-main/mmdetection/mmdet/core/bbox/coder/__init__.py
+13
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/coder/base_bbox_coder.py
...main/mmdetection/mmdet/core/bbox/coder/base_bbox_coder.py
+19
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/coder/bucketing_bbox_coder.py
...mmdetection/mmdet/core/bbox/coder/bucketing_bbox_coder.py
+346
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/coder/delta_xywh_bbox_coder.py
...mdetection/mmdet/core/bbox/coder/delta_xywh_bbox_coder.py
+204
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/coder/legacy_delta_xywh_bbox_coder.py
...ion/mmdet/core/bbox/coder/legacy_delta_xywh_bbox_coder.py
+212
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/coder/pseudo_bbox_coder.py
...in/mmdetection/mmdet/core/bbox/coder/pseudo_bbox_coder.py
+18
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/coder/tblr_bbox_coder.py
...main/mmdetection/mmdet/core/bbox/coder/tblr_bbox_coder.py
+172
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/coder/yolo_bbox_coder.py
...main/mmdetection/mmdet/core/bbox/coder/yolo_bbox_coder.py
+86
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/demodata.py
...LP/Conformer-main/mmdetection/mmdet/core/bbox/demodata.py
+63
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/iou_calculators/__init__.py
...n/mmdetection/mmdet/core/bbox/iou_calculators/__init__.py
+4
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/iou_calculators/builder.py
...in/mmdetection/mmdet/core/bbox/iou_calculators/builder.py
+8
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/iou_calculators/iou2d_calculator.py
...ction/mmdet/core/bbox/iou_calculators/iou2d_calculator.py
+159
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/match_costs/__init__.py
...-main/mmdetection/mmdet/core/bbox/match_costs/__init__.py
+7
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/match_costs/builder.py
...r-main/mmdetection/mmdet/core/bbox/match_costs/builder.py
+8
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/match_costs/match_cost.py
...ain/mmdetection/mmdet/core/bbox/match_costs/match_cost.py
+184
-0
No files found.
Too many changes to show.
To preserve performance only
317 of 317+
files are displayed.
Plain diff
Email patch
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/assigners/hungarian_assigner.py
0 → 100644
View file @
142dcf29
import
torch
from
..builder
import
BBOX_ASSIGNERS
from
..match_costs
import
build_match_cost
from
..transforms
import
bbox_cxcywh_to_xyxy
from
.assign_result
import
AssignResult
from
.base_assigner
import
BaseAssigner
try
:
from
scipy.optimize
import
linear_sum_assignment
except
ImportError
:
linear_sum_assignment
=
None
@
BBOX_ASSIGNERS
.
register_module
()
class
HungarianAssigner
(
BaseAssigner
):
"""Computes one-to-one matching between predictions and ground truth.
This class computes an assignment between the targets and the predictions
based on the costs. The costs are weighted sum of three components:
classfication cost, regression L1 cost and regression iou cost. The
targets don't include the no_object, so generally there are more
predictions than targets. After the one-to-one matching, the un-matched
are treated as backgrounds. Thus each query prediction will be assigned
with `0` or a positive integer indicating the ground truth index:
- 0: negative sample, no assigned gt
- positive integer: positive sample, index (1-based) of assigned gt
Args:
cls_weight (int | float, optional): The scale factor for classification
cost. Default 1.0.
bbox_weight (int | float, optional): The scale factor for regression
L1 cost. Default 1.0.
iou_weight (int | float, optional): The scale factor for regression
iou cost. Default 1.0.
iou_calculator (dict | optional): The config for the iou calculation.
Default type `BboxOverlaps2D`.
iou_mode (str | optional): "iou" (intersection over union), "iof"
(intersection over foreground), or "giou" (generalized
intersection over union). Default "giou".
"""
def
__init__
(
self
,
cls_cost
=
dict
(
type
=
'ClassificationCost'
,
weight
=
1.
),
reg_cost
=
dict
(
type
=
'BBoxL1Cost'
,
weight
=
1.0
),
iou_cost
=
dict
(
type
=
'IoUCost'
,
iou_mode
=
'giou'
,
weight
=
1.0
)):
self
.
cls_cost
=
build_match_cost
(
cls_cost
)
self
.
reg_cost
=
build_match_cost
(
reg_cost
)
self
.
iou_cost
=
build_match_cost
(
iou_cost
)
def
assign
(
self
,
bbox_pred
,
cls_pred
,
gt_bboxes
,
gt_labels
,
img_meta
,
gt_bboxes_ignore
=
None
,
eps
=
1e-7
):
"""Computes one-to-one matching based on the weighted costs.
This method assign each query prediction to a ground truth or
background. The `assigned_gt_inds` with -1 means don't care,
0 means negative sample, and positive number is the index (1-based)
of assigned gt.
The assignment is done in the following steps, the order matters.
1. assign every prediction to -1
2. compute the weighted costs
3. do Hungarian matching on CPU based on the costs
4. assign all to 0 (background) first, then for each matched pair
between predictions and gts, treat this prediction as foreground
and assign the corresponding gt index (plus 1) to it.
Args:
bbox_pred (Tensor): Predicted boxes with normalized coordinates
(cx, cy, w, h), which are all in range [0, 1]. Shape
[num_query, 4].
cls_pred (Tensor): Predicted classification logits, shape
[num_query, num_class].
gt_bboxes (Tensor): Ground truth boxes with unnormalized
coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
img_meta (dict): Meta information for current image.
gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
labelled as `ignored`. Default None.
eps (int | float, optional): A value added to the denominator for
numerical stability. Default 1e-7.
Returns:
:obj:`AssignResult`: The assigned result.
"""
assert
gt_bboxes_ignore
is
None
,
\
'Only case when gt_bboxes_ignore is None is supported.'
num_gts
,
num_bboxes
=
gt_bboxes
.
size
(
0
),
bbox_pred
.
size
(
0
)
# 1. assign -1 by default
assigned_gt_inds
=
bbox_pred
.
new_full
((
num_bboxes
,
),
-
1
,
dtype
=
torch
.
long
)
assigned_labels
=
bbox_pred
.
new_full
((
num_bboxes
,
),
-
1
,
dtype
=
torch
.
long
)
if
num_gts
==
0
or
num_bboxes
==
0
:
# No ground truth or boxes, return empty assignment
if
num_gts
==
0
:
# No ground truth, assign all to background
assigned_gt_inds
[:]
=
0
return
AssignResult
(
num_gts
,
assigned_gt_inds
,
None
,
labels
=
assigned_labels
)
img_h
,
img_w
,
_
=
img_meta
[
'img_shape'
]
factor
=
gt_bboxes
.
new_tensor
([
img_w
,
img_h
,
img_w
,
img_h
]).
unsqueeze
(
0
)
# 2. compute the weighted costs
# classification and bboxcost.
cls_cost
=
self
.
cls_cost
(
cls_pred
,
gt_labels
)
# regression L1 cost
normalize_gt_bboxes
=
gt_bboxes
/
factor
reg_cost
=
self
.
reg_cost
(
bbox_pred
,
normalize_gt_bboxes
)
# regression iou cost, defaultly giou is used in official DETR.
bboxes
=
bbox_cxcywh_to_xyxy
(
bbox_pred
)
*
factor
iou_cost
=
self
.
iou_cost
(
bboxes
,
gt_bboxes
)
# weighted sum of above three costs
cost
=
cls_cost
+
reg_cost
+
iou_cost
# 3. do Hungarian matching on CPU using linear_sum_assignment
cost
=
cost
.
detach
().
cpu
()
if
linear_sum_assignment
is
None
:
raise
ImportError
(
'Please run "pip install scipy" '
'to install scipy first.'
)
matched_row_inds
,
matched_col_inds
=
linear_sum_assignment
(
cost
)
matched_row_inds
=
torch
.
from_numpy
(
matched_row_inds
).
to
(
bbox_pred
.
device
)
matched_col_inds
=
torch
.
from_numpy
(
matched_col_inds
).
to
(
bbox_pred
.
device
)
# 4. assign backgrounds and foregrounds
# assign all indices to backgrounds first
assigned_gt_inds
[:]
=
0
# assign foregrounds based on matching results
assigned_gt_inds
[
matched_row_inds
]
=
matched_col_inds
+
1
assigned_labels
[
matched_row_inds
]
=
gt_labels
[
matched_col_inds
]
return
AssignResult
(
num_gts
,
assigned_gt_inds
,
None
,
labels
=
assigned_labels
)
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/assigners/max_iou_assigner.py
0 → 100644
View file @
142dcf29
import
torch
from
..builder
import
BBOX_ASSIGNERS
from
..iou_calculators
import
build_iou_calculator
from
.assign_result
import
AssignResult
from
.base_assigner
import
BaseAssigner
@
BBOX_ASSIGNERS
.
register_module
()
class
MaxIoUAssigner
(
BaseAssigner
):
"""Assign a corresponding gt bbox or background to each bbox.
Each proposals will be assigned with `-1`, or a semi-positive integer
indicating the ground truth index.
- -1: negative sample, no assigned gt
- semi-positive integer: positive sample, index (0-based) of assigned gt
Args:
pos_iou_thr (float): IoU threshold for positive bboxes.
neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
min_pos_iou (float): Minimum iou for a bbox to be considered as a
positive bbox. Positive samples can have smaller IoU than
pos_iou_thr due to the 4th step (assign max IoU sample to each gt).
gt_max_assign_all (bool): Whether to assign all bboxes with the same
highest overlap with some gt to that gt.
ignore_iof_thr (float): IoF threshold for ignoring bboxes (if
`gt_bboxes_ignore` is specified). Negative values mean not
ignoring any bboxes.
ignore_wrt_candidates (bool): Whether to compute the iof between
`bboxes` and `gt_bboxes_ignore`, or the contrary.
match_low_quality (bool): Whether to allow low quality matches. This is
usually allowed for RPN and single stage detectors, but not allowed
in the second stage. Details are demonstrated in Step 4.
gpu_assign_thr (int): The upper bound of the number of GT for GPU
assign. When the number of gt is above this threshold, will assign
on CPU device. Negative values mean not assign on CPU.
"""
def
__init__
(
self
,
pos_iou_thr
,
neg_iou_thr
,
min_pos_iou
=
.
0
,
gt_max_assign_all
=
True
,
ignore_iof_thr
=-
1
,
ignore_wrt_candidates
=
True
,
match_low_quality
=
True
,
gpu_assign_thr
=-
1
,
iou_calculator
=
dict
(
type
=
'BboxOverlaps2D'
)):
self
.
pos_iou_thr
=
pos_iou_thr
self
.
neg_iou_thr
=
neg_iou_thr
self
.
min_pos_iou
=
min_pos_iou
self
.
gt_max_assign_all
=
gt_max_assign_all
self
.
ignore_iof_thr
=
ignore_iof_thr
self
.
ignore_wrt_candidates
=
ignore_wrt_candidates
self
.
gpu_assign_thr
=
gpu_assign_thr
self
.
match_low_quality
=
match_low_quality
self
.
iou_calculator
=
build_iou_calculator
(
iou_calculator
)
def
assign
(
self
,
bboxes
,
gt_bboxes
,
gt_bboxes_ignore
=
None
,
gt_labels
=
None
):
"""Assign gt to bboxes.
This method assign a gt bbox to every bbox (proposal/anchor), each bbox
will be assigned with -1, or a semi-positive number. -1 means negative
sample, semi-positive number is the index (0-based) of assigned gt.
The assignment is done in following steps, the order matters.
1. assign every bbox to the background
2. assign proposals whose iou with all gts < neg_iou_thr to 0
3. for each bbox, if the iou with its nearest gt >= pos_iou_thr,
assign it to that bbox
4. for each gt bbox, assign its nearest proposals (may be more than
one) to itself
Args:
bboxes (Tensor): Bounding boxes to be assigned, shape(n, 4).
gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
labelled as `ignored`, e.g., crowd boxes in COCO.
gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).
Returns:
:obj:`AssignResult`: The assign result.
Example:
>>> self = MaxIoUAssigner(0.5, 0.5)
>>> bboxes = torch.Tensor([[0, 0, 10, 10], [10, 10, 20, 20]])
>>> gt_bboxes = torch.Tensor([[0, 0, 10, 9]])
>>> assign_result = self.assign(bboxes, gt_bboxes)
>>> expected_gt_inds = torch.LongTensor([1, 0])
>>> assert torch.all(assign_result.gt_inds == expected_gt_inds)
"""
assign_on_cpu
=
True
if
(
self
.
gpu_assign_thr
>
0
)
and
(
gt_bboxes
.
shape
[
0
]
>
self
.
gpu_assign_thr
)
else
False
# compute overlap and assign gt on CPU when number of GT is large
if
assign_on_cpu
:
device
=
bboxes
.
device
bboxes
=
bboxes
.
cpu
()
gt_bboxes
=
gt_bboxes
.
cpu
()
if
gt_bboxes_ignore
is
not
None
:
gt_bboxes_ignore
=
gt_bboxes_ignore
.
cpu
()
if
gt_labels
is
not
None
:
gt_labels
=
gt_labels
.
cpu
()
overlaps
=
self
.
iou_calculator
(
gt_bboxes
,
bboxes
)
if
(
self
.
ignore_iof_thr
>
0
and
gt_bboxes_ignore
is
not
None
and
gt_bboxes_ignore
.
numel
()
>
0
and
bboxes
.
numel
()
>
0
):
if
self
.
ignore_wrt_candidates
:
ignore_overlaps
=
self
.
iou_calculator
(
bboxes
,
gt_bboxes_ignore
,
mode
=
'iof'
)
ignore_max_overlaps
,
_
=
ignore_overlaps
.
max
(
dim
=
1
)
else
:
ignore_overlaps
=
self
.
iou_calculator
(
gt_bboxes_ignore
,
bboxes
,
mode
=
'iof'
)
ignore_max_overlaps
,
_
=
ignore_overlaps
.
max
(
dim
=
0
)
overlaps
[:,
ignore_max_overlaps
>
self
.
ignore_iof_thr
]
=
-
1
assign_result
=
self
.
assign_wrt_overlaps
(
overlaps
,
gt_labels
)
if
assign_on_cpu
:
assign_result
.
gt_inds
=
assign_result
.
gt_inds
.
to
(
device
)
assign_result
.
max_overlaps
=
assign_result
.
max_overlaps
.
to
(
device
)
if
assign_result
.
labels
is
not
None
:
assign_result
.
labels
=
assign_result
.
labels
.
to
(
device
)
return
assign_result
def
assign_wrt_overlaps
(
self
,
overlaps
,
gt_labels
=
None
):
"""Assign w.r.t. the overlaps of bboxes with gts.
Args:
overlaps (Tensor): Overlaps between k gt_bboxes and n bboxes,
shape(k, n).
gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ).
Returns:
:obj:`AssignResult`: The assign result.
"""
num_gts
,
num_bboxes
=
overlaps
.
size
(
0
),
overlaps
.
size
(
1
)
# 1. assign -1 by default
assigned_gt_inds
=
overlaps
.
new_full
((
num_bboxes
,
),
-
1
,
dtype
=
torch
.
long
)
if
num_gts
==
0
or
num_bboxes
==
0
:
# No ground truth or boxes, return empty assignment
max_overlaps
=
overlaps
.
new_zeros
((
num_bboxes
,
))
if
num_gts
==
0
:
# No truth, assign everything to background
assigned_gt_inds
[:]
=
0
if
gt_labels
is
None
:
assigned_labels
=
None
else
:
assigned_labels
=
overlaps
.
new_full
((
num_bboxes
,
),
-
1
,
dtype
=
torch
.
long
)
return
AssignResult
(
num_gts
,
assigned_gt_inds
,
max_overlaps
,
labels
=
assigned_labels
)
# for each anchor, which gt best overlaps with it
# for each anchor, the max iou of all gts
max_overlaps
,
argmax_overlaps
=
overlaps
.
max
(
dim
=
0
)
# for each gt, which anchor best overlaps with it
# for each gt, the max iou of all proposals
gt_max_overlaps
,
gt_argmax_overlaps
=
overlaps
.
max
(
dim
=
1
)
# 2. assign negative: below
# the negative inds are set to be 0
if
isinstance
(
self
.
neg_iou_thr
,
float
):
assigned_gt_inds
[(
max_overlaps
>=
0
)
&
(
max_overlaps
<
self
.
neg_iou_thr
)]
=
0
elif
isinstance
(
self
.
neg_iou_thr
,
tuple
):
assert
len
(
self
.
neg_iou_thr
)
==
2
assigned_gt_inds
[(
max_overlaps
>=
self
.
neg_iou_thr
[
0
])
&
(
max_overlaps
<
self
.
neg_iou_thr
[
1
])]
=
0
# 3. assign positive: above positive IoU threshold
pos_inds
=
max_overlaps
>=
self
.
pos_iou_thr
assigned_gt_inds
[
pos_inds
]
=
argmax_overlaps
[
pos_inds
]
+
1
if
self
.
match_low_quality
:
# Low-quality matching will overwirte the assigned_gt_inds assigned
# in Step 3. Thus, the assigned gt might not be the best one for
# prediction.
# For example, if bbox A has 0.9 and 0.8 iou with GT bbox 1 & 2,
# bbox 1 will be assigned as the best target for bbox A in step 3.
# However, if GT bbox 2's gt_argmax_overlaps = A, bbox A's
# assigned_gt_inds will be overwritten to be bbox B.
# This might be the reason that it is not used in ROI Heads.
for
i
in
range
(
num_gts
):
if
gt_max_overlaps
[
i
]
>=
self
.
min_pos_iou
:
if
self
.
gt_max_assign_all
:
max_iou_inds
=
overlaps
[
i
,
:]
==
gt_max_overlaps
[
i
]
assigned_gt_inds
[
max_iou_inds
]
=
i
+
1
else
:
assigned_gt_inds
[
gt_argmax_overlaps
[
i
]]
=
i
+
1
if
gt_labels
is
not
None
:
assigned_labels
=
assigned_gt_inds
.
new_full
((
num_bboxes
,
),
-
1
)
pos_inds
=
torch
.
nonzero
(
assigned_gt_inds
>
0
,
as_tuple
=
False
).
squeeze
()
if
pos_inds
.
numel
()
>
0
:
assigned_labels
[
pos_inds
]
=
gt_labels
[
assigned_gt_inds
[
pos_inds
]
-
1
]
else
:
assigned_labels
=
None
return
AssignResult
(
num_gts
,
assigned_gt_inds
,
max_overlaps
,
labels
=
assigned_labels
)
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/assigners/point_assigner.py
0 → 100644
View file @
142dcf29
import
torch
from
..builder
import
BBOX_ASSIGNERS
from
.assign_result
import
AssignResult
from
.base_assigner
import
BaseAssigner
@
BBOX_ASSIGNERS
.
register_module
()
class
PointAssigner
(
BaseAssigner
):
"""Assign a corresponding gt bbox or background to each point.
Each proposals will be assigned with `0`, or a positive integer
indicating the ground truth index.
- 0: negative sample, no assigned gt
- positive integer: positive sample, index (1-based) of assigned gt
"""
def
__init__
(
self
,
scale
=
4
,
pos_num
=
3
):
self
.
scale
=
scale
self
.
pos_num
=
pos_num
def
assign
(
self
,
points
,
gt_bboxes
,
gt_bboxes_ignore
=
None
,
gt_labels
=
None
):
"""Assign gt to points.
This method assign a gt bbox to every points set, each points set
will be assigned with the background_label (-1), or a label number.
-1 is background, and semi-positive number is the index (0-based) of
assigned gt.
The assignment is done in following steps, the order matters.
1. assign every points to the background_label (-1)
2. A point is assigned to some gt bbox if
(i) the point is within the k closest points to the gt bbox
(ii) the distance between this point and the gt is smaller than
other gt bboxes
Args:
points (Tensor): points to be assigned, shape(n, 3) while last
dimension stands for (x, y, stride).
gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
labelled as `ignored`, e.g., crowd boxes in COCO.
NOTE: currently unused.
gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).
Returns:
:obj:`AssignResult`: The assign result.
"""
num_points
=
points
.
shape
[
0
]
num_gts
=
gt_bboxes
.
shape
[
0
]
if
num_gts
==
0
or
num_points
==
0
:
# If no truth assign everything to the background
assigned_gt_inds
=
points
.
new_full
((
num_points
,
),
0
,
dtype
=
torch
.
long
)
if
gt_labels
is
None
:
assigned_labels
=
None
else
:
assigned_labels
=
points
.
new_full
((
num_points
,
),
-
1
,
dtype
=
torch
.
long
)
return
AssignResult
(
num_gts
,
assigned_gt_inds
,
None
,
labels
=
assigned_labels
)
points_xy
=
points
[:,
:
2
]
points_stride
=
points
[:,
2
]
points_lvl
=
torch
.
log2
(
points_stride
).
int
()
# [3...,4...,5...,6...,7...]
lvl_min
,
lvl_max
=
points_lvl
.
min
(),
points_lvl
.
max
()
# assign gt box
gt_bboxes_xy
=
(
gt_bboxes
[:,
:
2
]
+
gt_bboxes
[:,
2
:])
/
2
gt_bboxes_wh
=
(
gt_bboxes
[:,
2
:]
-
gt_bboxes
[:,
:
2
]).
clamp
(
min
=
1e-6
)
scale
=
self
.
scale
gt_bboxes_lvl
=
((
torch
.
log2
(
gt_bboxes_wh
[:,
0
]
/
scale
)
+
torch
.
log2
(
gt_bboxes_wh
[:,
1
]
/
scale
))
/
2
).
int
()
gt_bboxes_lvl
=
torch
.
clamp
(
gt_bboxes_lvl
,
min
=
lvl_min
,
max
=
lvl_max
)
# stores the assigned gt index of each point
assigned_gt_inds
=
points
.
new_zeros
((
num_points
,
),
dtype
=
torch
.
long
)
# stores the assigned gt dist (to this point) of each point
assigned_gt_dist
=
points
.
new_full
((
num_points
,
),
float
(
'inf'
))
points_range
=
torch
.
arange
(
points
.
shape
[
0
])
for
idx
in
range
(
num_gts
):
gt_lvl
=
gt_bboxes_lvl
[
idx
]
# get the index of points in this level
lvl_idx
=
gt_lvl
==
points_lvl
points_index
=
points_range
[
lvl_idx
]
# get the points in this level
lvl_points
=
points_xy
[
lvl_idx
,
:]
# get the center point of gt
gt_point
=
gt_bboxes_xy
[[
idx
],
:]
# get width and height of gt
gt_wh
=
gt_bboxes_wh
[[
idx
],
:]
# compute the distance between gt center and
# all points in this level
points_gt_dist
=
((
lvl_points
-
gt_point
)
/
gt_wh
).
norm
(
dim
=
1
)
# find the nearest k points to gt center in this level
min_dist
,
min_dist_index
=
torch
.
topk
(
points_gt_dist
,
self
.
pos_num
,
largest
=
False
)
# the index of nearest k points to gt center in this level
min_dist_points_index
=
points_index
[
min_dist_index
]
# The less_than_recorded_index stores the index
# of min_dist that is less then the assigned_gt_dist. Where
# assigned_gt_dist stores the dist from previous assigned gt
# (if exist) to each point.
less_than_recorded_index
=
min_dist
<
assigned_gt_dist
[
min_dist_points_index
]
# The min_dist_points_index stores the index of points satisfy:
# (1) it is k nearest to current gt center in this level.
# (2) it is closer to current gt center than other gt center.
min_dist_points_index
=
min_dist_points_index
[
less_than_recorded_index
]
# assign the result
assigned_gt_inds
[
min_dist_points_index
]
=
idx
+
1
assigned_gt_dist
[
min_dist_points_index
]
=
min_dist
[
less_than_recorded_index
]
if
gt_labels
is
not
None
:
assigned_labels
=
assigned_gt_inds
.
new_full
((
num_points
,
),
-
1
)
pos_inds
=
torch
.
nonzero
(
assigned_gt_inds
>
0
,
as_tuple
=
False
).
squeeze
()
if
pos_inds
.
numel
()
>
0
:
assigned_labels
[
pos_inds
]
=
gt_labels
[
assigned_gt_inds
[
pos_inds
]
-
1
]
else
:
assigned_labels
=
None
return
AssignResult
(
num_gts
,
assigned_gt_inds
,
None
,
labels
=
assigned_labels
)
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/assigners/region_assigner.py
0 → 100644
View file @
142dcf29
import
torch
from
mmdet.core
import
anchor_inside_flags
from
..builder
import
BBOX_ASSIGNERS
from
.assign_result
import
AssignResult
from
.base_assigner
import
BaseAssigner
def
calc_region
(
bbox
,
ratio
,
stride
,
featmap_size
=
None
):
"""Calculate region of the box defined by the ratio, the ratio is from the
center of the box to every edge."""
# project bbox on the feature
f_bbox
=
bbox
/
stride
x1
=
torch
.
round
((
1
-
ratio
)
*
f_bbox
[
0
]
+
ratio
*
f_bbox
[
2
])
y1
=
torch
.
round
((
1
-
ratio
)
*
f_bbox
[
1
]
+
ratio
*
f_bbox
[
3
])
x2
=
torch
.
round
(
ratio
*
f_bbox
[
0
]
+
(
1
-
ratio
)
*
f_bbox
[
2
])
y2
=
torch
.
round
(
ratio
*
f_bbox
[
1
]
+
(
1
-
ratio
)
*
f_bbox
[
3
])
if
featmap_size
is
not
None
:
x1
=
x1
.
clamp
(
min
=
0
,
max
=
featmap_size
[
1
])
y1
=
y1
.
clamp
(
min
=
0
,
max
=
featmap_size
[
0
])
x2
=
x2
.
clamp
(
min
=
0
,
max
=
featmap_size
[
1
])
y2
=
y2
.
clamp
(
min
=
0
,
max
=
featmap_size
[
0
])
return
(
x1
,
y1
,
x2
,
y2
)
def
anchor_ctr_inside_region_flags
(
anchors
,
stride
,
region
):
"""Get the flag indicate whether anchor centers are inside regions."""
x1
,
y1
,
x2
,
y2
=
region
f_anchors
=
anchors
/
stride
x
=
(
f_anchors
[:,
0
]
+
f_anchors
[:,
2
])
*
0.5
y
=
(
f_anchors
[:,
1
]
+
f_anchors
[:,
3
])
*
0.5
flags
=
(
x
>=
x1
)
&
(
x
<=
x2
)
&
(
y
>=
y1
)
&
(
y
<=
y2
)
return
flags
@
BBOX_ASSIGNERS
.
register_module
()
class
RegionAssigner
(
BaseAssigner
):
"""Assign a corresponding gt bbox or background to each bbox.
Each proposals will be assigned with `-1`, `0`, or a positive integer
indicating the ground truth index.
- -1: don't care
- 0: negative sample, no assigned gt
- positive integer: positive sample, index (1-based) of assigned gt
Args:
center_ratio: ratio of the region in the center of the bbox to
define positive sample.
ignore_ratio: ratio of the region to define ignore samples.
"""
def
__init__
(
self
,
center_ratio
=
0.2
,
ignore_ratio
=
0.5
):
self
.
center_ratio
=
center_ratio
self
.
ignore_ratio
=
ignore_ratio
def
assign
(
self
,
mlvl_anchors
,
mlvl_valid_flags
,
gt_bboxes
,
img_meta
,
featmap_sizes
,
anchor_scale
,
anchor_strides
,
gt_bboxes_ignore
=
None
,
gt_labels
=
None
,
allowed_border
=
0
):
"""Assign gt to anchors.
This method assign a gt bbox to every bbox (proposal/anchor), each bbox
will be assigned with -1, 0, or a positive number. -1 means don't care,
0 means negative sample, positive number is the index (1-based) of
assigned gt.
The assignment is done in following steps, the order matters.
1. Assign every anchor to 0 (negative)
For each gt_bboxes:
2. Compute ignore flags based on ignore_region then
assign -1 to anchors w.r.t. ignore flags
3. Compute pos flags based on center_region then
assign gt_bboxes to anchors w.r.t. pos flags
4. Compute ignore flags based on adjacent anchor lvl then
assign -1 to anchors w.r.t. ignore flags
5. Assign anchor outside of image to -1
Args:
mlvl_anchors (list[Tensor]): Multi level anchors.
mlvl_valid_flags (list[Tensor]): Multi level valid flags.
gt_bboxes (Tensor): Ground truth bboxes of image
img_meta (dict): Meta info of image.
featmap_sizes (list[Tensor]): Feature mapsize each level
anchor_scale (int): Scale of the anchor.
anchor_strides (list[int]): Stride of the anchor.
gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
labelled as `ignored`, e.g., crowd boxes in COCO.
gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).
allowed_border (int, optional): The border to allow the valid
anchor. Defaults to 0.
Returns:
:obj:`AssignResult`: The assign result.
"""
# TODO support gt_bboxes_ignore
if
gt_bboxes_ignore
is
not
None
:
raise
NotImplementedError
if
gt_bboxes
.
shape
[
0
]
==
0
:
raise
ValueError
(
'No gt bboxes'
)
num_gts
=
gt_bboxes
.
shape
[
0
]
num_lvls
=
len
(
mlvl_anchors
)
r1
=
(
1
-
self
.
center_ratio
)
/
2
r2
=
(
1
-
self
.
ignore_ratio
)
/
2
scale
=
torch
.
sqrt
((
gt_bboxes
[:,
2
]
-
gt_bboxes
[:,
0
])
*
(
gt_bboxes
[:,
3
]
-
gt_bboxes
[:,
1
]))
min_anchor_size
=
scale
.
new_full
(
(
1
,
),
float
(
anchor_scale
*
anchor_strides
[
0
]))
target_lvls
=
torch
.
floor
(
torch
.
log2
(
scale
)
-
torch
.
log2
(
min_anchor_size
)
+
0.5
)
target_lvls
=
target_lvls
.
clamp
(
min
=
0
,
max
=
num_lvls
-
1
).
long
()
# 1. assign 0 (negative) by default
mlvl_assigned_gt_inds
=
[]
mlvl_ignore_flags
=
[]
for
lvl
in
range
(
num_lvls
):
h
,
w
=
featmap_sizes
[
lvl
]
assert
h
*
w
==
mlvl_anchors
[
lvl
].
shape
[
0
]
assigned_gt_inds
=
gt_bboxes
.
new_full
((
h
*
w
,
),
0
,
dtype
=
torch
.
long
)
ignore_flags
=
torch
.
zeros_like
(
assigned_gt_inds
)
mlvl_assigned_gt_inds
.
append
(
assigned_gt_inds
)
mlvl_ignore_flags
.
append
(
ignore_flags
)
for
gt_id
in
range
(
num_gts
):
lvl
=
target_lvls
[
gt_id
].
item
()
featmap_size
=
featmap_sizes
[
lvl
]
stride
=
anchor_strides
[
lvl
]
anchors
=
mlvl_anchors
[
lvl
]
gt_bbox
=
gt_bboxes
[
gt_id
,
:
4
]
# Compute regions
ignore_region
=
calc_region
(
gt_bbox
,
r2
,
stride
,
featmap_size
)
ctr_region
=
calc_region
(
gt_bbox
,
r1
,
stride
,
featmap_size
)
# 2. Assign -1 to ignore flags
ignore_flags
=
anchor_ctr_inside_region_flags
(
anchors
,
stride
,
ignore_region
)
mlvl_assigned_gt_inds
[
lvl
][
ignore_flags
]
=
-
1
# 3. Assign gt_bboxes to pos flags
pos_flags
=
anchor_ctr_inside_region_flags
(
anchors
,
stride
,
ctr_region
)
mlvl_assigned_gt_inds
[
lvl
][
pos_flags
]
=
gt_id
+
1
# 4. Assign -1 to ignore adjacent lvl
if
lvl
>
0
:
d_lvl
=
lvl
-
1
d_anchors
=
mlvl_anchors
[
d_lvl
]
d_featmap_size
=
featmap_sizes
[
d_lvl
]
d_stride
=
anchor_strides
[
d_lvl
]
d_ignore_region
=
calc_region
(
gt_bbox
,
r2
,
d_stride
,
d_featmap_size
)
ignore_flags
=
anchor_ctr_inside_region_flags
(
d_anchors
,
d_stride
,
d_ignore_region
)
mlvl_ignore_flags
[
d_lvl
][
ignore_flags
]
=
1
if
lvl
<
num_lvls
-
1
:
u_lvl
=
lvl
+
1
u_anchors
=
mlvl_anchors
[
u_lvl
]
u_featmap_size
=
featmap_sizes
[
u_lvl
]
u_stride
=
anchor_strides
[
u_lvl
]
u_ignore_region
=
calc_region
(
gt_bbox
,
r2
,
u_stride
,
u_featmap_size
)
ignore_flags
=
anchor_ctr_inside_region_flags
(
u_anchors
,
u_stride
,
u_ignore_region
)
mlvl_ignore_flags
[
u_lvl
][
ignore_flags
]
=
1
# 4. (cont.) Assign -1 to ignore adjacent lvl
for
lvl
in
range
(
num_lvls
):
ignore_flags
=
mlvl_ignore_flags
[
lvl
]
mlvl_assigned_gt_inds
[
lvl
][
ignore_flags
]
=
-
1
# 5. Assign -1 to anchor outside of image
flat_assigned_gt_inds
=
torch
.
cat
(
mlvl_assigned_gt_inds
)
flat_anchors
=
torch
.
cat
(
mlvl_anchors
)
flat_valid_flags
=
torch
.
cat
(
mlvl_valid_flags
)
assert
(
flat_assigned_gt_inds
.
shape
[
0
]
==
flat_anchors
.
shape
[
0
]
==
flat_valid_flags
.
shape
[
0
])
inside_flags
=
anchor_inside_flags
(
flat_anchors
,
flat_valid_flags
,
img_meta
[
'img_shape'
],
allowed_border
)
outside_flags
=
~
inside_flags
flat_assigned_gt_inds
[
outside_flags
]
=
-
1
if
gt_labels
is
not
None
:
assigned_labels
=
torch
.
zeros_like
(
flat_assigned_gt_inds
)
pos_flags
=
assigned_gt_inds
>
0
assigned_labels
[
pos_flags
]
=
gt_labels
[
flat_assigned_gt_inds
[
pos_flags
]
-
1
]
else
:
assigned_labels
=
None
return
AssignResult
(
num_gts
,
flat_assigned_gt_inds
,
None
,
labels
=
assigned_labels
)
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/builder.py
0 → 100644
View file @
142dcf29
from
mmcv.utils
import
Registry
,
build_from_cfg
BBOX_ASSIGNERS
=
Registry
(
'bbox_assigner'
)
BBOX_SAMPLERS
=
Registry
(
'bbox_sampler'
)
BBOX_CODERS
=
Registry
(
'bbox_coder'
)
def
build_assigner
(
cfg
,
**
default_args
):
"""Builder of box assigner."""
return
build_from_cfg
(
cfg
,
BBOX_ASSIGNERS
,
default_args
)
def
build_sampler
(
cfg
,
**
default_args
):
"""Builder of box sampler."""
return
build_from_cfg
(
cfg
,
BBOX_SAMPLERS
,
default_args
)
def
build_bbox_coder
(
cfg
,
**
default_args
):
"""Builder of box coder."""
return
build_from_cfg
(
cfg
,
BBOX_CODERS
,
default_args
)
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/coder/__init__.py
0 → 100644
View file @
142dcf29
from
.base_bbox_coder
import
BaseBBoxCoder
from
.bucketing_bbox_coder
import
BucketingBBoxCoder
from
.delta_xywh_bbox_coder
import
DeltaXYWHBBoxCoder
from
.legacy_delta_xywh_bbox_coder
import
LegacyDeltaXYWHBBoxCoder
from
.pseudo_bbox_coder
import
PseudoBBoxCoder
from
.tblr_bbox_coder
import
TBLRBBoxCoder
from
.yolo_bbox_coder
import
YOLOBBoxCoder
__all__
=
[
'BaseBBoxCoder'
,
'PseudoBBoxCoder'
,
'DeltaXYWHBBoxCoder'
,
'LegacyDeltaXYWHBBoxCoder'
,
'TBLRBBoxCoder'
,
'YOLOBBoxCoder'
,
'BucketingBBoxCoder'
]
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/coder/base_bbox_coder.py
0 → 100644
View file @
142dcf29
from
abc
import
ABCMeta
,
abstractmethod
class
BaseBBoxCoder
(
metaclass
=
ABCMeta
):
"""Base bounding box coder."""
def
__init__
(
self
,
**
kwargs
):
pass
@
abstractmethod
def
encode
(
self
,
bboxes
,
gt_bboxes
):
"""Encode deltas between bboxes and ground truth boxes."""
pass
@
abstractmethod
def
decode
(
self
,
bboxes
,
bboxes_pred
):
"""Decode the predicted bboxes according to prediction and base
boxes."""
pass
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/coder/bucketing_bbox_coder.py
0 → 100644
View file @
142dcf29
import
numpy
as
np
import
torch
import
torch.nn.functional
as
F
from
..builder
import
BBOX_CODERS
from
..transforms
import
bbox_rescale
from
.base_bbox_coder
import
BaseBBoxCoder
@
BBOX_CODERS
.
register_module
()
class
BucketingBBoxCoder
(
BaseBBoxCoder
):
"""Bucketing BBox Coder for Side-Aware Bounday Localization (SABL).
Boundary Localization with Bucketing and Bucketing Guided Rescoring
are implemented here.
Please refer to https://arxiv.org/abs/1912.04260 for more details.
Args:
num_buckets (int): Number of buckets.
scale_factor (int): Scale factor of proposals to generate buckets.
offset_topk (int): Topk buckets are used to generate
bucket fine regression targets. Defaults to 2.
offset_upperbound (float): Offset upperbound to generate
bucket fine regression targets.
To avoid too large offset displacements. Defaults to 1.0.
cls_ignore_neighbor (bool): Ignore second nearest bucket or Not.
Defaults to True.
clip_border (bool, optional): Whether clip the objects outside the
border of the image. Defaults to True.
"""
def
__init__
(
self
,
num_buckets
,
scale_factor
,
offset_topk
=
2
,
offset_upperbound
=
1.0
,
cls_ignore_neighbor
=
True
,
clip_border
=
True
):
super
(
BucketingBBoxCoder
,
self
).
__init__
()
self
.
num_buckets
=
num_buckets
self
.
scale_factor
=
scale_factor
self
.
offset_topk
=
offset_topk
self
.
offset_upperbound
=
offset_upperbound
self
.
cls_ignore_neighbor
=
cls_ignore_neighbor
self
.
clip_border
=
clip_border
def
encode
(
self
,
bboxes
,
gt_bboxes
):
"""Get bucketing estimation and fine regression targets during
training.
Args:
bboxes (torch.Tensor): source boxes, e.g., object proposals.
gt_bboxes (torch.Tensor): target of the transformation, e.g.,
ground truth boxes.
Returns:
encoded_bboxes(tuple[Tensor]): bucketing estimation
and fine regression targets and weights
"""
assert
bboxes
.
size
(
0
)
==
gt_bboxes
.
size
(
0
)
assert
bboxes
.
size
(
-
1
)
==
gt_bboxes
.
size
(
-
1
)
==
4
encoded_bboxes
=
bbox2bucket
(
bboxes
,
gt_bboxes
,
self
.
num_buckets
,
self
.
scale_factor
,
self
.
offset_topk
,
self
.
offset_upperbound
,
self
.
cls_ignore_neighbor
)
return
encoded_bboxes
def
decode
(
self
,
bboxes
,
pred_bboxes
,
max_shape
=
None
):
"""Apply transformation `pred_bboxes` to `boxes`.
Args:
boxes (torch.Tensor): Basic boxes.
pred_bboxes (torch.Tensor): Predictions for bucketing estimation
and fine regression
max_shape (tuple[int], optional): Maximum shape of boxes.
Defaults to None.
Returns:
torch.Tensor: Decoded boxes.
"""
assert
len
(
pred_bboxes
)
==
2
cls_preds
,
offset_preds
=
pred_bboxes
assert
cls_preds
.
size
(
0
)
==
bboxes
.
size
(
0
)
and
offset_preds
.
size
(
0
)
==
bboxes
.
size
(
0
)
decoded_bboxes
=
bucket2bbox
(
bboxes
,
cls_preds
,
offset_preds
,
self
.
num_buckets
,
self
.
scale_factor
,
max_shape
,
self
.
clip_border
)
return
decoded_bboxes
def
generat_buckets
(
proposals
,
num_buckets
,
scale_factor
=
1.0
):
"""Generate buckets w.r.t bucket number and scale factor of proposals.
Args:
proposals (Tensor): Shape (n, 4)
num_buckets (int): Number of buckets.
scale_factor (float): Scale factor to rescale proposals.
Returns:
tuple[Tensor]: (bucket_w, bucket_h, l_buckets, r_buckets,
t_buckets, d_buckets)
- bucket_w: Width of buckets on x-axis. Shape (n, ).
- bucket_h: Height of buckets on y-axis. Shape (n, ).
- l_buckets: Left buckets. Shape (n, ceil(side_num/2)).
- r_buckets: Right buckets. Shape (n, ceil(side_num/2)).
- t_buckets: Top buckets. Shape (n, ceil(side_num/2)).
- d_buckets: Down buckets. Shape (n, ceil(side_num/2)).
"""
proposals
=
bbox_rescale
(
proposals
,
scale_factor
)
# number of buckets in each side
side_num
=
int
(
np
.
ceil
(
num_buckets
/
2.0
))
pw
=
proposals
[...,
2
]
-
proposals
[...,
0
]
ph
=
proposals
[...,
3
]
-
proposals
[...,
1
]
px1
=
proposals
[...,
0
]
py1
=
proposals
[...,
1
]
px2
=
proposals
[...,
2
]
py2
=
proposals
[...,
3
]
bucket_w
=
pw
/
num_buckets
bucket_h
=
ph
/
num_buckets
# left buckets
l_buckets
=
px1
[:,
None
]
+
(
0.5
+
torch
.
arange
(
0
,
side_num
).
to
(
proposals
).
float
())[
None
,
:]
*
bucket_w
[:,
None
]
# right buckets
r_buckets
=
px2
[:,
None
]
-
(
0.5
+
torch
.
arange
(
0
,
side_num
).
to
(
proposals
).
float
())[
None
,
:]
*
bucket_w
[:,
None
]
# top buckets
t_buckets
=
py1
[:,
None
]
+
(
0.5
+
torch
.
arange
(
0
,
side_num
).
to
(
proposals
).
float
())[
None
,
:]
*
bucket_h
[:,
None
]
# down buckets
d_buckets
=
py2
[:,
None
]
-
(
0.5
+
torch
.
arange
(
0
,
side_num
).
to
(
proposals
).
float
())[
None
,
:]
*
bucket_h
[:,
None
]
return
bucket_w
,
bucket_h
,
l_buckets
,
r_buckets
,
t_buckets
,
d_buckets
def
bbox2bucket
(
proposals
,
gt
,
num_buckets
,
scale_factor
,
offset_topk
=
2
,
offset_upperbound
=
1.0
,
cls_ignore_neighbor
=
True
):
"""Generate buckets estimation and fine regression targets.
Args:
proposals (Tensor): Shape (n, 4)
gt (Tensor): Shape (n, 4)
num_buckets (int): Number of buckets.
scale_factor (float): Scale factor to rescale proposals.
offset_topk (int): Topk buckets are used to generate
bucket fine regression targets. Defaults to 2.
offset_upperbound (float): Offset allowance to generate
bucket fine regression targets.
To avoid too large offset displacements. Defaults to 1.0.
cls_ignore_neighbor (bool): Ignore second nearest bucket or Not.
Defaults to True.
Returns:
tuple[Tensor]: (offsets, offsets_weights, bucket_labels, cls_weights).
- offsets: Fine regression targets.
\
Shape (n, num_buckets*2).
- offsets_weights: Fine regression weights.
\
Shape (n, num_buckets*2).
- bucket_labels: Bucketing estimation labels.
\
Shape (n, num_buckets*2).
- cls_weights: Bucketing estimation weights.
\
Shape (n, num_buckets*2).
"""
assert
proposals
.
size
()
==
gt
.
size
()
# generate buckets
proposals
=
proposals
.
float
()
gt
=
gt
.
float
()
(
bucket_w
,
bucket_h
,
l_buckets
,
r_buckets
,
t_buckets
,
d_buckets
)
=
generat_buckets
(
proposals
,
num_buckets
,
scale_factor
)
gx1
=
gt
[...,
0
]
gy1
=
gt
[...,
1
]
gx2
=
gt
[...,
2
]
gy2
=
gt
[...,
3
]
# generate offset targets and weights
# offsets from buckets to gts
l_offsets
=
(
l_buckets
-
gx1
[:,
None
])
/
bucket_w
[:,
None
]
r_offsets
=
(
r_buckets
-
gx2
[:,
None
])
/
bucket_w
[:,
None
]
t_offsets
=
(
t_buckets
-
gy1
[:,
None
])
/
bucket_h
[:,
None
]
d_offsets
=
(
d_buckets
-
gy2
[:,
None
])
/
bucket_h
[:,
None
]
# select top-k nearset buckets
l_topk
,
l_label
=
l_offsets
.
abs
().
topk
(
offset_topk
,
dim
=
1
,
largest
=
False
,
sorted
=
True
)
r_topk
,
r_label
=
r_offsets
.
abs
().
topk
(
offset_topk
,
dim
=
1
,
largest
=
False
,
sorted
=
True
)
t_topk
,
t_label
=
t_offsets
.
abs
().
topk
(
offset_topk
,
dim
=
1
,
largest
=
False
,
sorted
=
True
)
d_topk
,
d_label
=
d_offsets
.
abs
().
topk
(
offset_topk
,
dim
=
1
,
largest
=
False
,
sorted
=
True
)
offset_l_weights
=
l_offsets
.
new_zeros
(
l_offsets
.
size
())
offset_r_weights
=
r_offsets
.
new_zeros
(
r_offsets
.
size
())
offset_t_weights
=
t_offsets
.
new_zeros
(
t_offsets
.
size
())
offset_d_weights
=
d_offsets
.
new_zeros
(
d_offsets
.
size
())
inds
=
torch
.
arange
(
0
,
proposals
.
size
(
0
)).
to
(
proposals
).
long
()
# generate offset weights of top-k nearset buckets
for
k
in
range
(
offset_topk
):
if
k
>=
1
:
offset_l_weights
[
inds
,
l_label
[:,
k
]]
=
(
l_topk
[:,
k
]
<
offset_upperbound
).
float
()
offset_r_weights
[
inds
,
r_label
[:,
k
]]
=
(
r_topk
[:,
k
]
<
offset_upperbound
).
float
()
offset_t_weights
[
inds
,
t_label
[:,
k
]]
=
(
t_topk
[:,
k
]
<
offset_upperbound
).
float
()
offset_d_weights
[
inds
,
d_label
[:,
k
]]
=
(
d_topk
[:,
k
]
<
offset_upperbound
).
float
()
else
:
offset_l_weights
[
inds
,
l_label
[:,
k
]]
=
1.0
offset_r_weights
[
inds
,
r_label
[:,
k
]]
=
1.0
offset_t_weights
[
inds
,
t_label
[:,
k
]]
=
1.0
offset_d_weights
[
inds
,
d_label
[:,
k
]]
=
1.0
offsets
=
torch
.
cat
([
l_offsets
,
r_offsets
,
t_offsets
,
d_offsets
],
dim
=-
1
)
offsets_weights
=
torch
.
cat
([
offset_l_weights
,
offset_r_weights
,
offset_t_weights
,
offset_d_weights
],
dim
=-
1
)
# generate bucket labels and weight
side_num
=
int
(
np
.
ceil
(
num_buckets
/
2.0
))
labels
=
torch
.
stack
(
[
l_label
[:,
0
],
r_label
[:,
0
],
t_label
[:,
0
],
d_label
[:,
0
]],
dim
=-
1
)
batch_size
=
labels
.
size
(
0
)
bucket_labels
=
F
.
one_hot
(
labels
.
view
(
-
1
),
side_num
).
view
(
batch_size
,
-
1
).
float
()
bucket_cls_l_weights
=
(
l_offsets
.
abs
()
<
1
).
float
()
bucket_cls_r_weights
=
(
r_offsets
.
abs
()
<
1
).
float
()
bucket_cls_t_weights
=
(
t_offsets
.
abs
()
<
1
).
float
()
bucket_cls_d_weights
=
(
d_offsets
.
abs
()
<
1
).
float
()
bucket_cls_weights
=
torch
.
cat
([
bucket_cls_l_weights
,
bucket_cls_r_weights
,
bucket_cls_t_weights
,
bucket_cls_d_weights
],
dim
=-
1
)
# ignore second nearest buckets for cls if necessay
if
cls_ignore_neighbor
:
bucket_cls_weights
=
(
~
((
bucket_cls_weights
==
1
)
&
(
bucket_labels
==
0
))).
float
()
else
:
bucket_cls_weights
[:]
=
1.0
return
offsets
,
offsets_weights
,
bucket_labels
,
bucket_cls_weights
def
bucket2bbox
(
proposals
,
cls_preds
,
offset_preds
,
num_buckets
,
scale_factor
=
1.0
,
max_shape
=
None
,
clip_border
=
True
):
"""Apply bucketing estimation (cls preds) and fine regression (offset
preds) to generate det bboxes.
Args:
proposals (Tensor): Boxes to be transformed. Shape (n, 4)
cls_preds (Tensor): bucketing estimation. Shape (n, num_buckets*2).
offset_preds (Tensor): fine regression. Shape (n, num_buckets*2).
num_buckets (int): Number of buckets.
scale_factor (float): Scale factor to rescale proposals.
max_shape (tuple[int, int]): Maximum bounds for boxes. specifies (H, W)
clip_border (bool, optional): Whether clip the objects outside the
border of the image. Defaults to True.
Returns:
tuple[Tensor]: (bboxes, loc_confidence).
- bboxes: predicted bboxes. Shape (n, 4)
- loc_confidence: localization confidence of predicted bboxes.
Shape (n,).
"""
side_num
=
int
(
np
.
ceil
(
num_buckets
/
2.0
))
cls_preds
=
cls_preds
.
view
(
-
1
,
side_num
)
offset_preds
=
offset_preds
.
view
(
-
1
,
side_num
)
scores
=
F
.
softmax
(
cls_preds
,
dim
=
1
)
score_topk
,
score_label
=
scores
.
topk
(
2
,
dim
=
1
,
largest
=
True
,
sorted
=
True
)
rescaled_proposals
=
bbox_rescale
(
proposals
,
scale_factor
)
pw
=
rescaled_proposals
[...,
2
]
-
rescaled_proposals
[...,
0
]
ph
=
rescaled_proposals
[...,
3
]
-
rescaled_proposals
[...,
1
]
px1
=
rescaled_proposals
[...,
0
]
py1
=
rescaled_proposals
[...,
1
]
px2
=
rescaled_proposals
[...,
2
]
py2
=
rescaled_proposals
[...,
3
]
bucket_w
=
pw
/
num_buckets
bucket_h
=
ph
/
num_buckets
score_inds_l
=
score_label
[
0
::
4
,
0
]
score_inds_r
=
score_label
[
1
::
4
,
0
]
score_inds_t
=
score_label
[
2
::
4
,
0
]
score_inds_d
=
score_label
[
3
::
4
,
0
]
l_buckets
=
px1
+
(
0.5
+
score_inds_l
.
float
())
*
bucket_w
r_buckets
=
px2
-
(
0.5
+
score_inds_r
.
float
())
*
bucket_w
t_buckets
=
py1
+
(
0.5
+
score_inds_t
.
float
())
*
bucket_h
d_buckets
=
py2
-
(
0.5
+
score_inds_d
.
float
())
*
bucket_h
offsets
=
offset_preds
.
view
(
-
1
,
4
,
side_num
)
inds
=
torch
.
arange
(
proposals
.
size
(
0
)).
to
(
proposals
).
long
()
l_offsets
=
offsets
[:,
0
,
:][
inds
,
score_inds_l
]
r_offsets
=
offsets
[:,
1
,
:][
inds
,
score_inds_r
]
t_offsets
=
offsets
[:,
2
,
:][
inds
,
score_inds_t
]
d_offsets
=
offsets
[:,
3
,
:][
inds
,
score_inds_d
]
x1
=
l_buckets
-
l_offsets
*
bucket_w
x2
=
r_buckets
-
r_offsets
*
bucket_w
y1
=
t_buckets
-
t_offsets
*
bucket_h
y2
=
d_buckets
-
d_offsets
*
bucket_h
if
clip_border
and
max_shape
is
not
None
:
x1
=
x1
.
clamp
(
min
=
0
,
max
=
max_shape
[
1
]
-
1
)
y1
=
y1
.
clamp
(
min
=
0
,
max
=
max_shape
[
0
]
-
1
)
x2
=
x2
.
clamp
(
min
=
0
,
max
=
max_shape
[
1
]
-
1
)
y2
=
y2
.
clamp
(
min
=
0
,
max
=
max_shape
[
0
]
-
1
)
bboxes
=
torch
.
cat
([
x1
[:,
None
],
y1
[:,
None
],
x2
[:,
None
],
y2
[:,
None
]],
dim
=-
1
)
# bucketing guided rescoring
loc_confidence
=
score_topk
[:,
0
]
top2_neighbor_inds
=
(
score_label
[:,
0
]
-
score_label
[:,
1
]).
abs
()
==
1
loc_confidence
+=
score_topk
[:,
1
]
*
top2_neighbor_inds
.
float
()
loc_confidence
=
loc_confidence
.
view
(
-
1
,
4
).
mean
(
dim
=
1
)
return
bboxes
,
loc_confidence
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/coder/delta_xywh_bbox_coder.py
0 → 100644
View file @
142dcf29
import
numpy
as
np
import
torch
from
..builder
import
BBOX_CODERS
from
.base_bbox_coder
import
BaseBBoxCoder
@
BBOX_CODERS
.
register_module
()
class
DeltaXYWHBBoxCoder
(
BaseBBoxCoder
):
"""Delta XYWH BBox coder.
Following the practice in `R-CNN <https://arxiv.org/abs/1311.2524>`_,
this coder encodes bbox (x1, y1, x2, y2) into delta (dx, dy, dw, dh) and
decodes delta (dx, dy, dw, dh) back to original bbox (x1, y1, x2, y2).
Args:
target_means (Sequence[float]): Denormalizing means of target for
delta coordinates
target_stds (Sequence[float]): Denormalizing standard deviation of
target for delta coordinates
clip_border (bool, optional): Whether clip the objects outside the
border of the image. Defaults to True.
"""
def
__init__
(
self
,
target_means
=
(
0.
,
0.
,
0.
,
0.
),
target_stds
=
(
1.
,
1.
,
1.
,
1.
),
clip_border
=
True
):
super
(
BaseBBoxCoder
,
self
).
__init__
()
self
.
means
=
target_means
self
.
stds
=
target_stds
self
.
clip_border
=
clip_border
def
encode
(
self
,
bboxes
,
gt_bboxes
):
"""Get box regression transformation deltas that can be used to
transform the ``bboxes`` into the ``gt_bboxes``.
Args:
bboxes (torch.Tensor): Source boxes, e.g., object proposals.
gt_bboxes (torch.Tensor): Target of the transformation, e.g.,
ground-truth boxes.
Returns:
torch.Tensor: Box transformation deltas
"""
assert
bboxes
.
size
(
0
)
==
gt_bboxes
.
size
(
0
)
assert
bboxes
.
size
(
-
1
)
==
gt_bboxes
.
size
(
-
1
)
==
4
encoded_bboxes
=
bbox2delta
(
bboxes
,
gt_bboxes
,
self
.
means
,
self
.
stds
)
return
encoded_bboxes
def
decode
(
self
,
bboxes
,
pred_bboxes
,
max_shape
=
None
,
wh_ratio_clip
=
16
/
1000
):
"""Apply transformation `pred_bboxes` to `boxes`.
Args:
boxes (torch.Tensor): Basic boxes.
pred_bboxes (torch.Tensor): Encoded boxes with shape
max_shape (tuple[int], optional): Maximum shape of boxes.
Defaults to None.
wh_ratio_clip (float, optional): The allowed ratio between
width and height.
Returns:
torch.Tensor: Decoded boxes.
"""
assert
pred_bboxes
.
size
(
0
)
==
bboxes
.
size
(
0
)
decoded_bboxes
=
delta2bbox
(
bboxes
,
pred_bboxes
,
self
.
means
,
self
.
stds
,
max_shape
,
wh_ratio_clip
,
self
.
clip_border
)
return
decoded_bboxes
def
bbox2delta
(
proposals
,
gt
,
means
=
(
0.
,
0.
,
0.
,
0.
),
stds
=
(
1.
,
1.
,
1.
,
1.
)):
"""Compute deltas of proposals w.r.t. gt.
We usually compute the deltas of x, y, w, h of proposals w.r.t ground
truth bboxes to get regression target.
This is the inverse function of :func:`delta2bbox`.
Args:
proposals (Tensor): Boxes to be transformed, shape (N, ..., 4)
gt (Tensor): Gt bboxes to be used as base, shape (N, ..., 4)
means (Sequence[float]): Denormalizing means for delta coordinates
stds (Sequence[float]): Denormalizing standard deviation for delta
coordinates
Returns:
Tensor: deltas with shape (N, 4), where columns represent dx, dy,
dw, dh.
"""
assert
proposals
.
size
()
==
gt
.
size
()
proposals
=
proposals
.
float
()
gt
=
gt
.
float
()
px
=
(
proposals
[...,
0
]
+
proposals
[...,
2
])
*
0.5
py
=
(
proposals
[...,
1
]
+
proposals
[...,
3
])
*
0.5
pw
=
proposals
[...,
2
]
-
proposals
[...,
0
]
ph
=
proposals
[...,
3
]
-
proposals
[...,
1
]
gx
=
(
gt
[...,
0
]
+
gt
[...,
2
])
*
0.5
gy
=
(
gt
[...,
1
]
+
gt
[...,
3
])
*
0.5
gw
=
gt
[...,
2
]
-
gt
[...,
0
]
gh
=
gt
[...,
3
]
-
gt
[...,
1
]
dx
=
(
gx
-
px
)
/
pw
dy
=
(
gy
-
py
)
/
ph
dw
=
torch
.
log
(
gw
/
pw
)
dh
=
torch
.
log
(
gh
/
ph
)
deltas
=
torch
.
stack
([
dx
,
dy
,
dw
,
dh
],
dim
=-
1
)
means
=
deltas
.
new_tensor
(
means
).
unsqueeze
(
0
)
stds
=
deltas
.
new_tensor
(
stds
).
unsqueeze
(
0
)
deltas
=
deltas
.
sub_
(
means
).
div_
(
stds
)
return
deltas
def
delta2bbox
(
rois
,
deltas
,
means
=
(
0.
,
0.
,
0.
,
0.
),
stds
=
(
1.
,
1.
,
1.
,
1.
),
max_shape
=
None
,
wh_ratio_clip
=
16
/
1000
,
clip_border
=
True
):
"""Apply deltas to shift/scale base boxes.
Typically the rois are anchor or proposed bounding boxes and the deltas are
network outputs used to shift/scale those boxes.
This is the inverse function of :func:`bbox2delta`.
Args:
rois (Tensor): Boxes to be transformed. Has shape (N, 4)
deltas (Tensor): Encoded offsets with respect to each roi.
Has shape (N, 4 * num_classes). Note N = num_anchors * W * H when
rois is a grid of anchors. Offset encoding follows [1]_.
means (Sequence[float]): Denormalizing means for delta coordinates
stds (Sequence[float]): Denormalizing standard deviation for delta
coordinates
max_shape (tuple[int, int]): Maximum bounds for boxes. specifies (H, W)
wh_ratio_clip (float): Maximum aspect ratio for boxes.
clip_border (bool, optional): Whether clip the objects outside the
border of the image. Defaults to True.
Returns:
Tensor: Boxes with shape (N, 4), where columns represent
tl_x, tl_y, br_x, br_y.
References:
.. [1] https://arxiv.org/abs/1311.2524
Example:
>>> rois = torch.Tensor([[ 0., 0., 1., 1.],
>>> [ 0., 0., 1., 1.],
>>> [ 0., 0., 1., 1.],
>>> [ 5., 5., 5., 5.]])
>>> deltas = torch.Tensor([[ 0., 0., 0., 0.],
>>> [ 1., 1., 1., 1.],
>>> [ 0., 0., 2., -1.],
>>> [ 0.7, -1.9, -0.5, 0.3]])
>>> delta2bbox(rois, deltas, max_shape=(32, 32))
tensor([[0.0000, 0.0000, 1.0000, 1.0000],
[0.1409, 0.1409, 2.8591, 2.8591],
[0.0000, 0.3161, 4.1945, 0.6839],
[5.0000, 5.0000, 5.0000, 5.0000]])
"""
means
=
deltas
.
new_tensor
(
means
).
view
(
1
,
-
1
).
repeat
(
1
,
deltas
.
size
(
1
)
//
4
)
stds
=
deltas
.
new_tensor
(
stds
).
view
(
1
,
-
1
).
repeat
(
1
,
deltas
.
size
(
1
)
//
4
)
denorm_deltas
=
deltas
*
stds
+
means
dx
=
denorm_deltas
[:,
0
::
4
]
dy
=
denorm_deltas
[:,
1
::
4
]
dw
=
denorm_deltas
[:,
2
::
4
]
dh
=
denorm_deltas
[:,
3
::
4
]
max_ratio
=
np
.
abs
(
np
.
log
(
wh_ratio_clip
))
dw
=
dw
.
clamp
(
min
=-
max_ratio
,
max
=
max_ratio
)
dh
=
dh
.
clamp
(
min
=-
max_ratio
,
max
=
max_ratio
)
# Compute center of each roi
px
=
((
rois
[:,
0
]
+
rois
[:,
2
])
*
0.5
).
unsqueeze
(
1
).
expand_as
(
dx
)
py
=
((
rois
[:,
1
]
+
rois
[:,
3
])
*
0.5
).
unsqueeze
(
1
).
expand_as
(
dy
)
# Compute width/height of each roi
pw
=
(
rois
[:,
2
]
-
rois
[:,
0
]).
unsqueeze
(
1
).
expand_as
(
dw
)
ph
=
(
rois
[:,
3
]
-
rois
[:,
1
]).
unsqueeze
(
1
).
expand_as
(
dh
)
# Use exp(network energy) to enlarge/shrink each roi
gw
=
pw
*
dw
.
exp
()
gh
=
ph
*
dh
.
exp
()
# Use network energy to shift the center of each roi
gx
=
px
+
pw
*
dx
gy
=
py
+
ph
*
dy
# Convert center-xy/width/height to top-left, bottom-right
x1
=
gx
-
gw
*
0.5
y1
=
gy
-
gh
*
0.5
x2
=
gx
+
gw
*
0.5
y2
=
gy
+
gh
*
0.5
if
clip_border
and
max_shape
is
not
None
:
x1
=
x1
.
clamp
(
min
=
0
,
max
=
max_shape
[
1
])
y1
=
y1
.
clamp
(
min
=
0
,
max
=
max_shape
[
0
])
x2
=
x2
.
clamp
(
min
=
0
,
max
=
max_shape
[
1
])
y2
=
y2
.
clamp
(
min
=
0
,
max
=
max_shape
[
0
])
bboxes
=
torch
.
stack
([
x1
,
y1
,
x2
,
y2
],
dim
=-
1
).
view
(
deltas
.
size
())
return
bboxes
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/coder/legacy_delta_xywh_bbox_coder.py
0 → 100644
View file @
142dcf29
import
numpy
as
np
import
torch
from
..builder
import
BBOX_CODERS
from
.base_bbox_coder
import
BaseBBoxCoder
@
BBOX_CODERS
.
register_module
()
class
LegacyDeltaXYWHBBoxCoder
(
BaseBBoxCoder
):
"""Legacy Delta XYWH BBox coder used in MMDet V1.x.
Following the practice in R-CNN [1]_, this coder encodes bbox (x1, y1, x2,
y2) into delta (dx, dy, dw, dh) and decodes delta (dx, dy, dw, dh)
back to original bbox (x1, y1, x2, y2).
Note:
The main difference between :class`LegacyDeltaXYWHBBoxCoder` and
:class:`DeltaXYWHBBoxCoder` is whether ``+ 1`` is used during width and
height calculation. We suggest to only use this coder when testing with
MMDet V1.x models.
References:
.. [1] https://arxiv.org/abs/1311.2524
Args:
target_means (Sequence[float]): denormalizing means of target for
delta coordinates
target_stds (Sequence[float]): denormalizing standard deviation of
target for delta coordinates
"""
def
__init__
(
self
,
target_means
=
(
0.
,
0.
,
0.
,
0.
),
target_stds
=
(
1.
,
1.
,
1.
,
1.
)):
super
(
BaseBBoxCoder
,
self
).
__init__
()
self
.
means
=
target_means
self
.
stds
=
target_stds
def
encode
(
self
,
bboxes
,
gt_bboxes
):
"""Get box regression transformation deltas that can be used to
transform the ``bboxes`` into the ``gt_bboxes``.
Args:
bboxes (torch.Tensor): source boxes, e.g., object proposals.
gt_bboxes (torch.Tensor): target of the transformation, e.g.,
ground-truth boxes.
Returns:
torch.Tensor: Box transformation deltas
"""
assert
bboxes
.
size
(
0
)
==
gt_bboxes
.
size
(
0
)
assert
bboxes
.
size
(
-
1
)
==
gt_bboxes
.
size
(
-
1
)
==
4
encoded_bboxes
=
legacy_bbox2delta
(
bboxes
,
gt_bboxes
,
self
.
means
,
self
.
stds
)
return
encoded_bboxes
def
decode
(
self
,
bboxes
,
pred_bboxes
,
max_shape
=
None
,
wh_ratio_clip
=
16
/
1000
):
"""Apply transformation `pred_bboxes` to `boxes`.
Args:
boxes (torch.Tensor): Basic boxes.
pred_bboxes (torch.Tensor): Encoded boxes with shape
max_shape (tuple[int], optional): Maximum shape of boxes.
Defaults to None.
wh_ratio_clip (float, optional): The allowed ratio between
width and height.
Returns:
torch.Tensor: Decoded boxes.
"""
assert
pred_bboxes
.
size
(
0
)
==
bboxes
.
size
(
0
)
decoded_bboxes
=
legacy_delta2bbox
(
bboxes
,
pred_bboxes
,
self
.
means
,
self
.
stds
,
max_shape
,
wh_ratio_clip
)
return
decoded_bboxes
def
legacy_bbox2delta
(
proposals
,
gt
,
means
=
(
0.
,
0.
,
0.
,
0.
),
stds
=
(
1.
,
1.
,
1.
,
1.
)):
"""Compute deltas of proposals w.r.t. gt in the MMDet V1.x manner.
We usually compute the deltas of x, y, w, h of proposals w.r.t ground
truth bboxes to get regression target.
This is the inverse function of `delta2bbox()`
Args:
proposals (Tensor): Boxes to be transformed, shape (N, ..., 4)
gt (Tensor): Gt bboxes to be used as base, shape (N, ..., 4)
means (Sequence[float]): Denormalizing means for delta coordinates
stds (Sequence[float]): Denormalizing standard deviation for delta
coordinates
Returns:
Tensor: deltas with shape (N, 4), where columns represent dx, dy,
dw, dh.
"""
assert
proposals
.
size
()
==
gt
.
size
()
proposals
=
proposals
.
float
()
gt
=
gt
.
float
()
px
=
(
proposals
[...,
0
]
+
proposals
[...,
2
])
*
0.5
py
=
(
proposals
[...,
1
]
+
proposals
[...,
3
])
*
0.5
pw
=
proposals
[...,
2
]
-
proposals
[...,
0
]
+
1.0
ph
=
proposals
[...,
3
]
-
proposals
[...,
1
]
+
1.0
gx
=
(
gt
[...,
0
]
+
gt
[...,
2
])
*
0.5
gy
=
(
gt
[...,
1
]
+
gt
[...,
3
])
*
0.5
gw
=
gt
[...,
2
]
-
gt
[...,
0
]
+
1.0
gh
=
gt
[...,
3
]
-
gt
[...,
1
]
+
1.0
dx
=
(
gx
-
px
)
/
pw
dy
=
(
gy
-
py
)
/
ph
dw
=
torch
.
log
(
gw
/
pw
)
dh
=
torch
.
log
(
gh
/
ph
)
deltas
=
torch
.
stack
([
dx
,
dy
,
dw
,
dh
],
dim
=-
1
)
means
=
deltas
.
new_tensor
(
means
).
unsqueeze
(
0
)
stds
=
deltas
.
new_tensor
(
stds
).
unsqueeze
(
0
)
deltas
=
deltas
.
sub_
(
means
).
div_
(
stds
)
return
deltas
def
legacy_delta2bbox
(
rois
,
deltas
,
means
=
(
0.
,
0.
,
0.
,
0.
),
stds
=
(
1.
,
1.
,
1.
,
1.
),
max_shape
=
None
,
wh_ratio_clip
=
16
/
1000
):
"""Apply deltas to shift/scale base boxes in the MMDet V1.x manner.
Typically the rois are anchor or proposed bounding boxes and the deltas are
network outputs used to shift/scale those boxes.
This is the inverse function of `bbox2delta()`
Args:
rois (Tensor): Boxes to be transformed. Has shape (N, 4)
deltas (Tensor): Encoded offsets with respect to each roi.
Has shape (N, 4 * num_classes). Note N = num_anchors * W * H when
rois is a grid of anchors. Offset encoding follows [1]_.
means (Sequence[float]): Denormalizing means for delta coordinates
stds (Sequence[float]): Denormalizing standard deviation for delta
coordinates
max_shape (tuple[int, int]): Maximum bounds for boxes. specifies (H, W)
wh_ratio_clip (float): Maximum aspect ratio for boxes.
Returns:
Tensor: Boxes with shape (N, 4), where columns represent
tl_x, tl_y, br_x, br_y.
References:
.. [1] https://arxiv.org/abs/1311.2524
Example:
>>> rois = torch.Tensor([[ 0., 0., 1., 1.],
>>> [ 0., 0., 1., 1.],
>>> [ 0., 0., 1., 1.],
>>> [ 5., 5., 5., 5.]])
>>> deltas = torch.Tensor([[ 0., 0., 0., 0.],
>>> [ 1., 1., 1., 1.],
>>> [ 0., 0., 2., -1.],
>>> [ 0.7, -1.9, -0.5, 0.3]])
>>> legacy_delta2bbox(rois, deltas, max_shape=(32, 32))
tensor([[0.0000, 0.0000, 1.5000, 1.5000],
[0.0000, 0.0000, 5.2183, 5.2183],
[0.0000, 0.1321, 7.8891, 0.8679],
[5.3967, 2.4251, 6.0033, 3.7749]])
"""
means
=
deltas
.
new_tensor
(
means
).
repeat
(
1
,
deltas
.
size
(
1
)
//
4
)
stds
=
deltas
.
new_tensor
(
stds
).
repeat
(
1
,
deltas
.
size
(
1
)
//
4
)
denorm_deltas
=
deltas
*
stds
+
means
dx
=
denorm_deltas
[:,
0
::
4
]
dy
=
denorm_deltas
[:,
1
::
4
]
dw
=
denorm_deltas
[:,
2
::
4
]
dh
=
denorm_deltas
[:,
3
::
4
]
max_ratio
=
np
.
abs
(
np
.
log
(
wh_ratio_clip
))
dw
=
dw
.
clamp
(
min
=-
max_ratio
,
max
=
max_ratio
)
dh
=
dh
.
clamp
(
min
=-
max_ratio
,
max
=
max_ratio
)
# Compute center of each roi
px
=
((
rois
[:,
0
]
+
rois
[:,
2
])
*
0.5
).
unsqueeze
(
1
).
expand_as
(
dx
)
py
=
((
rois
[:,
1
]
+
rois
[:,
3
])
*
0.5
).
unsqueeze
(
1
).
expand_as
(
dy
)
# Compute width/height of each roi
pw
=
(
rois
[:,
2
]
-
rois
[:,
0
]
+
1.0
).
unsqueeze
(
1
).
expand_as
(
dw
)
ph
=
(
rois
[:,
3
]
-
rois
[:,
1
]
+
1.0
).
unsqueeze
(
1
).
expand_as
(
dh
)
# Use exp(network energy) to enlarge/shrink each roi
gw
=
pw
*
dw
.
exp
()
gh
=
ph
*
dh
.
exp
()
# Use network energy to shift the center of each roi
gx
=
px
+
pw
*
dx
gy
=
py
+
ph
*
dy
# Convert center-xy/width/height to top-left, bottom-right
# The true legacy box coder should +- 0.5 here.
# However, current implementation improves the performance when testing
# the models trained in MMDetection 1.X (~0.5 bbox AP, 0.2 mask AP)
x1
=
gx
-
gw
*
0.5
y1
=
gy
-
gh
*
0.5
x2
=
gx
+
gw
*
0.5
y2
=
gy
+
gh
*
0.5
if
max_shape
is
not
None
:
x1
=
x1
.
clamp
(
min
=
0
,
max
=
max_shape
[
1
]
-
1
)
y1
=
y1
.
clamp
(
min
=
0
,
max
=
max_shape
[
0
]
-
1
)
x2
=
x2
.
clamp
(
min
=
0
,
max
=
max_shape
[
1
]
-
1
)
y2
=
y2
.
clamp
(
min
=
0
,
max
=
max_shape
[
0
]
-
1
)
bboxes
=
torch
.
stack
([
x1
,
y1
,
x2
,
y2
],
dim
=-
1
).
view_as
(
deltas
)
return
bboxes
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/coder/pseudo_bbox_coder.py
0 → 100644
View file @
142dcf29
from
..builder
import
BBOX_CODERS
from
.base_bbox_coder
import
BaseBBoxCoder
@
BBOX_CODERS
.
register_module
()
class
PseudoBBoxCoder
(
BaseBBoxCoder
):
"""Pseudo bounding box coder."""
def
__init__
(
self
,
**
kwargs
):
super
(
BaseBBoxCoder
,
self
).
__init__
(
**
kwargs
)
def
encode
(
self
,
bboxes
,
gt_bboxes
):
"""torch.Tensor: return the given ``bboxes``"""
return
gt_bboxes
def
decode
(
self
,
bboxes
,
pred_bboxes
):
"""torch.Tensor: return the given ``pred_bboxes``"""
return
pred_bboxes
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/coder/tblr_bbox_coder.py
0 → 100644
View file @
142dcf29
import
torch
from
..builder
import
BBOX_CODERS
from
.base_bbox_coder
import
BaseBBoxCoder
@
BBOX_CODERS
.
register_module
()
class
TBLRBBoxCoder
(
BaseBBoxCoder
):
"""TBLR BBox coder.
Following the practice in `FSAF <https://arxiv.org/abs/1903.00621>`_,
this coder encodes gt bboxes (x1, y1, x2, y2) into (top, bottom, left,
right) and decode it back to the original.
Args:
normalizer (list | float): Normalization factor to be
divided with when coding the coordinates. If it is a list, it should
have length of 4 indicating normalization factor in tblr dims.
Otherwise it is a unified float factor for all dims. Default: 4.0
clip_border (bool, optional): Whether clip the objects outside the
border of the image. Defaults to True.
"""
def
__init__
(
self
,
normalizer
=
4.0
,
clip_border
=
True
):
super
(
BaseBBoxCoder
,
self
).
__init__
()
self
.
normalizer
=
normalizer
self
.
clip_border
=
clip_border
def
encode
(
self
,
bboxes
,
gt_bboxes
):
"""Get box regression transformation deltas that can be used to
transform the ``bboxes`` into the ``gt_bboxes`` in the (top, left,
bottom, right) order.
Args:
bboxes (torch.Tensor): source boxes, e.g., object proposals.
gt_bboxes (torch.Tensor): target of the transformation, e.g.,
ground truth boxes.
Returns:
torch.Tensor: Box transformation deltas
"""
assert
bboxes
.
size
(
0
)
==
gt_bboxes
.
size
(
0
)
assert
bboxes
.
size
(
-
1
)
==
gt_bboxes
.
size
(
-
1
)
==
4
encoded_bboxes
=
bboxes2tblr
(
bboxes
,
gt_bboxes
,
normalizer
=
self
.
normalizer
)
return
encoded_bboxes
def
decode
(
self
,
bboxes
,
pred_bboxes
,
max_shape
=
None
):
"""Apply transformation `pred_bboxes` to `boxes`.
Args:
boxes (torch.Tensor): Basic boxes.
pred_bboxes (torch.Tensor): Encoded boxes with shape
max_shape (tuple[int], optional): Maximum shape of boxes.
Defaults to None.
Returns:
torch.Tensor: Decoded boxes.
"""
assert
pred_bboxes
.
size
(
0
)
==
bboxes
.
size
(
0
)
decoded_bboxes
=
tblr2bboxes
(
bboxes
,
pred_bboxes
,
normalizer
=
self
.
normalizer
,
max_shape
=
max_shape
,
clip_border
=
self
.
clip_border
)
return
decoded_bboxes
def
bboxes2tblr
(
priors
,
gts
,
normalizer
=
4.0
,
normalize_by_wh
=
True
):
"""Encode ground truth boxes to tblr coordinate.
It first convert the gt coordinate to tblr format,
(top, bottom, left, right), relative to prior box centers.
The tblr coordinate may be normalized by the side length of prior bboxes
if `normalize_by_wh` is specified as True, and it is then normalized by
the `normalizer` factor.
Args:
priors (Tensor): Prior boxes in point form
Shape: (num_proposals,4).
gts (Tensor): Coords of ground truth for each prior in point-form
Shape: (num_proposals, 4).
normalizer (Sequence[float] | float): normalization parameter of
encoded boxes. If it is a list, it has to have length = 4.
Default: 4.0
normalize_by_wh (bool): Whether to normalize tblr coordinate by the
side length (wh) of prior bboxes.
Return:
encoded boxes (Tensor), Shape: (num_proposals, 4)
"""
# dist b/t match center and prior's center
if
not
isinstance
(
normalizer
,
float
):
normalizer
=
torch
.
tensor
(
normalizer
,
device
=
priors
.
device
)
assert
len
(
normalizer
)
==
4
,
'Normalizer must have length = 4'
assert
priors
.
size
(
0
)
==
gts
.
size
(
0
)
prior_centers
=
(
priors
[:,
0
:
2
]
+
priors
[:,
2
:
4
])
/
2
xmin
,
ymin
,
xmax
,
ymax
=
gts
.
split
(
1
,
dim
=
1
)
top
=
prior_centers
[:,
1
].
unsqueeze
(
1
)
-
ymin
bottom
=
ymax
-
prior_centers
[:,
1
].
unsqueeze
(
1
)
left
=
prior_centers
[:,
0
].
unsqueeze
(
1
)
-
xmin
right
=
xmax
-
prior_centers
[:,
0
].
unsqueeze
(
1
)
loc
=
torch
.
cat
((
top
,
bottom
,
left
,
right
),
dim
=
1
)
if
normalize_by_wh
:
# Normalize tblr by anchor width and height
wh
=
priors
[:,
2
:
4
]
-
priors
[:,
0
:
2
]
w
,
h
=
torch
.
split
(
wh
,
1
,
dim
=
1
)
loc
[:,
:
2
]
/=
h
# tb is normalized by h
loc
[:,
2
:]
/=
w
# lr is normalized by w
# Normalize tblr by the given normalization factor
return
loc
/
normalizer
def
tblr2bboxes
(
priors
,
tblr
,
normalizer
=
4.0
,
normalize_by_wh
=
True
,
max_shape
=
None
,
clip_border
=
True
):
"""Decode tblr outputs to prediction boxes.
The process includes 3 steps: 1) De-normalize tblr coordinates by
multiplying it with `normalizer`; 2) De-normalize tblr coordinates by the
prior bbox width and height if `normalize_by_wh` is `True`; 3) Convert
tblr (top, bottom, left, right) pair relative to the center of priors back
to (xmin, ymin, xmax, ymax) coordinate.
Args:
priors (Tensor): Prior boxes in point form (x0, y0, x1, y1)
Shape: (n,4).
tblr (Tensor): Coords of network output in tblr form
Shape: (n, 4).
normalizer (Sequence[float] | float): Normalization parameter of
encoded boxes. By list, it represents the normalization factors at
tblr dims. By float, it is the unified normalization factor at all
dims. Default: 4.0
normalize_by_wh (bool): Whether the tblr coordinates have been
normalized by the side length (wh) of prior bboxes.
max_shape (tuple, optional): Shape of the image. Decoded bboxes
exceeding which will be clamped.
clip_border (bool, optional): Whether clip the objects outside the
border of the image. Defaults to True.
Return:
encoded boxes (Tensor), Shape: (n, 4)
"""
if
not
isinstance
(
normalizer
,
float
):
normalizer
=
torch
.
tensor
(
normalizer
,
device
=
priors
.
device
)
assert
len
(
normalizer
)
==
4
,
'Normalizer must have length = 4'
assert
priors
.
size
(
0
)
==
tblr
.
size
(
0
)
loc_decode
=
tblr
*
normalizer
prior_centers
=
(
priors
[:,
0
:
2
]
+
priors
[:,
2
:
4
])
/
2
if
normalize_by_wh
:
wh
=
priors
[:,
2
:
4
]
-
priors
[:,
0
:
2
]
w
,
h
=
torch
.
split
(
wh
,
1
,
dim
=
1
)
loc_decode
[:,
:
2
]
*=
h
# tb
loc_decode
[:,
2
:]
*=
w
# lr
top
,
bottom
,
left
,
right
=
loc_decode
.
split
((
1
,
1
,
1
,
1
),
dim
=
1
)
xmin
=
prior_centers
[:,
0
].
unsqueeze
(
1
)
-
left
xmax
=
prior_centers
[:,
0
].
unsqueeze
(
1
)
+
right
ymin
=
prior_centers
[:,
1
].
unsqueeze
(
1
)
-
top
ymax
=
prior_centers
[:,
1
].
unsqueeze
(
1
)
+
bottom
boxes
=
torch
.
cat
((
xmin
,
ymin
,
xmax
,
ymax
),
dim
=
1
)
if
clip_border
and
max_shape
is
not
None
:
boxes
[:,
0
].
clamp_
(
min
=
0
,
max
=
max_shape
[
1
])
boxes
[:,
1
].
clamp_
(
min
=
0
,
max
=
max_shape
[
0
])
boxes
[:,
2
].
clamp_
(
min
=
0
,
max
=
max_shape
[
1
])
boxes
[:,
3
].
clamp_
(
min
=
0
,
max
=
max_shape
[
0
])
return
boxes
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/coder/yolo_bbox_coder.py
0 → 100644
View file @
142dcf29
import
torch
from
..builder
import
BBOX_CODERS
from
.base_bbox_coder
import
BaseBBoxCoder
@
BBOX_CODERS
.
register_module
()
class
YOLOBBoxCoder
(
BaseBBoxCoder
):
"""YOLO BBox coder.
Following `YOLO <https://arxiv.org/abs/1506.02640>`_, this coder divide
image into grids, and encode bbox (x1, y1, x2, y2) into (cx, cy, dw, dh).
cx, cy in [0., 1.], denotes relative center position w.r.t the center of
bboxes. dw, dh are the same as :obj:`DeltaXYWHBBoxCoder`.
Args:
eps (float): Min value of cx, cy when encoding.
"""
def
__init__
(
self
,
eps
=
1e-6
):
super
(
BaseBBoxCoder
,
self
).
__init__
()
self
.
eps
=
eps
def
encode
(
self
,
bboxes
,
gt_bboxes
,
stride
):
"""Get box regression transformation deltas that can be used to
transform the ``bboxes`` into the ``gt_bboxes``.
Args:
bboxes (torch.Tensor): Source boxes, e.g., anchors.
gt_bboxes (torch.Tensor): Target of the transformation, e.g.,
ground-truth boxes.
stride (torch.Tensor | int): Stride of bboxes.
Returns:
torch.Tensor: Box transformation deltas
"""
assert
bboxes
.
size
(
0
)
==
gt_bboxes
.
size
(
0
)
assert
bboxes
.
size
(
-
1
)
==
gt_bboxes
.
size
(
-
1
)
==
4
x_center_gt
=
(
gt_bboxes
[...,
0
]
+
gt_bboxes
[...,
2
])
*
0.5
y_center_gt
=
(
gt_bboxes
[...,
1
]
+
gt_bboxes
[...,
3
])
*
0.5
w_gt
=
gt_bboxes
[...,
2
]
-
gt_bboxes
[...,
0
]
h_gt
=
gt_bboxes
[...,
3
]
-
gt_bboxes
[...,
1
]
x_center
=
(
bboxes
[...,
0
]
+
bboxes
[...,
2
])
*
0.5
y_center
=
(
bboxes
[...,
1
]
+
bboxes
[...,
3
])
*
0.5
w
=
bboxes
[...,
2
]
-
bboxes
[...,
0
]
h
=
bboxes
[...,
3
]
-
bboxes
[...,
1
]
w_target
=
torch
.
log
((
w_gt
/
w
).
clamp
(
min
=
self
.
eps
))
h_target
=
torch
.
log
((
h_gt
/
h
).
clamp
(
min
=
self
.
eps
))
x_center_target
=
((
x_center_gt
-
x_center
)
/
stride
+
0.5
).
clamp
(
self
.
eps
,
1
-
self
.
eps
)
y_center_target
=
((
y_center_gt
-
y_center
)
/
stride
+
0.5
).
clamp
(
self
.
eps
,
1
-
self
.
eps
)
encoded_bboxes
=
torch
.
stack
(
[
x_center_target
,
y_center_target
,
w_target
,
h_target
],
dim
=-
1
)
return
encoded_bboxes
def
decode
(
self
,
bboxes
,
pred_bboxes
,
stride
):
"""Apply transformation `pred_bboxes` to `boxes`.
Args:
boxes (torch.Tensor): Basic boxes, e.g. anchors.
pred_bboxes (torch.Tensor): Encoded boxes with shape
stride (torch.Tensor | int): Strides of bboxes.
Returns:
torch.Tensor: Decoded boxes.
"""
assert
pred_bboxes
.
size
(
0
)
==
bboxes
.
size
(
0
)
assert
pred_bboxes
.
size
(
-
1
)
==
bboxes
.
size
(
-
1
)
==
4
x_center
=
(
bboxes
[...,
0
]
+
bboxes
[...,
2
])
*
0.5
y_center
=
(
bboxes
[...,
1
]
+
bboxes
[...,
3
])
*
0.5
w
=
bboxes
[...,
2
]
-
bboxes
[...,
0
]
h
=
bboxes
[...,
3
]
-
bboxes
[...,
1
]
# Get outputs x, y
x_center_pred
=
(
pred_bboxes
[...,
0
]
-
0.5
)
*
stride
+
x_center
y_center_pred
=
(
pred_bboxes
[...,
1
]
-
0.5
)
*
stride
+
y_center
w_pred
=
torch
.
exp
(
pred_bboxes
[...,
2
])
*
w
h_pred
=
torch
.
exp
(
pred_bboxes
[...,
3
])
*
h
decoded_bboxes
=
torch
.
stack
(
(
x_center_pred
-
w_pred
/
2
,
y_center_pred
-
h_pred
/
2
,
x_center_pred
+
w_pred
/
2
,
y_center_pred
+
h_pred
/
2
),
dim
=-
1
)
return
decoded_bboxes
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/demodata.py
0 → 100644
View file @
142dcf29
import
numpy
as
np
import
torch
def
ensure_rng
(
rng
=
None
):
"""Simple version of the ``kwarray.ensure_rng``
Args:
rng (int | numpy.random.RandomState | None):
if None, then defaults to the global rng. Otherwise this can be an
integer or a RandomState class
Returns:
(numpy.random.RandomState) : rng -
a numpy random number generator
References:
https://gitlab.kitware.com/computer-vision/kwarray/blob/master/kwarray/util_random.py#L270
"""
if
rng
is
None
:
rng
=
np
.
random
.
mtrand
.
_rand
elif
isinstance
(
rng
,
int
):
rng
=
np
.
random
.
RandomState
(
rng
)
else
:
rng
=
rng
return
rng
def
random_boxes
(
num
=
1
,
scale
=
1
,
rng
=
None
):
"""Simple version of ``kwimage.Boxes.random``
Returns:
Tensor: shape (n, 4) in x1, y1, x2, y2 format.
References:
https://gitlab.kitware.com/computer-vision/kwimage/blob/master/kwimage/structs/boxes.py#L1390
Example:
>>> num = 3
>>> scale = 512
>>> rng = 0
>>> boxes = random_boxes(num, scale, rng)
>>> print(boxes)
tensor([[280.9925, 278.9802, 308.6148, 366.1769],
[216.9113, 330.6978, 224.0446, 456.5878],
[405.3632, 196.3221, 493.3953, 270.7942]])
"""
rng
=
ensure_rng
(
rng
)
tlbr
=
rng
.
rand
(
num
,
4
).
astype
(
np
.
float32
)
tl_x
=
np
.
minimum
(
tlbr
[:,
0
],
tlbr
[:,
2
])
tl_y
=
np
.
minimum
(
tlbr
[:,
1
],
tlbr
[:,
3
])
br_x
=
np
.
maximum
(
tlbr
[:,
0
],
tlbr
[:,
2
])
br_y
=
np
.
maximum
(
tlbr
[:,
1
],
tlbr
[:,
3
])
tlbr
[:,
0
]
=
tl_x
*
scale
tlbr
[:,
1
]
=
tl_y
*
scale
tlbr
[:,
2
]
=
br_x
*
scale
tlbr
[:,
3
]
=
br_y
*
scale
boxes
=
torch
.
from_numpy
(
tlbr
)
return
boxes
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/iou_calculators/__init__.py
0 → 100644
View file @
142dcf29
from
.builder
import
build_iou_calculator
from
.iou2d_calculator
import
BboxOverlaps2D
,
bbox_overlaps
__all__
=
[
'build_iou_calculator'
,
'BboxOverlaps2D'
,
'bbox_overlaps'
]
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/iou_calculators/builder.py
0 → 100644
View file @
142dcf29
from
mmcv.utils
import
Registry
,
build_from_cfg
IOU_CALCULATORS
=
Registry
(
'IoU calculator'
)
def
build_iou_calculator
(
cfg
,
default_args
=
None
):
"""Builder of IoU calculator."""
return
build_from_cfg
(
cfg
,
IOU_CALCULATORS
,
default_args
)
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/iou_calculators/iou2d_calculator.py
0 → 100644
View file @
142dcf29
import
torch
from
.builder
import
IOU_CALCULATORS
@
IOU_CALCULATORS
.
register_module
()
class
BboxOverlaps2D
(
object
):
"""2D Overlaps (e.g. IoUs, GIoUs) Calculator."""
def
__call__
(
self
,
bboxes1
,
bboxes2
,
mode
=
'iou'
,
is_aligned
=
False
):
"""Calculate IoU between 2D bboxes.
Args:
bboxes1 (Tensor): bboxes have shape (m, 4) in <x1, y1, x2, y2>
format, or shape (m, 5) in <x1, y1, x2, y2, score> format.
bboxes2 (Tensor): bboxes have shape (m, 4) in <x1, y1, x2, y2>
format, shape (m, 5) in <x1, y1, x2, y2, score> format, or be
empty. If ``is_aligned `` is ``True``, then m and n must be
equal.
mode (str): "iou" (intersection over union), "iof" (intersection
over foreground), or "giou" (generalized intersection over
union).
is_aligned (bool, optional): If True, then m and n must be equal.
Default False.
Returns:
Tensor: shape (m, n) if ``is_aligned `` is False else shape (m,)
"""
assert
bboxes1
.
size
(
-
1
)
in
[
0
,
4
,
5
]
assert
bboxes2
.
size
(
-
1
)
in
[
0
,
4
,
5
]
if
bboxes2
.
size
(
-
1
)
==
5
:
bboxes2
=
bboxes2
[...,
:
4
]
if
bboxes1
.
size
(
-
1
)
==
5
:
bboxes1
=
bboxes1
[...,
:
4
]
return
bbox_overlaps
(
bboxes1
,
bboxes2
,
mode
,
is_aligned
)
def
__repr__
(
self
):
"""str: a string describing the module"""
repr_str
=
self
.
__class__
.
__name__
+
'()'
return
repr_str
def
bbox_overlaps
(
bboxes1
,
bboxes2
,
mode
=
'iou'
,
is_aligned
=
False
,
eps
=
1e-6
):
"""Calculate overlap between two set of bboxes.
If ``is_aligned `` is ``False``, then calculate the overlaps between each
bbox of bboxes1 and bboxes2, otherwise the overlaps between each aligned
pair of bboxes1 and bboxes2.
Args:
bboxes1 (Tensor): shape (B, m, 4) in <x1, y1, x2, y2> format or empty.
bboxes2 (Tensor): shape (B, n, 4) in <x1, y1, x2, y2> format or empty.
B indicates the batch dim, in shape (B1, B2, ..., Bn).
If ``is_aligned `` is ``True``, then m and n must be equal.
mode (str): "iou" (intersection over union), "iof" (intersection over
foreground) or "giou" (generalized intersection over union).
Default "iou".
is_aligned (bool, optional): If True, then m and n must be equal.
Default False.
eps (float, optional): A value added to the denominator for numerical
stability. Default 1e-6.
Returns:
Tensor: shape (m, n) if ``is_aligned `` is False else shape (m,)
Example:
>>> bboxes1 = torch.FloatTensor([
>>> [0, 0, 10, 10],
>>> [10, 10, 20, 20],
>>> [32, 32, 38, 42],
>>> ])
>>> bboxes2 = torch.FloatTensor([
>>> [0, 0, 10, 20],
>>> [0, 10, 10, 19],
>>> [10, 10, 20, 20],
>>> ])
>>> overlaps = bbox_overlaps(bboxes1, bboxes2)
>>> assert overlaps.shape == (3, 3)
>>> overlaps = bbox_overlaps(bboxes1, bboxes2, is_aligned=True)
>>> assert overlaps.shape == (3, )
Example:
>>> empty = torch.empty(0, 4)
>>> nonempty = torch.FloatTensor([[0, 0, 10, 9]])
>>> assert tuple(bbox_overlaps(empty, nonempty).shape) == (0, 1)
>>> assert tuple(bbox_overlaps(nonempty, empty).shape) == (1, 0)
>>> assert tuple(bbox_overlaps(empty, empty).shape) == (0, 0)
"""
assert
mode
in
[
'iou'
,
'iof'
,
'giou'
],
f
'Unsupported mode
{
mode
}
'
# Either the boxes are empty or the length of boxes's last dimenstion is 4
assert
(
bboxes1
.
size
(
-
1
)
==
4
or
bboxes1
.
size
(
0
)
==
0
)
assert
(
bboxes2
.
size
(
-
1
)
==
4
or
bboxes2
.
size
(
0
)
==
0
)
# Batch dim must be the same
# Batch dim: (B1, B2, ... Bn)
assert
bboxes1
.
shape
[:
-
2
]
==
bboxes2
.
shape
[:
-
2
]
batch_shape
=
bboxes1
.
shape
[:
-
2
]
rows
=
bboxes1
.
size
(
-
2
)
cols
=
bboxes2
.
size
(
-
2
)
if
is_aligned
:
assert
rows
==
cols
if
rows
*
cols
==
0
:
if
is_aligned
:
return
bboxes1
.
new
(
batch_shape
+
(
rows
,
))
else
:
return
bboxes1
.
new
(
batch_shape
+
(
rows
,
cols
))
area1
=
(
bboxes1
[...,
2
]
-
bboxes1
[...,
0
])
*
(
bboxes1
[...,
3
]
-
bboxes1
[...,
1
])
area2
=
(
bboxes2
[...,
2
]
-
bboxes2
[...,
0
])
*
(
bboxes2
[...,
3
]
-
bboxes2
[...,
1
])
if
is_aligned
:
lt
=
torch
.
max
(
bboxes1
[...,
:
2
],
bboxes2
[...,
:
2
])
# [B, rows, 2]
rb
=
torch
.
min
(
bboxes1
[...,
2
:],
bboxes2
[...,
2
:])
# [B, rows, 2]
wh
=
(
rb
-
lt
).
clamp
(
min
=
0
)
# [B, rows, 2]
overlap
=
wh
[...,
0
]
*
wh
[...,
1
]
if
mode
in
[
'iou'
,
'giou'
]:
union
=
area1
+
area2
-
overlap
else
:
union
=
area1
if
mode
==
'giou'
:
enclosed_lt
=
torch
.
min
(
bboxes1
[...,
:
2
],
bboxes2
[...,
:
2
])
enclosed_rb
=
torch
.
max
(
bboxes1
[...,
2
:],
bboxes2
[...,
2
:])
else
:
lt
=
torch
.
max
(
bboxes1
[...,
:,
None
,
:
2
],
bboxes2
[...,
None
,
:,
:
2
])
# [B, rows, cols, 2]
rb
=
torch
.
min
(
bboxes1
[...,
:,
None
,
2
:],
bboxes2
[...,
None
,
:,
2
:])
# [B, rows, cols, 2]
wh
=
(
rb
-
lt
).
clamp
(
min
=
0
)
# [B, rows, cols, 2]
overlap
=
wh
[...,
0
]
*
wh
[...,
1
]
if
mode
in
[
'iou'
,
'giou'
]:
union
=
area1
[...,
None
]
+
area2
[...,
None
,
:]
-
overlap
else
:
union
=
area1
[...,
None
]
if
mode
==
'giou'
:
enclosed_lt
=
torch
.
min
(
bboxes1
[...,
:,
None
,
:
2
],
bboxes2
[...,
None
,
:,
:
2
])
enclosed_rb
=
torch
.
max
(
bboxes1
[...,
:,
None
,
2
:],
bboxes2
[...,
None
,
:,
2
:])
eps
=
union
.
new_tensor
([
eps
])
union
=
torch
.
max
(
union
,
eps
)
ious
=
overlap
/
union
if
mode
in
[
'iou'
,
'iof'
]:
return
ious
# calculate gious
enclose_wh
=
(
enclosed_rb
-
enclosed_lt
).
clamp
(
min
=
0
)
enclose_area
=
enclose_wh
[...,
0
]
*
enclose_wh
[...,
1
]
enclose_area
=
torch
.
max
(
enclose_area
,
eps
)
gious
=
ious
-
(
enclose_area
-
union
)
/
enclose_area
return
gious
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/match_costs/__init__.py
0 → 100644
View file @
142dcf29
from
.builder
import
build_match_cost
from
.match_cost
import
BBoxL1Cost
,
ClassificationCost
,
FocalLossCost
,
IoUCost
__all__
=
[
'build_match_cost'
,
'ClassificationCost'
,
'BBoxL1Cost'
,
'IoUCost'
,
'FocalLossCost'
]
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/match_costs/builder.py
0 → 100644
View file @
142dcf29
from
mmcv.utils
import
Registry
,
build_from_cfg
MATCH_COST
=
Registry
(
'Match Cost'
)
def
build_match_cost
(
cfg
,
default_args
=
None
):
"""Builder of IoU calculator."""
return
build_from_cfg
(
cfg
,
MATCH_COST
,
default_args
)
PyTorch/NLP/Conformer-main/mmdetection/mmdet/core/bbox/match_costs/match_cost.py
0 → 100644
View file @
142dcf29
import
torch
from
mmdet.core.bbox.iou_calculators
import
bbox_overlaps
from
mmdet.core.bbox.transforms
import
bbox_cxcywh_to_xyxy
,
bbox_xyxy_to_cxcywh
from
.builder
import
MATCH_COST
@
MATCH_COST
.
register_module
()
class
BBoxL1Cost
(
object
):
"""BBoxL1Cost.
Args:
weight (int | float, optional): loss_weight
box_format (str, optional): 'xyxy' for DETR, 'xywh' for Sparse_RCNN
Examples:
>>> from mmdet.core.bbox.match_costs.match_cost import BBoxL1Cost
>>> import torch
>>> self = BBoxL1Cost()
>>> bbox_pred = torch.rand(1, 4)
>>> gt_bboxes= torch.FloatTensor([[0, 0, 2, 4], [1, 2, 3, 4]])
>>> factor = torch.tensor([10, 8, 10, 8])
>>> self(bbox_pred, gt_bboxes, factor)
tensor([[1.6172, 1.6422]])
"""
def
__init__
(
self
,
weight
=
1.
,
box_format
=
'xyxy'
):
self
.
weight
=
weight
assert
box_format
in
[
'xyxy'
,
'xywh'
]
self
.
box_format
=
box_format
def
__call__
(
self
,
bbox_pred
,
gt_bboxes
):
"""
Args:
bbox_pred (Tensor): Predicted boxes with normalized coordinates
(cx, cy, w, h), which are all in range [0, 1]. Shape
[num_query, 4].
gt_bboxes (Tensor): Ground truth boxes with normalized
coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
Returns:
torch.Tensor: bbox_cost value with weight
"""
if
self
.
box_format
==
'xywh'
:
gt_bboxes
=
bbox_xyxy_to_cxcywh
(
gt_bboxes
)
elif
self
.
box_format
==
'xyxy'
:
bbox_pred
=
bbox_cxcywh_to_xyxy
(
bbox_pred
)
bbox_cost
=
torch
.
cdist
(
bbox_pred
,
gt_bboxes
,
p
=
1
)
return
bbox_cost
*
self
.
weight
@
MATCH_COST
.
register_module
()
class
FocalLossCost
(
object
):
"""FocalLossCost.
Args:
weight (int | float, optional): loss_weight
alpha (int | float, optional): focal_loss alpha
gamma (int | float, optional): focal_loss gamma
eps (float, optional): default 1e-12
Examples:
>>> from mmdet.core.bbox.match_costs.match_cost import FocalLossCost
>>> import torch
>>> self = FocalLossCost()
>>> cls_pred = torch.rand(4, 3)
>>> gt_labels = torch.tensor([0, 1, 2])
>>> factor = torch.tensor([10, 8, 10, 8])
>>> self(cls_pred, gt_labels)
tensor([[-0.3236, -0.3364, -0.2699],
[-0.3439, -0.3209, -0.4807],
[-0.4099, -0.3795, -0.2929],
[-0.1950, -0.1207, -0.2626]])
"""
def
__init__
(
self
,
weight
=
1.
,
alpha
=
0.25
,
gamma
=
2
,
eps
=
1e-12
):
self
.
weight
=
weight
self
.
alpha
=
alpha
self
.
gamma
=
gamma
self
.
eps
=
eps
def
__call__
(
self
,
cls_pred
,
gt_labels
):
"""
Args:
cls_pred (Tensor): Predicted classification logits, shape
[num_query, num_class].
gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
Returns:
torch.Tensor: cls_cost value with weight
"""
cls_pred
=
cls_pred
.
sigmoid
()
neg_cost
=
-
(
1
-
cls_pred
+
self
.
eps
).
log
()
*
(
1
-
self
.
alpha
)
*
cls_pred
.
pow
(
self
.
gamma
)
pos_cost
=
-
(
cls_pred
+
self
.
eps
).
log
()
*
self
.
alpha
*
(
1
-
cls_pred
).
pow
(
self
.
gamma
)
cls_cost
=
pos_cost
[:,
gt_labels
]
-
neg_cost
[:,
gt_labels
]
return
cls_cost
*
self
.
weight
@
MATCH_COST
.
register_module
()
class
ClassificationCost
(
object
):
"""ClsSoftmaxCost.
Args:
weight (int | float, optional): loss_weight
Examples:
>>> from mmdet.core.bbox.match_costs.match_cost import
\
... ClassificationCost
>>> import torch
>>> self = ClassificationCost()
>>> cls_pred = torch.rand(4, 3)
>>> gt_labels = torch.tensor([0, 1, 2])
>>> factor = torch.tensor([10, 8, 10, 8])
>>> self(cls_pred, gt_labels)
tensor([[-0.3430, -0.3525, -0.3045],
[-0.3077, -0.2931, -0.3992],
[-0.3664, -0.3455, -0.2881],
[-0.3343, -0.2701, -0.3956]])
"""
def
__init__
(
self
,
weight
=
1.
):
self
.
weight
=
weight
def
__call__
(
self
,
cls_pred
,
gt_labels
):
"""
Args:
cls_pred (Tensor): Predicted classification logits, shape
[num_query, num_class].
gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
Returns:
torch.Tensor: cls_cost value with weight
"""
# Following the official DETR repo, contrary to the loss that
# NLL is used, we approximate it in 1 - cls_score[gt_label].
# The 1 is a constant that doesn't change the matching,
# so it can be ommitted.
cls_score
=
cls_pred
.
softmax
(
-
1
)
cls_cost
=
-
cls_score
[:,
gt_labels
]
return
cls_cost
*
self
.
weight
@
MATCH_COST
.
register_module
()
class
IoUCost
(
object
):
"""IoUCost.
Args:
iou_mode (str, optional): iou mode such as 'iou' | 'giou'
weight (int | float, optional): loss weight
Examples:
>>> from mmdet.core.bbox.match_costs.match_cost import IoUCost
>>> import torch
>>> self = IoUCost()
>>> bboxes = torch.FloatTensor([[1,1, 2, 2], [2, 2, 3, 4]])
>>> gt_bboxes = torch.FloatTensor([[0, 0, 2, 4], [1, 2, 3, 4]])
>>> self(bboxes, gt_bboxes)
tensor([[-0.1250, 0.1667],
[ 0.1667, -0.5000]])
"""
def
__init__
(
self
,
iou_mode
=
'giou'
,
weight
=
1.
):
self
.
weight
=
weight
self
.
iou_mode
=
iou_mode
def
__call__
(
self
,
bboxes
,
gt_bboxes
):
"""
Args:
bboxes (Tensor): Predicted boxes with unnormalized coordinates
(x1, y1, x2, y2). Shape [num_query, 4].
gt_bboxes (Tensor): Ground truth boxes with unnormalized
coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
Returns:
torch.Tensor: iou_cost value with weight
"""
# overlaps: [num_bboxes, num_gt]
overlaps
=
bbox_overlaps
(
bboxes
,
gt_bboxes
,
mode
=
self
.
iou_mode
,
is_aligned
=
False
)
# The 1 is a constant that doesn't change the matching, so ommitted.
iou_cost
=
-
overlaps
return
iou_cost
*
self
.
weight
Prev
1
…
4
5
6
7
8
9
10
11
12
…
16
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment