Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
RT-DETR_pytorch
Commits
46260e34
"vscode:/vscode.git/clone" did not exist on "a39ab4dd24402d023ec723690ceebd30a35796eb"
Commit
46260e34
authored
Nov 28, 2024
by
suily
Browse files
Initial commit
parents
Pipeline
#2006
failed with stages
in 0 seconds
Changes
81
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1829 additions
and
0 deletions
+1829
-0
rtdetr_pytorch/src/data/coco/coco_eval.py
rtdetr_pytorch/src/data/coco/coco_eval.py
+269
-0
rtdetr_pytorch/src/data/coco/coco_utils.py
rtdetr_pytorch/src/data/coco/coco_utils.py
+184
-0
rtdetr_pytorch/src/data/dataloader.py
rtdetr_pytorch/src/data/dataloader.py
+28
-0
rtdetr_pytorch/src/data/functional.py
rtdetr_pytorch/src/data/functional.py
+169
-0
rtdetr_pytorch/src/data/transforms.py
rtdetr_pytorch/src/data/transforms.py
+161
-0
rtdetr_pytorch/src/misc/__init__.py
rtdetr_pytorch/src/misc/__init__.py
+3
-0
rtdetr_pytorch/src/misc/dist.py
rtdetr_pytorch/src/misc/dist.py
+190
-0
rtdetr_pytorch/src/misc/logger.py
rtdetr_pytorch/src/misc/logger.py
+239
-0
rtdetr_pytorch/src/misc/visualizer.py
rtdetr_pytorch/src/misc/visualizer.py
+34
-0
rtdetr_pytorch/src/nn/__init__.py
rtdetr_pytorch/src/nn/__init__.py
+7
-0
rtdetr_pytorch/src/nn/arch/__init__.py
rtdetr_pytorch/src/nn/arch/__init__.py
+1
-0
rtdetr_pytorch/src/nn/arch/classification.py
rtdetr_pytorch/src/nn/arch/classification.py
+41
-0
rtdetr_pytorch/src/nn/backbone/__init__.py
rtdetr_pytorch/src/nn/backbone/__init__.py
+6
-0
rtdetr_pytorch/src/nn/backbone/common.py
rtdetr_pytorch/src/nn/backbone/common.py
+102
-0
rtdetr_pytorch/src/nn/backbone/presnet.py
rtdetr_pytorch/src/nn/backbone/presnet.py
+225
-0
rtdetr_pytorch/src/nn/backbone/test_resnet.py
rtdetr_pytorch/src/nn/backbone/test_resnet.py
+81
-0
rtdetr_pytorch/src/nn/backbone/utils.py
rtdetr_pytorch/src/nn/backbone/utils.py
+58
-0
rtdetr_pytorch/src/nn/criterion/__init__.py
rtdetr_pytorch/src/nn/criterion/__init__.py
+6
-0
rtdetr_pytorch/src/nn/criterion/utils.py
rtdetr_pytorch/src/nn/criterion/utils.py
+20
-0
rtdetr_pytorch/src/optim/__init__.py
rtdetr_pytorch/src/optim/__init__.py
+5
-0
No files found.
rtdetr_pytorch/src/data/coco/coco_eval.py
0 → 100644
View file @
46260e34
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
"""
COCO evaluator that works in distributed mode.
Mostly copy-paste from https://github.com/pytorch/vision/blob/edfd5a7/references/detection/coco_eval.py
The difference is that there is less copy-pasting from pycocotools
in the end of the file, as python3 can suppress prints with contextlib
"""
import
os
import
contextlib
import
copy
import
numpy
as
np
import
torch
from
pycocotools.cocoeval
import
COCOeval
from
pycocotools.coco
import
COCO
import
pycocotools.mask
as
mask_util
from
src.misc
import
dist
__all__
=
[
'CocoEvaluator'
,]
class
CocoEvaluator
(
object
):
def
__init__
(
self
,
coco_gt
,
iou_types
):
assert
isinstance
(
iou_types
,
(
list
,
tuple
))
coco_gt
=
copy
.
deepcopy
(
coco_gt
)
self
.
coco_gt
=
coco_gt
self
.
iou_types
=
iou_types
self
.
coco_eval
=
{}
for
iou_type
in
iou_types
:
self
.
coco_eval
[
iou_type
]
=
COCOeval
(
coco_gt
,
iouType
=
iou_type
)
self
.
img_ids
=
[]
self
.
eval_imgs
=
{
k
:
[]
for
k
in
iou_types
}
def
update
(
self
,
predictions
):
img_ids
=
list
(
np
.
unique
(
list
(
predictions
.
keys
())))
self
.
img_ids
.
extend
(
img_ids
)
for
iou_type
in
self
.
iou_types
:
results
=
self
.
prepare
(
predictions
,
iou_type
)
# suppress pycocotools prints
with
open
(
os
.
devnull
,
'w'
)
as
devnull
:
with
contextlib
.
redirect_stdout
(
devnull
):
coco_dt
=
COCO
.
loadRes
(
self
.
coco_gt
,
results
)
if
results
else
COCO
()
coco_eval
=
self
.
coco_eval
[
iou_type
]
coco_eval
.
cocoDt
=
coco_dt
coco_eval
.
params
.
imgIds
=
list
(
img_ids
)
img_ids
,
eval_imgs
=
evaluate
(
coco_eval
)
self
.
eval_imgs
[
iou_type
].
append
(
eval_imgs
)
def
synchronize_between_processes
(
self
):
for
iou_type
in
self
.
iou_types
:
self
.
eval_imgs
[
iou_type
]
=
np
.
concatenate
(
self
.
eval_imgs
[
iou_type
],
2
)
create_common_coco_eval
(
self
.
coco_eval
[
iou_type
],
self
.
img_ids
,
self
.
eval_imgs
[
iou_type
])
def
accumulate
(
self
):
for
coco_eval
in
self
.
coco_eval
.
values
():
coco_eval
.
accumulate
()
def
summarize
(
self
):
for
iou_type
,
coco_eval
in
self
.
coco_eval
.
items
():
print
(
"IoU metric: {}"
.
format
(
iou_type
))
coco_eval
.
summarize
()
def
prepare
(
self
,
predictions
,
iou_type
):
if
iou_type
==
"bbox"
:
return
self
.
prepare_for_coco_detection
(
predictions
)
elif
iou_type
==
"segm"
:
return
self
.
prepare_for_coco_segmentation
(
predictions
)
elif
iou_type
==
"keypoints"
:
return
self
.
prepare_for_coco_keypoint
(
predictions
)
else
:
raise
ValueError
(
"Unknown iou type {}"
.
format
(
iou_type
))
def
prepare_for_coco_detection
(
self
,
predictions
):
coco_results
=
[]
for
original_id
,
prediction
in
predictions
.
items
():
if
len
(
prediction
)
==
0
:
continue
boxes
=
prediction
[
"boxes"
]
boxes
=
convert_to_xywh
(
boxes
).
tolist
()
scores
=
prediction
[
"scores"
].
tolist
()
labels
=
prediction
[
"labels"
].
tolist
()
coco_results
.
extend
(
[
{
"image_id"
:
original_id
,
"category_id"
:
labels
[
k
],
"bbox"
:
box
,
"score"
:
scores
[
k
],
}
for
k
,
box
in
enumerate
(
boxes
)
]
)
return
coco_results
def
prepare_for_coco_segmentation
(
self
,
predictions
):
coco_results
=
[]
for
original_id
,
prediction
in
predictions
.
items
():
if
len
(
prediction
)
==
0
:
continue
scores
=
prediction
[
"scores"
]
labels
=
prediction
[
"labels"
]
masks
=
prediction
[
"masks"
]
masks
=
masks
>
0.5
scores
=
prediction
[
"scores"
].
tolist
()
labels
=
prediction
[
"labels"
].
tolist
()
rles
=
[
mask_util
.
encode
(
np
.
array
(
mask
[
0
,
:,
:,
np
.
newaxis
],
dtype
=
np
.
uint8
,
order
=
"F"
))[
0
]
for
mask
in
masks
]
for
rle
in
rles
:
rle
[
"counts"
]
=
rle
[
"counts"
].
decode
(
"utf-8"
)
coco_results
.
extend
(
[
{
"image_id"
:
original_id
,
"category_id"
:
labels
[
k
],
"segmentation"
:
rle
,
"score"
:
scores
[
k
],
}
for
k
,
rle
in
enumerate
(
rles
)
]
)
return
coco_results
def
prepare_for_coco_keypoint
(
self
,
predictions
):
coco_results
=
[]
for
original_id
,
prediction
in
predictions
.
items
():
if
len
(
prediction
)
==
0
:
continue
boxes
=
prediction
[
"boxes"
]
boxes
=
convert_to_xywh
(
boxes
).
tolist
()
scores
=
prediction
[
"scores"
].
tolist
()
labels
=
prediction
[
"labels"
].
tolist
()
keypoints
=
prediction
[
"keypoints"
]
keypoints
=
keypoints
.
flatten
(
start_dim
=
1
).
tolist
()
coco_results
.
extend
(
[
{
"image_id"
:
original_id
,
"category_id"
:
labels
[
k
],
'keypoints'
:
keypoint
,
"score"
:
scores
[
k
],
}
for
k
,
keypoint
in
enumerate
(
keypoints
)
]
)
return
coco_results
def
convert_to_xywh
(
boxes
):
xmin
,
ymin
,
xmax
,
ymax
=
boxes
.
unbind
(
1
)
return
torch
.
stack
((
xmin
,
ymin
,
xmax
-
xmin
,
ymax
-
ymin
),
dim
=
1
)
def
merge
(
img_ids
,
eval_imgs
):
all_img_ids
=
dist
.
all_gather
(
img_ids
)
all_eval_imgs
=
dist
.
all_gather
(
eval_imgs
)
merged_img_ids
=
[]
for
p
in
all_img_ids
:
merged_img_ids
.
extend
(
p
)
merged_eval_imgs
=
[]
for
p
in
all_eval_imgs
:
merged_eval_imgs
.
append
(
p
)
merged_img_ids
=
np
.
array
(
merged_img_ids
)
merged_eval_imgs
=
np
.
concatenate
(
merged_eval_imgs
,
2
)
# keep only unique (and in sorted order) images
merged_img_ids
,
idx
=
np
.
unique
(
merged_img_ids
,
return_index
=
True
)
merged_eval_imgs
=
merged_eval_imgs
[...,
idx
]
return
merged_img_ids
,
merged_eval_imgs
def
create_common_coco_eval
(
coco_eval
,
img_ids
,
eval_imgs
):
img_ids
,
eval_imgs
=
merge
(
img_ids
,
eval_imgs
)
img_ids
=
list
(
img_ids
)
eval_imgs
=
list
(
eval_imgs
.
flatten
())
coco_eval
.
evalImgs
=
eval_imgs
coco_eval
.
params
.
imgIds
=
img_ids
coco_eval
.
_paramsEval
=
copy
.
deepcopy
(
coco_eval
.
params
)
#################################################################
# From pycocotools, just removed the prints and fixed
# a Python3 bug about unicode not defined
#################################################################
# import io
# from contextlib import redirect_stdout
# def evaluate(imgs):
# with redirect_stdout(io.StringIO()):
# imgs.evaluate()
# return imgs.params.imgIds, np.asarray(imgs.evalImgs).reshape(-1, len(imgs.params.areaRng), len(imgs.params.imgIds))
def
evaluate
(
self
):
'''
Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
:return: None
'''
# tic = time.time()
# print('Running per image evaluation...')
p
=
self
.
params
# add backward compatibility if useSegm is specified in params
if
p
.
useSegm
is
not
None
:
p
.
iouType
=
'segm'
if
p
.
useSegm
==
1
else
'bbox'
print
(
'useSegm (deprecated) is not None. Running {} evaluation'
.
format
(
p
.
iouType
))
# print('Evaluate annotation type *{}*'.format(p.iouType))
p
.
imgIds
=
list
(
np
.
unique
(
p
.
imgIds
))
if
p
.
useCats
:
p
.
catIds
=
list
(
np
.
unique
(
p
.
catIds
))
p
.
maxDets
=
sorted
(
p
.
maxDets
)
self
.
params
=
p
self
.
_prepare
()
# loop through images, area range, max detection number
catIds
=
p
.
catIds
if
p
.
useCats
else
[
-
1
]
if
p
.
iouType
==
'segm'
or
p
.
iouType
==
'bbox'
:
computeIoU
=
self
.
computeIoU
elif
p
.
iouType
==
'keypoints'
:
computeIoU
=
self
.
computeOks
self
.
ious
=
{
(
imgId
,
catId
):
computeIoU
(
imgId
,
catId
)
for
imgId
in
p
.
imgIds
for
catId
in
catIds
}
evaluateImg
=
self
.
evaluateImg
maxDet
=
p
.
maxDets
[
-
1
]
evalImgs
=
[
evaluateImg
(
imgId
,
catId
,
areaRng
,
maxDet
)
for
catId
in
catIds
for
areaRng
in
p
.
areaRng
for
imgId
in
p
.
imgIds
]
# this is NOT in the pycocotools code, but could be done outside
evalImgs
=
np
.
asarray
(
evalImgs
).
reshape
(
len
(
catIds
),
len
(
p
.
areaRng
),
len
(
p
.
imgIds
))
self
.
_paramsEval
=
copy
.
deepcopy
(
self
.
params
)
# toc = time.time()
# print('DONE (t={:0.2f}s).'.format(toc-tic))
return
p
.
imgIds
,
evalImgs
#################################################################
# end of straight copy from pycocotools, just removing the prints
#################################################################
rtdetr_pytorch/src/data/coco/coco_utils.py
0 → 100644
View file @
46260e34
import
os
import
torch
import
torch.utils.data
import
torchvision
from
pycocotools
import
mask
as
coco_mask
from
pycocotools.coco
import
COCO
def
convert_coco_poly_to_mask
(
segmentations
,
height
,
width
):
masks
=
[]
for
polygons
in
segmentations
:
rles
=
coco_mask
.
frPyObjects
(
polygons
,
height
,
width
)
mask
=
coco_mask
.
decode
(
rles
)
if
len
(
mask
.
shape
)
<
3
:
mask
=
mask
[...,
None
]
mask
=
torch
.
as_tensor
(
mask
,
dtype
=
torch
.
uint8
)
mask
=
mask
.
any
(
dim
=
2
)
masks
.
append
(
mask
)
if
masks
:
masks
=
torch
.
stack
(
masks
,
dim
=
0
)
else
:
masks
=
torch
.
zeros
((
0
,
height
,
width
),
dtype
=
torch
.
uint8
)
return
masks
class
ConvertCocoPolysToMask
:
def
__call__
(
self
,
image
,
target
):
w
,
h
=
image
.
size
image_id
=
target
[
"image_id"
]
anno
=
target
[
"annotations"
]
anno
=
[
obj
for
obj
in
anno
if
obj
[
"iscrowd"
]
==
0
]
boxes
=
[
obj
[
"bbox"
]
for
obj
in
anno
]
# guard against no boxes via resizing
boxes
=
torch
.
as_tensor
(
boxes
,
dtype
=
torch
.
float32
).
reshape
(
-
1
,
4
)
boxes
[:,
2
:]
+=
boxes
[:,
:
2
]
boxes
[:,
0
::
2
].
clamp_
(
min
=
0
,
max
=
w
)
boxes
[:,
1
::
2
].
clamp_
(
min
=
0
,
max
=
h
)
classes
=
[
obj
[
"category_id"
]
for
obj
in
anno
]
classes
=
torch
.
tensor
(
classes
,
dtype
=
torch
.
int64
)
segmentations
=
[
obj
[
"segmentation"
]
for
obj
in
anno
]
masks
=
convert_coco_poly_to_mask
(
segmentations
,
h
,
w
)
keypoints
=
None
if
anno
and
"keypoints"
in
anno
[
0
]:
keypoints
=
[
obj
[
"keypoints"
]
for
obj
in
anno
]
keypoints
=
torch
.
as_tensor
(
keypoints
,
dtype
=
torch
.
float32
)
num_keypoints
=
keypoints
.
shape
[
0
]
if
num_keypoints
:
keypoints
=
keypoints
.
view
(
num_keypoints
,
-
1
,
3
)
keep
=
(
boxes
[:,
3
]
>
boxes
[:,
1
])
&
(
boxes
[:,
2
]
>
boxes
[:,
0
])
boxes
=
boxes
[
keep
]
classes
=
classes
[
keep
]
masks
=
masks
[
keep
]
if
keypoints
is
not
None
:
keypoints
=
keypoints
[
keep
]
target
=
{}
target
[
"boxes"
]
=
boxes
target
[
"labels"
]
=
classes
target
[
"masks"
]
=
masks
target
[
"image_id"
]
=
image_id
if
keypoints
is
not
None
:
target
[
"keypoints"
]
=
keypoints
# for conversion to coco api
area
=
torch
.
tensor
([
obj
[
"area"
]
for
obj
in
anno
])
iscrowd
=
torch
.
tensor
([
obj
[
"iscrowd"
]
for
obj
in
anno
])
target
[
"area"
]
=
area
target
[
"iscrowd"
]
=
iscrowd
return
image
,
target
def
_coco_remove_images_without_annotations
(
dataset
,
cat_list
=
None
):
def
_has_only_empty_bbox
(
anno
):
return
all
(
any
(
o
<=
1
for
o
in
obj
[
"bbox"
][
2
:])
for
obj
in
anno
)
def
_count_visible_keypoints
(
anno
):
return
sum
(
sum
(
1
for
v
in
ann
[
"keypoints"
][
2
::
3
]
if
v
>
0
)
for
ann
in
anno
)
min_keypoints_per_image
=
10
def
_has_valid_annotation
(
anno
):
# if it's empty, there is no annotation
if
len
(
anno
)
==
0
:
return
False
# if all boxes have close to zero area, there is no annotation
if
_has_only_empty_bbox
(
anno
):
return
False
# keypoints task have a slight different criteria for considering
# if an annotation is valid
if
"keypoints"
not
in
anno
[
0
]:
return
True
# for keypoint detection tasks, only consider valid images those
# containing at least min_keypoints_per_image
if
_count_visible_keypoints
(
anno
)
>=
min_keypoints_per_image
:
return
True
return
False
ids
=
[]
for
ds_idx
,
img_id
in
enumerate
(
dataset
.
ids
):
ann_ids
=
dataset
.
coco
.
getAnnIds
(
imgIds
=
img_id
,
iscrowd
=
None
)
anno
=
dataset
.
coco
.
loadAnns
(
ann_ids
)
if
cat_list
:
anno
=
[
obj
for
obj
in
anno
if
obj
[
"category_id"
]
in
cat_list
]
if
_has_valid_annotation
(
anno
):
ids
.
append
(
ds_idx
)
dataset
=
torch
.
utils
.
data
.
Subset
(
dataset
,
ids
)
return
dataset
def
convert_to_coco_api
(
ds
):
coco_ds
=
COCO
()
# annotation IDs need to start at 1, not 0, see torchvision issue #1530
ann_id
=
1
dataset
=
{
"images"
:
[],
"categories"
:
[],
"annotations"
:
[]}
categories
=
set
()
for
img_idx
in
range
(
len
(
ds
)):
# find better way to get target
# targets = ds.get_annotations(img_idx)
img
,
targets
=
ds
[
img_idx
]
image_id
=
targets
[
"image_id"
].
item
()
img_dict
=
{}
img_dict
[
"id"
]
=
image_id
img_dict
[
"height"
]
=
img
.
shape
[
-
2
]
img_dict
[
"width"
]
=
img
.
shape
[
-
1
]
dataset
[
"images"
].
append
(
img_dict
)
bboxes
=
targets
[
"boxes"
].
clone
()
bboxes
[:,
2
:]
-=
bboxes
[:,
:
2
]
bboxes
=
bboxes
.
tolist
()
labels
=
targets
[
"labels"
].
tolist
()
areas
=
targets
[
"area"
].
tolist
()
iscrowd
=
targets
[
"iscrowd"
].
tolist
()
if
"masks"
in
targets
:
masks
=
targets
[
"masks"
]
# make masks Fortran contiguous for coco_mask
masks
=
masks
.
permute
(
0
,
2
,
1
).
contiguous
().
permute
(
0
,
2
,
1
)
if
"keypoints"
in
targets
:
keypoints
=
targets
[
"keypoints"
]
keypoints
=
keypoints
.
reshape
(
keypoints
.
shape
[
0
],
-
1
).
tolist
()
num_objs
=
len
(
bboxes
)
for
i
in
range
(
num_objs
):
ann
=
{}
ann
[
"image_id"
]
=
image_id
ann
[
"bbox"
]
=
bboxes
[
i
]
ann
[
"category_id"
]
=
labels
[
i
]
categories
.
add
(
labels
[
i
])
ann
[
"area"
]
=
areas
[
i
]
ann
[
"iscrowd"
]
=
iscrowd
[
i
]
ann
[
"id"
]
=
ann_id
if
"masks"
in
targets
:
ann
[
"segmentation"
]
=
coco_mask
.
encode
(
masks
[
i
].
numpy
())
if
"keypoints"
in
targets
:
ann
[
"keypoints"
]
=
keypoints
[
i
]
ann
[
"num_keypoints"
]
=
sum
(
k
!=
0
for
k
in
keypoints
[
i
][
2
::
3
])
dataset
[
"annotations"
].
append
(
ann
)
ann_id
+=
1
dataset
[
"categories"
]
=
[{
"id"
:
i
}
for
i
in
sorted
(
categories
)]
coco_ds
.
dataset
=
dataset
coco_ds
.
createIndex
()
return
coco_ds
def
get_coco_api_from_dataset
(
dataset
):
# FIXME: This is... awful?
for
_
in
range
(
10
):
if
isinstance
(
dataset
,
torchvision
.
datasets
.
CocoDetection
):
break
if
isinstance
(
dataset
,
torch
.
utils
.
data
.
Subset
):
dataset
=
dataset
.
dataset
if
isinstance
(
dataset
,
torchvision
.
datasets
.
CocoDetection
):
return
dataset
.
coco
return
convert_to_coco_api
(
dataset
)
rtdetr_pytorch/src/data/dataloader.py
0 → 100644
View file @
46260e34
import
torch
import
torch.utils.data
as
data
from
src.core
import
register
__all__
=
[
'DataLoader'
]
@
register
class
DataLoader
(
data
.
DataLoader
):
__inject__
=
[
'dataset'
,
'collate_fn'
]
def
__repr__
(
self
)
->
str
:
format_string
=
self
.
__class__
.
__name__
+
"("
for
n
in
[
'dataset'
,
'batch_size'
,
'num_workers'
,
'drop_last'
,
'collate_fn'
]:
format_string
+=
"
\n
"
format_string
+=
" {0}: {1}"
.
format
(
n
,
getattr
(
self
,
n
))
format_string
+=
"
\n
)"
return
format_string
@
register
def
default_collate_fn
(
items
):
'''default collate_fn
'''
return
torch
.
cat
([
x
[
0
][
None
]
for
x
in
items
],
dim
=
0
),
[
x
[
1
]
for
x
in
items
]
rtdetr_pytorch/src/data/functional.py
0 → 100644
View file @
46260e34
import
torch
import
torchvision.transforms.functional
as
F
from
packaging
import
version
from
typing
import
Optional
,
List
from
torch
import
Tensor
# needed due to empty tensor bug in pytorch and torchvision 0.5
import
torchvision
if
version
.
parse
(
torchvision
.
__version__
)
<
version
.
parse
(
'0.7'
):
from
torchvision.ops
import
_new_empty_tensor
from
torchvision.ops.misc
import
_output_size
def
interpolate
(
input
,
size
=
None
,
scale_factor
=
None
,
mode
=
"nearest"
,
align_corners
=
None
):
# type: (Tensor, Optional[List[int]], Optional[float], str, Optional[bool]) -> Tensor
"""
Equivalent to nn.functional.interpolate, but with support for empty batch sizes.
This will eventually be supported natively by PyTorch, and this
class can go away.
"""
if
version
.
parse
(
torchvision
.
__version__
)
<
version
.
parse
(
'0.7'
):
if
input
.
numel
()
>
0
:
return
torch
.
nn
.
functional
.
interpolate
(
input
,
size
,
scale_factor
,
mode
,
align_corners
)
output_shape
=
_output_size
(
2
,
input
,
size
,
scale_factor
)
output_shape
=
list
(
input
.
shape
[:
-
2
])
+
list
(
output_shape
)
return
_new_empty_tensor
(
input
,
output_shape
)
else
:
return
torchvision
.
ops
.
misc
.
interpolate
(
input
,
size
,
scale_factor
,
mode
,
align_corners
)
def
crop
(
image
,
target
,
region
):
cropped_image
=
F
.
crop
(
image
,
*
region
)
target
=
target
.
copy
()
i
,
j
,
h
,
w
=
region
# should we do something wrt the original size?
target
[
"size"
]
=
torch
.
tensor
([
h
,
w
])
fields
=
[
"labels"
,
"area"
,
"iscrowd"
]
if
"boxes"
in
target
:
boxes
=
target
[
"boxes"
]
max_size
=
torch
.
as_tensor
([
w
,
h
],
dtype
=
torch
.
float32
)
cropped_boxes
=
boxes
-
torch
.
as_tensor
([
j
,
i
,
j
,
i
])
cropped_boxes
=
torch
.
min
(
cropped_boxes
.
reshape
(
-
1
,
2
,
2
),
max_size
)
cropped_boxes
=
cropped_boxes
.
clamp
(
min
=
0
)
area
=
(
cropped_boxes
[:,
1
,
:]
-
cropped_boxes
[:,
0
,
:]).
prod
(
dim
=
1
)
target
[
"boxes"
]
=
cropped_boxes
.
reshape
(
-
1
,
4
)
target
[
"area"
]
=
area
fields
.
append
(
"boxes"
)
if
"masks"
in
target
:
# FIXME should we update the area here if there are no boxes?
target
[
'masks'
]
=
target
[
'masks'
][:,
i
:
i
+
h
,
j
:
j
+
w
]
fields
.
append
(
"masks"
)
# remove elements for which the boxes or masks that have zero area
if
"boxes"
in
target
or
"masks"
in
target
:
# favor boxes selection when defining which elements to keep
# this is compatible with previous implementation
if
"boxes"
in
target
:
cropped_boxes
=
target
[
'boxes'
].
reshape
(
-
1
,
2
,
2
)
keep
=
torch
.
all
(
cropped_boxes
[:,
1
,
:]
>
cropped_boxes
[:,
0
,
:],
dim
=
1
)
else
:
keep
=
target
[
'masks'
].
flatten
(
1
).
any
(
1
)
for
field
in
fields
:
target
[
field
]
=
target
[
field
][
keep
]
return
cropped_image
,
target
def
hflip
(
image
,
target
):
flipped_image
=
F
.
hflip
(
image
)
w
,
h
=
image
.
size
target
=
target
.
copy
()
if
"boxes"
in
target
:
boxes
=
target
[
"boxes"
]
boxes
=
boxes
[:,
[
2
,
1
,
0
,
3
]]
*
torch
.
as_tensor
([
-
1
,
1
,
-
1
,
1
])
+
torch
.
as_tensor
([
w
,
0
,
w
,
0
])
target
[
"boxes"
]
=
boxes
if
"masks"
in
target
:
target
[
'masks'
]
=
target
[
'masks'
].
flip
(
-
1
)
return
flipped_image
,
target
def
resize
(
image
,
target
,
size
,
max_size
=
None
):
# size can be min_size (scalar) or (w, h) tuple
def
get_size_with_aspect_ratio
(
image_size
,
size
,
max_size
=
None
):
w
,
h
=
image_size
if
max_size
is
not
None
:
min_original_size
=
float
(
min
((
w
,
h
)))
max_original_size
=
float
(
max
((
w
,
h
)))
if
max_original_size
/
min_original_size
*
size
>
max_size
:
size
=
int
(
round
(
max_size
*
min_original_size
/
max_original_size
))
if
(
w
<=
h
and
w
==
size
)
or
(
h
<=
w
and
h
==
size
):
return
(
h
,
w
)
if
w
<
h
:
ow
=
size
oh
=
int
(
size
*
h
/
w
)
else
:
oh
=
size
ow
=
int
(
size
*
w
/
h
)
# r = min(size / min(h, w), max_size / max(h, w))
# ow = int(w * r)
# oh = int(h * r)
return
(
oh
,
ow
)
def
get_size
(
image_size
,
size
,
max_size
=
None
):
if
isinstance
(
size
,
(
list
,
tuple
)):
return
size
[::
-
1
]
else
:
return
get_size_with_aspect_ratio
(
image_size
,
size
,
max_size
)
size
=
get_size
(
image
.
size
,
size
,
max_size
)
rescaled_image
=
F
.
resize
(
image
,
size
)
if
target
is
None
:
return
rescaled_image
,
None
ratios
=
tuple
(
float
(
s
)
/
float
(
s_orig
)
for
s
,
s_orig
in
zip
(
rescaled_image
.
size
,
image
.
size
))
ratio_width
,
ratio_height
=
ratios
target
=
target
.
copy
()
if
"boxes"
in
target
:
boxes
=
target
[
"boxes"
]
scaled_boxes
=
boxes
*
torch
.
as_tensor
([
ratio_width
,
ratio_height
,
ratio_width
,
ratio_height
])
target
[
"boxes"
]
=
scaled_boxes
if
"area"
in
target
:
area
=
target
[
"area"
]
scaled_area
=
area
*
(
ratio_width
*
ratio_height
)
target
[
"area"
]
=
scaled_area
h
,
w
=
size
target
[
"size"
]
=
torch
.
tensor
([
h
,
w
])
if
"masks"
in
target
:
target
[
'masks'
]
=
interpolate
(
target
[
'masks'
][:,
None
].
float
(),
size
,
mode
=
"nearest"
)[:,
0
]
>
0.5
return
rescaled_image
,
target
def
pad
(
image
,
target
,
padding
):
# assumes that we only pad on the bottom right corners
padded_image
=
F
.
pad
(
image
,
(
0
,
0
,
padding
[
0
],
padding
[
1
]))
if
target
is
None
:
return
padded_image
,
None
target
=
target
.
copy
()
# should we do something wrt the original size?
target
[
"size"
]
=
torch
.
tensor
(
padded_image
.
size
[::
-
1
])
if
"masks"
in
target
:
target
[
'masks'
]
=
torch
.
nn
.
functional
.
pad
(
target
[
'masks'
],
(
0
,
padding
[
0
],
0
,
padding
[
1
]))
return
padded_image
,
target
rtdetr_pytorch/src/data/transforms.py
0 → 100644
View file @
46260e34
""""by lyuwenyu
"""
import
torch
import
torch.nn
as
nn
import
torchvision
torchvision
.
disable_beta_transforms_warning
()
# TODO:修改库:
# from torchvision import datapoints
from
torchvision
import
tv_tensors
import
torchvision.transforms.v2
as
T
import
torchvision.transforms.v2.functional
as
F
from
PIL
import
Image
from
typing
import
Any
,
Dict
,
List
,
Optional
from
src.core
import
register
,
GLOBAL_CONFIG
__all__
=
[
'Compose'
,
]
RandomPhotometricDistort
=
register
(
T
.
RandomPhotometricDistort
)
RandomZoomOut
=
register
(
T
.
RandomZoomOut
)
# RandomIoUCrop = register(T.RandomIoUCrop)
RandomHorizontalFlip
=
register
(
T
.
RandomHorizontalFlip
)
Resize
=
register
(
T
.
Resize
)
# ToImageTensor = register(T.ToImageTensor)
ToImageTensor
=
register
(
T
.
ToImage
)
# TODO:修改
# ConvertDtype = register(T.ConvertDtype)
ConvertDtype
=
register
(
T
.
ConvertImageDtype
)
# TODO:修改
# SanitizeBoundingBox = register(T.SanitizeBoundingBox)
SanitizeBoundingBox
=
register
(
T
.
SanitizeBoundingBoxes
)
# TODO:修改
RandomCrop
=
register
(
T
.
RandomCrop
)
Normalize
=
register
(
T
.
Normalize
)
@
register
class
Compose
(
T
.
Compose
):
def
__init__
(
self
,
ops
)
->
None
:
transforms
=
[]
if
ops
is
not
None
:
for
op
in
ops
:
if
isinstance
(
op
,
dict
):
name
=
op
.
pop
(
'type'
)
transfom
=
getattr
(
GLOBAL_CONFIG
[
name
][
'_pymodule'
],
name
)(
**
op
)
transforms
.
append
(
transfom
)
# op['type'] = name
elif
isinstance
(
op
,
nn
.
Module
):
transforms
.
append
(
op
)
else
:
raise
ValueError
(
''
)
else
:
transforms
=
[
EmptyTransform
(),
]
super
().
__init__
(
transforms
=
transforms
)
@
register
class
EmptyTransform
(
T
.
Transform
):
def
__init__
(
self
,
)
->
None
:
super
().
__init__
()
def
forward
(
self
,
*
inputs
):
inputs
=
inputs
if
len
(
inputs
)
>
1
else
inputs
[
0
]
return
inputs
@
register
class
PadToSize
(
T
.
Pad
):
# _transformed_types = (
# Image.Image,
# datapoints.Image,
# datapoints.Video,
# datapoints.Mask,
# datapoints.BoundingBox,
# )
_transformed_types
=
(
# TODO:修改
Image
.
Image
,
tv_tensors
.
Image
,
tv_tensors
.
Video
,
tv_tensors
.
Mask
,
tv_tensors
.
BoundingBoxes
,
)
def
_get_params
(
self
,
flat_inputs
:
List
[
Any
])
->
Dict
[
str
,
Any
]:
# sz = F.get_spatial_size(flat_inputs[0])
sz
=
F
.
get_image_size
(
flat_inputs
[
0
])
# TODO:修改
h
,
w
=
self
.
spatial_size
[
0
]
-
sz
[
0
],
self
.
spatial_size
[
1
]
-
sz
[
1
]
self
.
padding
=
[
0
,
0
,
w
,
h
]
return
dict
(
padding
=
self
.
padding
)
def
__init__
(
self
,
spatial_size
,
fill
=
0
,
padding_mode
=
'constant'
)
->
None
:
if
isinstance
(
spatial_size
,
int
):
spatial_size
=
(
spatial_size
,
spatial_size
)
self
.
spatial_size
=
spatial_size
super
().
__init__
(
0
,
fill
,
padding_mode
)
def
_transform
(
self
,
inpt
:
Any
,
params
:
Dict
[
str
,
Any
])
->
Any
:
fill
=
self
.
_fill
[
type
(
inpt
)]
padding
=
params
[
'padding'
]
return
F
.
pad
(
inpt
,
padding
=
padding
,
fill
=
fill
,
padding_mode
=
self
.
padding_mode
)
# type: ignore[arg-type]
def
__call__
(
self
,
*
inputs
:
Any
)
->
Any
:
outputs
=
super
().
forward
(
*
inputs
)
if
len
(
outputs
)
>
1
and
isinstance
(
outputs
[
1
],
dict
):
outputs
[
1
][
'padding'
]
=
torch
.
tensor
(
self
.
padding
)
return
outputs
@
register
class
RandomIoUCrop
(
T
.
RandomIoUCrop
):
def
__init__
(
self
,
min_scale
:
float
=
0.3
,
max_scale
:
float
=
1
,
min_aspect_ratio
:
float
=
0.5
,
max_aspect_ratio
:
float
=
2
,
sampler_options
:
Optional
[
List
[
float
]]
=
None
,
trials
:
int
=
40
,
p
:
float
=
1.0
):
super
().
__init__
(
min_scale
,
max_scale
,
min_aspect_ratio
,
max_aspect_ratio
,
sampler_options
,
trials
)
self
.
p
=
p
def
__call__
(
self
,
*
inputs
:
Any
)
->
Any
:
if
torch
.
rand
(
1
)
>=
self
.
p
:
return
inputs
if
len
(
inputs
)
>
1
else
inputs
[
0
]
return
super
().
forward
(
*
inputs
)
@
register
class
ConvertBox
(
T
.
Transform
):
_transformed_types
=
(
# datapoints.BoundingBox,
tv_tensors
.
BoundingBoxes
,
# TODO:修改
)
def
__init__
(
self
,
out_fmt
=
''
,
normalize
=
False
)
->
None
:
super
().
__init__
()
self
.
out_fmt
=
out_fmt
self
.
normalize
=
normalize
self
.
data_fmt
=
{
# 'xyxy': datapoints.BoundingBoxFormat.XYXY,
# 'cxcywh': datapoints.BoundingBoxFormat.CXCYWH
'xyxy'
:
tv_tensors
.
BoundingBoxFormat
.
XYXY
,
# TODO:修改
'cxcywh'
:
tv_tensors
.
BoundingBoxFormat
.
CXCYWH
}
def
_transform
(
self
,
inpt
:
Any
,
params
:
Dict
[
str
,
Any
])
->
Any
:
if
self
.
out_fmt
:
# spatial_size = inpt.spatial_size
spatial_size
=
inpt
.
canvas_size
# TODO:修改
in_fmt
=
inpt
.
format
.
value
.
lower
()
inpt
=
torchvision
.
ops
.
box_convert
(
inpt
,
in_fmt
=
in_fmt
,
out_fmt
=
self
.
out_fmt
)
# inpt = datapoints.BoundingBox(inpt, format=self.data_fmt[self.out_fmt], spatial_size=spatial_size)
inpt
=
tv_tensors
.
BoundingBoxes
(
inpt
,
format
=
self
.
data_fmt
[
self
.
out_fmt
],
canvas_size
=
spatial_size
)
# TODO:修改
if
self
.
normalize
:
# inpt = inpt / torch.tensor(inpt.spatial_size[::-1]).tile(2)[None]
inpt
=
inpt
/
torch
.
tensor
(
inpt
.
canvas_size
[::
-
1
]).
tile
(
2
)[
None
]
# TODO:修改
return
inpt
rtdetr_pytorch/src/misc/__init__.py
0 → 100644
View file @
46260e34
from
.logger
import
*
from
.visualizer
import
*
rtdetr_pytorch/src/misc/dist.py
0 → 100644
View file @
46260e34
"""
reference
- https://github.com/pytorch/vision/blob/main/references/detection/utils.py
- https://github.com/facebookresearch/detr/blob/master/util/misc.py#L406
by lyuwenyu
"""
import
random
import
numpy
as
np
import
torch
import
torch.nn
as
nn
import
torch.distributed
import
torch.distributed
as
tdist
from
torch.nn.parallel
import
DistributedDataParallel
as
DDP
from
torch.utils.data
import
DistributedSampler
from
torch.utils.data.dataloader
import
DataLoader
def
init_distributed
():
'''
distributed setup
args:
backend (str), ('nccl', 'gloo')
'''
try
:
# # https://pytorch.org/docs/stable/elastic/run.html
# LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))
# RANK = int(os.getenv('RANK', -1))
# WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
tdist
.
init_process_group
(
init_method
=
'env://'
,
)
torch
.
distributed
.
barrier
()
rank
=
get_rank
()
device
=
torch
.
device
(
f
'cuda:
{
rank
}
'
)
torch
.
cuda
.
set_device
(
device
)
setup_print
(
rank
==
0
)
print
(
'Initialized distributed mode...'
)
return
True
except
:
print
(
'Not init distributed mode.'
)
return
False
def
setup_print
(
is_main
):
'''This function disables printing when not in master process
'''
import
builtins
as
__builtin__
builtin_print
=
__builtin__
.
print
def
print
(
*
args
,
**
kwargs
):
force
=
kwargs
.
pop
(
'force'
,
False
)
if
is_main
or
force
:
builtin_print
(
*
args
,
**
kwargs
)
__builtin__
.
print
=
print
def
is_dist_available_and_initialized
():
if
not
tdist
.
is_available
():
return
False
if
not
tdist
.
is_initialized
():
return
False
return
True
def
get_rank
():
if
not
is_dist_available_and_initialized
():
return
0
return
tdist
.
get_rank
()
def
get_world_size
():
if
not
is_dist_available_and_initialized
():
return
1
return
tdist
.
get_world_size
()
def
is_main_process
():
return
get_rank
()
==
0
def
save_on_master
(
*
args
,
**
kwargs
):
if
is_main_process
():
torch
.
save
(
*
args
,
**
kwargs
)
def
warp_model
(
model
,
find_unused_parameters
=
False
,
sync_bn
=
False
,):
if
is_dist_available_and_initialized
():
rank
=
get_rank
()
model
=
nn
.
SyncBatchNorm
.
convert_sync_batchnorm
(
model
)
if
sync_bn
else
model
model
=
DDP
(
model
,
device_ids
=
[
rank
],
output_device
=
rank
,
find_unused_parameters
=
find_unused_parameters
)
return
model
def
warp_loader
(
loader
,
shuffle
=
False
):
if
is_dist_available_and_initialized
():
sampler
=
DistributedSampler
(
loader
.
dataset
,
shuffle
=
shuffle
)
loader
=
DataLoader
(
loader
.
dataset
,
loader
.
batch_size
,
sampler
=
sampler
,
drop_last
=
loader
.
drop_last
,
collate_fn
=
loader
.
collate_fn
,
pin_memory
=
loader
.
pin_memory
,
num_workers
=
loader
.
num_workers
,
)
return
loader
def
is_parallel
(
model
)
->
bool
:
# Returns True if model is of type DP or DDP
return
type
(
model
)
in
(
torch
.
nn
.
parallel
.
DataParallel
,
torch
.
nn
.
parallel
.
DistributedDataParallel
)
def
de_parallel
(
model
)
->
nn
.
Module
:
# De-parallelize a model: returns single-GPU model if model is of type DP or DDP
return
model
.
module
if
is_parallel
(
model
)
else
model
def
reduce_dict
(
data
,
avg
=
True
):
'''
Args
data dict: input, {k: v, ...}
avg bool: true
'''
world_size
=
get_world_size
()
if
world_size
<
2
:
return
data
with
torch
.
no_grad
():
keys
,
values
=
[],
[]
for
k
in
sorted
(
data
.
keys
()):
keys
.
append
(
k
)
values
.
append
(
data
[
k
])
values
=
torch
.
stack
(
values
,
dim
=
0
)
tdist
.
all_reduce
(
values
)
if
avg
is
True
:
values
/=
world_size
_data
=
{
k
:
v
for
k
,
v
in
zip
(
keys
,
values
)}
return
_data
def
all_gather
(
data
):
"""
Run all_gather on arbitrary picklable data (not necessarily tensors)
Args:
data: any picklable object
Returns:
list[data]: list of data gathered from each rank
"""
world_size
=
get_world_size
()
if
world_size
==
1
:
return
[
data
]
data_list
=
[
None
]
*
world_size
tdist
.
all_gather_object
(
data_list
,
data
)
return
data_list
import
time
def
sync_time
():
'''sync_time
'''
if
torch
.
cuda
.
is_available
():
torch
.
cuda
.
synchronize
()
return
time
.
time
()
def
set_seed
(
seed
):
# fix the seed for reproducibility
seed
=
seed
+
get_rank
()
torch
.
manual_seed
(
seed
)
np
.
random
.
seed
(
seed
)
random
.
seed
(
seed
)
rtdetr_pytorch/src/misc/logger.py
0 → 100644
View file @
46260e34
"""
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
https://github.com/facebookresearch/detr/blob/main/util/misc.py
Mostly copy-paste from torchvision references.
"""
import
time
import
pickle
import
datetime
from
collections
import
defaultdict
,
deque
from
typing
import
Dict
import
torch
import
torch.distributed
as
tdist
from
.dist
import
is_dist_available_and_initialized
,
get_world_size
class
SmoothedValue
(
object
):
"""Track a series of values and provide access to smoothed values over a
window or the global series average.
"""
def
__init__
(
self
,
window_size
=
20
,
fmt
=
None
):
if
fmt
is
None
:
fmt
=
"{median:.4f} ({global_avg:.4f})"
self
.
deque
=
deque
(
maxlen
=
window_size
)
self
.
total
=
0.0
self
.
count
=
0
self
.
fmt
=
fmt
def
update
(
self
,
value
,
n
=
1
):
self
.
deque
.
append
(
value
)
self
.
count
+=
n
self
.
total
+=
value
*
n
def
synchronize_between_processes
(
self
):
"""
Warning: does not synchronize the deque!
"""
if
not
is_dist_available_and_initialized
():
return
t
=
torch
.
tensor
([
self
.
count
,
self
.
total
],
dtype
=
torch
.
float64
,
device
=
'cuda'
)
tdist
.
barrier
()
tdist
.
all_reduce
(
t
)
t
=
t
.
tolist
()
self
.
count
=
int
(
t
[
0
])
self
.
total
=
t
[
1
]
@
property
def
median
(
self
):
d
=
torch
.
tensor
(
list
(
self
.
deque
))
return
d
.
median
().
item
()
@
property
def
avg
(
self
):
d
=
torch
.
tensor
(
list
(
self
.
deque
),
dtype
=
torch
.
float32
)
return
d
.
mean
().
item
()
@
property
def
global_avg
(
self
):
return
self
.
total
/
self
.
count
@
property
def
max
(
self
):
return
max
(
self
.
deque
)
@
property
def
value
(
self
):
return
self
.
deque
[
-
1
]
def
__str__
(
self
):
return
self
.
fmt
.
format
(
median
=
self
.
median
,
avg
=
self
.
avg
,
global_avg
=
self
.
global_avg
,
max
=
self
.
max
,
value
=
self
.
value
)
def
all_gather
(
data
):
"""
Run all_gather on arbitrary picklable data (not necessarily tensors)
Args:
data: any picklable object
Returns:
list[data]: list of data gathered from each rank
"""
world_size
=
get_world_size
()
if
world_size
==
1
:
return
[
data
]
# serialized to a Tensor
buffer
=
pickle
.
dumps
(
data
)
storage
=
torch
.
ByteStorage
.
from_buffer
(
buffer
)
tensor
=
torch
.
ByteTensor
(
storage
).
to
(
"cuda"
)
# obtain Tensor size of each rank
local_size
=
torch
.
tensor
([
tensor
.
numel
()],
device
=
"cuda"
)
size_list
=
[
torch
.
tensor
([
0
],
device
=
"cuda"
)
for
_
in
range
(
world_size
)]
tdist
.
all_gather
(
size_list
,
local_size
)
size_list
=
[
int
(
size
.
item
())
for
size
in
size_list
]
max_size
=
max
(
size_list
)
# receiving Tensor from all ranks
# we pad the tensor because torch all_gather does not support
# gathering tensors of different shapes
tensor_list
=
[]
for
_
in
size_list
:
tensor_list
.
append
(
torch
.
empty
((
max_size
,),
dtype
=
torch
.
uint8
,
device
=
"cuda"
))
if
local_size
!=
max_size
:
padding
=
torch
.
empty
(
size
=
(
max_size
-
local_size
,),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
tensor
=
torch
.
cat
((
tensor
,
padding
),
dim
=
0
)
tdist
.
all_gather
(
tensor_list
,
tensor
)
data_list
=
[]
for
size
,
tensor
in
zip
(
size_list
,
tensor_list
):
buffer
=
tensor
.
cpu
().
numpy
().
tobytes
()[:
size
]
data_list
.
append
(
pickle
.
loads
(
buffer
))
return
data_list
def
reduce_dict
(
input_dict
,
average
=
True
)
->
Dict
[
str
,
torch
.
Tensor
]:
"""
Args:
input_dict (dict): all the values will be reduced
average (bool): whether to do average or sum
Reduce the values in the dictionary from all processes so that all processes
have the averaged results. Returns a dict with the same fields as
input_dict, after reduction.
"""
world_size
=
get_world_size
()
if
world_size
<
2
:
return
input_dict
with
torch
.
no_grad
():
names
=
[]
values
=
[]
# sort the keys so that they are consistent across processes
for
k
in
sorted
(
input_dict
.
keys
()):
names
.
append
(
k
)
values
.
append
(
input_dict
[
k
])
values
=
torch
.
stack
(
values
,
dim
=
0
)
tdist
.
all_reduce
(
values
)
if
average
:
values
/=
world_size
reduced_dict
=
{
k
:
v
for
k
,
v
in
zip
(
names
,
values
)}
return
reduced_dict
class
MetricLogger
(
object
):
def
__init__
(
self
,
delimiter
=
"
\t
"
):
self
.
meters
=
defaultdict
(
SmoothedValue
)
self
.
delimiter
=
delimiter
def
update
(
self
,
**
kwargs
):
for
k
,
v
in
kwargs
.
items
():
if
isinstance
(
v
,
torch
.
Tensor
):
v
=
v
.
item
()
assert
isinstance
(
v
,
(
float
,
int
))
self
.
meters
[
k
].
update
(
v
)
def
__getattr__
(
self
,
attr
):
if
attr
in
self
.
meters
:
return
self
.
meters
[
attr
]
if
attr
in
self
.
__dict__
:
return
self
.
__dict__
[
attr
]
raise
AttributeError
(
"'{}' object has no attribute '{}'"
.
format
(
type
(
self
).
__name__
,
attr
))
def
__str__
(
self
):
loss_str
=
[]
for
name
,
meter
in
self
.
meters
.
items
():
loss_str
.
append
(
"{}: {}"
.
format
(
name
,
str
(
meter
))
)
return
self
.
delimiter
.
join
(
loss_str
)
def
synchronize_between_processes
(
self
):
for
meter
in
self
.
meters
.
values
():
meter
.
synchronize_between_processes
()
def
add_meter
(
self
,
name
,
meter
):
self
.
meters
[
name
]
=
meter
def
log_every
(
self
,
iterable
,
print_freq
,
header
=
None
):
i
=
0
if
not
header
:
header
=
''
start_time
=
time
.
time
()
end
=
time
.
time
()
iter_time
=
SmoothedValue
(
fmt
=
'{avg:.4f}'
)
data_time
=
SmoothedValue
(
fmt
=
'{avg:.4f}'
)
space_fmt
=
':'
+
str
(
len
(
str
(
len
(
iterable
))))
+
'd'
if
torch
.
cuda
.
is_available
():
log_msg
=
self
.
delimiter
.
join
([
header
,
'[{0'
+
space_fmt
+
'}/{1}]'
,
'eta: {eta}'
,
'{meters}'
,
'time: {time}'
,
'data: {data}'
,
'max mem: {memory:.0f}'
])
else
:
log_msg
=
self
.
delimiter
.
join
([
header
,
'[{0'
+
space_fmt
+
'}/{1}]'
,
'eta: {eta}'
,
'{meters}'
,
'time: {time}'
,
'data: {data}'
])
MB
=
1024.0
*
1024.0
for
obj
in
iterable
:
data_time
.
update
(
time
.
time
()
-
end
)
yield
obj
iter_time
.
update
(
time
.
time
()
-
end
)
if
i
%
print_freq
==
0
or
i
==
len
(
iterable
)
-
1
:
eta_seconds
=
iter_time
.
global_avg
*
(
len
(
iterable
)
-
i
)
eta_string
=
str
(
datetime
.
timedelta
(
seconds
=
int
(
eta_seconds
)))
if
torch
.
cuda
.
is_available
():
print
(
log_msg
.
format
(
i
,
len
(
iterable
),
eta
=
eta_string
,
meters
=
str
(
self
),
time
=
str
(
iter_time
),
data
=
str
(
data_time
),
memory
=
torch
.
cuda
.
max_memory_allocated
()
/
MB
))
else
:
print
(
log_msg
.
format
(
i
,
len
(
iterable
),
eta
=
eta_string
,
meters
=
str
(
self
),
time
=
str
(
iter_time
),
data
=
str
(
data_time
)))
i
+=
1
end
=
time
.
time
()
total_time
=
time
.
time
()
-
start_time
total_time_str
=
str
(
datetime
.
timedelta
(
seconds
=
int
(
total_time
)))
print
(
'{} Total time: {} ({:.4f} s / it)'
.
format
(
header
,
total_time_str
,
total_time
/
len
(
iterable
)))
rtdetr_pytorch/src/misc/visualizer.py
0 → 100644
View file @
46260e34
""""by lyuwenyu
"""
import
torch
import
torch.utils.data
import
torchvision
torchvision
.
disable_beta_transforms_warning
()
import
PIL
__all__
=
[
'show_sample'
]
def
show_sample
(
sample
):
"""for coco dataset/dataloader
"""
import
matplotlib.pyplot
as
plt
from
torchvision.transforms.v2
import
functional
as
F
from
torchvision.utils
import
draw_bounding_boxes
image
,
target
=
sample
if
isinstance
(
image
,
PIL
.
Image
.
Image
):
image
=
F
.
to_image_tensor
(
image
)
image
=
F
.
convert_dtype
(
image
,
torch
.
uint8
)
annotated_image
=
draw_bounding_boxes
(
image
,
target
[
"boxes"
],
colors
=
"yellow"
,
width
=
3
)
fig
,
ax
=
plt
.
subplots
()
ax
.
imshow
(
annotated_image
.
permute
(
1
,
2
,
0
).
numpy
())
ax
.
set
(
xticklabels
=
[],
yticklabels
=
[],
xticks
=
[],
yticks
=
[])
fig
.
tight_layout
()
fig
.
show
()
plt
.
show
()
rtdetr_pytorch/src/nn/__init__.py
0 → 100644
View file @
46260e34
from
.arch
import
*
from
.criterion
import
*
#
from
.backbone
import
*
rtdetr_pytorch/src/nn/arch/__init__.py
0 → 100644
View file @
46260e34
from
.classification
import
*
rtdetr_pytorch/src/nn/arch/classification.py
0 → 100644
View file @
46260e34
import
torch
import
torch.nn
as
nn
from
src.core
import
register
__all__
=
[
'Classification'
,
'ClassHead'
]
@
register
class
Classification
(
nn
.
Module
):
__inject__
=
[
'backbone'
,
'head'
]
def
__init__
(
self
,
backbone
:
nn
.
Module
,
head
:
nn
.
Module
=
None
):
super
().
__init__
()
self
.
backbone
=
backbone
self
.
head
=
head
def
forward
(
self
,
x
):
x
=
self
.
backbone
(
x
)
if
self
.
head
is
not
None
:
x
=
self
.
head
(
x
)
return
x
@
register
class
ClassHead
(
nn
.
Module
):
def
__init__
(
self
,
hidden_dim
,
num_classes
):
super
().
__init__
()
self
.
pool
=
nn
.
AdaptiveAvgPool2d
(
1
)
self
.
proj
=
nn
.
Linear
(
hidden_dim
,
num_classes
)
def
forward
(
self
,
x
):
x
=
x
[
0
]
if
isinstance
(
x
,
(
list
,
tuple
))
else
x
x
=
self
.
pool
(
x
)
x
=
x
.
reshape
(
x
.
shape
[
0
],
-
1
)
x
=
self
.
proj
(
x
)
return
x
rtdetr_pytorch/src/nn/backbone/__init__.py
0 → 100644
View file @
46260e34
from
.presnet
import
*
from
.test_resnet
import
*
from
.common
import
*
\ No newline at end of file
rtdetr_pytorch/src/nn/backbone/common.py
0 → 100644
View file @
46260e34
'''by lyuwenyu
'''
import
torch
import
torch.nn
as
nn
class
ConvNormLayer
(
nn
.
Module
):
def
__init__
(
self
,
ch_in
,
ch_out
,
kernel_size
,
stride
,
padding
=
None
,
bias
=
False
,
act
=
None
):
super
().
__init__
()
self
.
conv
=
nn
.
Conv2d
(
ch_in
,
ch_out
,
kernel_size
,
stride
,
padding
=
(
kernel_size
-
1
)
//
2
if
padding
is
None
else
padding
,
bias
=
bias
)
self
.
norm
=
nn
.
BatchNorm2d
(
ch_out
)
self
.
act
=
nn
.
Identity
()
if
act
is
None
else
get_activation
(
act
)
def
forward
(
self
,
x
):
return
self
.
act
(
self
.
norm
(
self
.
conv
(
x
)))
class
FrozenBatchNorm2d
(
nn
.
Module
):
"""copy and modified from https://github.com/facebookresearch/detr/blob/master/models/backbone.py
BatchNorm2d where the batch statistics and the affine parameters are fixed.
Copy-paste from torchvision.misc.ops with added eps before rqsrt,
without which any other models than torchvision.models.resnet[18,34,50,101]
produce nans.
"""
def
__init__
(
self
,
num_features
,
eps
=
1e-5
):
super
(
FrozenBatchNorm2d
,
self
).
__init__
()
n
=
num_features
self
.
register_buffer
(
"weight"
,
torch
.
ones
(
n
))
self
.
register_buffer
(
"bias"
,
torch
.
zeros
(
n
))
self
.
register_buffer
(
"running_mean"
,
torch
.
zeros
(
n
))
self
.
register_buffer
(
"running_var"
,
torch
.
ones
(
n
))
self
.
eps
=
eps
self
.
num_features
=
n
def
_load_from_state_dict
(
self
,
state_dict
,
prefix
,
local_metadata
,
strict
,
missing_keys
,
unexpected_keys
,
error_msgs
):
num_batches_tracked_key
=
prefix
+
'num_batches_tracked'
if
num_batches_tracked_key
in
state_dict
:
del
state_dict
[
num_batches_tracked_key
]
super
(
FrozenBatchNorm2d
,
self
).
_load_from_state_dict
(
state_dict
,
prefix
,
local_metadata
,
strict
,
missing_keys
,
unexpected_keys
,
error_msgs
)
def
forward
(
self
,
x
):
# move reshapes to the beginning
# to make it fuser-friendly
w
=
self
.
weight
.
reshape
(
1
,
-
1
,
1
,
1
)
b
=
self
.
bias
.
reshape
(
1
,
-
1
,
1
,
1
)
rv
=
self
.
running_var
.
reshape
(
1
,
-
1
,
1
,
1
)
rm
=
self
.
running_mean
.
reshape
(
1
,
-
1
,
1
,
1
)
scale
=
w
*
(
rv
+
self
.
eps
).
rsqrt
()
bias
=
b
-
rm
*
scale
return
x
*
scale
+
bias
def
extra_repr
(
self
):
return
(
"{num_features}, eps={eps}"
.
format
(
**
self
.
__dict__
)
)
def
get_activation
(
act
:
str
,
inpace
:
bool
=
True
):
'''get activation
'''
act
=
act
.
lower
()
if
act
==
'silu'
:
m
=
nn
.
SiLU
()
elif
act
==
'relu'
:
m
=
nn
.
ReLU
()
elif
act
==
'leaky_relu'
:
m
=
nn
.
LeakyReLU
()
elif
act
==
'silu'
:
m
=
nn
.
SiLU
()
elif
act
==
'gelu'
:
m
=
nn
.
GELU
()
elif
act
is
None
:
m
=
nn
.
Identity
()
elif
isinstance
(
act
,
nn
.
Module
):
m
=
act
else
:
raise
RuntimeError
(
''
)
if
hasattr
(
m
,
'inplace'
):
m
.
inplace
=
inpace
return
m
rtdetr_pytorch/src/nn/backbone/presnet.py
0 → 100644
View file @
46260e34
'''by lyuwenyu
'''
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
collections
import
OrderedDict
from
.common
import
get_activation
,
ConvNormLayer
,
FrozenBatchNorm2d
from
src.core
import
register
__all__
=
[
'PResNet'
]
ResNet_cfg
=
{
18
:
[
2
,
2
,
2
,
2
],
34
:
[
3
,
4
,
6
,
3
],
50
:
[
3
,
4
,
6
,
3
],
101
:
[
3
,
4
,
23
,
3
],
# 152: [3, 8, 36, 3],
}
donwload_url
=
{
18
:
'https://github.com/lyuwenyu/storage/releases/download/v0.1/ResNet18_vd_pretrained_from_paddle.pth'
,
34
:
'https://github.com/lyuwenyu/storage/releases/download/v0.1/ResNet34_vd_pretrained_from_paddle.pth'
,
50
:
'https://github.com/lyuwenyu/storage/releases/download/v0.1/ResNet50_vd_ssld_v2_pretrained_from_paddle.pth'
,
101
:
'https://github.com/lyuwenyu/storage/releases/download/v0.1/ResNet101_vd_ssld_pretrained_from_paddle.pth'
,
}
class
BasicBlock
(
nn
.
Module
):
expansion
=
1
def
__init__
(
self
,
ch_in
,
ch_out
,
stride
,
shortcut
,
act
=
'relu'
,
variant
=
'b'
):
super
().
__init__
()
self
.
shortcut
=
shortcut
if
not
shortcut
:
if
variant
==
'd'
and
stride
==
2
:
self
.
short
=
nn
.
Sequential
(
OrderedDict
([
(
'pool'
,
nn
.
AvgPool2d
(
2
,
2
,
0
,
ceil_mode
=
True
)),
(
'conv'
,
ConvNormLayer
(
ch_in
,
ch_out
,
1
,
1
))
]))
else
:
self
.
short
=
ConvNormLayer
(
ch_in
,
ch_out
,
1
,
stride
)
self
.
branch2a
=
ConvNormLayer
(
ch_in
,
ch_out
,
3
,
stride
,
act
=
act
)
self
.
branch2b
=
ConvNormLayer
(
ch_out
,
ch_out
,
3
,
1
,
act
=
None
)
self
.
act
=
nn
.
Identity
()
if
act
is
None
else
get_activation
(
act
)
def
forward
(
self
,
x
):
out
=
self
.
branch2a
(
x
)
out
=
self
.
branch2b
(
out
)
if
self
.
shortcut
:
short
=
x
else
:
short
=
self
.
short
(
x
)
out
=
out
+
short
out
=
self
.
act
(
out
)
return
out
class
BottleNeck
(
nn
.
Module
):
expansion
=
4
def
__init__
(
self
,
ch_in
,
ch_out
,
stride
,
shortcut
,
act
=
'relu'
,
variant
=
'b'
):
super
().
__init__
()
if
variant
==
'a'
:
stride1
,
stride2
=
stride
,
1
else
:
stride1
,
stride2
=
1
,
stride
width
=
ch_out
self
.
branch2a
=
ConvNormLayer
(
ch_in
,
width
,
1
,
stride1
,
act
=
act
)
self
.
branch2b
=
ConvNormLayer
(
width
,
width
,
3
,
stride2
,
act
=
act
)
self
.
branch2c
=
ConvNormLayer
(
width
,
ch_out
*
self
.
expansion
,
1
,
1
)
self
.
shortcut
=
shortcut
if
not
shortcut
:
if
variant
==
'd'
and
stride
==
2
:
self
.
short
=
nn
.
Sequential
(
OrderedDict
([
(
'pool'
,
nn
.
AvgPool2d
(
2
,
2
,
0
,
ceil_mode
=
True
)),
(
'conv'
,
ConvNormLayer
(
ch_in
,
ch_out
*
self
.
expansion
,
1
,
1
))
]))
else
:
self
.
short
=
ConvNormLayer
(
ch_in
,
ch_out
*
self
.
expansion
,
1
,
stride
)
self
.
act
=
nn
.
Identity
()
if
act
is
None
else
get_activation
(
act
)
def
forward
(
self
,
x
):
out
=
self
.
branch2a
(
x
)
out
=
self
.
branch2b
(
out
)
out
=
self
.
branch2c
(
out
)
if
self
.
shortcut
:
short
=
x
else
:
short
=
self
.
short
(
x
)
out
=
out
+
short
out
=
self
.
act
(
out
)
return
out
class
Blocks
(
nn
.
Module
):
def
__init__
(
self
,
block
,
ch_in
,
ch_out
,
count
,
stage_num
,
act
=
'relu'
,
variant
=
'b'
):
super
().
__init__
()
self
.
blocks
=
nn
.
ModuleList
()
for
i
in
range
(
count
):
self
.
blocks
.
append
(
block
(
ch_in
,
ch_out
,
stride
=
2
if
i
==
0
and
stage_num
!=
2
else
1
,
shortcut
=
False
if
i
==
0
else
True
,
variant
=
variant
,
act
=
act
)
)
if
i
==
0
:
ch_in
=
ch_out
*
block
.
expansion
def
forward
(
self
,
x
):
out
=
x
for
block
in
self
.
blocks
:
out
=
block
(
out
)
return
out
@
register
class
PResNet
(
nn
.
Module
):
def
__init__
(
self
,
depth
,
variant
=
'd'
,
num_stages
=
4
,
return_idx
=
[
0
,
1
,
2
,
3
],
act
=
'relu'
,
freeze_at
=-
1
,
freeze_norm
=
True
,
pretrained
=
False
):
super
().
__init__
()
block_nums
=
ResNet_cfg
[
depth
]
ch_in
=
64
if
variant
in
[
'c'
,
'd'
]:
conv_def
=
[
[
3
,
ch_in
//
2
,
3
,
2
,
"conv1_1"
],
[
ch_in
//
2
,
ch_in
//
2
,
3
,
1
,
"conv1_2"
],
[
ch_in
//
2
,
ch_in
,
3
,
1
,
"conv1_3"
],
]
else
:
conv_def
=
[[
3
,
ch_in
,
7
,
2
,
"conv1_1"
]]
self
.
conv1
=
nn
.
Sequential
(
OrderedDict
([
(
_name
,
ConvNormLayer
(
c_in
,
c_out
,
k
,
s
,
act
=
act
))
for
c_in
,
c_out
,
k
,
s
,
_name
in
conv_def
]))
ch_out_list
=
[
64
,
128
,
256
,
512
]
block
=
BottleNeck
if
depth
>=
50
else
BasicBlock
_out_channels
=
[
block
.
expansion
*
v
for
v
in
ch_out_list
]
_out_strides
=
[
4
,
8
,
16
,
32
]
self
.
res_layers
=
nn
.
ModuleList
()
for
i
in
range
(
num_stages
):
stage_num
=
i
+
2
self
.
res_layers
.
append
(
Blocks
(
block
,
ch_in
,
ch_out_list
[
i
],
block_nums
[
i
],
stage_num
,
act
=
act
,
variant
=
variant
)
)
ch_in
=
_out_channels
[
i
]
self
.
return_idx
=
return_idx
self
.
out_channels
=
[
_out_channels
[
_i
]
for
_i
in
return_idx
]
self
.
out_strides
=
[
_out_strides
[
_i
]
for
_i
in
return_idx
]
if
freeze_at
>=
0
:
self
.
_freeze_parameters
(
self
.
conv1
)
for
i
in
range
(
min
(
freeze_at
,
num_stages
)):
self
.
_freeze_parameters
(
self
.
res_layers
[
i
])
if
freeze_norm
:
self
.
_freeze_norm
(
self
)
if
pretrained
:
state
=
torch
.
hub
.
load_state_dict_from_url
(
donwload_url
[
depth
])
self
.
load_state_dict
(
state
)
print
(
f
'Load PResNet
{
depth
}
state_dict'
)
def
_freeze_parameters
(
self
,
m
:
nn
.
Module
):
for
p
in
m
.
parameters
():
p
.
requires_grad
=
False
def
_freeze_norm
(
self
,
m
:
nn
.
Module
):
if
isinstance
(
m
,
nn
.
BatchNorm2d
):
m
=
FrozenBatchNorm2d
(
m
.
num_features
)
else
:
for
name
,
child
in
m
.
named_children
():
_child
=
self
.
_freeze_norm
(
child
)
if
_child
is
not
child
:
setattr
(
m
,
name
,
_child
)
return
m
def
forward
(
self
,
x
):
conv1
=
self
.
conv1
(
x
)
x
=
F
.
max_pool2d
(
conv1
,
kernel_size
=
3
,
stride
=
2
,
padding
=
1
)
outs
=
[]
for
idx
,
stage
in
enumerate
(
self
.
res_layers
):
x
=
stage
(
x
)
if
idx
in
self
.
return_idx
:
outs
.
append
(
x
)
return
outs
rtdetr_pytorch/src/nn/backbone/test_resnet.py
0 → 100644
View file @
46260e34
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
collections
import
OrderedDict
from
src.core
import
register
class
BasicBlock
(
nn
.
Module
):
expansion
=
1
def
__init__
(
self
,
in_planes
,
planes
,
stride
=
1
):
super
(
BasicBlock
,
self
).
__init__
()
self
.
conv1
=
nn
.
Conv2d
(
in_planes
,
planes
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
1
,
bias
=
False
)
self
.
bn1
=
nn
.
BatchNorm2d
(
planes
)
self
.
conv2
=
nn
.
Conv2d
(
planes
,
planes
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
False
)
self
.
bn2
=
nn
.
BatchNorm2d
(
planes
)
self
.
shortcut
=
nn
.
Sequential
()
if
stride
!=
1
or
in_planes
!=
self
.
expansion
*
planes
:
self
.
shortcut
=
nn
.
Sequential
(
nn
.
Conv2d
(
in_planes
,
self
.
expansion
*
planes
,
kernel_size
=
1
,
stride
=
stride
,
bias
=
False
),
nn
.
BatchNorm2d
(
self
.
expansion
*
planes
)
)
def
forward
(
self
,
x
):
out
=
F
.
relu
(
self
.
bn1
(
self
.
conv1
(
x
)))
out
=
self
.
bn2
(
self
.
conv2
(
out
))
out
+=
self
.
shortcut
(
x
)
out
=
F
.
relu
(
out
)
return
out
class
_ResNet
(
nn
.
Module
):
def
__init__
(
self
,
block
,
num_blocks
,
num_classes
=
10
):
super
().
__init__
()
self
.
in_planes
=
64
self
.
conv1
=
nn
.
Conv2d
(
3
,
64
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
False
)
self
.
bn1
=
nn
.
BatchNorm2d
(
64
)
self
.
layer1
=
self
.
_make_layer
(
block
,
64
,
num_blocks
[
0
],
stride
=
1
)
self
.
layer2
=
self
.
_make_layer
(
block
,
128
,
num_blocks
[
1
],
stride
=
2
)
self
.
layer3
=
self
.
_make_layer
(
block
,
256
,
num_blocks
[
2
],
stride
=
2
)
self
.
layer4
=
self
.
_make_layer
(
block
,
512
,
num_blocks
[
3
],
stride
=
2
)
self
.
linear
=
nn
.
Linear
(
512
*
block
.
expansion
,
num_classes
)
def
_make_layer
(
self
,
block
,
planes
,
num_blocks
,
stride
):
strides
=
[
stride
]
+
[
1
]
*
(
num_blocks
-
1
)
layers
=
[]
for
stride
in
strides
:
layers
.
append
(
block
(
self
.
in_planes
,
planes
,
stride
))
self
.
in_planes
=
planes
*
block
.
expansion
return
nn
.
Sequential
(
*
layers
)
def
forward
(
self
,
x
):
out
=
F
.
relu
(
self
.
bn1
(
self
.
conv1
(
x
)))
out
=
self
.
layer1
(
out
)
out
=
self
.
layer2
(
out
)
out
=
self
.
layer3
(
out
)
out
=
self
.
layer4
(
out
)
out
=
F
.
avg_pool2d
(
out
,
4
)
out
=
out
.
view
(
out
.
size
(
0
),
-
1
)
out
=
self
.
linear
(
out
)
return
out
@
register
class
MResNet
(
nn
.
Module
):
def
__init__
(
self
,
num_classes
=
10
,
num_blocks
=
[
2
,
2
,
2
,
2
])
->
None
:
super
().
__init__
()
self
.
model
=
_ResNet
(
BasicBlock
,
num_blocks
,
num_classes
)
def
forward
(
self
,
x
):
return
self
.
model
(
x
)
rtdetr_pytorch/src/nn/backbone/utils.py
0 → 100644
View file @
46260e34
"""
https://github.com/pytorch/vision/blob/main/torchvision/models/_utils.py
by lyuwenyu
"""
from
collections
import
OrderedDict
from
typing
import
Dict
,
List
import
torch.nn
as
nn
class
IntermediateLayerGetter
(
nn
.
ModuleDict
):
"""
Module wrapper that returns intermediate layers from a model
It has a strong assumption that the modules have been registered
into the model in the same order as they are used.
This means that one should **not** reuse the same nn.Module
twice in the forward if you want this to work.
Additionally, it is only able to query submodules that are directly
assigned to the model. So if `model` is passed, `model.feature1` can
be returned, but not `model.feature1.layer2`.
"""
_version
=
3
def
__init__
(
self
,
model
:
nn
.
Module
,
return_layers
:
List
[
str
])
->
None
:
if
not
set
(
return_layers
).
issubset
([
name
for
name
,
_
in
model
.
named_children
()]):
raise
ValueError
(
"return_layers are not present in model. {}"
\
.
format
([
name
for
name
,
_
in
model
.
named_children
()]))
orig_return_layers
=
return_layers
return_layers
=
{
str
(
k
):
str
(
k
)
for
k
in
return_layers
}
layers
=
OrderedDict
()
for
name
,
module
in
model
.
named_children
():
layers
[
name
]
=
module
if
name
in
return_layers
:
del
return_layers
[
name
]
if
not
return_layers
:
break
super
().
__init__
(
layers
)
self
.
return_layers
=
orig_return_layers
def
forward
(
self
,
x
):
# out = OrderedDict()
outputs
=
[]
for
name
,
module
in
self
.
items
():
x
=
module
(
x
)
if
name
in
self
.
return_layers
:
# out_name = self.return_layers[name]
# out[out_name] = x
outputs
.
append
(
x
)
return
outputs
rtdetr_pytorch/src/nn/criterion/__init__.py
0 → 100644
View file @
46260e34
import
torch.nn
as
nn
from
src.core
import
register
CrossEntropyLoss
=
register
(
nn
.
CrossEntropyLoss
)
rtdetr_pytorch/src/nn/criterion/utils.py
0 → 100644
View file @
46260e34
import
torch
import
torchvision
def
format_target
(
targets
):
'''
Args:
targets (List[Dict]),
Return:
tensor (Tensor), [im_id, label, bbox,]
'''
outputs
=
[]
for
i
,
tgt
in
enumerate
(
targets
):
boxes
=
torchvision
.
ops
.
box_convert
(
tgt
[
'boxes'
],
in_fmt
=
'xyxy'
,
out_fmt
=
'cxcywh'
)
labels
=
tgt
[
'labels'
].
reshape
(
-
1
,
1
)
im_ids
=
torch
.
ones_like
(
labels
)
*
i
outputs
.
append
(
torch
.
cat
([
im_ids
,
labels
,
boxes
],
dim
=
1
))
return
torch
.
cat
(
outputs
,
dim
=
0
)
rtdetr_pytorch/src/optim/__init__.py
0 → 100644
View file @
46260e34
from
.ema
import
*
from
.optim
import
*
from
.amp
import
*
\ No newline at end of file
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment