Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
OOTDiffusion_pytorch
Commits
c50c08d9
Commit
c50c08d9
authored
May 22, 2024
by
mashun1
Browse files
ootd
parent
fb08b1e6
Changes
21
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2400 additions
and
6 deletions
+2400
-6
.gitignore
.gitignore
+3
-1
ootd/inference_ootd_hd.py
ootd/inference_ootd_hd.py
+2
-1
preprocess/humanparsing/datasets/__init__.py
preprocess/humanparsing/datasets/__init__.py
+0
-0
preprocess/humanparsing/datasets/datasets.py
preprocess/humanparsing/datasets/datasets.py
+201
-0
preprocess/humanparsing/datasets/simple_extractor_dataset.py
preprocess/humanparsing/datasets/simple_extractor_dataset.py
+89
-0
preprocess/humanparsing/datasets/target_generation.py
preprocess/humanparsing/datasets/target_generation.py
+40
-0
preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/README.md
...p_extension/detectron2/detectron2/data/datasets/README.md
+9
-0
preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/__init__.py
...extension/detectron2/detectron2/data/datasets/__init__.py
+9
-0
preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/builtin.py
..._extension/detectron2/detectron2/data/datasets/builtin.py
+220
-0
preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/builtin_meta.py
...nsion/detectron2/detectron2/data/datasets/builtin_meta.py
+267
-0
preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/cityscapes.py
...tension/detectron2/detectron2/data/datasets/cityscapes.py
+329
-0
preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/coco.py
...mhp_extension/detectron2/detectron2/data/datasets/coco.py
+466
-0
preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/lvis.py
...mhp_extension/detectron2/detectron2/data/datasets/lvis.py
+209
-0
preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/lvis_v0_5_categories.py
...tectron2/detectron2/data/datasets/lvis_v0_5_categories.py
+13
-0
preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/pascal_voc.py
...tension/detectron2/detectron2/data/datasets/pascal_voc.py
+80
-0
preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/register_coco.py
...sion/detectron2/detectron2/data/datasets/register_coco.py
+129
-0
preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/__init__.py
...n2/projects/DensePose/densepose/data/datasets/__init__.py
+5
-0
preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/builtin.py
...on2/projects/DensePose/densepose/data/datasets/builtin.py
+10
-0
preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/coco.py
...ctron2/projects/DensePose/densepose/data/datasets/coco.py
+314
-0
train/main.py
train/main.py
+5
-4
No files found.
.gitignore
View file @
c50c08d9
...
...
@@ -7,4 +7,6 @@ __pycache__/
checkpoints/
*logs*
train.txt
datasets
\ No newline at end of file
VITON*
eval_output
eval_ootd.py
\ No newline at end of file
ootd/inference_ootd_hd.py
View file @
c50c08d9
...
...
@@ -30,7 +30,8 @@ sys.path.append(str(OOTD_ROOT))
# VIT_PATH = "../checkpoints/clip-vit-large-patch14"
VIT_PATH
=
os
.
path
.
join
(
OOTD_ROOT
,
"checkpoints/clip-vit-large-patch14"
)
VAE_PATH
=
"../checkpoints/ootd"
UNET_PATH
=
"../checkpoints/ootd/ootd_hd/checkpoint-36000"
# UNET_PATH = "../checkpoints/ootd/ootd_hd/checkpoint-36000"
UNET_PATH
=
"../train/checkpoints"
MODEL_PATH
=
"../checkpoints/ootd"
class
OOTDiffusionHD
:
...
...
preprocess/humanparsing/datasets/__init__.py
0 → 100644
View file @
c50c08d9
preprocess/humanparsing/datasets/datasets.py
0 → 100644
View file @
c50c08d9
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
@Author : Peike Li
@Contact : peike.li@yahoo.com
@File : datasets.py
@Time : 8/4/19 3:35 PM
@Desc :
@License : This source code is licensed under the license found in the
LICENSE file in the root directory of this source tree.
"""
import
os
import
numpy
as
np
import
random
import
torch
import
cv2
from
torch.utils
import
data
from
utils.transforms
import
get_affine_transform
class
LIPDataSet
(
data
.
Dataset
):
def
__init__
(
self
,
root
,
dataset
,
crop_size
=
[
473
,
473
],
scale_factor
=
0.25
,
rotation_factor
=
30
,
ignore_label
=
255
,
transform
=
None
):
self
.
root
=
root
self
.
aspect_ratio
=
crop_size
[
1
]
*
1.0
/
crop_size
[
0
]
self
.
crop_size
=
np
.
asarray
(
crop_size
)
self
.
ignore_label
=
ignore_label
self
.
scale_factor
=
scale_factor
self
.
rotation_factor
=
rotation_factor
self
.
flip_prob
=
0.5
self
.
transform
=
transform
self
.
dataset
=
dataset
list_path
=
os
.
path
.
join
(
self
.
root
,
self
.
dataset
+
'_id.txt'
)
train_list
=
[
i_id
.
strip
()
for
i_id
in
open
(
list_path
)]
self
.
train_list
=
train_list
self
.
number_samples
=
len
(
self
.
train_list
)
def
__len__
(
self
):
return
self
.
number_samples
def
_box2cs
(
self
,
box
):
x
,
y
,
w
,
h
=
box
[:
4
]
return
self
.
_xywh2cs
(
x
,
y
,
w
,
h
)
def
_xywh2cs
(
self
,
x
,
y
,
w
,
h
):
center
=
np
.
zeros
((
2
),
dtype
=
np
.
float32
)
center
[
0
]
=
x
+
w
*
0.5
center
[
1
]
=
y
+
h
*
0.5
if
w
>
self
.
aspect_ratio
*
h
:
h
=
w
*
1.0
/
self
.
aspect_ratio
elif
w
<
self
.
aspect_ratio
*
h
:
w
=
h
*
self
.
aspect_ratio
scale
=
np
.
array
([
w
*
1.0
,
h
*
1.0
],
dtype
=
np
.
float32
)
return
center
,
scale
def
__getitem__
(
self
,
index
):
train_item
=
self
.
train_list
[
index
]
im_path
=
os
.
path
.
join
(
self
.
root
,
self
.
dataset
+
'_images'
,
train_item
+
'.jpg'
)
parsing_anno_path
=
os
.
path
.
join
(
self
.
root
,
self
.
dataset
+
'_segmentations'
,
train_item
+
'.png'
)
im
=
cv2
.
imread
(
im_path
,
cv2
.
IMREAD_COLOR
)
h
,
w
,
_
=
im
.
shape
parsing_anno
=
np
.
zeros
((
h
,
w
),
dtype
=
np
.
long
)
# Get person center and scale
person_center
,
s
=
self
.
_box2cs
([
0
,
0
,
w
-
1
,
h
-
1
])
r
=
0
if
self
.
dataset
!=
'test'
:
# Get pose annotation
parsing_anno
=
cv2
.
imread
(
parsing_anno_path
,
cv2
.
IMREAD_GRAYSCALE
)
if
self
.
dataset
==
'train'
or
self
.
dataset
==
'trainval'
:
sf
=
self
.
scale_factor
rf
=
self
.
rotation_factor
s
=
s
*
np
.
clip
(
np
.
random
.
randn
()
*
sf
+
1
,
1
-
sf
,
1
+
sf
)
r
=
np
.
clip
(
np
.
random
.
randn
()
*
rf
,
-
rf
*
2
,
rf
*
2
)
if
random
.
random
()
<=
0.6
else
0
if
random
.
random
()
<=
self
.
flip_prob
:
im
=
im
[:,
::
-
1
,
:]
parsing_anno
=
parsing_anno
[:,
::
-
1
]
person_center
[
0
]
=
im
.
shape
[
1
]
-
person_center
[
0
]
-
1
right_idx
=
[
15
,
17
,
19
]
left_idx
=
[
14
,
16
,
18
]
for
i
in
range
(
0
,
3
):
right_pos
=
np
.
where
(
parsing_anno
==
right_idx
[
i
])
left_pos
=
np
.
where
(
parsing_anno
==
left_idx
[
i
])
parsing_anno
[
right_pos
[
0
],
right_pos
[
1
]]
=
left_idx
[
i
]
parsing_anno
[
left_pos
[
0
],
left_pos
[
1
]]
=
right_idx
[
i
]
trans
=
get_affine_transform
(
person_center
,
s
,
r
,
self
.
crop_size
)
input
=
cv2
.
warpAffine
(
im
,
trans
,
(
int
(
self
.
crop_size
[
1
]),
int
(
self
.
crop_size
[
0
])),
flags
=
cv2
.
INTER_LINEAR
,
borderMode
=
cv2
.
BORDER_CONSTANT
,
borderValue
=
(
0
,
0
,
0
))
if
self
.
transform
:
input
=
self
.
transform
(
input
)
meta
=
{
'name'
:
train_item
,
'center'
:
person_center
,
'height'
:
h
,
'width'
:
w
,
'scale'
:
s
,
'rotation'
:
r
}
if
self
.
dataset
==
'val'
or
self
.
dataset
==
'test'
:
return
input
,
meta
else
:
label_parsing
=
cv2
.
warpAffine
(
parsing_anno
,
trans
,
(
int
(
self
.
crop_size
[
1
]),
int
(
self
.
crop_size
[
0
])),
flags
=
cv2
.
INTER_NEAREST
,
borderMode
=
cv2
.
BORDER_CONSTANT
,
borderValue
=
(
255
))
label_parsing
=
torch
.
from_numpy
(
label_parsing
)
return
input
,
label_parsing
,
meta
class
LIPDataValSet
(
data
.
Dataset
):
def
__init__
(
self
,
root
,
dataset
=
'val'
,
crop_size
=
[
473
,
473
],
transform
=
None
,
flip
=
False
):
self
.
root
=
root
self
.
crop_size
=
crop_size
self
.
transform
=
transform
self
.
flip
=
flip
self
.
dataset
=
dataset
self
.
root
=
root
self
.
aspect_ratio
=
crop_size
[
1
]
*
1.0
/
crop_size
[
0
]
self
.
crop_size
=
np
.
asarray
(
crop_size
)
list_path
=
os
.
path
.
join
(
self
.
root
,
self
.
dataset
+
'_id.txt'
)
val_list
=
[
i_id
.
strip
()
for
i_id
in
open
(
list_path
)]
self
.
val_list
=
val_list
self
.
number_samples
=
len
(
self
.
val_list
)
def
__len__
(
self
):
return
len
(
self
.
val_list
)
def
_box2cs
(
self
,
box
):
x
,
y
,
w
,
h
=
box
[:
4
]
return
self
.
_xywh2cs
(
x
,
y
,
w
,
h
)
def
_xywh2cs
(
self
,
x
,
y
,
w
,
h
):
center
=
np
.
zeros
((
2
),
dtype
=
np
.
float32
)
center
[
0
]
=
x
+
w
*
0.5
center
[
1
]
=
y
+
h
*
0.5
if
w
>
self
.
aspect_ratio
*
h
:
h
=
w
*
1.0
/
self
.
aspect_ratio
elif
w
<
self
.
aspect_ratio
*
h
:
w
=
h
*
self
.
aspect_ratio
scale
=
np
.
array
([
w
*
1.0
,
h
*
1.0
],
dtype
=
np
.
float32
)
return
center
,
scale
def
__getitem__
(
self
,
index
):
val_item
=
self
.
val_list
[
index
]
# Load training image
im_path
=
os
.
path
.
join
(
self
.
root
,
self
.
dataset
+
'_images'
,
val_item
+
'.jpg'
)
im
=
cv2
.
imread
(
im_path
,
cv2
.
IMREAD_COLOR
)
h
,
w
,
_
=
im
.
shape
# Get person center and scale
person_center
,
s
=
self
.
_box2cs
([
0
,
0
,
w
-
1
,
h
-
1
])
r
=
0
trans
=
get_affine_transform
(
person_center
,
s
,
r
,
self
.
crop_size
)
input
=
cv2
.
warpAffine
(
im
,
trans
,
(
int
(
self
.
crop_size
[
1
]),
int
(
self
.
crop_size
[
0
])),
flags
=
cv2
.
INTER_LINEAR
,
borderMode
=
cv2
.
BORDER_CONSTANT
,
borderValue
=
(
0
,
0
,
0
))
input
=
self
.
transform
(
input
)
flip_input
=
input
.
flip
(
dims
=
[
-
1
])
if
self
.
flip
:
batch_input_im
=
torch
.
stack
([
input
,
flip_input
])
else
:
batch_input_im
=
input
meta
=
{
'name'
:
val_item
,
'center'
:
person_center
,
'height'
:
h
,
'width'
:
w
,
'scale'
:
s
,
'rotation'
:
r
}
return
batch_input_im
,
meta
preprocess/humanparsing/datasets/simple_extractor_dataset.py
0 → 100644
View file @
c50c08d9
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
@Author : Peike Li
@Contact : peike.li@yahoo.com
@File : dataset.py
@Time : 8/30/19 9:12 PM
@Desc : Dataset Definition
@License : This source code is licensed under the license found in the
LICENSE file in the root directory of this source tree.
"""
import
os
import
pdb
import
cv2
import
numpy
as
np
from
PIL
import
Image
from
torch.utils
import
data
from
utils.transforms
import
get_affine_transform
class
SimpleFolderDataset
(
data
.
Dataset
):
def
__init__
(
self
,
root
,
input_size
=
[
512
,
512
],
transform
=
None
):
self
.
root
=
root
self
.
input_size
=
input_size
self
.
transform
=
transform
self
.
aspect_ratio
=
input_size
[
1
]
*
1.0
/
input_size
[
0
]
self
.
input_size
=
np
.
asarray
(
input_size
)
self
.
is_pil_image
=
False
if
isinstance
(
root
,
Image
.
Image
):
self
.
file_list
=
[
root
]
self
.
is_pil_image
=
True
elif
os
.
path
.
isfile
(
root
):
self
.
file_list
=
[
os
.
path
.
basename
(
root
)]
self
.
root
=
os
.
path
.
dirname
(
root
)
else
:
self
.
file_list
=
os
.
listdir
(
self
.
root
)
def
__len__
(
self
):
return
len
(
self
.
file_list
)
def
_box2cs
(
self
,
box
):
x
,
y
,
w
,
h
=
box
[:
4
]
return
self
.
_xywh2cs
(
x
,
y
,
w
,
h
)
def
_xywh2cs
(
self
,
x
,
y
,
w
,
h
):
center
=
np
.
zeros
((
2
),
dtype
=
np
.
float32
)
center
[
0
]
=
x
+
w
*
0.5
center
[
1
]
=
y
+
h
*
0.5
if
w
>
self
.
aspect_ratio
*
h
:
h
=
w
*
1.0
/
self
.
aspect_ratio
elif
w
<
self
.
aspect_ratio
*
h
:
w
=
h
*
self
.
aspect_ratio
scale
=
np
.
array
([
w
,
h
],
dtype
=
np
.
float32
)
return
center
,
scale
def
__getitem__
(
self
,
index
):
if
self
.
is_pil_image
:
img
=
np
.
asarray
(
self
.
file_list
[
index
])[:,
:,
[
2
,
1
,
0
]]
else
:
img_name
=
self
.
file_list
[
index
]
img_path
=
os
.
path
.
join
(
self
.
root
,
img_name
)
img
=
cv2
.
imread
(
img_path
,
cv2
.
IMREAD_COLOR
)
h
,
w
,
_
=
img
.
shape
# Get person center and scale
person_center
,
s
=
self
.
_box2cs
([
0
,
0
,
w
-
1
,
h
-
1
])
r
=
0
trans
=
get_affine_transform
(
person_center
,
s
,
r
,
self
.
input_size
)
input
=
cv2
.
warpAffine
(
img
,
trans
,
(
int
(
self
.
input_size
[
1
]),
int
(
self
.
input_size
[
0
])),
flags
=
cv2
.
INTER_LINEAR
,
borderMode
=
cv2
.
BORDER_CONSTANT
,
borderValue
=
(
0
,
0
,
0
))
input
=
self
.
transform
(
input
)
meta
=
{
'center'
:
person_center
,
'height'
:
h
,
'width'
:
w
,
'scale'
:
s
,
'rotation'
:
r
}
return
input
,
meta
preprocess/humanparsing/datasets/target_generation.py
0 → 100644
View file @
c50c08d9
import
torch
from
torch.nn
import
functional
as
F
def
generate_edge_tensor
(
label
,
edge_width
=
3
):
label
=
label
.
type
(
torch
.
cuda
.
FloatTensor
)
if
len
(
label
.
shape
)
==
2
:
label
=
label
.
unsqueeze
(
0
)
n
,
h
,
w
=
label
.
shape
edge
=
torch
.
zeros
(
label
.
shape
,
dtype
=
torch
.
float
).
cuda
()
# right
edge_right
=
edge
[:,
1
:
h
,
:]
edge_right
[(
label
[:,
1
:
h
,
:]
!=
label
[:,
:
h
-
1
,
:])
&
(
label
[:,
1
:
h
,
:]
!=
255
)
&
(
label
[:,
:
h
-
1
,
:]
!=
255
)]
=
1
# up
edge_up
=
edge
[:,
:,
:
w
-
1
]
edge_up
[(
label
[:,
:,
:
w
-
1
]
!=
label
[:,
:,
1
:
w
])
&
(
label
[:,
:,
:
w
-
1
]
!=
255
)
&
(
label
[:,
:,
1
:
w
]
!=
255
)]
=
1
# upright
edge_upright
=
edge
[:,
:
h
-
1
,
:
w
-
1
]
edge_upright
[(
label
[:,
:
h
-
1
,
:
w
-
1
]
!=
label
[:,
1
:
h
,
1
:
w
])
&
(
label
[:,
:
h
-
1
,
:
w
-
1
]
!=
255
)
&
(
label
[:,
1
:
h
,
1
:
w
]
!=
255
)]
=
1
# bottomright
edge_bottomright
=
edge
[:,
:
h
-
1
,
1
:
w
]
edge_bottomright
[(
label
[:,
:
h
-
1
,
1
:
w
]
!=
label
[:,
1
:
h
,
:
w
-
1
])
&
(
label
[:,
:
h
-
1
,
1
:
w
]
!=
255
)
&
(
label
[:,
1
:
h
,
:
w
-
1
]
!=
255
)]
=
1
kernel
=
torch
.
ones
((
1
,
1
,
edge_width
,
edge_width
),
dtype
=
torch
.
float
).
cuda
()
with
torch
.
no_grad
():
edge
=
edge
.
unsqueeze
(
1
)
edge
=
F
.
conv2d
(
edge
,
kernel
,
stride
=
1
,
padding
=
1
)
edge
[
edge
!=
0
]
=
1
edge
=
edge
.
squeeze
()
return
edge
preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/README.md
0 → 100644
View file @
c50c08d9
### Common Datasets
The dataset implemented here do not need to load the data into the final format.
It should provide the minimal data structure needed to use the dataset, so it can be very efficient.
For example, for an image dataset, just provide the file names and labels, but don't read the images.
Let the downstream decide how to read.
preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/__init__.py
0 → 100644
View file @
c50c08d9
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from
.cityscapes
import
load_cityscapes_instances
from
.coco
import
load_coco_json
,
load_sem_seg
from
.lvis
import
load_lvis_json
,
register_lvis_instances
,
get_lvis_instances_meta
from
.register_coco
import
register_coco_instances
,
register_coco_panoptic_separated
from
.
import
builtin
# ensure the builtin data are registered
__all__
=
[
k
for
k
in
globals
().
keys
()
if
"builtin"
not
in
k
and
not
k
.
startswith
(
"_"
)]
preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/builtin.py
0 → 100644
View file @
c50c08d9
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
"""
This file registers pre-defined data at hard-coded paths, and their metadata.
We hard-code metadata for common data. This will enable:
1. Consistency check when loading the data
2. Use models on these standard data directly and run demos,
without having to download the dataset annotations
We hard-code some paths to the dataset that's assumed to
exist in "./data/".
Users SHOULD NOT use this file to create new dataset / metadata for new dataset.
To add new dataset, refer to the tutorial "docs/DATASETS.md".
"""
import
os
from
detectron2.data
import
DatasetCatalog
,
MetadataCatalog
from
.builtin_meta
import
_get_builtin_metadata
from
.cityscapes
import
load_cityscapes_instances
,
load_cityscapes_semantic
from
.lvis
import
get_lvis_instances_meta
,
register_lvis_instances
from
.pascal_voc
import
register_pascal_voc
from
.register_coco
import
register_coco_instances
,
register_coco_panoptic_separated
# ==== Predefined data and splits for COCO ==========
_PREDEFINED_SPLITS_COCO
=
{}
_PREDEFINED_SPLITS_COCO
[
"coco"
]
=
{
"coco_2014_train"
:
(
"coco/train2014"
,
"coco/annotations/instances_train2014.json"
),
"coco_2014_val"
:
(
"coco/val2014"
,
"coco/annotations/instances_val2014.json"
),
"coco_2014_minival"
:
(
"coco/val2014"
,
"coco/annotations/instances_minival2014.json"
),
"coco_2014_minival_100"
:
(
"coco/val2014"
,
"coco/annotations/instances_minival2014_100.json"
),
"coco_2014_valminusminival"
:
(
"coco/val2014"
,
"coco/annotations/instances_valminusminival2014.json"
,
),
"coco_2017_train"
:
(
"coco/train2017"
,
"coco/annotations/instances_train2017.json"
),
"coco_2017_val"
:
(
"coco/val2017"
,
"coco/annotations/instances_val2017.json"
),
"coco_2017_test"
:
(
"coco/test2017"
,
"coco/annotations/image_info_test2017.json"
),
"coco_2017_test-dev"
:
(
"coco/test2017"
,
"coco/annotations/image_info_test-dev2017.json"
),
"coco_2017_val_100"
:
(
"coco/val2017"
,
"coco/annotations/instances_val2017_100.json"
),
}
_PREDEFINED_SPLITS_COCO
[
"coco_person"
]
=
{
"keypoints_coco_2014_train"
:
(
"coco/train2014"
,
"coco/annotations/person_keypoints_train2014.json"
,
),
"keypoints_coco_2014_val"
:
(
"coco/val2014"
,
"coco/annotations/person_keypoints_val2014.json"
),
"keypoints_coco_2014_minival"
:
(
"coco/val2014"
,
"coco/annotations/person_keypoints_minival2014.json"
,
),
"keypoints_coco_2014_valminusminival"
:
(
"coco/val2014"
,
"coco/annotations/person_keypoints_valminusminival2014.json"
,
),
"keypoints_coco_2014_minival_100"
:
(
"coco/val2014"
,
"coco/annotations/person_keypoints_minival2014_100.json"
,
),
"keypoints_coco_2017_train"
:
(
"coco/train2017"
,
"coco/annotations/person_keypoints_train2017.json"
,
),
"keypoints_coco_2017_val"
:
(
"coco/val2017"
,
"coco/annotations/person_keypoints_val2017.json"
),
"keypoints_coco_2017_val_100"
:
(
"coco/val2017"
,
"coco/annotations/person_keypoints_val2017_100.json"
,
),
}
_PREDEFINED_SPLITS_COCO_PANOPTIC
=
{
"coco_2017_train_panoptic"
:
(
# This is the original panoptic annotation directory
"coco/panoptic_train2017"
,
"coco/annotations/panoptic_train2017.json"
,
# This directory contains semantic annotations that are
# converted from panoptic annotations.
# It is used by PanopticFPN.
# You can use the script at detectron2/data/prepare_panoptic_fpn.py
# to create these directories.
"coco/panoptic_stuff_train2017"
,
),
"coco_2017_val_panoptic"
:
(
"coco/panoptic_val2017"
,
"coco/annotations/panoptic_val2017.json"
,
"coco/panoptic_stuff_val2017"
,
),
"coco_2017_val_100_panoptic"
:
(
"coco/panoptic_val2017_100"
,
"coco/annotations/panoptic_val2017_100.json"
,
"coco/panoptic_stuff_val2017_100"
,
),
}
def
register_all_coco
(
root
):
for
dataset_name
,
splits_per_dataset
in
_PREDEFINED_SPLITS_COCO
.
items
():
for
key
,
(
image_root
,
json_file
)
in
splits_per_dataset
.
items
():
# Assume pre-defined data live in `./data`.
register_coco_instances
(
key
,
_get_builtin_metadata
(
dataset_name
),
os
.
path
.
join
(
root
,
json_file
)
if
"://"
not
in
json_file
else
json_file
,
os
.
path
.
join
(
root
,
image_root
),
)
for
(
prefix
,
(
panoptic_root
,
panoptic_json
,
semantic_root
),
)
in
_PREDEFINED_SPLITS_COCO_PANOPTIC
.
items
():
prefix_instances
=
prefix
[:
-
len
(
"_panoptic"
)]
instances_meta
=
MetadataCatalog
.
get
(
prefix_instances
)
image_root
,
instances_json
=
instances_meta
.
image_root
,
instances_meta
.
json_file
register_coco_panoptic_separated
(
prefix
,
_get_builtin_metadata
(
"coco_panoptic_separated"
),
image_root
,
os
.
path
.
join
(
root
,
panoptic_root
),
os
.
path
.
join
(
root
,
panoptic_json
),
os
.
path
.
join
(
root
,
semantic_root
),
instances_json
,
)
# ==== Predefined data and splits for LVIS ==========
_PREDEFINED_SPLITS_LVIS
=
{
"lvis_v0.5"
:
{
"lvis_v0.5_train"
:
(
"coco/train2017"
,
"lvis/lvis_v0.5_train.json"
),
"lvis_v0.5_val"
:
(
"coco/val2017"
,
"lvis/lvis_v0.5_val.json"
),
"lvis_v0.5_val_rand_100"
:
(
"coco/val2017"
,
"lvis/lvis_v0.5_val_rand_100.json"
),
"lvis_v0.5_test"
:
(
"coco/test2017"
,
"lvis/lvis_v0.5_image_info_test.json"
),
},
"lvis_v0.5_cocofied"
:
{
"lvis_v0.5_train_cocofied"
:
(
"coco/train2017"
,
"lvis/lvis_v0.5_train_cocofied.json"
),
"lvis_v0.5_val_cocofied"
:
(
"coco/val2017"
,
"lvis/lvis_v0.5_val_cocofied.json"
),
},
}
def
register_all_lvis
(
root
):
for
dataset_name
,
splits_per_dataset
in
_PREDEFINED_SPLITS_LVIS
.
items
():
for
key
,
(
image_root
,
json_file
)
in
splits_per_dataset
.
items
():
# Assume pre-defined data live in `./data`.
register_lvis_instances
(
key
,
get_lvis_instances_meta
(
dataset_name
),
os
.
path
.
join
(
root
,
json_file
)
if
"://"
not
in
json_file
else
json_file
,
os
.
path
.
join
(
root
,
image_root
),
)
# ==== Predefined splits for raw cityscapes images ===========
_RAW_CITYSCAPES_SPLITS
=
{
"cityscapes_fine_{task}_train"
:
(
"cityscapes/leftImg8bit/train"
,
"cityscapes/gtFine/train"
),
"cityscapes_fine_{task}_val"
:
(
"cityscapes/leftImg8bit/val"
,
"cityscapes/gtFine/val"
),
"cityscapes_fine_{task}_test"
:
(
"cityscapes/leftImg8bit/test"
,
"cityscapes/gtFine/test"
),
}
def
register_all_cityscapes
(
root
):
for
key
,
(
image_dir
,
gt_dir
)
in
_RAW_CITYSCAPES_SPLITS
.
items
():
meta
=
_get_builtin_metadata
(
"cityscapes"
)
image_dir
=
os
.
path
.
join
(
root
,
image_dir
)
gt_dir
=
os
.
path
.
join
(
root
,
gt_dir
)
inst_key
=
key
.
format
(
task
=
"instance_seg"
)
DatasetCatalog
.
register
(
inst_key
,
lambda
x
=
image_dir
,
y
=
gt_dir
:
load_cityscapes_instances
(
x
,
y
,
from_json
=
True
,
to_polygons
=
True
),
)
MetadataCatalog
.
get
(
inst_key
).
set
(
image_dir
=
image_dir
,
gt_dir
=
gt_dir
,
evaluator_type
=
"cityscapes_instance"
,
**
meta
)
sem_key
=
key
.
format
(
task
=
"sem_seg"
)
DatasetCatalog
.
register
(
sem_key
,
lambda
x
=
image_dir
,
y
=
gt_dir
:
load_cityscapes_semantic
(
x
,
y
)
)
MetadataCatalog
.
get
(
sem_key
).
set
(
image_dir
=
image_dir
,
gt_dir
=
gt_dir
,
evaluator_type
=
"cityscapes_sem_seg"
,
**
meta
)
# ==== Predefined splits for PASCAL VOC ===========
def
register_all_pascal_voc
(
root
):
SPLITS
=
[
(
"voc_2007_trainval"
,
"VOC2007"
,
"trainval"
),
(
"voc_2007_train"
,
"VOC2007"
,
"train"
),
(
"voc_2007_val"
,
"VOC2007"
,
"val"
),
(
"voc_2007_test"
,
"VOC2007"
,
"test"
),
(
"voc_2012_trainval"
,
"VOC2012"
,
"trainval"
),
(
"voc_2012_train"
,
"VOC2012"
,
"train"
),
(
"voc_2012_val"
,
"VOC2012"
,
"val"
),
]
for
name
,
dirname
,
split
in
SPLITS
:
year
=
2007
if
"2007"
in
name
else
2012
register_pascal_voc
(
name
,
os
.
path
.
join
(
root
,
dirname
),
split
,
year
)
MetadataCatalog
.
get
(
name
).
evaluator_type
=
"pascal_voc"
# Register them all under "./data"
_root
=
os
.
getenv
(
"DETECTRON2_DATASETS"
,
"data"
)
register_all_coco
(
_root
)
register_all_lvis
(
_root
)
register_all_cityscapes
(
_root
)
register_all_pascal_voc
(
_root
)
preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/builtin_meta.py
0 → 100644
View file @
c50c08d9
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# All coco categories, together with their nice-looking visualization colors
# It's from https://github.com/cocodataset/panopticapi/blob/master/panoptic_coco_categories.json
COCO_CATEGORIES
=
[
{
"color"
:
[
220
,
20
,
60
],
"isthing"
:
1
,
"id"
:
1
,
"name"
:
"person"
},
{
"color"
:
[
119
,
11
,
32
],
"isthing"
:
1
,
"id"
:
2
,
"name"
:
"bicycle"
},
{
"color"
:
[
0
,
0
,
142
],
"isthing"
:
1
,
"id"
:
3
,
"name"
:
"car"
},
{
"color"
:
[
0
,
0
,
230
],
"isthing"
:
1
,
"id"
:
4
,
"name"
:
"motorcycle"
},
{
"color"
:
[
106
,
0
,
228
],
"isthing"
:
1
,
"id"
:
5
,
"name"
:
"airplane"
},
{
"color"
:
[
0
,
60
,
100
],
"isthing"
:
1
,
"id"
:
6
,
"name"
:
"bus"
},
{
"color"
:
[
0
,
80
,
100
],
"isthing"
:
1
,
"id"
:
7
,
"name"
:
"train"
},
{
"color"
:
[
0
,
0
,
70
],
"isthing"
:
1
,
"id"
:
8
,
"name"
:
"truck"
},
{
"color"
:
[
0
,
0
,
192
],
"isthing"
:
1
,
"id"
:
9
,
"name"
:
"boat"
},
{
"color"
:
[
250
,
170
,
30
],
"isthing"
:
1
,
"id"
:
10
,
"name"
:
"traffic light"
},
{
"color"
:
[
100
,
170
,
30
],
"isthing"
:
1
,
"id"
:
11
,
"name"
:
"fire hydrant"
},
{
"color"
:
[
220
,
220
,
0
],
"isthing"
:
1
,
"id"
:
13
,
"name"
:
"stop sign"
},
{
"color"
:
[
175
,
116
,
175
],
"isthing"
:
1
,
"id"
:
14
,
"name"
:
"parking meter"
},
{
"color"
:
[
250
,
0
,
30
],
"isthing"
:
1
,
"id"
:
15
,
"name"
:
"bench"
},
{
"color"
:
[
165
,
42
,
42
],
"isthing"
:
1
,
"id"
:
16
,
"name"
:
"bird"
},
{
"color"
:
[
255
,
77
,
255
],
"isthing"
:
1
,
"id"
:
17
,
"name"
:
"cat"
},
{
"color"
:
[
0
,
226
,
252
],
"isthing"
:
1
,
"id"
:
18
,
"name"
:
"dog"
},
{
"color"
:
[
182
,
182
,
255
],
"isthing"
:
1
,
"id"
:
19
,
"name"
:
"horse"
},
{
"color"
:
[
0
,
82
,
0
],
"isthing"
:
1
,
"id"
:
20
,
"name"
:
"sheep"
},
{
"color"
:
[
120
,
166
,
157
],
"isthing"
:
1
,
"id"
:
21
,
"name"
:
"cow"
},
{
"color"
:
[
110
,
76
,
0
],
"isthing"
:
1
,
"id"
:
22
,
"name"
:
"elephant"
},
{
"color"
:
[
174
,
57
,
255
],
"isthing"
:
1
,
"id"
:
23
,
"name"
:
"bear"
},
{
"color"
:
[
199
,
100
,
0
],
"isthing"
:
1
,
"id"
:
24
,
"name"
:
"zebra"
},
{
"color"
:
[
72
,
0
,
118
],
"isthing"
:
1
,
"id"
:
25
,
"name"
:
"giraffe"
},
{
"color"
:
[
255
,
179
,
240
],
"isthing"
:
1
,
"id"
:
27
,
"name"
:
"backpack"
},
{
"color"
:
[
0
,
125
,
92
],
"isthing"
:
1
,
"id"
:
28
,
"name"
:
"umbrella"
},
{
"color"
:
[
209
,
0
,
151
],
"isthing"
:
1
,
"id"
:
31
,
"name"
:
"handbag"
},
{
"color"
:
[
188
,
208
,
182
],
"isthing"
:
1
,
"id"
:
32
,
"name"
:
"tie"
},
{
"color"
:
[
0
,
220
,
176
],
"isthing"
:
1
,
"id"
:
33
,
"name"
:
"suitcase"
},
{
"color"
:
[
255
,
99
,
164
],
"isthing"
:
1
,
"id"
:
34
,
"name"
:
"frisbee"
},
{
"color"
:
[
92
,
0
,
73
],
"isthing"
:
1
,
"id"
:
35
,
"name"
:
"skis"
},
{
"color"
:
[
133
,
129
,
255
],
"isthing"
:
1
,
"id"
:
36
,
"name"
:
"snowboard"
},
{
"color"
:
[
78
,
180
,
255
],
"isthing"
:
1
,
"id"
:
37
,
"name"
:
"sports ball"
},
{
"color"
:
[
0
,
228
,
0
],
"isthing"
:
1
,
"id"
:
38
,
"name"
:
"kite"
},
{
"color"
:
[
174
,
255
,
243
],
"isthing"
:
1
,
"id"
:
39
,
"name"
:
"baseball bat"
},
{
"color"
:
[
45
,
89
,
255
],
"isthing"
:
1
,
"id"
:
40
,
"name"
:
"baseball glove"
},
{
"color"
:
[
134
,
134
,
103
],
"isthing"
:
1
,
"id"
:
41
,
"name"
:
"skateboard"
},
{
"color"
:
[
145
,
148
,
174
],
"isthing"
:
1
,
"id"
:
42
,
"name"
:
"surfboard"
},
{
"color"
:
[
255
,
208
,
186
],
"isthing"
:
1
,
"id"
:
43
,
"name"
:
"tennis racket"
},
{
"color"
:
[
197
,
226
,
255
],
"isthing"
:
1
,
"id"
:
44
,
"name"
:
"bottle"
},
{
"color"
:
[
171
,
134
,
1
],
"isthing"
:
1
,
"id"
:
46
,
"name"
:
"wine glass"
},
{
"color"
:
[
109
,
63
,
54
],
"isthing"
:
1
,
"id"
:
47
,
"name"
:
"cup"
},
{
"color"
:
[
207
,
138
,
255
],
"isthing"
:
1
,
"id"
:
48
,
"name"
:
"fork"
},
{
"color"
:
[
151
,
0
,
95
],
"isthing"
:
1
,
"id"
:
49
,
"name"
:
"knife"
},
{
"color"
:
[
9
,
80
,
61
],
"isthing"
:
1
,
"id"
:
50
,
"name"
:
"spoon"
},
{
"color"
:
[
84
,
105
,
51
],
"isthing"
:
1
,
"id"
:
51
,
"name"
:
"bowl"
},
{
"color"
:
[
74
,
65
,
105
],
"isthing"
:
1
,
"id"
:
52
,
"name"
:
"banana"
},
{
"color"
:
[
166
,
196
,
102
],
"isthing"
:
1
,
"id"
:
53
,
"name"
:
"apple"
},
{
"color"
:
[
208
,
195
,
210
],
"isthing"
:
1
,
"id"
:
54
,
"name"
:
"sandwich"
},
{
"color"
:
[
255
,
109
,
65
],
"isthing"
:
1
,
"id"
:
55
,
"name"
:
"orange"
},
{
"color"
:
[
0
,
143
,
149
],
"isthing"
:
1
,
"id"
:
56
,
"name"
:
"broccoli"
},
{
"color"
:
[
179
,
0
,
194
],
"isthing"
:
1
,
"id"
:
57
,
"name"
:
"carrot"
},
{
"color"
:
[
209
,
99
,
106
],
"isthing"
:
1
,
"id"
:
58
,
"name"
:
"hot dog"
},
{
"color"
:
[
5
,
121
,
0
],
"isthing"
:
1
,
"id"
:
59
,
"name"
:
"pizza"
},
{
"color"
:
[
227
,
255
,
205
],
"isthing"
:
1
,
"id"
:
60
,
"name"
:
"donut"
},
{
"color"
:
[
147
,
186
,
208
],
"isthing"
:
1
,
"id"
:
61
,
"name"
:
"cake"
},
{
"color"
:
[
153
,
69
,
1
],
"isthing"
:
1
,
"id"
:
62
,
"name"
:
"chair"
},
{
"color"
:
[
3
,
95
,
161
],
"isthing"
:
1
,
"id"
:
63
,
"name"
:
"couch"
},
{
"color"
:
[
163
,
255
,
0
],
"isthing"
:
1
,
"id"
:
64
,
"name"
:
"potted plant"
},
{
"color"
:
[
119
,
0
,
170
],
"isthing"
:
1
,
"id"
:
65
,
"name"
:
"bed"
},
{
"color"
:
[
0
,
182
,
199
],
"isthing"
:
1
,
"id"
:
67
,
"name"
:
"dining table"
},
{
"color"
:
[
0
,
165
,
120
],
"isthing"
:
1
,
"id"
:
70
,
"name"
:
"toilet"
},
{
"color"
:
[
183
,
130
,
88
],
"isthing"
:
1
,
"id"
:
72
,
"name"
:
"tv"
},
{
"color"
:
[
95
,
32
,
0
],
"isthing"
:
1
,
"id"
:
73
,
"name"
:
"laptop"
},
{
"color"
:
[
130
,
114
,
135
],
"isthing"
:
1
,
"id"
:
74
,
"name"
:
"mouse"
},
{
"color"
:
[
110
,
129
,
133
],
"isthing"
:
1
,
"id"
:
75
,
"name"
:
"remote"
},
{
"color"
:
[
166
,
74
,
118
],
"isthing"
:
1
,
"id"
:
76
,
"name"
:
"keyboard"
},
{
"color"
:
[
219
,
142
,
185
],
"isthing"
:
1
,
"id"
:
77
,
"name"
:
"cell phone"
},
{
"color"
:
[
79
,
210
,
114
],
"isthing"
:
1
,
"id"
:
78
,
"name"
:
"microwave"
},
{
"color"
:
[
178
,
90
,
62
],
"isthing"
:
1
,
"id"
:
79
,
"name"
:
"oven"
},
{
"color"
:
[
65
,
70
,
15
],
"isthing"
:
1
,
"id"
:
80
,
"name"
:
"toaster"
},
{
"color"
:
[
127
,
167
,
115
],
"isthing"
:
1
,
"id"
:
81
,
"name"
:
"sink"
},
{
"color"
:
[
59
,
105
,
106
],
"isthing"
:
1
,
"id"
:
82
,
"name"
:
"refrigerator"
},
{
"color"
:
[
142
,
108
,
45
],
"isthing"
:
1
,
"id"
:
84
,
"name"
:
"book"
},
{
"color"
:
[
196
,
172
,
0
],
"isthing"
:
1
,
"id"
:
85
,
"name"
:
"clock"
},
{
"color"
:
[
95
,
54
,
80
],
"isthing"
:
1
,
"id"
:
86
,
"name"
:
"vase"
},
{
"color"
:
[
128
,
76
,
255
],
"isthing"
:
1
,
"id"
:
87
,
"name"
:
"scissors"
},
{
"color"
:
[
201
,
57
,
1
],
"isthing"
:
1
,
"id"
:
88
,
"name"
:
"teddy bear"
},
{
"color"
:
[
246
,
0
,
122
],
"isthing"
:
1
,
"id"
:
89
,
"name"
:
"hair drier"
},
{
"color"
:
[
191
,
162
,
208
],
"isthing"
:
1
,
"id"
:
90
,
"name"
:
"toothbrush"
},
{
"color"
:
[
255
,
255
,
128
],
"isthing"
:
0
,
"id"
:
92
,
"name"
:
"banner"
},
{
"color"
:
[
147
,
211
,
203
],
"isthing"
:
0
,
"id"
:
93
,
"name"
:
"blanket"
},
{
"color"
:
[
150
,
100
,
100
],
"isthing"
:
0
,
"id"
:
95
,
"name"
:
"bridge"
},
{
"color"
:
[
168
,
171
,
172
],
"isthing"
:
0
,
"id"
:
100
,
"name"
:
"cardboard"
},
{
"color"
:
[
146
,
112
,
198
],
"isthing"
:
0
,
"id"
:
107
,
"name"
:
"counter"
},
{
"color"
:
[
210
,
170
,
100
],
"isthing"
:
0
,
"id"
:
109
,
"name"
:
"curtain"
},
{
"color"
:
[
92
,
136
,
89
],
"isthing"
:
0
,
"id"
:
112
,
"name"
:
"door-stuff"
},
{
"color"
:
[
218
,
88
,
184
],
"isthing"
:
0
,
"id"
:
118
,
"name"
:
"floor-wood"
},
{
"color"
:
[
241
,
129
,
0
],
"isthing"
:
0
,
"id"
:
119
,
"name"
:
"flower"
},
{
"color"
:
[
217
,
17
,
255
],
"isthing"
:
0
,
"id"
:
122
,
"name"
:
"fruit"
},
{
"color"
:
[
124
,
74
,
181
],
"isthing"
:
0
,
"id"
:
125
,
"name"
:
"gravel"
},
{
"color"
:
[
70
,
70
,
70
],
"isthing"
:
0
,
"id"
:
128
,
"name"
:
"house"
},
{
"color"
:
[
255
,
228
,
255
],
"isthing"
:
0
,
"id"
:
130
,
"name"
:
"light"
},
{
"color"
:
[
154
,
208
,
0
],
"isthing"
:
0
,
"id"
:
133
,
"name"
:
"mirror-stuff"
},
{
"color"
:
[
193
,
0
,
92
],
"isthing"
:
0
,
"id"
:
138
,
"name"
:
"net"
},
{
"color"
:
[
76
,
91
,
113
],
"isthing"
:
0
,
"id"
:
141
,
"name"
:
"pillow"
},
{
"color"
:
[
255
,
180
,
195
],
"isthing"
:
0
,
"id"
:
144
,
"name"
:
"platform"
},
{
"color"
:
[
106
,
154
,
176
],
"isthing"
:
0
,
"id"
:
145
,
"name"
:
"playingfield"
},
{
"color"
:
[
230
,
150
,
140
],
"isthing"
:
0
,
"id"
:
147
,
"name"
:
"railroad"
},
{
"color"
:
[
60
,
143
,
255
],
"isthing"
:
0
,
"id"
:
148
,
"name"
:
"river"
},
{
"color"
:
[
128
,
64
,
128
],
"isthing"
:
0
,
"id"
:
149
,
"name"
:
"road"
},
{
"color"
:
[
92
,
82
,
55
],
"isthing"
:
0
,
"id"
:
151
,
"name"
:
"roof"
},
{
"color"
:
[
254
,
212
,
124
],
"isthing"
:
0
,
"id"
:
154
,
"name"
:
"sand"
},
{
"color"
:
[
73
,
77
,
174
],
"isthing"
:
0
,
"id"
:
155
,
"name"
:
"sea"
},
{
"color"
:
[
255
,
160
,
98
],
"isthing"
:
0
,
"id"
:
156
,
"name"
:
"shelf"
},
{
"color"
:
[
255
,
255
,
255
],
"isthing"
:
0
,
"id"
:
159
,
"name"
:
"snow"
},
{
"color"
:
[
104
,
84
,
109
],
"isthing"
:
0
,
"id"
:
161
,
"name"
:
"stairs"
},
{
"color"
:
[
169
,
164
,
131
],
"isthing"
:
0
,
"id"
:
166
,
"name"
:
"tent"
},
{
"color"
:
[
225
,
199
,
255
],
"isthing"
:
0
,
"id"
:
168
,
"name"
:
"towel"
},
{
"color"
:
[
137
,
54
,
74
],
"isthing"
:
0
,
"id"
:
171
,
"name"
:
"wall-brick"
},
{
"color"
:
[
135
,
158
,
223
],
"isthing"
:
0
,
"id"
:
175
,
"name"
:
"wall-stone"
},
{
"color"
:
[
7
,
246
,
231
],
"isthing"
:
0
,
"id"
:
176
,
"name"
:
"wall-tile"
},
{
"color"
:
[
107
,
255
,
200
],
"isthing"
:
0
,
"id"
:
177
,
"name"
:
"wall-wood"
},
{
"color"
:
[
58
,
41
,
149
],
"isthing"
:
0
,
"id"
:
178
,
"name"
:
"water-other"
},
{
"color"
:
[
183
,
121
,
142
],
"isthing"
:
0
,
"id"
:
180
,
"name"
:
"window-blind"
},
{
"color"
:
[
255
,
73
,
97
],
"isthing"
:
0
,
"id"
:
181
,
"name"
:
"window-other"
},
{
"color"
:
[
107
,
142
,
35
],
"isthing"
:
0
,
"id"
:
184
,
"name"
:
"tree-merged"
},
{
"color"
:
[
190
,
153
,
153
],
"isthing"
:
0
,
"id"
:
185
,
"name"
:
"fence-merged"
},
{
"color"
:
[
146
,
139
,
141
],
"isthing"
:
0
,
"id"
:
186
,
"name"
:
"ceiling-merged"
},
{
"color"
:
[
70
,
130
,
180
],
"isthing"
:
0
,
"id"
:
187
,
"name"
:
"sky-other-merged"
},
{
"color"
:
[
134
,
199
,
156
],
"isthing"
:
0
,
"id"
:
188
,
"name"
:
"cabinet-merged"
},
{
"color"
:
[
209
,
226
,
140
],
"isthing"
:
0
,
"id"
:
189
,
"name"
:
"table-merged"
},
{
"color"
:
[
96
,
36
,
108
],
"isthing"
:
0
,
"id"
:
190
,
"name"
:
"floor-other-merged"
},
{
"color"
:
[
96
,
96
,
96
],
"isthing"
:
0
,
"id"
:
191
,
"name"
:
"pavement-merged"
},
{
"color"
:
[
64
,
170
,
64
],
"isthing"
:
0
,
"id"
:
192
,
"name"
:
"mountain-merged"
},
{
"color"
:
[
152
,
251
,
152
],
"isthing"
:
0
,
"id"
:
193
,
"name"
:
"grass-merged"
},
{
"color"
:
[
208
,
229
,
228
],
"isthing"
:
0
,
"id"
:
194
,
"name"
:
"dirt-merged"
},
{
"color"
:
[
206
,
186
,
171
],
"isthing"
:
0
,
"id"
:
195
,
"name"
:
"paper-merged"
},
{
"color"
:
[
152
,
161
,
64
],
"isthing"
:
0
,
"id"
:
196
,
"name"
:
"food-other-merged"
},
{
"color"
:
[
116
,
112
,
0
],
"isthing"
:
0
,
"id"
:
197
,
"name"
:
"building-other-merged"
},
{
"color"
:
[
0
,
114
,
143
],
"isthing"
:
0
,
"id"
:
198
,
"name"
:
"rock-merged"
},
{
"color"
:
[
102
,
102
,
156
],
"isthing"
:
0
,
"id"
:
199
,
"name"
:
"wall-other-merged"
},
{
"color"
:
[
250
,
141
,
255
],
"isthing"
:
0
,
"id"
:
200
,
"name"
:
"rug-merged"
},
]
# fmt: off
COCO_PERSON_KEYPOINT_NAMES
=
(
"nose"
,
"left_eye"
,
"right_eye"
,
"left_ear"
,
"right_ear"
,
"left_shoulder"
,
"right_shoulder"
,
"left_elbow"
,
"right_elbow"
,
"left_wrist"
,
"right_wrist"
,
"left_hip"
,
"right_hip"
,
"left_knee"
,
"right_knee"
,
"left_ankle"
,
"right_ankle"
,
)
# fmt: on
# Pairs of keypoints that should be exchanged under horizontal flipping
COCO_PERSON_KEYPOINT_FLIP_MAP
=
(
(
"left_eye"
,
"right_eye"
),
(
"left_ear"
,
"right_ear"
),
(
"left_shoulder"
,
"right_shoulder"
),
(
"left_elbow"
,
"right_elbow"
),
(
"left_wrist"
,
"right_wrist"
),
(
"left_hip"
,
"right_hip"
),
(
"left_knee"
,
"right_knee"
),
(
"left_ankle"
,
"right_ankle"
),
)
# rules for pairs of keypoints to draw a line between, and the line color to use.
KEYPOINT_CONNECTION_RULES
=
[
# face
(
"left_ear"
,
"left_eye"
,
(
102
,
204
,
255
)),
(
"right_ear"
,
"right_eye"
,
(
51
,
153
,
255
)),
(
"left_eye"
,
"nose"
,
(
102
,
0
,
204
)),
(
"nose"
,
"right_eye"
,
(
51
,
102
,
255
)),
# upper-body
(
"left_shoulder"
,
"right_shoulder"
,
(
255
,
128
,
0
)),
(
"left_shoulder"
,
"left_elbow"
,
(
153
,
255
,
204
)),
(
"right_shoulder"
,
"right_elbow"
,
(
128
,
229
,
255
)),
(
"left_elbow"
,
"left_wrist"
,
(
153
,
255
,
153
)),
(
"right_elbow"
,
"right_wrist"
,
(
102
,
255
,
224
)),
# lower-body
(
"left_hip"
,
"right_hip"
,
(
255
,
102
,
0
)),
(
"left_hip"
,
"left_knee"
,
(
255
,
255
,
77
)),
(
"right_hip"
,
"right_knee"
,
(
153
,
255
,
204
)),
(
"left_knee"
,
"left_ankle"
,
(
191
,
255
,
128
)),
(
"right_knee"
,
"right_ankle"
,
(
255
,
195
,
77
)),
]
def
_get_coco_instances_meta
():
thing_ids
=
[
k
[
"id"
]
for
k
in
COCO_CATEGORIES
if
k
[
"isthing"
]
==
1
]
thing_colors
=
[
k
[
"color"
]
for
k
in
COCO_CATEGORIES
if
k
[
"isthing"
]
==
1
]
assert
len
(
thing_ids
)
==
80
,
len
(
thing_ids
)
# Mapping from the incontiguous COCO category id to an id in [0, 79]
thing_dataset_id_to_contiguous_id
=
{
k
:
i
for
i
,
k
in
enumerate
(
thing_ids
)}
thing_classes
=
[
k
[
"name"
]
for
k
in
COCO_CATEGORIES
if
k
[
"isthing"
]
==
1
]
ret
=
{
"thing_dataset_id_to_contiguous_id"
:
thing_dataset_id_to_contiguous_id
,
"thing_classes"
:
thing_classes
,
"thing_colors"
:
thing_colors
,
}
return
ret
def
_get_coco_panoptic_separated_meta
():
"""
Returns metadata for "separated" version of the panoptic segmentation dataset.
"""
stuff_ids
=
[
k
[
"id"
]
for
k
in
COCO_CATEGORIES
if
k
[
"isthing"
]
==
0
]
assert
len
(
stuff_ids
)
==
53
,
len
(
stuff_ids
)
# For semantic segmentation, this mapping maps from contiguous stuff id
# (in [0, 53], used in models) to ids in the dataset (used for processing results)
# The id 0 is mapped to an extra category "thing".
stuff_dataset_id_to_contiguous_id
=
{
k
:
i
+
1
for
i
,
k
in
enumerate
(
stuff_ids
)}
# When converting COCO panoptic annotations to semantic annotations
# We label the "thing" category to 0
stuff_dataset_id_to_contiguous_id
[
0
]
=
0
# 54 names for COCO stuff categories (including "things")
stuff_classes
=
[
"things"
]
+
[
k
[
"name"
].
replace
(
"-other"
,
""
).
replace
(
"-merged"
,
""
)
for
k
in
COCO_CATEGORIES
if
k
[
"isthing"
]
==
0
]
# NOTE: I randomly picked a color for things
stuff_colors
=
[[
82
,
18
,
128
]]
+
[
k
[
"color"
]
for
k
in
COCO_CATEGORIES
if
k
[
"isthing"
]
==
0
]
ret
=
{
"stuff_dataset_id_to_contiguous_id"
:
stuff_dataset_id_to_contiguous_id
,
"stuff_classes"
:
stuff_classes
,
"stuff_colors"
:
stuff_colors
,
}
ret
.
update
(
_get_coco_instances_meta
())
return
ret
def
_get_builtin_metadata
(
dataset_name
):
if
dataset_name
==
"coco"
:
return
_get_coco_instances_meta
()
if
dataset_name
==
"coco_panoptic_separated"
:
return
_get_coco_panoptic_separated_meta
()
elif
dataset_name
==
"coco_person"
:
return
{
"thing_classes"
:
[
"person"
],
"keypoint_names"
:
COCO_PERSON_KEYPOINT_NAMES
,
"keypoint_flip_map"
:
COCO_PERSON_KEYPOINT_FLIP_MAP
,
"keypoint_connection_rules"
:
KEYPOINT_CONNECTION_RULES
,
}
elif
dataset_name
==
"cityscapes"
:
# fmt: off
CITYSCAPES_THING_CLASSES
=
[
"person"
,
"rider"
,
"car"
,
"truck"
,
"bus"
,
"train"
,
"motorcycle"
,
"bicycle"
,
]
CITYSCAPES_STUFF_CLASSES
=
[
"road"
,
"sidewalk"
,
"building"
,
"wall"
,
"fence"
,
"pole"
,
"traffic light"
,
"traffic sign"
,
"vegetation"
,
"terrain"
,
"sky"
,
"person"
,
"rider"
,
"car"
,
"truck"
,
"bus"
,
"train"
,
"motorcycle"
,
"bicycle"
,
"license plate"
,
]
# fmt: on
return
{
"thing_classes"
:
CITYSCAPES_THING_CLASSES
,
"stuff_classes"
:
CITYSCAPES_STUFF_CLASSES
,
}
raise
KeyError
(
"No built-in metadata for dataset {}"
.
format
(
dataset_name
))
preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/cityscapes.py
0 → 100644
View file @
c50c08d9
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import
functools
import
json
import
logging
import
multiprocessing
as
mp
import
numpy
as
np
import
os
from
itertools
import
chain
import
pycocotools.mask
as
mask_util
from
fvcore.common.file_io
import
PathManager
from
PIL
import
Image
from
detectron2.structures
import
BoxMode
from
detectron2.utils.comm
import
get_world_size
from
detectron2.utils.logger
import
setup_logger
try
:
import
cv2
# noqa
except
ImportError
:
# OpenCV is an optional dependency at the moment
pass
logger
=
logging
.
getLogger
(
__name__
)
def
get_cityscapes_files
(
image_dir
,
gt_dir
):
files
=
[]
# scan through the directory
cities
=
PathManager
.
ls
(
image_dir
)
logger
.
info
(
f
"
{
len
(
cities
)
}
cities found in '
{
image_dir
}
'."
)
for
city
in
cities
:
city_img_dir
=
os
.
path
.
join
(
image_dir
,
city
)
city_gt_dir
=
os
.
path
.
join
(
gt_dir
,
city
)
for
basename
in
PathManager
.
ls
(
city_img_dir
):
image_file
=
os
.
path
.
join
(
city_img_dir
,
basename
)
suffix
=
"leftImg8bit.png"
assert
basename
.
endswith
(
suffix
)
basename
=
basename
[:
-
len
(
suffix
)]
instance_file
=
os
.
path
.
join
(
city_gt_dir
,
basename
+
"gtFine_instanceIds.png"
)
label_file
=
os
.
path
.
join
(
city_gt_dir
,
basename
+
"gtFine_labelIds.png"
)
json_file
=
os
.
path
.
join
(
city_gt_dir
,
basename
+
"gtFine_polygons.json"
)
files
.
append
((
image_file
,
instance_file
,
label_file
,
json_file
))
assert
len
(
files
),
"No images found in {}"
.
format
(
image_dir
)
for
f
in
files
[
0
]:
assert
PathManager
.
isfile
(
f
),
f
return
files
def
load_cityscapes_instances
(
image_dir
,
gt_dir
,
from_json
=
True
,
to_polygons
=
True
):
"""
Args:
image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train".
gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train".
from_json (bool): whether to read annotations from the raw json file or the png files.
to_polygons (bool): whether to represent the segmentation as polygons
(COCO's format) instead of masks (cityscapes's format).
Returns:
list[dict]: a list of dicts in Detectron2 standard format. (See
`Using Custom Datasets </tutorials/data.html>`_ )
"""
if
from_json
:
assert
to_polygons
,
(
"Cityscapes's json annotations are in polygon format. "
"Converting to mask format is not supported now."
)
files
=
get_cityscapes_files
(
image_dir
,
gt_dir
)
logger
.
info
(
"Preprocessing cityscapes annotations ..."
)
# This is still not fast: all workers will execute duplicate works and will
# take up to 10m on a 8GPU server.
pool
=
mp
.
Pool
(
processes
=
max
(
mp
.
cpu_count
()
//
get_world_size
()
//
2
,
4
))
ret
=
pool
.
map
(
functools
.
partial
(
cityscapes_files_to_dict
,
from_json
=
from_json
,
to_polygons
=
to_polygons
),
files
,
)
logger
.
info
(
"Loaded {} images from {}"
.
format
(
len
(
ret
),
image_dir
))
# Map cityscape ids to contiguous ids
from
cityscapesscripts.helpers.labels
import
labels
labels
=
[
l
for
l
in
labels
if
l
.
hasInstances
and
not
l
.
ignoreInEval
]
dataset_id_to_contiguous_id
=
{
l
.
id
:
idx
for
idx
,
l
in
enumerate
(
labels
)}
for
dict_per_image
in
ret
:
for
anno
in
dict_per_image
[
"annotations"
]:
anno
[
"category_id"
]
=
dataset_id_to_contiguous_id
[
anno
[
"category_id"
]]
return
ret
def
load_cityscapes_semantic
(
image_dir
,
gt_dir
):
"""
Args:
image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train".
gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train".
Returns:
list[dict]: a list of dict, each has "file_name" and
"sem_seg_file_name".
"""
ret
=
[]
# gt_dir is small and contain many small files. make sense to fetch to local first
gt_dir
=
PathManager
.
get_local_path
(
gt_dir
)
for
image_file
,
_
,
label_file
,
json_file
in
get_cityscapes_files
(
image_dir
,
gt_dir
):
label_file
=
label_file
.
replace
(
"labelIds"
,
"labelTrainIds"
)
with
PathManager
.
open
(
json_file
,
"r"
)
as
f
:
jsonobj
=
json
.
load
(
f
)
ret
.
append
(
{
"file_name"
:
image_file
,
"sem_seg_file_name"
:
label_file
,
"height"
:
jsonobj
[
"imgHeight"
],
"width"
:
jsonobj
[
"imgWidth"
],
}
)
assert
len
(
ret
),
f
"No images found in
{
image_dir
}
!"
assert
PathManager
.
isfile
(
ret
[
0
][
"sem_seg_file_name"
]
),
"Please generate labelTrainIds.png with cityscapesscripts/preparation/createTrainIdLabelImgs.py"
# noqa
return
ret
def
cityscapes_files_to_dict
(
files
,
from_json
,
to_polygons
):
"""
Parse cityscapes annotation files to a instance segmentation dataset dict.
Args:
files (tuple): consists of (image_file, instance_id_file, label_id_file, json_file)
from_json (bool): whether to read annotations from the raw json file or the png files.
to_polygons (bool): whether to represent the segmentation as polygons
(COCO's format) instead of masks (cityscapes's format).
Returns:
A dict in Detectron2 Dataset format.
"""
from
cityscapesscripts.helpers.labels
import
id2label
,
name2label
image_file
,
instance_id_file
,
_
,
json_file
=
files
annos
=
[]
if
from_json
:
from
shapely.geometry
import
MultiPolygon
,
Polygon
with
PathManager
.
open
(
json_file
,
"r"
)
as
f
:
jsonobj
=
json
.
load
(
f
)
ret
=
{
"file_name"
:
image_file
,
"image_id"
:
os
.
path
.
basename
(
image_file
),
"height"
:
jsonobj
[
"imgHeight"
],
"width"
:
jsonobj
[
"imgWidth"
],
}
# `polygons_union` contains the union of all valid polygons.
polygons_union
=
Polygon
()
# CityscapesScripts draw the polygons in sequential order
# and each polygon *overwrites* existing ones. See
# (https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/preparation/json2instanceImg.py) # noqa
# We use reverse order, and each polygon *avoids* early ones.
# This will resolve the ploygon overlaps in the same way as CityscapesScripts.
for
obj
in
jsonobj
[
"objects"
][::
-
1
]:
if
"deleted"
in
obj
:
# cityscapes data format specific
continue
label_name
=
obj
[
"label"
]
try
:
label
=
name2label
[
label_name
]
except
KeyError
:
if
label_name
.
endswith
(
"group"
):
# crowd area
label
=
name2label
[
label_name
[:
-
len
(
"group"
)]]
else
:
raise
if
label
.
id
<
0
:
# cityscapes data format
continue
# Cityscapes's raw annotations uses integer coordinates
# Therefore +0.5 here
poly_coord
=
np
.
asarray
(
obj
[
"polygon"
],
dtype
=
"f4"
)
+
0.5
# CityscapesScript uses PIL.ImageDraw.polygon to rasterize
# polygons for evaluation. This function operates in integer space
# and draws each pixel whose center falls into the polygon.
# Therefore it draws a polygon which is 0.5 "fatter" in expectation.
# We therefore dilate the input polygon by 0.5 as our input.
poly
=
Polygon
(
poly_coord
).
buffer
(
0.5
,
resolution
=
4
)
if
not
label
.
hasInstances
or
label
.
ignoreInEval
:
# even if we won't store the polygon it still contributes to overlaps resolution
polygons_union
=
polygons_union
.
union
(
poly
)
continue
# Take non-overlapping part of the polygon
poly_wo_overlaps
=
poly
.
difference
(
polygons_union
)
if
poly_wo_overlaps
.
is_empty
:
continue
polygons_union
=
polygons_union
.
union
(
poly
)
anno
=
{}
anno
[
"iscrowd"
]
=
label_name
.
endswith
(
"group"
)
anno
[
"category_id"
]
=
label
.
id
if
isinstance
(
poly_wo_overlaps
,
Polygon
):
poly_list
=
[
poly_wo_overlaps
]
elif
isinstance
(
poly_wo_overlaps
,
MultiPolygon
):
poly_list
=
poly_wo_overlaps
.
geoms
else
:
raise
NotImplementedError
(
"Unknown geometric structure {}"
.
format
(
poly_wo_overlaps
))
poly_coord
=
[]
for
poly_el
in
poly_list
:
# COCO API can work only with exterior boundaries now, hence we store only them.
# TODO: store both exterior and interior boundaries once other parts of the
# codebase support holes in polygons.
poly_coord
.
append
(
list
(
chain
(
*
poly_el
.
exterior
.
coords
)))
anno
[
"segmentation"
]
=
poly_coord
(
xmin
,
ymin
,
xmax
,
ymax
)
=
poly_wo_overlaps
.
bounds
anno
[
"bbox"
]
=
(
xmin
,
ymin
,
xmax
,
ymax
)
anno
[
"bbox_mode"
]
=
BoxMode
.
XYXY_ABS
annos
.
append
(
anno
)
else
:
# See also the official annotation parsing scripts at
# https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/instances2dict.py # noqa
with
PathManager
.
open
(
instance_id_file
,
"rb"
)
as
f
:
inst_image
=
np
.
asarray
(
Image
.
open
(
f
),
order
=
"F"
)
# ids < 24 are stuff labels (filtering them first is about 5% faster)
flattened_ids
=
np
.
unique
(
inst_image
[
inst_image
>=
24
])
ret
=
{
"file_name"
:
image_file
,
"image_id"
:
os
.
path
.
basename
(
image_file
),
"height"
:
inst_image
.
shape
[
0
],
"width"
:
inst_image
.
shape
[
1
],
}
for
instance_id
in
flattened_ids
:
# For non-crowd annotations, instance_id // 1000 is the label_id
# Crowd annotations have <1000 instance ids
label_id
=
instance_id
//
1000
if
instance_id
>=
1000
else
instance_id
label
=
id2label
[
label_id
]
if
not
label
.
hasInstances
or
label
.
ignoreInEval
:
continue
anno
=
{}
anno
[
"iscrowd"
]
=
instance_id
<
1000
anno
[
"category_id"
]
=
label
.
id
mask
=
np
.
asarray
(
inst_image
==
instance_id
,
dtype
=
np
.
uint8
,
order
=
"F"
)
inds
=
np
.
nonzero
(
mask
)
ymin
,
ymax
=
inds
[
0
].
min
(),
inds
[
0
].
max
()
xmin
,
xmax
=
inds
[
1
].
min
(),
inds
[
1
].
max
()
anno
[
"bbox"
]
=
(
xmin
,
ymin
,
xmax
,
ymax
)
if
xmax
<=
xmin
or
ymax
<=
ymin
:
continue
anno
[
"bbox_mode"
]
=
BoxMode
.
XYXY_ABS
if
to_polygons
:
# This conversion comes from D4809743 and D5171122,
# when Mask-RCNN was first developed.
contours
=
cv2
.
findContours
(
mask
.
copy
(),
cv2
.
RETR_EXTERNAL
,
cv2
.
CHAIN_APPROX_NONE
)[
-
2
]
polygons
=
[
c
.
reshape
(
-
1
).
tolist
()
for
c
in
contours
if
len
(
c
)
>=
3
]
# opencv's can produce invalid polygons
if
len
(
polygons
)
==
0
:
continue
anno
[
"segmentation"
]
=
polygons
else
:
anno
[
"segmentation"
]
=
mask_util
.
encode
(
mask
[:,
:,
None
])[
0
]
annos
.
append
(
anno
)
ret
[
"annotations"
]
=
annos
return
ret
if
__name__
==
"__main__"
:
"""
Test the cityscapes dataset loader.
Usage:
python -m detectron2.data.data.cityscapes
\
cityscapes/leftImg8bit/train cityscapes/gtFine/train
"""
import
argparse
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"image_dir"
)
parser
.
add_argument
(
"gt_dir"
)
parser
.
add_argument
(
"--type"
,
choices
=
[
"instance"
,
"semantic"
],
default
=
"instance"
)
args
=
parser
.
parse_args
()
from
detectron2.data.catalog
import
Metadata
from
detectron2.utils.visualizer
import
Visualizer
from
cityscapesscripts.helpers.labels
import
labels
logger
=
setup_logger
(
name
=
__name__
)
dirname
=
"cityscapes-data-vis"
os
.
makedirs
(
dirname
,
exist_ok
=
True
)
if
args
.
type
==
"instance"
:
dicts
=
load_cityscapes_instances
(
args
.
image_dir
,
args
.
gt_dir
,
from_json
=
True
,
to_polygons
=
True
)
logger
.
info
(
"Done loading {} samples."
.
format
(
len
(
dicts
)))
thing_classes
=
[
k
.
name
for
k
in
labels
if
k
.
hasInstances
and
not
k
.
ignoreInEval
]
meta
=
Metadata
().
set
(
thing_classes
=
thing_classes
)
else
:
dicts
=
load_cityscapes_semantic
(
args
.
image_dir
,
args
.
gt_dir
)
logger
.
info
(
"Done loading {} samples."
.
format
(
len
(
dicts
)))
stuff_names
=
[
k
.
name
for
k
in
labels
if
k
.
trainId
!=
255
]
stuff_colors
=
[
k
.
color
for
k
in
labels
if
k
.
trainId
!=
255
]
meta
=
Metadata
().
set
(
stuff_names
=
stuff_names
,
stuff_colors
=
stuff_colors
)
for
d
in
dicts
:
img
=
np
.
array
(
Image
.
open
(
PathManager
.
open
(
d
[
"file_name"
],
"rb"
)))
visualizer
=
Visualizer
(
img
,
metadata
=
meta
)
vis
=
visualizer
.
draw_dataset_dict
(
d
)
# cv2.imshow("a", vis.get_image()[:, :, ::-1])
# cv2.waitKey()
fpath
=
os
.
path
.
join
(
dirname
,
os
.
path
.
basename
(
d
[
"file_name"
]))
vis
.
save
(
fpath
)
preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/coco.py
0 → 100644
View file @
c50c08d9
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import
contextlib
import
datetime
import
io
import
json
import
logging
import
numpy
as
np
import
os
import
pycocotools.mask
as
mask_util
from
fvcore.common.file_io
import
PathManager
,
file_lock
from
fvcore.common.timer
import
Timer
from
PIL
import
Image
from
detectron2.structures
import
Boxes
,
BoxMode
,
PolygonMasks
from
..
import
DatasetCatalog
,
MetadataCatalog
"""
This file contains functions to parse COCO-format annotations into dicts in "Detectron2 format".
"""
logger
=
logging
.
getLogger
(
__name__
)
__all__
=
[
"load_coco_json"
,
"load_sem_seg"
,
"convert_to_coco_json"
]
def
load_coco_json
(
json_file
,
image_root
,
dataset_name
=
None
,
extra_annotation_keys
=
None
):
"""
Load a json file with COCO's instances annotation format.
Currently supports instance detection, instance segmentation,
and person keypoints annotations.
Args:
json_file (str): full path to the json file in COCO instances annotation format.
image_root (str or path-like): the directory where the images in this json file exists.
dataset_name (str): the name of the dataset (e.g., coco_2017_train).
If provided, this function will also put "thing_classes" into
the metadata associated with this dataset.
extra_annotation_keys (list[str]): list of per-annotation keys that should also be
loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints",
"category_id", "segmentation"). The values for these keys will be returned as-is.
For example, the densepose annotations are loaded in this way.
Returns:
list[dict]: a list of dicts in Detectron2 standard dataset dicts format. (See
`Using Custom Datasets </tutorials/data.html>`_ )
Notes:
1. This function does not read the image files.
The results do not have the "image" field.
"""
from
pycocotools.coco
import
COCO
timer
=
Timer
()
json_file
=
PathManager
.
get_local_path
(
json_file
)
with
contextlib
.
redirect_stdout
(
io
.
StringIO
()):
coco_api
=
COCO
(
json_file
)
if
timer
.
seconds
()
>
1
:
logger
.
info
(
"Loading {} takes {:.2f} seconds."
.
format
(
json_file
,
timer
.
seconds
()))
id_map
=
None
if
dataset_name
is
not
None
:
meta
=
MetadataCatalog
.
get
(
dataset_name
)
cat_ids
=
sorted
(
coco_api
.
getCatIds
())
cats
=
coco_api
.
loadCats
(
cat_ids
)
# The categories in a custom json file may not be sorted.
thing_classes
=
[
c
[
"name"
]
for
c
in
sorted
(
cats
,
key
=
lambda
x
:
x
[
"id"
])]
meta
.
thing_classes
=
thing_classes
# In COCO, certain category ids are artificially removed,
# and by convention they are always ignored.
# We deal with COCO's id issue and translate
# the category ids to contiguous ids in [0, 80).
# It works by looking at the "categories" field in the json, therefore
# if users' own json also have incontiguous ids, we'll
# apply this mapping as well but print a warning.
if
not
(
min
(
cat_ids
)
==
1
and
max
(
cat_ids
)
==
len
(
cat_ids
)):
if
"coco"
not
in
dataset_name
:
logger
.
warning
(
"""
Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.
"""
)
id_map
=
{
v
:
i
for
i
,
v
in
enumerate
(
cat_ids
)}
meta
.
thing_dataset_id_to_contiguous_id
=
id_map
# sort indices for reproducible results
img_ids
=
sorted
(
coco_api
.
imgs
.
keys
())
# imgs is a list of dicts, each looks something like:
# {'license': 4,
# 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
# 'file_name': 'COCO_val2014_000000001268.jpg',
# 'height': 427,
# 'width': 640,
# 'date_captured': '2013-11-17 05:57:24',
# 'id': 1268}
imgs
=
coco_api
.
loadImgs
(
img_ids
)
# anns is a list[list[dict]], where each dict is an annotation
# record for an object. The inner list enumerates the objects in an image
# and the outer list enumerates over images. Example of anns[0]:
# [{'segmentation': [[192.81,
# 247.09,
# ...
# 219.03,
# 249.06]],
# 'area': 1035.749,
# 'iscrowd': 0,
# 'image_id': 1268,
# 'bbox': [192.81, 224.8, 74.73, 33.43],
# 'category_id': 16,
# 'id': 42986},
# ...]
anns
=
[
coco_api
.
imgToAnns
[
img_id
]
for
img_id
in
img_ids
]
if
"minival"
not
in
json_file
:
# The popular valminusminival & minival annotations for COCO2014 contain this bug.
# However the ratio of buggy annotations there is tiny and does not affect accuracy.
# Therefore we explicitly white-list them.
ann_ids
=
[
ann
[
"id"
]
for
anns_per_image
in
anns
for
ann
in
anns_per_image
]
assert
len
(
set
(
ann_ids
))
==
len
(
ann_ids
),
"Annotation ids in '{}' are not unique!"
.
format
(
json_file
)
imgs_anns
=
list
(
zip
(
imgs
,
anns
))
logger
.
info
(
"Loaded {} images in COCO format from {}"
.
format
(
len
(
imgs_anns
),
json_file
))
dataset_dicts
=
[]
ann_keys
=
[
"iscrowd"
,
"bbox"
,
"keypoints"
,
"category_id"
]
+
(
extra_annotation_keys
or
[])
num_instances_without_valid_segmentation
=
0
for
(
img_dict
,
anno_dict_list
)
in
imgs_anns
:
record
=
{}
record
[
"file_name"
]
=
os
.
path
.
join
(
image_root
,
img_dict
[
"file_name"
])
record
[
"height"
]
=
img_dict
[
"height"
]
record
[
"width"
]
=
img_dict
[
"width"
]
image_id
=
record
[
"image_id"
]
=
img_dict
[
"id"
]
objs
=
[]
for
anno
in
anno_dict_list
:
# Check that the image_id in this annotation is the same as
# the image_id we're looking at.
# This fails only when the data parsing logic or the annotation file is buggy.
# The original COCO valminusminival2014 & minival2014 annotation files
# actually contains bugs that, together with certain ways of using COCO API,
# can trigger this assertion.
assert
anno
[
"image_id"
]
==
image_id
assert
anno
.
get
(
"ignore"
,
0
)
==
0
,
'"ignore" in COCO json file is not supported.'
obj
=
{
key
:
anno
[
key
]
for
key
in
ann_keys
if
key
in
anno
}
segm
=
anno
.
get
(
"segmentation"
,
None
)
if
segm
:
# either list[list[float]] or dict(RLE)
if
not
isinstance
(
segm
,
dict
):
# filter out invalid polygons (< 3 points)
segm
=
[
poly
for
poly
in
segm
if
len
(
poly
)
%
2
==
0
and
len
(
poly
)
>=
6
]
if
len
(
segm
)
==
0
:
num_instances_without_valid_segmentation
+=
1
continue
# ignore this instance
obj
[
"segmentation"
]
=
segm
keypts
=
anno
.
get
(
"keypoints"
,
None
)
if
keypts
:
# list[int]
for
idx
,
v
in
enumerate
(
keypts
):
if
idx
%
3
!=
2
:
# COCO's segmentation coordinates are floating points in [0, H or W],
# but keypoint coordinates are integers in [0, H-1 or W-1]
# Therefore we assume the coordinates are "pixel indices" and
# add 0.5 to convert to floating point coordinates.
keypts
[
idx
]
=
v
+
0.5
obj
[
"keypoints"
]
=
keypts
obj
[
"bbox_mode"
]
=
BoxMode
.
XYWH_ABS
if
id_map
:
obj
[
"category_id"
]
=
id_map
[
obj
[
"category_id"
]]
objs
.
append
(
obj
)
record
[
"annotations"
]
=
objs
dataset_dicts
.
append
(
record
)
if
num_instances_without_valid_segmentation
>
0
:
logger
.
warning
(
"Filtered out {} instances without valid segmentation. "
"There might be issues in your dataset generation process."
.
format
(
num_instances_without_valid_segmentation
)
)
return
dataset_dicts
def
load_sem_seg
(
gt_root
,
image_root
,
gt_ext
=
"png"
,
image_ext
=
"jpg"
):
"""
Load semantic segmentation data. All files under "gt_root" with "gt_ext" extension are
treated as ground truth annotations and all files under "image_root" with "image_ext" extension
as input images. Ground truth and input images are matched using file paths relative to
"gt_root" and "image_root" respectively without taking into account file extensions.
This works for COCO as well as some other data.
Args:
gt_root (str): full path to ground truth semantic segmentation files. Semantic segmentation
annotations are stored as images with integer values in pixels that represent
corresponding semantic labels.
image_root (str): the directory where the input images are.
gt_ext (str): file extension for ground truth annotations.
image_ext (str): file extension for input images.
Returns:
list[dict]:
a list of dicts in detectron2 standard format without instance-level
annotation.
Notes:
1. This function does not read the image and ground truth files.
The results do not have the "image" and "sem_seg" fields.
"""
# We match input images with ground truth based on their relative filepaths (without file
# extensions) starting from 'image_root' and 'gt_root' respectively.
def
file2id
(
folder_path
,
file_path
):
# extract relative path starting from `folder_path`
image_id
=
os
.
path
.
normpath
(
os
.
path
.
relpath
(
file_path
,
start
=
folder_path
))
# remove file extension
image_id
=
os
.
path
.
splitext
(
image_id
)[
0
]
return
image_id
input_files
=
sorted
(
(
os
.
path
.
join
(
image_root
,
f
)
for
f
in
PathManager
.
ls
(
image_root
)
if
f
.
endswith
(
image_ext
)),
key
=
lambda
file_path
:
file2id
(
image_root
,
file_path
),
)
gt_files
=
sorted
(
(
os
.
path
.
join
(
gt_root
,
f
)
for
f
in
PathManager
.
ls
(
gt_root
)
if
f
.
endswith
(
gt_ext
)),
key
=
lambda
file_path
:
file2id
(
gt_root
,
file_path
),
)
assert
len
(
gt_files
)
>
0
,
"No annotations found in {}."
.
format
(
gt_root
)
# Use the intersection, so that val2017_100 annotations can run smoothly with val2017 images
if
len
(
input_files
)
!=
len
(
gt_files
):
logger
.
warn
(
"Directory {} and {} has {} and {} files, respectively."
.
format
(
image_root
,
gt_root
,
len
(
input_files
),
len
(
gt_files
)
)
)
input_basenames
=
[
os
.
path
.
basename
(
f
)[:
-
len
(
image_ext
)]
for
f
in
input_files
]
gt_basenames
=
[
os
.
path
.
basename
(
f
)[:
-
len
(
gt_ext
)]
for
f
in
gt_files
]
intersect
=
list
(
set
(
input_basenames
)
&
set
(
gt_basenames
))
# sort, otherwise each worker may obtain a list[dict] in different order
intersect
=
sorted
(
intersect
)
logger
.
warn
(
"Will use their intersection of {} files."
.
format
(
len
(
intersect
)))
input_files
=
[
os
.
path
.
join
(
image_root
,
f
+
image_ext
)
for
f
in
intersect
]
gt_files
=
[
os
.
path
.
join
(
gt_root
,
f
+
gt_ext
)
for
f
in
intersect
]
logger
.
info
(
"Loaded {} images with semantic segmentation from {}"
.
format
(
len
(
input_files
),
image_root
)
)
dataset_dicts
=
[]
for
(
img_path
,
gt_path
)
in
zip
(
input_files
,
gt_files
):
record
=
{}
record
[
"file_name"
]
=
img_path
record
[
"sem_seg_file_name"
]
=
gt_path
dataset_dicts
.
append
(
record
)
return
dataset_dicts
def
convert_to_coco_dict
(
dataset_name
):
"""
Convert an instance detection/segmentation or keypoint detection dataset
in detectron2's standard format into COCO json format.
Generic dataset description can be found here:
https://detectron2.readthedocs.io/tutorials/datasets.html#register-a-dataset
COCO data format description can be found here:
http://cocodataset.org/#format-data
Args:
dataset_name (str):
name of the source dataset
Must be registered in DatastCatalog and in detectron2's standard format.
Must have corresponding metadata "thing_classes"
Returns:
coco_dict: serializable dict in COCO json format
"""
dataset_dicts
=
DatasetCatalog
.
get
(
dataset_name
)
metadata
=
MetadataCatalog
.
get
(
dataset_name
)
# unmap the category mapping ids for COCO
if
hasattr
(
metadata
,
"thing_dataset_id_to_contiguous_id"
):
reverse_id_mapping
=
{
v
:
k
for
k
,
v
in
metadata
.
thing_dataset_id_to_contiguous_id
.
items
()}
reverse_id_mapper
=
lambda
contiguous_id
:
reverse_id_mapping
[
contiguous_id
]
# noqa
else
:
reverse_id_mapper
=
lambda
contiguous_id
:
contiguous_id
# noqa
categories
=
[
{
"id"
:
reverse_id_mapper
(
id
),
"name"
:
name
}
for
id
,
name
in
enumerate
(
metadata
.
thing_classes
)
]
logger
.
info
(
"Converting dataset dicts into COCO format"
)
coco_images
=
[]
coco_annotations
=
[]
for
image_id
,
image_dict
in
enumerate
(
dataset_dicts
):
coco_image
=
{
"id"
:
image_dict
.
get
(
"image_id"
,
image_id
),
"width"
:
image_dict
[
"width"
],
"height"
:
image_dict
[
"height"
],
"file_name"
:
image_dict
[
"file_name"
],
}
coco_images
.
append
(
coco_image
)
anns_per_image
=
image_dict
[
"annotations"
]
for
annotation
in
anns_per_image
:
# create a new dict with only COCO fields
coco_annotation
=
{}
# COCO requirement: XYWH box format
bbox
=
annotation
[
"bbox"
]
bbox_mode
=
annotation
[
"bbox_mode"
]
bbox
=
BoxMode
.
convert
(
bbox
,
bbox_mode
,
BoxMode
.
XYWH_ABS
)
# COCO requirement: instance area
if
"segmentation"
in
annotation
:
# Computing areas for instances by counting the pixels
segmentation
=
annotation
[
"segmentation"
]
# TODO: check segmentation type: RLE, BinaryMask or Polygon
if
isinstance
(
segmentation
,
list
):
polygons
=
PolygonMasks
([
segmentation
])
area
=
polygons
.
area
()[
0
].
item
()
elif
isinstance
(
segmentation
,
dict
):
# RLE
area
=
mask_util
.
area
(
segmentation
).
item
()
else
:
raise
TypeError
(
f
"Unknown segmentation type
{
type
(
segmentation
)
}
!"
)
else
:
# Computing areas using bounding boxes
bbox_xy
=
BoxMode
.
convert
(
bbox
,
BoxMode
.
XYWH_ABS
,
BoxMode
.
XYXY_ABS
)
area
=
Boxes
([
bbox_xy
]).
area
()[
0
].
item
()
if
"keypoints"
in
annotation
:
keypoints
=
annotation
[
"keypoints"
]
# list[int]
for
idx
,
v
in
enumerate
(
keypoints
):
if
idx
%
3
!=
2
:
# COCO's segmentation coordinates are floating points in [0, H or W],
# but keypoint coordinates are integers in [0, H-1 or W-1]
# For COCO format consistency we substract 0.5
# https://github.com/facebookresearch/detectron2/pull/175#issuecomment-551202163
keypoints
[
idx
]
=
v
-
0.5
if
"num_keypoints"
in
annotation
:
num_keypoints
=
annotation
[
"num_keypoints"
]
else
:
num_keypoints
=
sum
(
kp
>
0
for
kp
in
keypoints
[
2
::
3
])
# COCO requirement:
# linking annotations to images
# "id" field must start with 1
coco_annotation
[
"id"
]
=
len
(
coco_annotations
)
+
1
coco_annotation
[
"image_id"
]
=
coco_image
[
"id"
]
coco_annotation
[
"bbox"
]
=
[
round
(
float
(
x
),
3
)
for
x
in
bbox
]
coco_annotation
[
"area"
]
=
float
(
area
)
coco_annotation
[
"iscrowd"
]
=
annotation
.
get
(
"iscrowd"
,
0
)
coco_annotation
[
"category_id"
]
=
reverse_id_mapper
(
annotation
[
"category_id"
])
# Add optional fields
if
"keypoints"
in
annotation
:
coco_annotation
[
"keypoints"
]
=
keypoints
coco_annotation
[
"num_keypoints"
]
=
num_keypoints
if
"segmentation"
in
annotation
:
coco_annotation
[
"segmentation"
]
=
annotation
[
"segmentation"
]
if
isinstance
(
coco_annotation
[
"segmentation"
],
dict
):
# RLE
coco_annotation
[
"segmentation"
][
"counts"
]
=
coco_annotation
[
"segmentation"
][
"counts"
].
decode
(
"ascii"
)
coco_annotations
.
append
(
coco_annotation
)
logger
.
info
(
"Conversion finished, "
f
"#images:
{
len
(
coco_images
)
}
, #annotations:
{
len
(
coco_annotations
)
}
"
)
info
=
{
"date_created"
:
str
(
datetime
.
datetime
.
now
()),
"description"
:
"Automatically generated COCO json file for Detectron2."
,
}
coco_dict
=
{
"info"
:
info
,
"images"
:
coco_images
,
"annotations"
:
coco_annotations
,
"categories"
:
categories
,
"licenses"
:
None
,
}
return
coco_dict
def
convert_to_coco_json
(
dataset_name
,
output_file
,
allow_cached
=
True
):
"""
Converts dataset into COCO format and saves it to a json file.
dataset_name must be registered in DatasetCatalog and in detectron2's standard format.
Args:
dataset_name:
reference from the config file to the catalogs
must be registered in DatasetCatalog and in detectron2's standard format
output_file: path of json file that will be saved to
allow_cached: if json file is already present then skip conversion
"""
# TODO: The dataset or the conversion script *may* change,
# a checksum would be useful for validating the cached data
PathManager
.
mkdirs
(
os
.
path
.
dirname
(
output_file
))
with
file_lock
(
output_file
):
if
PathManager
.
exists
(
output_file
)
and
allow_cached
:
logger
.
warning
(
f
"Using previously cached COCO format annotations at '
{
output_file
}
'. "
"You need to clear the cache file if your dataset has been modified."
)
else
:
logger
.
info
(
f
"Converting annotations of dataset '
{
dataset_name
}
' to COCO format ...)"
)
coco_dict
=
convert_to_coco_dict
(
dataset_name
)
logger
.
info
(
f
"Caching COCO format annotations at '
{
output_file
}
' ..."
)
with
PathManager
.
open
(
output_file
,
"w"
)
as
f
:
json
.
dump
(
coco_dict
,
f
)
if
__name__
==
"__main__"
:
"""
Test the COCO json dataset loader.
Usage:
python -m detectron2.data.data.coco
\
path/to/json path/to/image_root dataset_name
"dataset_name" can be "coco_2014_minival_100", or other
pre-registered ones
"""
from
detectron2.utils.logger
import
setup_logger
from
detectron2.utils.visualizer
import
Visualizer
import
detectron2.data.datasets
# noqa # add pre-defined metadata
import
sys
logger
=
setup_logger
(
name
=
__name__
)
assert
sys
.
argv
[
3
]
in
DatasetCatalog
.
list
()
meta
=
MetadataCatalog
.
get
(
sys
.
argv
[
3
])
dicts
=
load_coco_json
(
sys
.
argv
[
1
],
sys
.
argv
[
2
],
sys
.
argv
[
3
])
logger
.
info
(
"Done loading {} samples."
.
format
(
len
(
dicts
)))
dirname
=
"coco-data-vis"
os
.
makedirs
(
dirname
,
exist_ok
=
True
)
for
d
in
dicts
:
img
=
np
.
array
(
Image
.
open
(
d
[
"file_name"
]))
visualizer
=
Visualizer
(
img
,
metadata
=
meta
)
vis
=
visualizer
.
draw_dataset_dict
(
d
)
fpath
=
os
.
path
.
join
(
dirname
,
os
.
path
.
basename
(
d
[
"file_name"
]))
vis
.
save
(
fpath
)
preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/lvis.py
0 → 100644
View file @
c50c08d9
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import
logging
import
os
from
fvcore.common.file_io
import
PathManager
from
fvcore.common.timer
import
Timer
from
detectron2.data
import
DatasetCatalog
,
MetadataCatalog
from
detectron2.structures
import
BoxMode
from
.builtin_meta
import
_get_coco_instances_meta
from
.lvis_v0_5_categories
import
LVIS_CATEGORIES
"""
This file contains functions to parse LVIS-format annotations into dicts in the
"Detectron2 format".
"""
logger
=
logging
.
getLogger
(
__name__
)
__all__
=
[
"load_lvis_json"
,
"register_lvis_instances"
,
"get_lvis_instances_meta"
]
def
register_lvis_instances
(
name
,
metadata
,
json_file
,
image_root
):
"""
Register a dataset in LVIS's json annotation format for instance detection and segmentation.
Args:
name (str): a name that identifies the dataset, e.g. "lvis_v0.5_train".
metadata (dict): extra metadata associated with this dataset. It can be an empty dict.
json_file (str): path to the json instance annotation file.
image_root (str or path-like): directory which contains all the images.
"""
DatasetCatalog
.
register
(
name
,
lambda
:
load_lvis_json
(
json_file
,
image_root
,
name
))
MetadataCatalog
.
get
(
name
).
set
(
json_file
=
json_file
,
image_root
=
image_root
,
evaluator_type
=
"lvis"
,
**
metadata
)
def
load_lvis_json
(
json_file
,
image_root
,
dataset_name
=
None
):
"""
Load a json file in LVIS's annotation format.
Args:
json_file (str): full path to the LVIS json annotation file.
image_root (str): the directory where the images in this json file exists.
dataset_name (str): the name of the dataset (e.g., "lvis_v0.5_train").
If provided, this function will put "thing_classes" into the metadata
associated with this dataset.
Returns:
list[dict]: a list of dicts in Detectron2 standard format. (See
`Using Custom Datasets </tutorials/data.html>`_ )
Notes:
1. This function does not read the image files.
The results do not have the "image" field.
"""
from
lvis
import
LVIS
json_file
=
PathManager
.
get_local_path
(
json_file
)
timer
=
Timer
()
lvis_api
=
LVIS
(
json_file
)
if
timer
.
seconds
()
>
1
:
logger
.
info
(
"Loading {} takes {:.2f} seconds."
.
format
(
json_file
,
timer
.
seconds
()))
if
dataset_name
is
not
None
:
meta
=
get_lvis_instances_meta
(
dataset_name
)
MetadataCatalog
.
get
(
dataset_name
).
set
(
**
meta
)
# sort indices for reproducible results
img_ids
=
sorted
(
lvis_api
.
imgs
.
keys
())
# imgs is a list of dicts, each looks something like:
# {'license': 4,
# 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
# 'file_name': 'COCO_val2014_000000001268.jpg',
# 'height': 427,
# 'width': 640,
# 'date_captured': '2013-11-17 05:57:24',
# 'id': 1268}
imgs
=
lvis_api
.
load_imgs
(
img_ids
)
# anns is a list[list[dict]], where each dict is an annotation
# record for an object. The inner list enumerates the objects in an image
# and the outer list enumerates over images. Example of anns[0]:
# [{'segmentation': [[192.81,
# 247.09,
# ...
# 219.03,
# 249.06]],
# 'area': 1035.749,
# 'image_id': 1268,
# 'bbox': [192.81, 224.8, 74.73, 33.43],
# 'category_id': 16,
# 'id': 42986},
# ...]
anns
=
[
lvis_api
.
img_ann_map
[
img_id
]
for
img_id
in
img_ids
]
# Sanity check that each annotation has a unique id
ann_ids
=
[
ann
[
"id"
]
for
anns_per_image
in
anns
for
ann
in
anns_per_image
]
assert
len
(
set
(
ann_ids
))
==
len
(
ann_ids
),
"Annotation ids in '{}' are not unique"
.
format
(
json_file
)
imgs_anns
=
list
(
zip
(
imgs
,
anns
))
logger
.
info
(
"Loaded {} images in the LVIS format from {}"
.
format
(
len
(
imgs_anns
),
json_file
))
dataset_dicts
=
[]
for
(
img_dict
,
anno_dict_list
)
in
imgs_anns
:
record
=
{}
file_name
=
img_dict
[
"file_name"
]
if
img_dict
[
"file_name"
].
startswith
(
"COCO"
):
# Convert form the COCO 2014 file naming convention of
# COCO_[train/val/test]2014_000000000000.jpg to the 2017 naming convention of
# 000000000000.jpg (LVIS v1 will fix this naming issue)
file_name
=
file_name
[
-
16
:]
record
[
"file_name"
]
=
os
.
path
.
join
(
image_root
,
file_name
)
record
[
"height"
]
=
img_dict
[
"height"
]
record
[
"width"
]
=
img_dict
[
"width"
]
record
[
"not_exhaustive_category_ids"
]
=
img_dict
.
get
(
"not_exhaustive_category_ids"
,
[])
record
[
"neg_category_ids"
]
=
img_dict
.
get
(
"neg_category_ids"
,
[])
image_id
=
record
[
"image_id"
]
=
img_dict
[
"id"
]
objs
=
[]
for
anno
in
anno_dict_list
:
# Check that the image_id in this annotation is the same as
# the image_id we're looking at.
# This fails only when the data parsing logic or the annotation file is buggy.
assert
anno
[
"image_id"
]
==
image_id
obj
=
{
"bbox"
:
anno
[
"bbox"
],
"bbox_mode"
:
BoxMode
.
XYWH_ABS
}
obj
[
"category_id"
]
=
anno
[
"category_id"
]
-
1
# Convert 1-indexed to 0-indexed
segm
=
anno
[
"segmentation"
]
# list[list[float]]
# filter out invalid polygons (< 3 points)
valid_segm
=
[
poly
for
poly
in
segm
if
len
(
poly
)
%
2
==
0
and
len
(
poly
)
>=
6
]
assert
len
(
segm
)
==
len
(
valid_segm
),
"Annotation contains an invalid polygon with < 3 points"
assert
len
(
segm
)
>
0
obj
[
"segmentation"
]
=
segm
objs
.
append
(
obj
)
record
[
"annotations"
]
=
objs
dataset_dicts
.
append
(
record
)
return
dataset_dicts
def
get_lvis_instances_meta
(
dataset_name
):
"""
Load LVIS metadata.
Args:
dataset_name (str): LVIS dataset name without the split name (e.g., "lvis_v0.5").
Returns:
dict: LVIS metadata with keys: thing_classes
"""
if
"cocofied"
in
dataset_name
:
return
_get_coco_instances_meta
()
if
"v0.5"
in
dataset_name
:
return
_get_lvis_instances_meta_v0_5
()
# There will be a v1 in the future
# elif dataset_name == "lvis_v1":
# return get_lvis_instances_meta_v1()
raise
ValueError
(
"No built-in metadata for dataset {}"
.
format
(
dataset_name
))
def
_get_lvis_instances_meta_v0_5
():
assert
len
(
LVIS_CATEGORIES
)
==
1230
cat_ids
=
[
k
[
"id"
]
for
k
in
LVIS_CATEGORIES
]
assert
min
(
cat_ids
)
==
1
and
max
(
cat_ids
)
==
len
(
cat_ids
),
"Category ids are not in [1, #categories], as expected"
# Ensure that the category list is sorted by id
lvis_categories
=
sorted
(
LVIS_CATEGORIES
,
key
=
lambda
x
:
x
[
"id"
])
thing_classes
=
[
k
[
"synonyms"
][
0
]
for
k
in
lvis_categories
]
meta
=
{
"thing_classes"
:
thing_classes
}
return
meta
if
__name__
==
"__main__"
:
"""
Test the LVIS json dataset loader.
Usage:
python -m detectron2.data.data.lvis
\
path/to/json path/to/image_root dataset_name vis_limit
"""
import
sys
import
numpy
as
np
from
detectron2.utils.logger
import
setup_logger
from
PIL
import
Image
import
detectron2.data.datasets
# noqa # add pre-defined metadata
from
detectron2.utils.visualizer
import
Visualizer
logger
=
setup_logger
(
name
=
__name__
)
meta
=
MetadataCatalog
.
get
(
sys
.
argv
[
3
])
dicts
=
load_lvis_json
(
sys
.
argv
[
1
],
sys
.
argv
[
2
],
sys
.
argv
[
3
])
logger
.
info
(
"Done loading {} samples."
.
format
(
len
(
dicts
)))
dirname
=
"lvis-data-vis"
os
.
makedirs
(
dirname
,
exist_ok
=
True
)
for
d
in
dicts
[:
int
(
sys
.
argv
[
4
])]:
img
=
np
.
array
(
Image
.
open
(
d
[
"file_name"
]))
visualizer
=
Visualizer
(
img
,
metadata
=
meta
)
vis
=
visualizer
.
draw_dataset_dict
(
d
)
fpath
=
os
.
path
.
join
(
dirname
,
os
.
path
.
basename
(
d
[
"file_name"
]))
vis
.
save
(
fpath
)
preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/lvis_v0_5_categories.py
0 → 100644
View file @
c50c08d9
This source diff could not be displayed because it is too large. You can
view the blob
instead.
preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/pascal_voc.py
0 → 100644
View file @
c50c08d9
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import
numpy
as
np
import
os
import
xml.etree.ElementTree
as
ET
from
fvcore.common.file_io
import
PathManager
from
detectron2.data
import
DatasetCatalog
,
MetadataCatalog
from
detectron2.structures
import
BoxMode
__all__
=
[
"register_pascal_voc"
]
# fmt: off
CLASS_NAMES
=
[
"aeroplane"
,
"bicycle"
,
"bird"
,
"boat"
,
"bottle"
,
"bus"
,
"car"
,
"cat"
,
"chair"
,
"cow"
,
"diningtable"
,
"dog"
,
"horse"
,
"motorbike"
,
"person"
,
"pottedplant"
,
"sheep"
,
"sofa"
,
"train"
,
"tvmonitor"
,
]
# fmt: on
def
load_voc_instances
(
dirname
:
str
,
split
:
str
):
"""
Load Pascal VOC detection annotations to Detectron2 format.
Args:
dirname: Contain "Annotations", "ImageSets", "JPEGImages"
split (str): one of "train", "test", "val", "trainval"
"""
with
PathManager
.
open
(
os
.
path
.
join
(
dirname
,
"ImageSets"
,
"Main"
,
split
+
".txt"
))
as
f
:
fileids
=
np
.
loadtxt
(
f
,
dtype
=
np
.
str
)
# Needs to read many small annotation files. Makes sense at local
annotation_dirname
=
PathManager
.
get_local_path
(
os
.
path
.
join
(
dirname
,
"Annotations/"
))
dicts
=
[]
for
fileid
in
fileids
:
anno_file
=
os
.
path
.
join
(
annotation_dirname
,
fileid
+
".xml"
)
jpeg_file
=
os
.
path
.
join
(
dirname
,
"JPEGImages"
,
fileid
+
".jpg"
)
with
PathManager
.
open
(
anno_file
)
as
f
:
tree
=
ET
.
parse
(
f
)
r
=
{
"file_name"
:
jpeg_file
,
"image_id"
:
fileid
,
"height"
:
int
(
tree
.
findall
(
"./size/height"
)[
0
].
text
),
"width"
:
int
(
tree
.
findall
(
"./size/width"
)[
0
].
text
),
}
instances
=
[]
for
obj
in
tree
.
findall
(
"object"
):
cls
=
obj
.
find
(
"name"
).
text
# We include "difficult" samples in training.
# Based on limited experiments, they don't hurt accuracy.
# difficult = int(obj.find("difficult").text)
# if difficult == 1:
# continue
bbox
=
obj
.
find
(
"bndbox"
)
bbox
=
[
float
(
bbox
.
find
(
x
).
text
)
for
x
in
[
"xmin"
,
"ymin"
,
"xmax"
,
"ymax"
]]
# Original annotations are integers in the range [1, W or H]
# Assuming they mean 1-based pixel indices (inclusive),
# a box with annotation (xmin=1, xmax=W) covers the whole image.
# In coordinate space this is represented by (xmin=0, xmax=W)
bbox
[
0
]
-=
1.0
bbox
[
1
]
-=
1.0
instances
.
append
(
{
"category_id"
:
CLASS_NAMES
.
index
(
cls
),
"bbox"
:
bbox
,
"bbox_mode"
:
BoxMode
.
XYXY_ABS
}
)
r
[
"annotations"
]
=
instances
dicts
.
append
(
r
)
return
dicts
def
register_pascal_voc
(
name
,
dirname
,
split
,
year
):
DatasetCatalog
.
register
(
name
,
lambda
:
load_voc_instances
(
dirname
,
split
))
MetadataCatalog
.
get
(
name
).
set
(
thing_classes
=
CLASS_NAMES
,
dirname
=
dirname
,
year
=
year
,
split
=
split
)
preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/register_coco.py
0 → 100644
View file @
c50c08d9
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import
copy
import
os
from
detectron2.data
import
DatasetCatalog
,
MetadataCatalog
from
.coco
import
load_coco_json
,
load_sem_seg
"""
This file contains functions to register a COCO-format dataset to the DatasetCatalog.
"""
__all__
=
[
"register_coco_instances"
,
"register_coco_panoptic_separated"
]
def
register_coco_instances
(
name
,
metadata
,
json_file
,
image_root
):
"""
Register a dataset in COCO's json annotation format for
instance detection, instance segmentation and keypoint detection.
(i.e., Type 1 and 2 in http://cocodataset.org/#format-data.
`instances*.json` and `person_keypoints*.json` in the dataset).
This is an example of how to register a new dataset.
You can do something similar to this function, to register new data.
Args:
name (str): the name that identifies a dataset, e.g. "coco_2014_train".
metadata (dict): extra metadata associated with this dataset. You can
leave it as an empty dict.
json_file (str): path to the json instance annotation file.
image_root (str or path-like): directory which contains all the images.
"""
assert
isinstance
(
name
,
str
),
name
assert
isinstance
(
json_file
,
(
str
,
os
.
PathLike
)),
json_file
assert
isinstance
(
image_root
,
(
str
,
os
.
PathLike
)),
image_root
# 1. register a function which returns dicts
DatasetCatalog
.
register
(
name
,
lambda
:
load_coco_json
(
json_file
,
image_root
,
name
))
# 2. Optionally, add metadata about this dataset,
# since they might be useful in evaluation, visualization or logging
MetadataCatalog
.
get
(
name
).
set
(
json_file
=
json_file
,
image_root
=
image_root
,
evaluator_type
=
"coco"
,
**
metadata
)
def
register_coco_panoptic_separated
(
name
,
metadata
,
image_root
,
panoptic_root
,
panoptic_json
,
sem_seg_root
,
instances_json
):
"""
Register a COCO panoptic segmentation dataset named `name`.
The annotations in this registered dataset will contain both instance annotations and
semantic annotations, each with its own contiguous ids. Hence it's called "separated".
It follows the setting used by the PanopticFPN paper:
1. The instance annotations directly come from polygons in the COCO
instances annotation task, rather than from the masks in the COCO panoptic annotations.
The two format have small differences:
Polygons in the instance annotations may have overlaps.
The mask annotations are produced by labeling the overlapped polygons
with depth ordering.
2. The semantic annotations are converted from panoptic annotations, where
all "things" are assigned a semantic id of 0.
All semantic categories will therefore have ids in contiguous
range [1, #stuff_categories].
This function will also register a pure semantic segmentation dataset
named ``name + '_stuffonly'``.
Args:
name (str): the name that identifies a dataset,
e.g. "coco_2017_train_panoptic"
metadata (dict): extra metadata associated with this dataset.
image_root (str): directory which contains all the images
panoptic_root (str): directory which contains panoptic annotation images
panoptic_json (str): path to the json panoptic annotation file
sem_seg_root (str): directory which contains all the ground truth segmentation annotations.
instances_json (str): path to the json instance annotation file
"""
panoptic_name
=
name
+
"_separated"
DatasetCatalog
.
register
(
panoptic_name
,
lambda
:
merge_to_panoptic
(
load_coco_json
(
instances_json
,
image_root
,
panoptic_name
),
load_sem_seg
(
sem_seg_root
,
image_root
),
),
)
MetadataCatalog
.
get
(
panoptic_name
).
set
(
panoptic_root
=
panoptic_root
,
image_root
=
image_root
,
panoptic_json
=
panoptic_json
,
sem_seg_root
=
sem_seg_root
,
json_file
=
instances_json
,
# TODO rename
evaluator_type
=
"coco_panoptic_seg"
,
**
metadata
)
semantic_name
=
name
+
"_stuffonly"
DatasetCatalog
.
register
(
semantic_name
,
lambda
:
load_sem_seg
(
sem_seg_root
,
image_root
))
MetadataCatalog
.
get
(
semantic_name
).
set
(
sem_seg_root
=
sem_seg_root
,
image_root
=
image_root
,
evaluator_type
=
"sem_seg"
,
**
metadata
)
def
merge_to_panoptic
(
detection_dicts
,
sem_seg_dicts
):
"""
Create dataset dicts for panoptic segmentation, by
merging two dicts using "file_name" field to match their entries.
Args:
detection_dicts (list[dict]): lists of dicts for object detection or instance segmentation.
sem_seg_dicts (list[dict]): lists of dicts for semantic segmentation.
Returns:
list[dict] (one per input image): Each dict contains all (key, value) pairs from dicts in
both detection_dicts and sem_seg_dicts that correspond to the same image.
The function assumes that the same key in different dicts has the same value.
"""
results
=
[]
sem_seg_file_to_entry
=
{
x
[
"file_name"
]:
x
for
x
in
sem_seg_dicts
}
assert
len
(
sem_seg_file_to_entry
)
>
0
for
det_dict
in
detection_dicts
:
dic
=
copy
.
copy
(
det_dict
)
dic
.
update
(
sem_seg_file_to_entry
[
dic
[
"file_name"
]])
results
.
append
(
dic
)
return
results
preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/__init__.py
0 → 100644
View file @
c50c08d9
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from
.
import
builtin
# ensure the builtin data are registered
__all__
=
[
k
for
k
in
globals
().
keys
()
if
"builtin"
not
in
k
and
not
k
.
startswith
(
"_"
)]
preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/builtin.py
0 → 100644
View file @
c50c08d9
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from
.coco
import
BASE_DATASETS
as
BASE_COCO_DATASETS
from
.coco
import
DATASETS
as
COCO_DATASETS
from
.coco
import
register_datasets
as
register_coco_datasets
DEFAULT_DATASETS_ROOT
=
"data"
register_coco_datasets
(
COCO_DATASETS
,
DEFAULT_DATASETS_ROOT
)
register_coco_datasets
(
BASE_COCO_DATASETS
,
DEFAULT_DATASETS_ROOT
)
preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/coco.py
0 → 100644
View file @
c50c08d9
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import
contextlib
import
io
import
logging
import
os
from
dataclasses
import
dataclass
from
typing
import
Any
,
Dict
,
Iterable
,
List
,
Optional
from
fvcore.common.file_io
import
PathManager
from
fvcore.common.timer
import
Timer
from
detectron2.data
import
DatasetCatalog
,
MetadataCatalog
from
detectron2.structures
import
BoxMode
DENSEPOSE_MASK_KEY
=
"dp_masks"
DENSEPOSE_KEYS_WITHOUT_MASK
=
[
"dp_x"
,
"dp_y"
,
"dp_I"
,
"dp_U"
,
"dp_V"
]
DENSEPOSE_KEYS
=
DENSEPOSE_KEYS_WITHOUT_MASK
+
[
DENSEPOSE_MASK_KEY
]
DENSEPOSE_METADATA_URL_PREFIX
=
"https://dl.fbaipublicfiles.com/densepose/data/"
@
dataclass
class
CocoDatasetInfo
:
name
:
str
images_root
:
str
annotations_fpath
:
str
DATASETS
=
[
CocoDatasetInfo
(
name
=
"densepose_coco_2014_train"
,
images_root
=
"coco/train2014"
,
annotations_fpath
=
"coco/annotations/densepose_train2014.json"
,
),
CocoDatasetInfo
(
name
=
"densepose_coco_2014_minival"
,
images_root
=
"coco/val2014"
,
annotations_fpath
=
"coco/annotations/densepose_minival2014.json"
,
),
CocoDatasetInfo
(
name
=
"densepose_coco_2014_minival_100"
,
images_root
=
"coco/val2014"
,
annotations_fpath
=
"coco/annotations/densepose_minival2014_100.json"
,
),
CocoDatasetInfo
(
name
=
"densepose_coco_2014_valminusminival"
,
images_root
=
"coco/val2014"
,
annotations_fpath
=
"coco/annotations/densepose_valminusminival2014.json"
,
),
CocoDatasetInfo
(
name
=
"densepose_chimps"
,
images_root
=
"densepose_evolution/densepose_chimps"
,
annotations_fpath
=
"densepose_evolution/annotations/densepose_chimps_densepose.json"
,
),
]
BASE_DATASETS
=
[
CocoDatasetInfo
(
name
=
"base_coco_2017_train"
,
images_root
=
"coco/train2017"
,
annotations_fpath
=
"coco/annotations/instances_train2017.json"
,
),
CocoDatasetInfo
(
name
=
"base_coco_2017_val"
,
images_root
=
"coco/val2017"
,
annotations_fpath
=
"coco/annotations/instances_val2017.json"
,
),
CocoDatasetInfo
(
name
=
"base_coco_2017_val_100"
,
images_root
=
"coco/val2017"
,
annotations_fpath
=
"coco/annotations/instances_val2017_100.json"
,
),
]
def
_is_relative_local_path
(
path
:
os
.
PathLike
):
path_str
=
os
.
fsdecode
(
path
)
return
(
"://"
not
in
path_str
)
and
not
os
.
path
.
isabs
(
path
)
def
_maybe_prepend_base_path
(
base_path
:
Optional
[
os
.
PathLike
],
path
:
os
.
PathLike
):
"""
Prepends the provided path with a base path prefix if:
1) base path is not None;
2) path is a local path
"""
if
base_path
is
None
:
return
path
if
_is_relative_local_path
(
path
):
return
os
.
path
.
join
(
base_path
,
path
)
return
path
def
get_metadata
(
base_path
:
Optional
[
os
.
PathLike
])
->
Dict
[
str
,
Any
]:
"""
Returns metadata associated with COCO DensePose data
Args:
base_path: Optional[os.PathLike]
Base path used to load metadata from
Returns:
Dict[str, Any]
Metadata in the form of a dictionary
"""
meta
=
{
"densepose_transform_src"
:
_maybe_prepend_base_path
(
base_path
,
"UV_symmetry_transforms.mat"
),
"densepose_smpl_subdiv"
:
_maybe_prepend_base_path
(
base_path
,
"SMPL_subdiv.mat"
),
"densepose_smpl_subdiv_transform"
:
_maybe_prepend_base_path
(
base_path
,
"SMPL_SUBDIV_TRANSFORM.mat"
),
}
return
meta
def
_load_coco_annotations
(
json_file
:
str
):
"""
Load COCO annotations from a JSON file
Args:
json_file: str
Path to the file to load annotations from
Returns:
Instance of `pycocotools.coco.COCO` that provides access to annotations
data
"""
from
pycocotools.coco
import
COCO
logger
=
logging
.
getLogger
(
__name__
)
timer
=
Timer
()
with
contextlib
.
redirect_stdout
(
io
.
StringIO
()):
coco_api
=
COCO
(
json_file
)
if
timer
.
seconds
()
>
1
:
logger
.
info
(
"Loading {} takes {:.2f} seconds."
.
format
(
json_file
,
timer
.
seconds
()))
return
coco_api
def
_add_categories_metadata
(
dataset_name
:
str
,
categories
:
Dict
[
str
,
Any
]):
meta
=
MetadataCatalog
.
get
(
dataset_name
)
meta
.
categories
=
{
c
[
"id"
]:
c
[
"name"
]
for
c
in
categories
}
logger
=
logging
.
getLogger
(
__name__
)
logger
.
info
(
"Dataset {} categories: {}"
.
format
(
dataset_name
,
categories
))
def
_verify_annotations_have_unique_ids
(
json_file
:
str
,
anns
:
List
[
List
[
Dict
[
str
,
Any
]]]):
if
"minival"
in
json_file
:
# Skip validation on COCO2014 valminusminival and minival annotations
# The ratio of buggy annotations there is tiny and does not affect accuracy
# Therefore we explicitly white-list them
return
ann_ids
=
[
ann
[
"id"
]
for
anns_per_image
in
anns
for
ann
in
anns_per_image
]
assert
len
(
set
(
ann_ids
))
==
len
(
ann_ids
),
"Annotation ids in '{}' are not unique!"
.
format
(
json_file
)
def
_maybe_add_bbox
(
obj
:
Dict
[
str
,
Any
],
ann_dict
:
Dict
[
str
,
Any
]):
if
"bbox"
not
in
ann_dict
:
return
obj
[
"bbox"
]
=
ann_dict
[
"bbox"
]
obj
[
"bbox_mode"
]
=
BoxMode
.
XYWH_ABS
def
_maybe_add_segm
(
obj
:
Dict
[
str
,
Any
],
ann_dict
:
Dict
[
str
,
Any
]):
if
"segmentation"
not
in
ann_dict
:
return
segm
=
ann_dict
[
"segmentation"
]
if
not
isinstance
(
segm
,
dict
):
# filter out invalid polygons (< 3 points)
segm
=
[
poly
for
poly
in
segm
if
len
(
poly
)
%
2
==
0
and
len
(
poly
)
>=
6
]
if
len
(
segm
)
==
0
:
return
obj
[
"segmentation"
]
=
segm
def
_maybe_add_keypoints
(
obj
:
Dict
[
str
,
Any
],
ann_dict
:
Dict
[
str
,
Any
]):
if
"keypoints"
not
in
ann_dict
:
return
keypts
=
ann_dict
[
"keypoints"
]
# list[int]
for
idx
,
v
in
enumerate
(
keypts
):
if
idx
%
3
!=
2
:
# COCO's segmentation coordinates are floating points in [0, H or W],
# but keypoint coordinates are integers in [0, H-1 or W-1]
# Therefore we assume the coordinates are "pixel indices" and
# add 0.5 to convert to floating point coordinates.
keypts
[
idx
]
=
v
+
0.5
obj
[
"keypoints"
]
=
keypts
def
_maybe_add_densepose
(
obj
:
Dict
[
str
,
Any
],
ann_dict
:
Dict
[
str
,
Any
]):
for
key
in
DENSEPOSE_KEYS
:
if
key
in
ann_dict
:
obj
[
key
]
=
ann_dict
[
key
]
def
_combine_images_with_annotations
(
dataset_name
:
str
,
image_root
:
str
,
img_datas
:
Iterable
[
Dict
[
str
,
Any
]],
ann_datas
:
Iterable
[
Iterable
[
Dict
[
str
,
Any
]]],
):
ann_keys
=
[
"iscrowd"
,
"category_id"
]
dataset_dicts
=
[]
for
img_dict
,
ann_dicts
in
zip
(
img_datas
,
ann_datas
):
record
=
{}
record
[
"file_name"
]
=
os
.
path
.
join
(
image_root
,
img_dict
[
"file_name"
])
record
[
"height"
]
=
img_dict
[
"height"
]
record
[
"width"
]
=
img_dict
[
"width"
]
record
[
"image_id"
]
=
img_dict
[
"id"
]
record
[
"dataset"
]
=
dataset_name
objs
=
[]
for
ann_dict
in
ann_dicts
:
assert
ann_dict
[
"image_id"
]
==
record
[
"image_id"
]
assert
ann_dict
.
get
(
"ignore"
,
0
)
==
0
obj
=
{
key
:
ann_dict
[
key
]
for
key
in
ann_keys
if
key
in
ann_dict
}
_maybe_add_bbox
(
obj
,
ann_dict
)
_maybe_add_segm
(
obj
,
ann_dict
)
_maybe_add_keypoints
(
obj
,
ann_dict
)
_maybe_add_densepose
(
obj
,
ann_dict
)
objs
.
append
(
obj
)
record
[
"annotations"
]
=
objs
dataset_dicts
.
append
(
record
)
return
dataset_dicts
def
load_coco_json
(
annotations_json_file
:
str
,
image_root
:
str
,
dataset_name
:
str
):
"""
Loads a JSON file with annotations in COCO instances format.
Replaces `detectron2.data.data.coco.load_coco_json` to handle metadata
in a more flexible way. Postpones category mapping to a later stage to be
able to combine several data with different (but coherent) sets of
categories.
Args:
annotations_json_file: str
Path to the JSON file with annotations in COCO instances format.
image_root: str
directory that contains all the images
dataset_name: str
the name that identifies a dataset, e.g. "densepose_coco_2014_train"
extra_annotation_keys: Optional[List[str]]
If provided, these keys are used to extract additional data from
the annotations.
"""
coco_api
=
_load_coco_annotations
(
PathManager
.
get_local_path
(
annotations_json_file
))
_add_categories_metadata
(
dataset_name
,
coco_api
.
loadCats
(
coco_api
.
getCatIds
()))
# sort indices for reproducible results
img_ids
=
sorted
(
coco_api
.
imgs
.
keys
())
# imgs is a list of dicts, each looks something like:
# {'license': 4,
# 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
# 'file_name': 'COCO_val2014_000000001268.jpg',
# 'height': 427,
# 'width': 640,
# 'date_captured': '2013-11-17 05:57:24',
# 'id': 1268}
imgs
=
coco_api
.
loadImgs
(
img_ids
)
logger
=
logging
.
getLogger
(
__name__
)
logger
.
info
(
"Loaded {} images in COCO format from {}"
.
format
(
len
(
imgs
),
annotations_json_file
))
# anns is a list[list[dict]], where each dict is an annotation
# record for an object. The inner list enumerates the objects in an image
# and the outer list enumerates over images.
anns
=
[
coco_api
.
imgToAnns
[
img_id
]
for
img_id
in
img_ids
]
_verify_annotations_have_unique_ids
(
annotations_json_file
,
anns
)
dataset_records
=
_combine_images_with_annotations
(
dataset_name
,
image_root
,
imgs
,
anns
)
return
dataset_records
def
register_dataset
(
dataset_data
:
CocoDatasetInfo
,
datasets_root
:
Optional
[
os
.
PathLike
]
=
None
):
"""
Registers provided COCO DensePose dataset
Args:
dataset_data: CocoDatasetInfo
Dataset data
datasets_root: Optional[os.PathLike]
Datasets root folder (default: None)
"""
annotations_fpath
=
_maybe_prepend_base_path
(
datasets_root
,
dataset_data
.
annotations_fpath
)
images_root
=
_maybe_prepend_base_path
(
datasets_root
,
dataset_data
.
images_root
)
def
load_annotations
():
return
load_coco_json
(
annotations_json_file
=
annotations_fpath
,
image_root
=
images_root
,
dataset_name
=
dataset_data
.
name
,
)
DatasetCatalog
.
register
(
dataset_data
.
name
,
load_annotations
)
MetadataCatalog
.
get
(
dataset_data
.
name
).
set
(
json_file
=
annotations_fpath
,
image_root
=
images_root
,
**
get_metadata
(
DENSEPOSE_METADATA_URL_PREFIX
)
)
def
register_datasets
(
datasets_data
:
Iterable
[
CocoDatasetInfo
],
datasets_root
:
Optional
[
os
.
PathLike
]
=
None
):
"""
Registers provided COCO DensePose data
Args:
datasets_data: Iterable[CocoDatasetInfo]
An iterable of dataset datas
datasets_root: Optional[os.PathLike]
Datasets root folder (default: None)
"""
for
dataset_data
in
datasets_data
:
register_dataset
(
dataset_data
,
datasets_root
)
train/main.py
View file @
c50c08d9
...
...
@@ -17,7 +17,7 @@ def get_args():
# 模型相关
parser
.
add_argument
(
"--vae_path"
,
type
=
str
,
default
=
"/home/modelzoo/OOTDiffusion/checkpoints/ootd"
)
parser
.
add_argument
(
"--unet_path"
,
type
=
str
,
default
=
"/home/modelzoo/OOTDiffusion/checkpoints/
ootd/ootd_dc/checkpoint-36000
"
)
parser
.
add_argument
(
"--unet_path"
,
type
=
str
,
default
=
"/home/modelzoo/OOTDiffusion/checkpoints/
sd15
"
)
parser
.
add_argument
(
"--model_path"
,
type
=
str
,
default
=
"/home/modelzoo/OOTDiffusion/checkpoints/ootd"
)
...
...
@@ -59,14 +59,15 @@ def main():
args
.
lr_scheduler
)
trainer
=
L
.
Trainer
(
max_epochs
=
1
0
,
max_epochs
=
5
0
,
accelerator
=
'auto'
,
log_every_n_steps
=
1
,
callbacks
=
[
ModelCheckpoint
(
every_n_train_steps
=
6000
,
save_top_k
=-
1
,
save_last
=
True
)],
precision
=
"16-mixed"
precision
=
"16-mixed"
,
accumulate_grad_batches
=
32
,
)
trainer
.
fit
(
model
,
dm
)
trainer
.
fit
(
model
,
dm
,
ckpt_path
=
"lightning_logs/version_6/checkpoints/last.ckpt"
)
if
__name__
==
"__main__"
:
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment