Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
InstructBLIP_pytorch
Commits
c04f261a
Commit
c04f261a
authored
Aug 22, 2024
by
dongchy920
Browse files
InstruceBLIP
parents
Pipeline
#1594
canceled with stages
Changes
421
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2777 additions
and
0 deletions
+2777
-0
lavis/common/annotator/uniformer/mmseg/core/seg/sampler/ohem_pixel_sampler.py
...or/uniformer/mmseg/core/seg/sampler/ohem_pixel_sampler.py
+76
-0
lavis/common/annotator/uniformer/mmseg/core/utils/__init__.py
...s/common/annotator/uniformer/mmseg/core/utils/__init__.py
+3
-0
lavis/common/annotator/uniformer/mmseg/core/utils/misc.py
lavis/common/annotator/uniformer/mmseg/core/utils/misc.py
+17
-0
lavis/common/annotator/uniformer/mmseg/datasets/__init__.py
lavis/common/annotator/uniformer/mmseg/datasets/__init__.py
+19
-0
lavis/common/annotator/uniformer/mmseg/datasets/ade.py
lavis/common/annotator/uniformer/mmseg/datasets/ade.py
+84
-0
lavis/common/annotator/uniformer/mmseg/datasets/builder.py
lavis/common/annotator/uniformer/mmseg/datasets/builder.py
+170
-0
lavis/common/annotator/uniformer/mmseg/datasets/chase_db1.py
lavis/common/annotator/uniformer/mmseg/datasets/chase_db1.py
+27
-0
lavis/common/annotator/uniformer/mmseg/datasets/cityscapes.py
...s/common/annotator/uniformer/mmseg/datasets/cityscapes.py
+217
-0
lavis/common/annotator/uniformer/mmseg/datasets/custom.py
lavis/common/annotator/uniformer/mmseg/datasets/custom.py
+400
-0
lavis/common/annotator/uniformer/mmseg/datasets/dataset_wrappers.py
...on/annotator/uniformer/mmseg/datasets/dataset_wrappers.py
+50
-0
lavis/common/annotator/uniformer/mmseg/datasets/drive.py
lavis/common/annotator/uniformer/mmseg/datasets/drive.py
+27
-0
lavis/common/annotator/uniformer/mmseg/datasets/hrf.py
lavis/common/annotator/uniformer/mmseg/datasets/hrf.py
+27
-0
lavis/common/annotator/uniformer/mmseg/datasets/pascal_context.py
...mmon/annotator/uniformer/mmseg/datasets/pascal_context.py
+103
-0
lavis/common/annotator/uniformer/mmseg/datasets/pipelines/__init__.py
.../annotator/uniformer/mmseg/datasets/pipelines/__init__.py
+16
-0
lavis/common/annotator/uniformer/mmseg/datasets/pipelines/compose.py
...n/annotator/uniformer/mmseg/datasets/pipelines/compose.py
+51
-0
lavis/common/annotator/uniformer/mmseg/datasets/pipelines/formating.py
...annotator/uniformer/mmseg/datasets/pipelines/formating.py
+288
-0
lavis/common/annotator/uniformer/mmseg/datasets/pipelines/loading.py
...n/annotator/uniformer/mmseg/datasets/pipelines/loading.py
+153
-0
lavis/common/annotator/uniformer/mmseg/datasets/pipelines/test_time_aug.py
...tator/uniformer/mmseg/datasets/pipelines/test_time_aug.py
+133
-0
lavis/common/annotator/uniformer/mmseg/datasets/pipelines/transforms.py
...nnotator/uniformer/mmseg/datasets/pipelines/transforms.py
+889
-0
lavis/common/annotator/uniformer/mmseg/datasets/stare.py
lavis/common/annotator/uniformer/mmseg/datasets/stare.py
+27
-0
No files found.
Too many changes to show.
To preserve performance only
421 of 421+
files are displayed.
Plain diff
Email patch
lavis/common/annotator/uniformer/mmseg/core/seg/sampler/ohem_pixel_sampler.py
0 → 100644
View file @
c04f261a
import
torch
import
torch.nn.functional
as
F
from
..builder
import
PIXEL_SAMPLERS
from
.base_pixel_sampler
import
BasePixelSampler
@
PIXEL_SAMPLERS
.
register_module
()
class
OHEMPixelSampler
(
BasePixelSampler
):
"""Online Hard Example Mining Sampler for segmentation.
Args:
context (nn.Module): The context of sampler, subclass of
:obj:`BaseDecodeHead`.
thresh (float, optional): The threshold for hard example selection.
Below which, are prediction with low confidence. If not
specified, the hard examples will be pixels of top ``min_kept``
loss. Default: None.
min_kept (int, optional): The minimum number of predictions to keep.
Default: 100000.
"""
def
__init__
(
self
,
context
,
thresh
=
None
,
min_kept
=
100000
):
super
(
OHEMPixelSampler
,
self
).
__init__
()
self
.
context
=
context
assert
min_kept
>
1
self
.
thresh
=
thresh
self
.
min_kept
=
min_kept
def
sample
(
self
,
seg_logit
,
seg_label
):
"""Sample pixels that have high loss or with low prediction confidence.
Args:
seg_logit (torch.Tensor): segmentation logits, shape (N, C, H, W)
seg_label (torch.Tensor): segmentation label, shape (N, 1, H, W)
Returns:
torch.Tensor: segmentation weight, shape (N, H, W)
"""
with
torch
.
no_grad
():
assert
seg_logit
.
shape
[
2
:]
==
seg_label
.
shape
[
2
:]
assert
seg_label
.
shape
[
1
]
==
1
seg_label
=
seg_label
.
squeeze
(
1
).
long
()
batch_kept
=
self
.
min_kept
*
seg_label
.
size
(
0
)
valid_mask
=
seg_label
!=
self
.
context
.
ignore_index
seg_weight
=
seg_logit
.
new_zeros
(
size
=
seg_label
.
size
())
valid_seg_weight
=
seg_weight
[
valid_mask
]
if
self
.
thresh
is
not
None
:
seg_prob
=
F
.
softmax
(
seg_logit
,
dim
=
1
)
tmp_seg_label
=
seg_label
.
clone
().
unsqueeze
(
1
)
tmp_seg_label
[
tmp_seg_label
==
self
.
context
.
ignore_index
]
=
0
seg_prob
=
seg_prob
.
gather
(
1
,
tmp_seg_label
).
squeeze
(
1
)
sort_prob
,
sort_indices
=
seg_prob
[
valid_mask
].
sort
()
if
sort_prob
.
numel
()
>
0
:
min_threshold
=
sort_prob
[
min
(
batch_kept
,
sort_prob
.
numel
()
-
1
)]
else
:
min_threshold
=
0.0
threshold
=
max
(
min_threshold
,
self
.
thresh
)
valid_seg_weight
[
seg_prob
[
valid_mask
]
<
threshold
]
=
1.
else
:
losses
=
self
.
context
.
loss_decode
(
seg_logit
,
seg_label
,
weight
=
None
,
ignore_index
=
self
.
context
.
ignore_index
,
reduction_override
=
'none'
)
# faster than topk according to https://github.com/pytorch/pytorch/issues/22812 # noqa
_
,
sort_indices
=
losses
[
valid_mask
].
sort
(
descending
=
True
)
valid_seg_weight
[
sort_indices
[:
batch_kept
]]
=
1.
seg_weight
[
valid_mask
]
=
valid_seg_weight
return
seg_weight
lavis/common/annotator/uniformer/mmseg/core/utils/__init__.py
0 → 100644
View file @
c04f261a
from
.misc
import
add_prefix
__all__
=
[
'add_prefix'
]
lavis/common/annotator/uniformer/mmseg/core/utils/misc.py
0 → 100644
View file @
c04f261a
def
add_prefix
(
inputs
,
prefix
):
"""Add prefix for dict.
Args:
inputs (dict): The input dict with str keys.
prefix (str): The prefix to add.
Returns:
dict: The dict with keys updated with ``prefix``.
"""
outputs
=
dict
()
for
name
,
value
in
inputs
.
items
():
outputs
[
f
'
{
prefix
}
.
{
name
}
'
]
=
value
return
outputs
lavis/common/annotator/uniformer/mmseg/datasets/__init__.py
0 → 100644
View file @
c04f261a
from
.ade
import
ADE20KDataset
from
.builder
import
DATASETS
,
PIPELINES
,
build_dataloader
,
build_dataset
from
.chase_db1
import
ChaseDB1Dataset
from
.cityscapes
import
CityscapesDataset
from
.custom
import
CustomDataset
from
.dataset_wrappers
import
ConcatDataset
,
RepeatDataset
from
.drive
import
DRIVEDataset
from
.hrf
import
HRFDataset
from
.pascal_context
import
PascalContextDataset
,
PascalContextDataset59
from
.stare
import
STAREDataset
from
.voc
import
PascalVOCDataset
__all__
=
[
'CustomDataset'
,
'build_dataloader'
,
'ConcatDataset'
,
'RepeatDataset'
,
'DATASETS'
,
'build_dataset'
,
'PIPELINES'
,
'CityscapesDataset'
,
'PascalVOCDataset'
,
'ADE20KDataset'
,
'PascalContextDataset'
,
'PascalContextDataset59'
,
'ChaseDB1Dataset'
,
'DRIVEDataset'
,
'HRFDataset'
,
'STAREDataset'
]
lavis/common/annotator/uniformer/mmseg/datasets/ade.py
0 → 100644
View file @
c04f261a
from
.builder
import
DATASETS
from
.custom
import
CustomDataset
@
DATASETS
.
register_module
()
class
ADE20KDataset
(
CustomDataset
):
"""ADE20K dataset.
In segmentation map annotation for ADE20K, 0 stands for background, which
is not included in 150 categories. ``reduce_zero_label`` is fixed to True.
The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is fixed to
'.png'.
"""
CLASSES
=
(
'wall'
,
'building'
,
'sky'
,
'floor'
,
'tree'
,
'ceiling'
,
'road'
,
'bed '
,
'windowpane'
,
'grass'
,
'cabinet'
,
'sidewalk'
,
'person'
,
'earth'
,
'door'
,
'table'
,
'mountain'
,
'plant'
,
'curtain'
,
'chair'
,
'car'
,
'water'
,
'painting'
,
'sofa'
,
'shelf'
,
'house'
,
'sea'
,
'mirror'
,
'rug'
,
'field'
,
'armchair'
,
'seat'
,
'fence'
,
'desk'
,
'rock'
,
'wardrobe'
,
'lamp'
,
'bathtub'
,
'railing'
,
'cushion'
,
'base'
,
'box'
,
'column'
,
'signboard'
,
'chest of drawers'
,
'counter'
,
'sand'
,
'sink'
,
'skyscraper'
,
'fireplace'
,
'refrigerator'
,
'grandstand'
,
'path'
,
'stairs'
,
'runway'
,
'case'
,
'pool table'
,
'pillow'
,
'screen door'
,
'stairway'
,
'river'
,
'bridge'
,
'bookcase'
,
'blind'
,
'coffee table'
,
'toilet'
,
'flower'
,
'book'
,
'hill'
,
'bench'
,
'countertop'
,
'stove'
,
'palm'
,
'kitchen island'
,
'computer'
,
'swivel chair'
,
'boat'
,
'bar'
,
'arcade machine'
,
'hovel'
,
'bus'
,
'towel'
,
'light'
,
'truck'
,
'tower'
,
'chandelier'
,
'awning'
,
'streetlight'
,
'booth'
,
'television receiver'
,
'airplane'
,
'dirt track'
,
'apparel'
,
'pole'
,
'land'
,
'bannister'
,
'escalator'
,
'ottoman'
,
'bottle'
,
'buffet'
,
'poster'
,
'stage'
,
'van'
,
'ship'
,
'fountain'
,
'conveyer belt'
,
'canopy'
,
'washer'
,
'plaything'
,
'swimming pool'
,
'stool'
,
'barrel'
,
'basket'
,
'waterfall'
,
'tent'
,
'bag'
,
'minibike'
,
'cradle'
,
'oven'
,
'ball'
,
'food'
,
'step'
,
'tank'
,
'trade name'
,
'microwave'
,
'pot'
,
'animal'
,
'bicycle'
,
'lake'
,
'dishwasher'
,
'screen'
,
'blanket'
,
'sculpture'
,
'hood'
,
'sconce'
,
'vase'
,
'traffic light'
,
'tray'
,
'ashcan'
,
'fan'
,
'pier'
,
'crt screen'
,
'plate'
,
'monitor'
,
'bulletin board'
,
'shower'
,
'radiator'
,
'glass'
,
'clock'
,
'flag'
)
PALETTE
=
[[
120
,
120
,
120
],
[
180
,
120
,
120
],
[
6
,
230
,
230
],
[
80
,
50
,
50
],
[
4
,
200
,
3
],
[
120
,
120
,
80
],
[
140
,
140
,
140
],
[
204
,
5
,
255
],
[
230
,
230
,
230
],
[
4
,
250
,
7
],
[
224
,
5
,
255
],
[
235
,
255
,
7
],
[
150
,
5
,
61
],
[
120
,
120
,
70
],
[
8
,
255
,
51
],
[
255
,
6
,
82
],
[
143
,
255
,
140
],
[
204
,
255
,
4
],
[
255
,
51
,
7
],
[
204
,
70
,
3
],
[
0
,
102
,
200
],
[
61
,
230
,
250
],
[
255
,
6
,
51
],
[
11
,
102
,
255
],
[
255
,
7
,
71
],
[
255
,
9
,
224
],
[
9
,
7
,
230
],
[
220
,
220
,
220
],
[
255
,
9
,
92
],
[
112
,
9
,
255
],
[
8
,
255
,
214
],
[
7
,
255
,
224
],
[
255
,
184
,
6
],
[
10
,
255
,
71
],
[
255
,
41
,
10
],
[
7
,
255
,
255
],
[
224
,
255
,
8
],
[
102
,
8
,
255
],
[
255
,
61
,
6
],
[
255
,
194
,
7
],
[
255
,
122
,
8
],
[
0
,
255
,
20
],
[
255
,
8
,
41
],
[
255
,
5
,
153
],
[
6
,
51
,
255
],
[
235
,
12
,
255
],
[
160
,
150
,
20
],
[
0
,
163
,
255
],
[
140
,
140
,
140
],
[
250
,
10
,
15
],
[
20
,
255
,
0
],
[
31
,
255
,
0
],
[
255
,
31
,
0
],
[
255
,
224
,
0
],
[
153
,
255
,
0
],
[
0
,
0
,
255
],
[
255
,
71
,
0
],
[
0
,
235
,
255
],
[
0
,
173
,
255
],
[
31
,
0
,
255
],
[
11
,
200
,
200
],
[
255
,
82
,
0
],
[
0
,
255
,
245
],
[
0
,
61
,
255
],
[
0
,
255
,
112
],
[
0
,
255
,
133
],
[
255
,
0
,
0
],
[
255
,
163
,
0
],
[
255
,
102
,
0
],
[
194
,
255
,
0
],
[
0
,
143
,
255
],
[
51
,
255
,
0
],
[
0
,
82
,
255
],
[
0
,
255
,
41
],
[
0
,
255
,
173
],
[
10
,
0
,
255
],
[
173
,
255
,
0
],
[
0
,
255
,
153
],
[
255
,
92
,
0
],
[
255
,
0
,
255
],
[
255
,
0
,
245
],
[
255
,
0
,
102
],
[
255
,
173
,
0
],
[
255
,
0
,
20
],
[
255
,
184
,
184
],
[
0
,
31
,
255
],
[
0
,
255
,
61
],
[
0
,
71
,
255
],
[
255
,
0
,
204
],
[
0
,
255
,
194
],
[
0
,
255
,
82
],
[
0
,
10
,
255
],
[
0
,
112
,
255
],
[
51
,
0
,
255
],
[
0
,
194
,
255
],
[
0
,
122
,
255
],
[
0
,
255
,
163
],
[
255
,
153
,
0
],
[
0
,
255
,
10
],
[
255
,
112
,
0
],
[
143
,
255
,
0
],
[
82
,
0
,
255
],
[
163
,
255
,
0
],
[
255
,
235
,
0
],
[
8
,
184
,
170
],
[
133
,
0
,
255
],
[
0
,
255
,
92
],
[
184
,
0
,
255
],
[
255
,
0
,
31
],
[
0
,
184
,
255
],
[
0
,
214
,
255
],
[
255
,
0
,
112
],
[
92
,
255
,
0
],
[
0
,
224
,
255
],
[
112
,
224
,
255
],
[
70
,
184
,
160
],
[
163
,
0
,
255
],
[
153
,
0
,
255
],
[
71
,
255
,
0
],
[
255
,
0
,
163
],
[
255
,
204
,
0
],
[
255
,
0
,
143
],
[
0
,
255
,
235
],
[
133
,
255
,
0
],
[
255
,
0
,
235
],
[
245
,
0
,
255
],
[
255
,
0
,
122
],
[
255
,
245
,
0
],
[
10
,
190
,
212
],
[
214
,
255
,
0
],
[
0
,
204
,
255
],
[
20
,
0
,
255
],
[
255
,
255
,
0
],
[
0
,
153
,
255
],
[
0
,
41
,
255
],
[
0
,
255
,
204
],
[
41
,
0
,
255
],
[
41
,
255
,
0
],
[
173
,
0
,
255
],
[
0
,
245
,
255
],
[
71
,
0
,
255
],
[
122
,
0
,
255
],
[
0
,
255
,
184
],
[
0
,
92
,
255
],
[
184
,
255
,
0
],
[
0
,
133
,
255
],
[
255
,
214
,
0
],
[
25
,
194
,
194
],
[
102
,
255
,
0
],
[
92
,
0
,
255
]]
def
__init__
(
self
,
**
kwargs
):
super
(
ADE20KDataset
,
self
).
__init__
(
img_suffix
=
'.jpg'
,
seg_map_suffix
=
'.png'
,
reduce_zero_label
=
True
,
**
kwargs
)
lavis/common/annotator/uniformer/mmseg/datasets/builder.py
0 → 100644
View file @
c04f261a
import
copy
import
platform
import
random
from
functools
import
partial
import
numpy
as
np
from
annotator.uniformer.mmcv.parallel
import
collate
from
annotator.uniformer.mmcv.runner
import
get_dist_info
from
annotator.uniformer.mmcv.utils
import
Registry
,
build_from_cfg
from
annotator.uniformer.mmcv.utils.parrots_wrapper
import
DataLoader
,
PoolDataLoader
from
torch.utils.data
import
DistributedSampler
if
platform
.
system
()
!=
'Windows'
:
# https://github.com/pytorch/pytorch/issues/973
import
resource
rlimit
=
resource
.
getrlimit
(
resource
.
RLIMIT_NOFILE
)
hard_limit
=
rlimit
[
1
]
soft_limit
=
min
(
4096
,
hard_limit
)
resource
.
setrlimit
(
resource
.
RLIMIT_NOFILE
,
(
soft_limit
,
hard_limit
))
DATASETS
=
Registry
(
'dataset'
)
PIPELINES
=
Registry
(
'pipeline'
)
def
_concat_dataset
(
cfg
,
default_args
=
None
):
"""Build :obj:`ConcatDataset by."""
from
.dataset_wrappers
import
ConcatDataset
img_dir
=
cfg
[
'img_dir'
]
ann_dir
=
cfg
.
get
(
'ann_dir'
,
None
)
split
=
cfg
.
get
(
'split'
,
None
)
num_img_dir
=
len
(
img_dir
)
if
isinstance
(
img_dir
,
(
list
,
tuple
))
else
1
if
ann_dir
is
not
None
:
num_ann_dir
=
len
(
ann_dir
)
if
isinstance
(
ann_dir
,
(
list
,
tuple
))
else
1
else
:
num_ann_dir
=
0
if
split
is
not
None
:
num_split
=
len
(
split
)
if
isinstance
(
split
,
(
list
,
tuple
))
else
1
else
:
num_split
=
0
if
num_img_dir
>
1
:
assert
num_img_dir
==
num_ann_dir
or
num_ann_dir
==
0
assert
num_img_dir
==
num_split
or
num_split
==
0
else
:
assert
num_split
==
num_ann_dir
or
num_ann_dir
<=
1
num_dset
=
max
(
num_split
,
num_img_dir
)
datasets
=
[]
for
i
in
range
(
num_dset
):
data_cfg
=
copy
.
deepcopy
(
cfg
)
if
isinstance
(
img_dir
,
(
list
,
tuple
)):
data_cfg
[
'img_dir'
]
=
img_dir
[
i
]
if
isinstance
(
ann_dir
,
(
list
,
tuple
)):
data_cfg
[
'ann_dir'
]
=
ann_dir
[
i
]
if
isinstance
(
split
,
(
list
,
tuple
)):
data_cfg
[
'split'
]
=
split
[
i
]
datasets
.
append
(
build_dataset
(
data_cfg
,
default_args
))
return
ConcatDataset
(
datasets
)
def
build_dataset
(
cfg
,
default_args
=
None
):
"""Build datasets."""
from
.dataset_wrappers
import
ConcatDataset
,
RepeatDataset
if
isinstance
(
cfg
,
(
list
,
tuple
)):
dataset
=
ConcatDataset
([
build_dataset
(
c
,
default_args
)
for
c
in
cfg
])
elif
cfg
[
'type'
]
==
'RepeatDataset'
:
dataset
=
RepeatDataset
(
build_dataset
(
cfg
[
'dataset'
],
default_args
),
cfg
[
'times'
])
elif
isinstance
(
cfg
.
get
(
'img_dir'
),
(
list
,
tuple
))
or
isinstance
(
cfg
.
get
(
'split'
,
None
),
(
list
,
tuple
)):
dataset
=
_concat_dataset
(
cfg
,
default_args
)
else
:
dataset
=
build_from_cfg
(
cfg
,
DATASETS
,
default_args
)
return
dataset
def
build_dataloader
(
dataset
,
samples_per_gpu
,
workers_per_gpu
,
num_gpus
=
1
,
dist
=
True
,
shuffle
=
True
,
seed
=
None
,
drop_last
=
False
,
pin_memory
=
True
,
dataloader_type
=
'PoolDataLoader'
,
**
kwargs
):
"""Build PyTorch DataLoader.
In distributed training, each GPU/process has a dataloader.
In non-distributed training, there is only one dataloader for all GPUs.
Args:
dataset (Dataset): A PyTorch dataset.
samples_per_gpu (int): Number of training samples on each GPU, i.e.,
batch size of each GPU.
workers_per_gpu (int): How many subprocesses to use for data loading
for each GPU.
num_gpus (int): Number of GPUs. Only used in non-distributed training.
dist (bool): Distributed training/test or not. Default: True.
shuffle (bool): Whether to shuffle the data at every epoch.
Default: True.
seed (int | None): Seed to be used. Default: None.
drop_last (bool): Whether to drop the last incomplete batch in epoch.
Default: False
pin_memory (bool): Whether to use pin_memory in DataLoader.
Default: True
dataloader_type (str): Type of dataloader. Default: 'PoolDataLoader'
kwargs: any keyword argument to be used to initialize DataLoader
Returns:
DataLoader: A PyTorch dataloader.
"""
rank
,
world_size
=
get_dist_info
()
if
dist
:
sampler
=
DistributedSampler
(
dataset
,
world_size
,
rank
,
shuffle
=
shuffle
)
shuffle
=
False
batch_size
=
samples_per_gpu
num_workers
=
workers_per_gpu
else
:
sampler
=
None
batch_size
=
num_gpus
*
samples_per_gpu
num_workers
=
num_gpus
*
workers_per_gpu
init_fn
=
partial
(
worker_init_fn
,
num_workers
=
num_workers
,
rank
=
rank
,
seed
=
seed
)
if
seed
is
not
None
else
None
assert
dataloader_type
in
(
'DataLoader'
,
'PoolDataLoader'
),
f
'unsupported dataloader
{
dataloader_type
}
'
if
dataloader_type
==
'PoolDataLoader'
:
dataloader
=
PoolDataLoader
elif
dataloader_type
==
'DataLoader'
:
dataloader
=
DataLoader
print
(
"shuffle"
)
print
(
shuffle
)
data_loader
=
dataloader
(
dataset
,
batch_size
=
batch_size
,
sampler
=
sampler
,
num_workers
=
num_workers
,
collate_fn
=
partial
(
collate
,
samples_per_gpu
=
samples_per_gpu
),
pin_memory
=
pin_memory
,
shuffle
=
shuffle
,
worker_init_fn
=
init_fn
,
drop_last
=
drop_last
,
**
kwargs
)
return
data_loader
def
worker_init_fn
(
worker_id
,
num_workers
,
rank
,
seed
):
"""Worker init func for dataloader.
The seed of each worker equals to num_worker * rank + worker_id + user_seed
Args:
worker_id (int): Worker id.
num_workers (int): Number of workers.
rank (int): The rank of current process.
seed (int): The random seed to use.
"""
worker_seed
=
num_workers
*
rank
+
worker_id
+
seed
np
.
random
.
seed
(
worker_seed
)
random
.
seed
(
worker_seed
)
lavis/common/annotator/uniformer/mmseg/datasets/chase_db1.py
0 → 100644
View file @
c04f261a
import
os.path
as
osp
from
.builder
import
DATASETS
from
.custom
import
CustomDataset
@
DATASETS
.
register_module
()
class
ChaseDB1Dataset
(
CustomDataset
):
"""Chase_db1 dataset.
In segmentation map annotation for Chase_db1, 0 stands for background,
which is included in 2 categories. ``reduce_zero_label`` is fixed to False.
The ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to
'_1stHO.png'.
"""
CLASSES
=
(
'background'
,
'vessel'
)
PALETTE
=
[[
120
,
120
,
120
],
[
6
,
230
,
230
]]
def
__init__
(
self
,
**
kwargs
):
super
(
ChaseDB1Dataset
,
self
).
__init__
(
img_suffix
=
'.png'
,
seg_map_suffix
=
'_1stHO.png'
,
reduce_zero_label
=
False
,
**
kwargs
)
assert
osp
.
exists
(
self
.
img_dir
)
lavis/common/annotator/uniformer/mmseg/datasets/cityscapes.py
0 → 100644
View file @
c04f261a
import
os.path
as
osp
import
tempfile
import
annotator.uniformer.mmcv
as
mmcv
import
numpy
as
np
from
annotator.uniformer.mmcv.utils
import
print_log
from
PIL
import
Image
from
.builder
import
DATASETS
from
.custom
import
CustomDataset
@
DATASETS
.
register_module
()
class
CityscapesDataset
(
CustomDataset
):
"""Cityscapes dataset.
The ``img_suffix`` is fixed to '_leftImg8bit.png' and ``seg_map_suffix`` is
fixed to '_gtFine_labelTrainIds.png' for Cityscapes dataset.
"""
CLASSES
=
(
'road'
,
'sidewalk'
,
'building'
,
'wall'
,
'fence'
,
'pole'
,
'traffic light'
,
'traffic sign'
,
'vegetation'
,
'terrain'
,
'sky'
,
'person'
,
'rider'
,
'car'
,
'truck'
,
'bus'
,
'train'
,
'motorcycle'
,
'bicycle'
)
PALETTE
=
[[
128
,
64
,
128
],
[
244
,
35
,
232
],
[
70
,
70
,
70
],
[
102
,
102
,
156
],
[
190
,
153
,
153
],
[
153
,
153
,
153
],
[
250
,
170
,
30
],
[
220
,
220
,
0
],
[
107
,
142
,
35
],
[
152
,
251
,
152
],
[
70
,
130
,
180
],
[
220
,
20
,
60
],
[
255
,
0
,
0
],
[
0
,
0
,
142
],
[
0
,
0
,
70
],
[
0
,
60
,
100
],
[
0
,
80
,
100
],
[
0
,
0
,
230
],
[
119
,
11
,
32
]]
def
__init__
(
self
,
**
kwargs
):
super
(
CityscapesDataset
,
self
).
__init__
(
img_suffix
=
'_leftImg8bit.png'
,
seg_map_suffix
=
'_gtFine_labelTrainIds.png'
,
**
kwargs
)
@
staticmethod
def
_convert_to_label_id
(
result
):
"""Convert trainId to id for cityscapes."""
if
isinstance
(
result
,
str
):
result
=
np
.
load
(
result
)
import
cityscapesscripts.helpers.labels
as
CSLabels
result_copy
=
result
.
copy
()
for
trainId
,
label
in
CSLabels
.
trainId2label
.
items
():
result_copy
[
result
==
trainId
]
=
label
.
id
return
result_copy
def
results2img
(
self
,
results
,
imgfile_prefix
,
to_label_id
):
"""Write the segmentation results to images.
Args:
results (list[list | tuple | ndarray]): Testing results of the
dataset.
imgfile_prefix (str): The filename prefix of the png files.
If the prefix is "somepath/xxx",
the png files will be named "somepath/xxx.png".
to_label_id (bool): whether convert output to label_id for
submission
Returns:
list[str: str]: result txt files which contains corresponding
semantic segmentation images.
"""
mmcv
.
mkdir_or_exist
(
imgfile_prefix
)
result_files
=
[]
prog_bar
=
mmcv
.
ProgressBar
(
len
(
self
))
for
idx
in
range
(
len
(
self
)):
result
=
results
[
idx
]
if
to_label_id
:
result
=
self
.
_convert_to_label_id
(
result
)
filename
=
self
.
img_infos
[
idx
][
'filename'
]
basename
=
osp
.
splitext
(
osp
.
basename
(
filename
))[
0
]
png_filename
=
osp
.
join
(
imgfile_prefix
,
f
'
{
basename
}
.png'
)
output
=
Image
.
fromarray
(
result
.
astype
(
np
.
uint8
)).
convert
(
'P'
)
import
cityscapesscripts.helpers.labels
as
CSLabels
palette
=
np
.
zeros
((
len
(
CSLabels
.
id2label
),
3
),
dtype
=
np
.
uint8
)
for
label_id
,
label
in
CSLabels
.
id2label
.
items
():
palette
[
label_id
]
=
label
.
color
output
.
putpalette
(
palette
)
output
.
save
(
png_filename
)
result_files
.
append
(
png_filename
)
prog_bar
.
update
()
return
result_files
def
format_results
(
self
,
results
,
imgfile_prefix
=
None
,
to_label_id
=
True
):
"""Format the results into dir (standard format for Cityscapes
evaluation).
Args:
results (list): Testing results of the dataset.
imgfile_prefix (str | None): The prefix of images files. It
includes the file path and the prefix of filename, e.g.,
"a/b/prefix". If not specified, a temp file will be created.
Default: None.
to_label_id (bool): whether convert output to label_id for
submission. Default: False
Returns:
tuple: (result_files, tmp_dir), result_files is a list containing
the image paths, tmp_dir is the temporal directory created
for saving json/png files when img_prefix is not specified.
"""
assert
isinstance
(
results
,
list
),
'results must be a list'
assert
len
(
results
)
==
len
(
self
),
(
'The length of results is not equal to the dataset len: '
f
'
{
len
(
results
)
}
!=
{
len
(
self
)
}
'
)
if
imgfile_prefix
is
None
:
tmp_dir
=
tempfile
.
TemporaryDirectory
()
imgfile_prefix
=
tmp_dir
.
name
else
:
tmp_dir
=
None
result_files
=
self
.
results2img
(
results
,
imgfile_prefix
,
to_label_id
)
return
result_files
,
tmp_dir
def
evaluate
(
self
,
results
,
metric
=
'mIoU'
,
logger
=
None
,
imgfile_prefix
=
None
,
efficient_test
=
False
):
"""Evaluation in Cityscapes/default protocol.
Args:
results (list): Testing results of the dataset.
metric (str | list[str]): Metrics to be evaluated.
logger (logging.Logger | None | str): Logger used for printing
related information during evaluation. Default: None.
imgfile_prefix (str | None): The prefix of output image file,
for cityscapes evaluation only. It includes the file path and
the prefix of filename, e.g., "a/b/prefix".
If results are evaluated with cityscapes protocol, it would be
the prefix of output png files. The output files would be
png images under folder "a/b/prefix/xxx.png", where "xxx" is
the image name of cityscapes. If not specified, a temp file
will be created for evaluation.
Default: None.
Returns:
dict[str, float]: Cityscapes/default metrics.
"""
eval_results
=
dict
()
metrics
=
metric
.
copy
()
if
isinstance
(
metric
,
list
)
else
[
metric
]
if
'cityscapes'
in
metrics
:
eval_results
.
update
(
self
.
_evaluate_cityscapes
(
results
,
logger
,
imgfile_prefix
))
metrics
.
remove
(
'cityscapes'
)
if
len
(
metrics
)
>
0
:
eval_results
.
update
(
super
(
CityscapesDataset
,
self
).
evaluate
(
results
,
metrics
,
logger
,
efficient_test
))
return
eval_results
def
_evaluate_cityscapes
(
self
,
results
,
logger
,
imgfile_prefix
):
"""Evaluation in Cityscapes protocol.
Args:
results (list): Testing results of the dataset.
logger (logging.Logger | str | None): Logger used for printing
related information during evaluation. Default: None.
imgfile_prefix (str | None): The prefix of output image file
Returns:
dict[str: float]: Cityscapes evaluation results.
"""
try
:
import
cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling
as
CSEval
# noqa
except
ImportError
:
raise
ImportError
(
'Please run "pip install cityscapesscripts" to '
'install cityscapesscripts first.'
)
msg
=
'Evaluating in Cityscapes style'
if
logger
is
None
:
msg
=
'
\n
'
+
msg
print_log
(
msg
,
logger
=
logger
)
result_files
,
tmp_dir
=
self
.
format_results
(
results
,
imgfile_prefix
)
if
tmp_dir
is
None
:
result_dir
=
imgfile_prefix
else
:
result_dir
=
tmp_dir
.
name
eval_results
=
dict
()
print_log
(
f
'Evaluating results under
{
result_dir
}
...'
,
logger
=
logger
)
CSEval
.
args
.
evalInstLevelScore
=
True
CSEval
.
args
.
predictionPath
=
osp
.
abspath
(
result_dir
)
CSEval
.
args
.
evalPixelAccuracy
=
True
CSEval
.
args
.
JSONOutput
=
False
seg_map_list
=
[]
pred_list
=
[]
# when evaluating with official cityscapesscripts,
# **_gtFine_labelIds.png is used
for
seg_map
in
mmcv
.
scandir
(
self
.
ann_dir
,
'gtFine_labelIds.png'
,
recursive
=
True
):
seg_map_list
.
append
(
osp
.
join
(
self
.
ann_dir
,
seg_map
))
pred_list
.
append
(
CSEval
.
getPrediction
(
CSEval
.
args
,
seg_map
))
eval_results
.
update
(
CSEval
.
evaluateImgLists
(
pred_list
,
seg_map_list
,
CSEval
.
args
))
if
tmp_dir
is
not
None
:
tmp_dir
.
cleanup
()
return
eval_results
lavis/common/annotator/uniformer/mmseg/datasets/custom.py
0 → 100644
View file @
c04f261a
import
os
import
os.path
as
osp
from
collections
import
OrderedDict
from
functools
import
reduce
import
annotator.uniformer.mmcv
as
mmcv
import
numpy
as
np
from
annotator.uniformer.mmcv.utils
import
print_log
from
prettytable
import
PrettyTable
from
torch.utils.data
import
Dataset
from
annotator.uniformer.mmseg.core
import
eval_metrics
from
annotator.uniformer.mmseg.utils
import
get_root_logger
from
.builder
import
DATASETS
from
.pipelines
import
Compose
@
DATASETS
.
register_module
()
class
CustomDataset
(
Dataset
):
"""Custom dataset for semantic segmentation. An example of file structure
is as followed.
.. code-block:: none
├── data
│ ├── my_dataset
│ │ ├── img_dir
│ │ │ ├── train
│ │ │ │ ├── xxx{img_suffix}
│ │ │ │ ├── yyy{img_suffix}
│ │ │ │ ├── zzz{img_suffix}
│ │ │ ├── val
│ │ ├── ann_dir
│ │ │ ├── train
│ │ │ │ ├── xxx{seg_map_suffix}
│ │ │ │ ├── yyy{seg_map_suffix}
│ │ │ │ ├── zzz{seg_map_suffix}
│ │ │ ├── val
The img/gt_semantic_seg pair of CustomDataset should be of the same
except suffix. A valid img/gt_semantic_seg filename pair should be like
``xxx{img_suffix}`` and ``xxx{seg_map_suffix}`` (extension is also included
in the suffix). If split is given, then ``xxx`` is specified in txt file.
Otherwise, all files in ``img_dir/``and ``ann_dir`` will be loaded.
Please refer to ``docs/tutorials/new_dataset.md`` for more details.
Args:
pipeline (list[dict]): Processing pipeline
img_dir (str): Path to image directory
img_suffix (str): Suffix of images. Default: '.jpg'
ann_dir (str, optional): Path to annotation directory. Default: None
seg_map_suffix (str): Suffix of segmentation maps. Default: '.png'
split (str, optional): Split txt file. If split is specified, only
file with suffix in the splits will be loaded. Otherwise, all
images in img_dir/ann_dir will be loaded. Default: None
data_root (str, optional): Data root for img_dir/ann_dir. Default:
None.
test_mode (bool): If test_mode=True, gt wouldn't be loaded.
ignore_index (int): The label index to be ignored. Default: 255
reduce_zero_label (bool): Whether to mark label zero as ignored.
Default: False
classes (str | Sequence[str], optional): Specify classes to load.
If is None, ``cls.CLASSES`` will be used. Default: None.
palette (Sequence[Sequence[int]]] | np.ndarray | None):
The palette of segmentation map. If None is given, and
self.PALETTE is None, random palette will be generated.
Default: None
"""
CLASSES
=
None
PALETTE
=
None
def
__init__
(
self
,
pipeline
,
img_dir
,
img_suffix
=
'.jpg'
,
ann_dir
=
None
,
seg_map_suffix
=
'.png'
,
split
=
None
,
data_root
=
None
,
test_mode
=
False
,
ignore_index
=
255
,
reduce_zero_label
=
False
,
classes
=
None
,
palette
=
None
):
self
.
pipeline
=
Compose
(
pipeline
)
self
.
img_dir
=
img_dir
self
.
img_suffix
=
img_suffix
self
.
ann_dir
=
ann_dir
self
.
seg_map_suffix
=
seg_map_suffix
self
.
split
=
split
self
.
data_root
=
data_root
self
.
test_mode
=
test_mode
self
.
ignore_index
=
ignore_index
self
.
reduce_zero_label
=
reduce_zero_label
self
.
label_map
=
None
self
.
CLASSES
,
self
.
PALETTE
=
self
.
get_classes_and_palette
(
classes
,
palette
)
# join paths if data_root is specified
if
self
.
data_root
is
not
None
:
if
not
osp
.
isabs
(
self
.
img_dir
):
self
.
img_dir
=
osp
.
join
(
self
.
data_root
,
self
.
img_dir
)
if
not
(
self
.
ann_dir
is
None
or
osp
.
isabs
(
self
.
ann_dir
)):
self
.
ann_dir
=
osp
.
join
(
self
.
data_root
,
self
.
ann_dir
)
if
not
(
self
.
split
is
None
or
osp
.
isabs
(
self
.
split
)):
self
.
split
=
osp
.
join
(
self
.
data_root
,
self
.
split
)
# load annotations
self
.
img_infos
=
self
.
load_annotations
(
self
.
img_dir
,
self
.
img_suffix
,
self
.
ann_dir
,
self
.
seg_map_suffix
,
self
.
split
)
def
__len__
(
self
):
"""Total number of samples of data."""
return
len
(
self
.
img_infos
)
def
load_annotations
(
self
,
img_dir
,
img_suffix
,
ann_dir
,
seg_map_suffix
,
split
):
"""Load annotation from directory.
Args:
img_dir (str): Path to image directory
img_suffix (str): Suffix of images.
ann_dir (str|None): Path to annotation directory.
seg_map_suffix (str|None): Suffix of segmentation maps.
split (str|None): Split txt file. If split is specified, only file
with suffix in the splits will be loaded. Otherwise, all images
in img_dir/ann_dir will be loaded. Default: None
Returns:
list[dict]: All image info of dataset.
"""
img_infos
=
[]
if
split
is
not
None
:
with
open
(
split
)
as
f
:
for
line
in
f
:
img_name
=
line
.
strip
()
img_info
=
dict
(
filename
=
img_name
+
img_suffix
)
if
ann_dir
is
not
None
:
seg_map
=
img_name
+
seg_map_suffix
img_info
[
'ann'
]
=
dict
(
seg_map
=
seg_map
)
img_infos
.
append
(
img_info
)
else
:
for
img
in
mmcv
.
scandir
(
img_dir
,
img_suffix
,
recursive
=
True
):
img_info
=
dict
(
filename
=
img
)
if
ann_dir
is
not
None
:
seg_map
=
img
.
replace
(
img_suffix
,
seg_map_suffix
)
img_info
[
'ann'
]
=
dict
(
seg_map
=
seg_map
)
img_infos
.
append
(
img_info
)
print_log
(
f
'Loaded
{
len
(
img_infos
)
}
images'
,
logger
=
get_root_logger
())
return
img_infos
def
get_ann_info
(
self
,
idx
):
"""Get annotation by index.
Args:
idx (int): Index of data.
Returns:
dict: Annotation info of specified index.
"""
return
self
.
img_infos
[
idx
][
'ann'
]
def
pre_pipeline
(
self
,
results
):
"""Prepare results dict for pipeline."""
results
[
'seg_fields'
]
=
[]
results
[
'img_prefix'
]
=
self
.
img_dir
results
[
'seg_prefix'
]
=
self
.
ann_dir
if
self
.
custom_classes
:
results
[
'label_map'
]
=
self
.
label_map
def
__getitem__
(
self
,
idx
):
"""Get training/test data after pipeline.
Args:
idx (int): Index of data.
Returns:
dict: Training/test data (with annotation if `test_mode` is set
False).
"""
if
self
.
test_mode
:
return
self
.
prepare_test_img
(
idx
)
else
:
return
self
.
prepare_train_img
(
idx
)
def
prepare_train_img
(
self
,
idx
):
"""Get training data and annotations after pipeline.
Args:
idx (int): Index of data.
Returns:
dict: Training data and annotation after pipeline with new keys
introduced by pipeline.
"""
img_info
=
self
.
img_infos
[
idx
]
ann_info
=
self
.
get_ann_info
(
idx
)
results
=
dict
(
img_info
=
img_info
,
ann_info
=
ann_info
)
self
.
pre_pipeline
(
results
)
return
self
.
pipeline
(
results
)
def
prepare_test_img
(
self
,
idx
):
"""Get testing data after pipeline.
Args:
idx (int): Index of data.
Returns:
dict: Testing data after pipeline with new keys introduced by
pipeline.
"""
img_info
=
self
.
img_infos
[
idx
]
results
=
dict
(
img_info
=
img_info
)
self
.
pre_pipeline
(
results
)
return
self
.
pipeline
(
results
)
def
format_results
(
self
,
results
,
**
kwargs
):
"""Place holder to format result to dataset specific output."""
def
get_gt_seg_maps
(
self
,
efficient_test
=
False
):
"""Get ground truth segmentation maps for evaluation."""
gt_seg_maps
=
[]
for
img_info
in
self
.
img_infos
:
seg_map
=
osp
.
join
(
self
.
ann_dir
,
img_info
[
'ann'
][
'seg_map'
])
if
efficient_test
:
gt_seg_map
=
seg_map
else
:
gt_seg_map
=
mmcv
.
imread
(
seg_map
,
flag
=
'unchanged'
,
backend
=
'pillow'
)
gt_seg_maps
.
append
(
gt_seg_map
)
return
gt_seg_maps
def
get_classes_and_palette
(
self
,
classes
=
None
,
palette
=
None
):
"""Get class names of current dataset.
Args:
classes (Sequence[str] | str | None): If classes is None, use
default CLASSES defined by builtin dataset. If classes is a
string, take it as a file name. The file contains the name of
classes where each line contains one class name. If classes is
a tuple or list, override the CLASSES defined by the dataset.
palette (Sequence[Sequence[int]]] | np.ndarray | None):
The palette of segmentation map. If None is given, random
palette will be generated. Default: None
"""
if
classes
is
None
:
self
.
custom_classes
=
False
return
self
.
CLASSES
,
self
.
PALETTE
self
.
custom_classes
=
True
if
isinstance
(
classes
,
str
):
# take it as a file path
class_names
=
mmcv
.
list_from_file
(
classes
)
elif
isinstance
(
classes
,
(
tuple
,
list
)):
class_names
=
classes
else
:
raise
ValueError
(
f
'Unsupported type
{
type
(
classes
)
}
of classes.'
)
if
self
.
CLASSES
:
if
not
set
(
classes
).
issubset
(
self
.
CLASSES
):
raise
ValueError
(
'classes is not a subset of CLASSES.'
)
# dictionary, its keys are the old label ids and its values
# are the new label ids.
# used for changing pixel labels in load_annotations.
self
.
label_map
=
{}
for
i
,
c
in
enumerate
(
self
.
CLASSES
):
if
c
not
in
class_names
:
self
.
label_map
[
i
]
=
-
1
else
:
self
.
label_map
[
i
]
=
classes
.
index
(
c
)
palette
=
self
.
get_palette_for_custom_classes
(
class_names
,
palette
)
return
class_names
,
palette
def
get_palette_for_custom_classes
(
self
,
class_names
,
palette
=
None
):
if
self
.
label_map
is
not
None
:
# return subset of palette
palette
=
[]
for
old_id
,
new_id
in
sorted
(
self
.
label_map
.
items
(),
key
=
lambda
x
:
x
[
1
]):
if
new_id
!=
-
1
:
palette
.
append
(
self
.
PALETTE
[
old_id
])
palette
=
type
(
self
.
PALETTE
)(
palette
)
elif
palette
is
None
:
if
self
.
PALETTE
is
None
:
palette
=
np
.
random
.
randint
(
0
,
255
,
size
=
(
len
(
class_names
),
3
))
else
:
palette
=
self
.
PALETTE
return
palette
def
evaluate
(
self
,
results
,
metric
=
'mIoU'
,
logger
=
None
,
efficient_test
=
False
,
**
kwargs
):
"""Evaluate the dataset.
Args:
results (list): Testing results of the dataset.
metric (str | list[str]): Metrics to be evaluated. 'mIoU',
'mDice' and 'mFscore' are supported.
logger (logging.Logger | None | str): Logger used for printing
related information during evaluation. Default: None.
Returns:
dict[str, float]: Default metrics.
"""
if
isinstance
(
metric
,
str
):
metric
=
[
metric
]
allowed_metrics
=
[
'mIoU'
,
'mDice'
,
'mFscore'
]
if
not
set
(
metric
).
issubset
(
set
(
allowed_metrics
)):
raise
KeyError
(
'metric {} is not supported'
.
format
(
metric
))
eval_results
=
{}
gt_seg_maps
=
self
.
get_gt_seg_maps
(
efficient_test
)
if
self
.
CLASSES
is
None
:
num_classes
=
len
(
reduce
(
np
.
union1d
,
[
np
.
unique
(
_
)
for
_
in
gt_seg_maps
]))
else
:
num_classes
=
len
(
self
.
CLASSES
)
ret_metrics
=
eval_metrics
(
results
,
gt_seg_maps
,
num_classes
,
self
.
ignore_index
,
metric
,
label_map
=
self
.
label_map
,
reduce_zero_label
=
self
.
reduce_zero_label
)
if
self
.
CLASSES
is
None
:
class_names
=
tuple
(
range
(
num_classes
))
else
:
class_names
=
self
.
CLASSES
# summary table
ret_metrics_summary
=
OrderedDict
({
ret_metric
:
np
.
round
(
np
.
nanmean
(
ret_metric_value
)
*
100
,
2
)
for
ret_metric
,
ret_metric_value
in
ret_metrics
.
items
()
})
# each class table
ret_metrics
.
pop
(
'aAcc'
,
None
)
ret_metrics_class
=
OrderedDict
({
ret_metric
:
np
.
round
(
ret_metric_value
*
100
,
2
)
for
ret_metric
,
ret_metric_value
in
ret_metrics
.
items
()
})
ret_metrics_class
.
update
({
'Class'
:
class_names
})
ret_metrics_class
.
move_to_end
(
'Class'
,
last
=
False
)
# for logger
class_table_data
=
PrettyTable
()
for
key
,
val
in
ret_metrics_class
.
items
():
class_table_data
.
add_column
(
key
,
val
)
summary_table_data
=
PrettyTable
()
for
key
,
val
in
ret_metrics_summary
.
items
():
if
key
==
'aAcc'
:
summary_table_data
.
add_column
(
key
,
[
val
])
else
:
summary_table_data
.
add_column
(
'm'
+
key
,
[
val
])
print_log
(
'per class results:'
,
logger
)
print_log
(
'
\n
'
+
class_table_data
.
get_string
(),
logger
=
logger
)
print_log
(
'Summary:'
,
logger
)
print_log
(
'
\n
'
+
summary_table_data
.
get_string
(),
logger
=
logger
)
# each metric dict
for
key
,
value
in
ret_metrics_summary
.
items
():
if
key
==
'aAcc'
:
eval_results
[
key
]
=
value
/
100.0
else
:
eval_results
[
'm'
+
key
]
=
value
/
100.0
ret_metrics_class
.
pop
(
'Class'
,
None
)
for
key
,
value
in
ret_metrics_class
.
items
():
eval_results
.
update
({
key
+
'.'
+
str
(
name
):
value
[
idx
]
/
100.0
for
idx
,
name
in
enumerate
(
class_names
)
})
if
mmcv
.
is_list_of
(
results
,
str
):
for
file_name
in
results
:
os
.
remove
(
file_name
)
return
eval_results
lavis/common/annotator/uniformer/mmseg/datasets/dataset_wrappers.py
0 → 100644
View file @
c04f261a
from
torch.utils.data.dataset
import
ConcatDataset
as
_ConcatDataset
from
.builder
import
DATASETS
@
DATASETS
.
register_module
()
class
ConcatDataset
(
_ConcatDataset
):
"""A wrapper of concatenated dataset.
Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but
concat the group flag for image aspect ratio.
Args:
datasets (list[:obj:`Dataset`]): A list of datasets.
"""
def
__init__
(
self
,
datasets
):
super
(
ConcatDataset
,
self
).
__init__
(
datasets
)
self
.
CLASSES
=
datasets
[
0
].
CLASSES
self
.
PALETTE
=
datasets
[
0
].
PALETTE
@
DATASETS
.
register_module
()
class
RepeatDataset
(
object
):
"""A wrapper of repeated dataset.
The length of repeated dataset will be `times` larger than the original
dataset. This is useful when the data loading time is long but the dataset
is small. Using RepeatDataset can reduce the data loading time between
epochs.
Args:
dataset (:obj:`Dataset`): The dataset to be repeated.
times (int): Repeat times.
"""
def
__init__
(
self
,
dataset
,
times
):
self
.
dataset
=
dataset
self
.
times
=
times
self
.
CLASSES
=
dataset
.
CLASSES
self
.
PALETTE
=
dataset
.
PALETTE
self
.
_ori_len
=
len
(
self
.
dataset
)
def
__getitem__
(
self
,
idx
):
"""Get item from original dataset."""
return
self
.
dataset
[
idx
%
self
.
_ori_len
]
def
__len__
(
self
):
"""The length is multiplied by ``times``"""
return
self
.
times
*
self
.
_ori_len
lavis/common/annotator/uniformer/mmseg/datasets/drive.py
0 → 100644
View file @
c04f261a
import
os.path
as
osp
from
.builder
import
DATASETS
from
.custom
import
CustomDataset
@
DATASETS
.
register_module
()
class
DRIVEDataset
(
CustomDataset
):
"""DRIVE dataset.
In segmentation map annotation for DRIVE, 0 stands for background, which is
included in 2 categories. ``reduce_zero_label`` is fixed to False. The
``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to
'_manual1.png'.
"""
CLASSES
=
(
'background'
,
'vessel'
)
PALETTE
=
[[
120
,
120
,
120
],
[
6
,
230
,
230
]]
def
__init__
(
self
,
**
kwargs
):
super
(
DRIVEDataset
,
self
).
__init__
(
img_suffix
=
'.png'
,
seg_map_suffix
=
'_manual1.png'
,
reduce_zero_label
=
False
,
**
kwargs
)
assert
osp
.
exists
(
self
.
img_dir
)
lavis/common/annotator/uniformer/mmseg/datasets/hrf.py
0 → 100644
View file @
c04f261a
import
os.path
as
osp
from
.builder
import
DATASETS
from
.custom
import
CustomDataset
@
DATASETS
.
register_module
()
class
HRFDataset
(
CustomDataset
):
"""HRF dataset.
In segmentation map annotation for HRF, 0 stands for background, which is
included in 2 categories. ``reduce_zero_label`` is fixed to False. The
``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to
'.png'.
"""
CLASSES
=
(
'background'
,
'vessel'
)
PALETTE
=
[[
120
,
120
,
120
],
[
6
,
230
,
230
]]
def
__init__
(
self
,
**
kwargs
):
super
(
HRFDataset
,
self
).
__init__
(
img_suffix
=
'.png'
,
seg_map_suffix
=
'.png'
,
reduce_zero_label
=
False
,
**
kwargs
)
assert
osp
.
exists
(
self
.
img_dir
)
lavis/common/annotator/uniformer/mmseg/datasets/pascal_context.py
0 → 100644
View file @
c04f261a
import
os.path
as
osp
from
.builder
import
DATASETS
from
.custom
import
CustomDataset
@
DATASETS
.
register_module
()
class
PascalContextDataset
(
CustomDataset
):
"""PascalContext dataset.
In segmentation map annotation for PascalContext, 0 stands for background,
which is included in 60 categories. ``reduce_zero_label`` is fixed to
False. The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is
fixed to '.png'.
Args:
split (str): Split txt file for PascalContext.
"""
CLASSES
=
(
'background'
,
'aeroplane'
,
'bag'
,
'bed'
,
'bedclothes'
,
'bench'
,
'bicycle'
,
'bird'
,
'boat'
,
'book'
,
'bottle'
,
'building'
,
'bus'
,
'cabinet'
,
'car'
,
'cat'
,
'ceiling'
,
'chair'
,
'cloth'
,
'computer'
,
'cow'
,
'cup'
,
'curtain'
,
'dog'
,
'door'
,
'fence'
,
'floor'
,
'flower'
,
'food'
,
'grass'
,
'ground'
,
'horse'
,
'keyboard'
,
'light'
,
'motorbike'
,
'mountain'
,
'mouse'
,
'person'
,
'plate'
,
'platform'
,
'pottedplant'
,
'road'
,
'rock'
,
'sheep'
,
'shelves'
,
'sidewalk'
,
'sign'
,
'sky'
,
'snow'
,
'sofa'
,
'table'
,
'track'
,
'train'
,
'tree'
,
'truck'
,
'tvmonitor'
,
'wall'
,
'water'
,
'window'
,
'wood'
)
PALETTE
=
[[
120
,
120
,
120
],
[
180
,
120
,
120
],
[
6
,
230
,
230
],
[
80
,
50
,
50
],
[
4
,
200
,
3
],
[
120
,
120
,
80
],
[
140
,
140
,
140
],
[
204
,
5
,
255
],
[
230
,
230
,
230
],
[
4
,
250
,
7
],
[
224
,
5
,
255
],
[
235
,
255
,
7
],
[
150
,
5
,
61
],
[
120
,
120
,
70
],
[
8
,
255
,
51
],
[
255
,
6
,
82
],
[
143
,
255
,
140
],
[
204
,
255
,
4
],
[
255
,
51
,
7
],
[
204
,
70
,
3
],
[
0
,
102
,
200
],
[
61
,
230
,
250
],
[
255
,
6
,
51
],
[
11
,
102
,
255
],
[
255
,
7
,
71
],
[
255
,
9
,
224
],
[
9
,
7
,
230
],
[
220
,
220
,
220
],
[
255
,
9
,
92
],
[
112
,
9
,
255
],
[
8
,
255
,
214
],
[
7
,
255
,
224
],
[
255
,
184
,
6
],
[
10
,
255
,
71
],
[
255
,
41
,
10
],
[
7
,
255
,
255
],
[
224
,
255
,
8
],
[
102
,
8
,
255
],
[
255
,
61
,
6
],
[
255
,
194
,
7
],
[
255
,
122
,
8
],
[
0
,
255
,
20
],
[
255
,
8
,
41
],
[
255
,
5
,
153
],
[
6
,
51
,
255
],
[
235
,
12
,
255
],
[
160
,
150
,
20
],
[
0
,
163
,
255
],
[
140
,
140
,
140
],
[
250
,
10
,
15
],
[
20
,
255
,
0
],
[
31
,
255
,
0
],
[
255
,
31
,
0
],
[
255
,
224
,
0
],
[
153
,
255
,
0
],
[
0
,
0
,
255
],
[
255
,
71
,
0
],
[
0
,
235
,
255
],
[
0
,
173
,
255
],
[
31
,
0
,
255
]]
def
__init__
(
self
,
split
,
**
kwargs
):
super
(
PascalContextDataset
,
self
).
__init__
(
img_suffix
=
'.jpg'
,
seg_map_suffix
=
'.png'
,
split
=
split
,
reduce_zero_label
=
False
,
**
kwargs
)
assert
osp
.
exists
(
self
.
img_dir
)
and
self
.
split
is
not
None
@
DATASETS
.
register_module
()
class
PascalContextDataset59
(
CustomDataset
):
"""PascalContext dataset.
In segmentation map annotation for PascalContext, 0 stands for background,
which is included in 60 categories. ``reduce_zero_label`` is fixed to
False. The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is
fixed to '.png'.
Args:
split (str): Split txt file for PascalContext.
"""
CLASSES
=
(
'aeroplane'
,
'bag'
,
'bed'
,
'bedclothes'
,
'bench'
,
'bicycle'
,
'bird'
,
'boat'
,
'book'
,
'bottle'
,
'building'
,
'bus'
,
'cabinet'
,
'car'
,
'cat'
,
'ceiling'
,
'chair'
,
'cloth'
,
'computer'
,
'cow'
,
'cup'
,
'curtain'
,
'dog'
,
'door'
,
'fence'
,
'floor'
,
'flower'
,
'food'
,
'grass'
,
'ground'
,
'horse'
,
'keyboard'
,
'light'
,
'motorbike'
,
'mountain'
,
'mouse'
,
'person'
,
'plate'
,
'platform'
,
'pottedplant'
,
'road'
,
'rock'
,
'sheep'
,
'shelves'
,
'sidewalk'
,
'sign'
,
'sky'
,
'snow'
,
'sofa'
,
'table'
,
'track'
,
'train'
,
'tree'
,
'truck'
,
'tvmonitor'
,
'wall'
,
'water'
,
'window'
,
'wood'
)
PALETTE
=
[[
180
,
120
,
120
],
[
6
,
230
,
230
],
[
80
,
50
,
50
],
[
4
,
200
,
3
],
[
120
,
120
,
80
],
[
140
,
140
,
140
],
[
204
,
5
,
255
],
[
230
,
230
,
230
],
[
4
,
250
,
7
],
[
224
,
5
,
255
],
[
235
,
255
,
7
],
[
150
,
5
,
61
],
[
120
,
120
,
70
],
[
8
,
255
,
51
],
[
255
,
6
,
82
],
[
143
,
255
,
140
],
[
204
,
255
,
4
],
[
255
,
51
,
7
],
[
204
,
70
,
3
],
[
0
,
102
,
200
],
[
61
,
230
,
250
],
[
255
,
6
,
51
],
[
11
,
102
,
255
],
[
255
,
7
,
71
],
[
255
,
9
,
224
],
[
9
,
7
,
230
],
[
220
,
220
,
220
],
[
255
,
9
,
92
],
[
112
,
9
,
255
],
[
8
,
255
,
214
],
[
7
,
255
,
224
],
[
255
,
184
,
6
],
[
10
,
255
,
71
],
[
255
,
41
,
10
],
[
7
,
255
,
255
],
[
224
,
255
,
8
],
[
102
,
8
,
255
],
[
255
,
61
,
6
],
[
255
,
194
,
7
],
[
255
,
122
,
8
],
[
0
,
255
,
20
],
[
255
,
8
,
41
],
[
255
,
5
,
153
],
[
6
,
51
,
255
],
[
235
,
12
,
255
],
[
160
,
150
,
20
],
[
0
,
163
,
255
],
[
140
,
140
,
140
],
[
250
,
10
,
15
],
[
20
,
255
,
0
],
[
31
,
255
,
0
],
[
255
,
31
,
0
],
[
255
,
224
,
0
],
[
153
,
255
,
0
],
[
0
,
0
,
255
],
[
255
,
71
,
0
],
[
0
,
235
,
255
],
[
0
,
173
,
255
],
[
31
,
0
,
255
]]
def
__init__
(
self
,
split
,
**
kwargs
):
super
(
PascalContextDataset59
,
self
).
__init__
(
img_suffix
=
'.jpg'
,
seg_map_suffix
=
'.png'
,
split
=
split
,
reduce_zero_label
=
True
,
**
kwargs
)
assert
osp
.
exists
(
self
.
img_dir
)
and
self
.
split
is
not
None
lavis/common/annotator/uniformer/mmseg/datasets/pipelines/__init__.py
0 → 100644
View file @
c04f261a
from
.compose
import
Compose
from
.formating
import
(
Collect
,
ImageToTensor
,
ToDataContainer
,
ToTensor
,
Transpose
,
to_tensor
)
from
.loading
import
LoadAnnotations
,
LoadImageFromFile
from
.test_time_aug
import
MultiScaleFlipAug
from
.transforms
import
(
CLAHE
,
AdjustGamma
,
Normalize
,
Pad
,
PhotoMetricDistortion
,
RandomCrop
,
RandomFlip
,
RandomRotate
,
Rerange
,
Resize
,
RGB2Gray
,
SegRescale
)
__all__
=
[
'Compose'
,
'to_tensor'
,
'ToTensor'
,
'ImageToTensor'
,
'ToDataContainer'
,
'Transpose'
,
'Collect'
,
'LoadAnnotations'
,
'LoadImageFromFile'
,
'MultiScaleFlipAug'
,
'Resize'
,
'RandomFlip'
,
'Pad'
,
'RandomCrop'
,
'Normalize'
,
'SegRescale'
,
'PhotoMetricDistortion'
,
'RandomRotate'
,
'AdjustGamma'
,
'CLAHE'
,
'Rerange'
,
'RGB2Gray'
]
lavis/common/annotator/uniformer/mmseg/datasets/pipelines/compose.py
0 → 100644
View file @
c04f261a
import
collections
from
annotator.uniformer.mmcv.utils
import
build_from_cfg
from
..builder
import
PIPELINES
@
PIPELINES
.
register_module
()
class
Compose
(
object
):
"""Compose multiple transforms sequentially.
Args:
transforms (Sequence[dict | callable]): Sequence of transform object or
config dict to be composed.
"""
def
__init__
(
self
,
transforms
):
assert
isinstance
(
transforms
,
collections
.
abc
.
Sequence
)
self
.
transforms
=
[]
for
transform
in
transforms
:
if
isinstance
(
transform
,
dict
):
transform
=
build_from_cfg
(
transform
,
PIPELINES
)
self
.
transforms
.
append
(
transform
)
elif
callable
(
transform
):
self
.
transforms
.
append
(
transform
)
else
:
raise
TypeError
(
'transform must be callable or a dict'
)
def
__call__
(
self
,
data
):
"""Call function to apply transforms sequentially.
Args:
data (dict): A result dict contains the data to transform.
Returns:
dict: Transformed data.
"""
for
t
in
self
.
transforms
:
data
=
t
(
data
)
if
data
is
None
:
return
None
return
data
def
__repr__
(
self
):
format_string
=
self
.
__class__
.
__name__
+
'('
for
t
in
self
.
transforms
:
format_string
+=
'
\n
'
format_string
+=
f
'
{
t
}
'
format_string
+=
'
\n
)'
return
format_string
lavis/common/annotator/uniformer/mmseg/datasets/pipelines/formating.py
0 → 100644
View file @
c04f261a
from
collections.abc
import
Sequence
import
annotator.uniformer.mmcv
as
mmcv
import
numpy
as
np
import
torch
from
annotator.uniformer.mmcv.parallel
import
DataContainer
as
DC
from
..builder
import
PIPELINES
def
to_tensor
(
data
):
"""Convert objects of various python types to :obj:`torch.Tensor`.
Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,
:class:`Sequence`, :class:`int` and :class:`float`.
Args:
data (torch.Tensor | numpy.ndarray | Sequence | int | float): Data to
be converted.
"""
if
isinstance
(
data
,
torch
.
Tensor
):
return
data
elif
isinstance
(
data
,
np
.
ndarray
):
return
torch
.
from_numpy
(
data
)
elif
isinstance
(
data
,
Sequence
)
and
not
mmcv
.
is_str
(
data
):
return
torch
.
tensor
(
data
)
elif
isinstance
(
data
,
int
):
return
torch
.
LongTensor
([
data
])
elif
isinstance
(
data
,
float
):
return
torch
.
FloatTensor
([
data
])
else
:
raise
TypeError
(
f
'type
{
type
(
data
)
}
cannot be converted to tensor.'
)
@
PIPELINES
.
register_module
()
class
ToTensor
(
object
):
"""Convert some results to :obj:`torch.Tensor` by given keys.
Args:
keys (Sequence[str]): Keys that need to be converted to Tensor.
"""
def
__init__
(
self
,
keys
):
self
.
keys
=
keys
def
__call__
(
self
,
results
):
"""Call function to convert data in results to :obj:`torch.Tensor`.
Args:
results (dict): Result dict contains the data to convert.
Returns:
dict: The result dict contains the data converted
to :obj:`torch.Tensor`.
"""
for
key
in
self
.
keys
:
results
[
key
]
=
to_tensor
(
results
[
key
])
return
results
def
__repr__
(
self
):
return
self
.
__class__
.
__name__
+
f
'(keys=
{
self
.
keys
}
)'
@
PIPELINES
.
register_module
()
class
ImageToTensor
(
object
):
"""Convert image to :obj:`torch.Tensor` by given keys.
The dimension order of input image is (H, W, C). The pipeline will convert
it to (C, H, W). If only 2 dimension (H, W) is given, the output would be
(1, H, W).
Args:
keys (Sequence[str]): Key of images to be converted to Tensor.
"""
def
__init__
(
self
,
keys
):
self
.
keys
=
keys
def
__call__
(
self
,
results
):
"""Call function to convert image in results to :obj:`torch.Tensor` and
transpose the channel order.
Args:
results (dict): Result dict contains the image data to convert.
Returns:
dict: The result dict contains the image converted
to :obj:`torch.Tensor` and transposed to (C, H, W) order.
"""
for
key
in
self
.
keys
:
img
=
results
[
key
]
if
len
(
img
.
shape
)
<
3
:
img
=
np
.
expand_dims
(
img
,
-
1
)
results
[
key
]
=
to_tensor
(
img
.
transpose
(
2
,
0
,
1
))
return
results
def
__repr__
(
self
):
return
self
.
__class__
.
__name__
+
f
'(keys=
{
self
.
keys
}
)'
@
PIPELINES
.
register_module
()
class
Transpose
(
object
):
"""Transpose some results by given keys.
Args:
keys (Sequence[str]): Keys of results to be transposed.
order (Sequence[int]): Order of transpose.
"""
def
__init__
(
self
,
keys
,
order
):
self
.
keys
=
keys
self
.
order
=
order
def
__call__
(
self
,
results
):
"""Call function to convert image in results to :obj:`torch.Tensor` and
transpose the channel order.
Args:
results (dict): Result dict contains the image data to convert.
Returns:
dict: The result dict contains the image converted
to :obj:`torch.Tensor` and transposed to (C, H, W) order.
"""
for
key
in
self
.
keys
:
results
[
key
]
=
results
[
key
].
transpose
(
self
.
order
)
return
results
def
__repr__
(
self
):
return
self
.
__class__
.
__name__
+
\
f
'(keys=
{
self
.
keys
}
, order=
{
self
.
order
}
)'
@
PIPELINES
.
register_module
()
class
ToDataContainer
(
object
):
"""Convert results to :obj:`mmcv.DataContainer` by given fields.
Args:
fields (Sequence[dict]): Each field is a dict like
``dict(key='xxx', **kwargs)``. The ``key`` in result will
be converted to :obj:`mmcv.DataContainer` with ``**kwargs``.
Default: ``(dict(key='img', stack=True),
dict(key='gt_semantic_seg'))``.
"""
def
__init__
(
self
,
fields
=
(
dict
(
key
=
'img'
,
stack
=
True
),
dict
(
key
=
'gt_semantic_seg'
))):
self
.
fields
=
fields
def
__call__
(
self
,
results
):
"""Call function to convert data in results to
:obj:`mmcv.DataContainer`.
Args:
results (dict): Result dict contains the data to convert.
Returns:
dict: The result dict contains the data converted to
:obj:`mmcv.DataContainer`.
"""
for
field
in
self
.
fields
:
field
=
field
.
copy
()
key
=
field
.
pop
(
'key'
)
results
[
key
]
=
DC
(
results
[
key
],
**
field
)
return
results
def
__repr__
(
self
):
return
self
.
__class__
.
__name__
+
f
'(fields=
{
self
.
fields
}
)'
@
PIPELINES
.
register_module
()
class
DefaultFormatBundle
(
object
):
"""Default formatting bundle.
It simplifies the pipeline of formatting common fields, including "img"
and "gt_semantic_seg". These fields are formatted as follows.
- img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)
- gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor,
(3)to DataContainer (stack=True)
"""
def
__call__
(
self
,
results
):
"""Call function to transform and format common fields in results.
Args:
results (dict): Result dict contains the data to convert.
Returns:
dict: The result dict contains the data that is formatted with
default bundle.
"""
if
'img'
in
results
:
img
=
results
[
'img'
]
if
len
(
img
.
shape
)
<
3
:
img
=
np
.
expand_dims
(
img
,
-
1
)
img
=
np
.
ascontiguousarray
(
img
.
transpose
(
2
,
0
,
1
))
results
[
'img'
]
=
DC
(
to_tensor
(
img
),
stack
=
True
)
if
'gt_semantic_seg'
in
results
:
# convert to long
results
[
'gt_semantic_seg'
]
=
DC
(
to_tensor
(
results
[
'gt_semantic_seg'
][
None
,
...].
astype
(
np
.
int64
)),
stack
=
True
)
return
results
def
__repr__
(
self
):
return
self
.
__class__
.
__name__
@
PIPELINES
.
register_module
()
class
Collect
(
object
):
"""Collect data from the loader relevant to the specific task.
This is usually the last stage of the data loader pipeline. Typically keys
is set to some subset of "img", "gt_semantic_seg".
The "img_meta" item is always populated. The contents of the "img_meta"
dictionary depends on "meta_keys". By default this includes:
- "img_shape": shape of the image input to the network as a tuple
(h, w, c). Note that images may be zero padded on the bottom/right
if the batch tensor is larger than this shape.
- "scale_factor": a float indicating the preprocessing scale
- "flip": a boolean indicating if image flip transform was used
- "filename": path to the image file
- "ori_shape": original shape of the image as a tuple (h, w, c)
- "pad_shape": image shape after padding
- "img_norm_cfg": a dict of normalization information:
- mean - per channel mean subtraction
- std - per channel std divisor
- to_rgb - bool indicating if bgr was converted to rgb
Args:
keys (Sequence[str]): Keys of results to be collected in ``data``.
meta_keys (Sequence[str], optional): Meta keys to be converted to
``mmcv.DataContainer`` and collected in ``data[img_metas]``.
Default: ``('filename', 'ori_filename', 'ori_shape', 'img_shape',
'pad_shape', 'scale_factor', 'flip', 'flip_direction',
'img_norm_cfg')``
"""
def
__init__
(
self
,
keys
,
meta_keys
=
(
'filename'
,
'ori_filename'
,
'ori_shape'
,
'img_shape'
,
'pad_shape'
,
'scale_factor'
,
'flip'
,
'flip_direction'
,
'img_norm_cfg'
)):
self
.
keys
=
keys
self
.
meta_keys
=
meta_keys
def
__call__
(
self
,
results
):
"""Call function to collect keys in results. The keys in ``meta_keys``
will be converted to :obj:mmcv.DataContainer.
Args:
results (dict): Result dict contains the data to collect.
Returns:
dict: The result dict contains the following keys
- keys in``self.keys``
- ``img_metas``
"""
data
=
{}
img_meta
=
{}
for
key
in
self
.
meta_keys
:
img_meta
[
key
]
=
results
[
key
]
data
[
'img_metas'
]
=
DC
(
img_meta
,
cpu_only
=
True
)
for
key
in
self
.
keys
:
data
[
key
]
=
results
[
key
]
return
data
def
__repr__
(
self
):
return
self
.
__class__
.
__name__
+
\
f
'(keys=
{
self
.
keys
}
, meta_keys=
{
self
.
meta_keys
}
)'
lavis/common/annotator/uniformer/mmseg/datasets/pipelines/loading.py
0 → 100644
View file @
c04f261a
import
os.path
as
osp
import
annotator.uniformer.mmcv
as
mmcv
import
numpy
as
np
from
..builder
import
PIPELINES
@
PIPELINES
.
register_module
()
class
LoadImageFromFile
(
object
):
"""Load an image from file.
Required keys are "img_prefix" and "img_info" (a dict that must contain the
key "filename"). Added or updated keys are "filename", "img", "img_shape",
"ori_shape" (same as `img_shape`), "pad_shape" (same as `img_shape`),
"scale_factor" (1.0) and "img_norm_cfg" (means=0 and stds=1).
Args:
to_float32 (bool): Whether to convert the loaded image to a float32
numpy array. If set to False, the loaded image is an uint8 array.
Defaults to False.
color_type (str): The flag argument for :func:`mmcv.imfrombytes`.
Defaults to 'color'.
file_client_args (dict): Arguments to instantiate a FileClient.
See :class:`mmcv.fileio.FileClient` for details.
Defaults to ``dict(backend='disk')``.
imdecode_backend (str): Backend for :func:`mmcv.imdecode`. Default:
'cv2'
"""
def
__init__
(
self
,
to_float32
=
False
,
color_type
=
'color'
,
file_client_args
=
dict
(
backend
=
'disk'
),
imdecode_backend
=
'cv2'
):
self
.
to_float32
=
to_float32
self
.
color_type
=
color_type
self
.
file_client_args
=
file_client_args
.
copy
()
self
.
file_client
=
None
self
.
imdecode_backend
=
imdecode_backend
def
__call__
(
self
,
results
):
"""Call functions to load image and get image meta information.
Args:
results (dict): Result dict from :obj:`mmseg.CustomDataset`.
Returns:
dict: The dict contains loaded image and meta information.
"""
if
self
.
file_client
is
None
:
self
.
file_client
=
mmcv
.
FileClient
(
**
self
.
file_client_args
)
if
results
.
get
(
'img_prefix'
)
is
not
None
:
filename
=
osp
.
join
(
results
[
'img_prefix'
],
results
[
'img_info'
][
'filename'
])
else
:
filename
=
results
[
'img_info'
][
'filename'
]
img_bytes
=
self
.
file_client
.
get
(
filename
)
img
=
mmcv
.
imfrombytes
(
img_bytes
,
flag
=
self
.
color_type
,
backend
=
self
.
imdecode_backend
)
if
self
.
to_float32
:
img
=
img
.
astype
(
np
.
float32
)
results
[
'filename'
]
=
filename
results
[
'ori_filename'
]
=
results
[
'img_info'
][
'filename'
]
results
[
'img'
]
=
img
results
[
'img_shape'
]
=
img
.
shape
results
[
'ori_shape'
]
=
img
.
shape
# Set initial values for default meta_keys
results
[
'pad_shape'
]
=
img
.
shape
results
[
'scale_factor'
]
=
1.0
num_channels
=
1
if
len
(
img
.
shape
)
<
3
else
img
.
shape
[
2
]
results
[
'img_norm_cfg'
]
=
dict
(
mean
=
np
.
zeros
(
num_channels
,
dtype
=
np
.
float32
),
std
=
np
.
ones
(
num_channels
,
dtype
=
np
.
float32
),
to_rgb
=
False
)
return
results
def
__repr__
(
self
):
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(to_float32=
{
self
.
to_float32
}
,'
repr_str
+=
f
"color_type='
{
self
.
color_type
}
',"
repr_str
+=
f
"imdecode_backend='
{
self
.
imdecode_backend
}
')"
return
repr_str
@
PIPELINES
.
register_module
()
class
LoadAnnotations
(
object
):
"""Load annotations for semantic segmentation.
Args:
reduce_zero_label (bool): Whether reduce all label value by 1.
Usually used for datasets where 0 is background label.
Default: False.
file_client_args (dict): Arguments to instantiate a FileClient.
See :class:`mmcv.fileio.FileClient` for details.
Defaults to ``dict(backend='disk')``.
imdecode_backend (str): Backend for :func:`mmcv.imdecode`. Default:
'pillow'
"""
def
__init__
(
self
,
reduce_zero_label
=
False
,
file_client_args
=
dict
(
backend
=
'disk'
),
imdecode_backend
=
'pillow'
):
self
.
reduce_zero_label
=
reduce_zero_label
self
.
file_client_args
=
file_client_args
.
copy
()
self
.
file_client
=
None
self
.
imdecode_backend
=
imdecode_backend
def
__call__
(
self
,
results
):
"""Call function to load multiple types annotations.
Args:
results (dict): Result dict from :obj:`mmseg.CustomDataset`.
Returns:
dict: The dict contains loaded semantic segmentation annotations.
"""
if
self
.
file_client
is
None
:
self
.
file_client
=
mmcv
.
FileClient
(
**
self
.
file_client_args
)
if
results
.
get
(
'seg_prefix'
,
None
)
is
not
None
:
filename
=
osp
.
join
(
results
[
'seg_prefix'
],
results
[
'ann_info'
][
'seg_map'
])
else
:
filename
=
results
[
'ann_info'
][
'seg_map'
]
img_bytes
=
self
.
file_client
.
get
(
filename
)
gt_semantic_seg
=
mmcv
.
imfrombytes
(
img_bytes
,
flag
=
'unchanged'
,
backend
=
self
.
imdecode_backend
).
squeeze
().
astype
(
np
.
uint8
)
# modify if custom classes
if
results
.
get
(
'label_map'
,
None
)
is
not
None
:
for
old_id
,
new_id
in
results
[
'label_map'
].
items
():
gt_semantic_seg
[
gt_semantic_seg
==
old_id
]
=
new_id
# reduce zero_label
if
self
.
reduce_zero_label
:
# avoid using underflow conversion
gt_semantic_seg
[
gt_semantic_seg
==
0
]
=
255
gt_semantic_seg
=
gt_semantic_seg
-
1
gt_semantic_seg
[
gt_semantic_seg
==
254
]
=
255
results
[
'gt_semantic_seg'
]
=
gt_semantic_seg
results
[
'seg_fields'
].
append
(
'gt_semantic_seg'
)
return
results
def
__repr__
(
self
):
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(reduce_zero_label=
{
self
.
reduce_zero_label
}
,'
repr_str
+=
f
"imdecode_backend='
{
self
.
imdecode_backend
}
')"
return
repr_str
lavis/common/annotator/uniformer/mmseg/datasets/pipelines/test_time_aug.py
0 → 100644
View file @
c04f261a
import
warnings
import
annotator.uniformer.mmcv
as
mmcv
from
..builder
import
PIPELINES
from
.compose
import
Compose
@
PIPELINES
.
register_module
()
class
MultiScaleFlipAug
(
object
):
"""Test-time augmentation with multiple scales and flipping.
An example configuration is as followed:
.. code-block::
img_scale=(2048, 1024),
img_ratios=[0.5, 1.0],
flip=True,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
]
After MultiScaleFLipAug with above configuration, the results are wrapped
into lists of the same length as followed:
.. code-block::
dict(
img=[...],
img_shape=[...],
scale=[(1024, 512), (1024, 512), (2048, 1024), (2048, 1024)]
flip=[False, True, False, True]
...
)
Args:
transforms (list[dict]): Transforms to apply in each augmentation.
img_scale (None | tuple | list[tuple]): Images scales for resizing.
img_ratios (float | list[float]): Image ratios for resizing
flip (bool): Whether apply flip augmentation. Default: False.
flip_direction (str | list[str]): Flip augmentation directions,
options are "horizontal" and "vertical". If flip_direction is list,
multiple flip augmentations will be applied.
It has no effect when flip == False. Default: "horizontal".
"""
def
__init__
(
self
,
transforms
,
img_scale
,
img_ratios
=
None
,
flip
=
False
,
flip_direction
=
'horizontal'
):
self
.
transforms
=
Compose
(
transforms
)
if
img_ratios
is
not
None
:
img_ratios
=
img_ratios
if
isinstance
(
img_ratios
,
list
)
else
[
img_ratios
]
assert
mmcv
.
is_list_of
(
img_ratios
,
float
)
if
img_scale
is
None
:
# mode 1: given img_scale=None and a range of image ratio
self
.
img_scale
=
None
assert
mmcv
.
is_list_of
(
img_ratios
,
float
)
elif
isinstance
(
img_scale
,
tuple
)
and
mmcv
.
is_list_of
(
img_ratios
,
float
):
assert
len
(
img_scale
)
==
2
# mode 2: given a scale and a range of image ratio
self
.
img_scale
=
[(
int
(
img_scale
[
0
]
*
ratio
),
int
(
img_scale
[
1
]
*
ratio
))
for
ratio
in
img_ratios
]
else
:
# mode 3: given multiple scales
self
.
img_scale
=
img_scale
if
isinstance
(
img_scale
,
list
)
else
[
img_scale
]
assert
mmcv
.
is_list_of
(
self
.
img_scale
,
tuple
)
or
self
.
img_scale
is
None
self
.
flip
=
flip
self
.
img_ratios
=
img_ratios
self
.
flip_direction
=
flip_direction
if
isinstance
(
flip_direction
,
list
)
else
[
flip_direction
]
assert
mmcv
.
is_list_of
(
self
.
flip_direction
,
str
)
if
not
self
.
flip
and
self
.
flip_direction
!=
[
'horizontal'
]:
warnings
.
warn
(
'flip_direction has no effect when flip is set to False'
)
if
(
self
.
flip
and
not
any
([
t
[
'type'
]
==
'RandomFlip'
for
t
in
transforms
])):
warnings
.
warn
(
'flip has no effect when RandomFlip is not in transforms'
)
def
__call__
(
self
,
results
):
"""Call function to apply test time augment transforms on results.
Args:
results (dict): Result dict contains the data to transform.
Returns:
dict[str: list]: The augmented data, where each value is wrapped
into a list.
"""
aug_data
=
[]
if
self
.
img_scale
is
None
and
mmcv
.
is_list_of
(
self
.
img_ratios
,
float
):
h
,
w
=
results
[
'img'
].
shape
[:
2
]
img_scale
=
[(
int
(
w
*
ratio
),
int
(
h
*
ratio
))
for
ratio
in
self
.
img_ratios
]
else
:
img_scale
=
self
.
img_scale
flip_aug
=
[
False
,
True
]
if
self
.
flip
else
[
False
]
for
scale
in
img_scale
:
for
flip
in
flip_aug
:
for
direction
in
self
.
flip_direction
:
_results
=
results
.
copy
()
_results
[
'scale'
]
=
scale
_results
[
'flip'
]
=
flip
_results
[
'flip_direction'
]
=
direction
data
=
self
.
transforms
(
_results
)
aug_data
.
append
(
data
)
# list of dict to dict of list
aug_data_dict
=
{
key
:
[]
for
key
in
aug_data
[
0
]}
for
data
in
aug_data
:
for
key
,
val
in
data
.
items
():
aug_data_dict
[
key
].
append
(
val
)
return
aug_data_dict
def
__repr__
(
self
):
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(transforms=
{
self
.
transforms
}
, '
repr_str
+=
f
'img_scale=
{
self
.
img_scale
}
, flip=
{
self
.
flip
}
)'
repr_str
+=
f
'flip_direction=
{
self
.
flip_direction
}
'
return
repr_str
lavis/common/annotator/uniformer/mmseg/datasets/pipelines/transforms.py
0 → 100644
View file @
c04f261a
import
annotator.uniformer.mmcv
as
mmcv
import
numpy
as
np
from
annotator.uniformer.mmcv.utils
import
deprecated_api_warning
,
is_tuple_of
from
numpy
import
random
from
..builder
import
PIPELINES
@
PIPELINES
.
register_module
()
class
Resize
(
object
):
"""Resize images & seg.
This transform resizes the input image to some scale. If the input dict
contains the key "scale", then the scale in the input dict is used,
otherwise the specified scale in the init method is used.
``img_scale`` can be None, a tuple (single-scale) or a list of tuple
(multi-scale). There are 4 multiscale modes:
- ``ratio_range is not None``:
1. When img_scale is None, img_scale is the shape of image in results
(img_scale = results['img'].shape[:2]) and the image is resized based
on the original size. (mode 1)
2. When img_scale is a tuple (single-scale), randomly sample a ratio from
the ratio range and multiply it with the image scale. (mode 2)
- ``ratio_range is None and multiscale_mode == "range"``: randomly sample a
scale from the a range. (mode 3)
- ``ratio_range is None and multiscale_mode == "value"``: randomly sample a
scale from multiple scales. (mode 4)
Args:
img_scale (tuple or list[tuple]): Images scales for resizing.
multiscale_mode (str): Either "range" or "value".
ratio_range (tuple[float]): (min_ratio, max_ratio)
keep_ratio (bool): Whether to keep the aspect ratio when resizing the
image.
"""
def
__init__
(
self
,
img_scale
=
None
,
multiscale_mode
=
'range'
,
ratio_range
=
None
,
keep_ratio
=
True
):
if
img_scale
is
None
:
self
.
img_scale
=
None
else
:
if
isinstance
(
img_scale
,
list
):
self
.
img_scale
=
img_scale
else
:
self
.
img_scale
=
[
img_scale
]
assert
mmcv
.
is_list_of
(
self
.
img_scale
,
tuple
)
if
ratio_range
is
not
None
:
# mode 1: given img_scale=None and a range of image ratio
# mode 2: given a scale and a range of image ratio
assert
self
.
img_scale
is
None
or
len
(
self
.
img_scale
)
==
1
else
:
# mode 3 and 4: given multiple scales or a range of scales
assert
multiscale_mode
in
[
'value'
,
'range'
]
self
.
multiscale_mode
=
multiscale_mode
self
.
ratio_range
=
ratio_range
self
.
keep_ratio
=
keep_ratio
@
staticmethod
def
random_select
(
img_scales
):
"""Randomly select an img_scale from given candidates.
Args:
img_scales (list[tuple]): Images scales for selection.
Returns:
(tuple, int): Returns a tuple ``(img_scale, scale_dix)``,
where ``img_scale`` is the selected image scale and
``scale_idx`` is the selected index in the given candidates.
"""
assert
mmcv
.
is_list_of
(
img_scales
,
tuple
)
scale_idx
=
np
.
random
.
randint
(
len
(
img_scales
))
img_scale
=
img_scales
[
scale_idx
]
return
img_scale
,
scale_idx
@
staticmethod
def
random_sample
(
img_scales
):
"""Randomly sample an img_scale when ``multiscale_mode=='range'``.
Args:
img_scales (list[tuple]): Images scale range for sampling.
There must be two tuples in img_scales, which specify the lower
and upper bound of image scales.
Returns:
(tuple, None): Returns a tuple ``(img_scale, None)``, where
``img_scale`` is sampled scale and None is just a placeholder
to be consistent with :func:`random_select`.
"""
assert
mmcv
.
is_list_of
(
img_scales
,
tuple
)
and
len
(
img_scales
)
==
2
img_scale_long
=
[
max
(
s
)
for
s
in
img_scales
]
img_scale_short
=
[
min
(
s
)
for
s
in
img_scales
]
long_edge
=
np
.
random
.
randint
(
min
(
img_scale_long
),
max
(
img_scale_long
)
+
1
)
short_edge
=
np
.
random
.
randint
(
min
(
img_scale_short
),
max
(
img_scale_short
)
+
1
)
img_scale
=
(
long_edge
,
short_edge
)
return
img_scale
,
None
@
staticmethod
def
random_sample_ratio
(
img_scale
,
ratio_range
):
"""Randomly sample an img_scale when ``ratio_range`` is specified.
A ratio will be randomly sampled from the range specified by
``ratio_range``. Then it would be multiplied with ``img_scale`` to
generate sampled scale.
Args:
img_scale (tuple): Images scale base to multiply with ratio.
ratio_range (tuple[float]): The minimum and maximum ratio to scale
the ``img_scale``.
Returns:
(tuple, None): Returns a tuple ``(scale, None)``, where
``scale`` is sampled ratio multiplied with ``img_scale`` and
None is just a placeholder to be consistent with
:func:`random_select`.
"""
assert
isinstance
(
img_scale
,
tuple
)
and
len
(
img_scale
)
==
2
min_ratio
,
max_ratio
=
ratio_range
assert
min_ratio
<=
max_ratio
ratio
=
np
.
random
.
random_sample
()
*
(
max_ratio
-
min_ratio
)
+
min_ratio
scale
=
int
(
img_scale
[
0
]
*
ratio
),
int
(
img_scale
[
1
]
*
ratio
)
return
scale
,
None
def
_random_scale
(
self
,
results
):
"""Randomly sample an img_scale according to ``ratio_range`` and
``multiscale_mode``.
If ``ratio_range`` is specified, a ratio will be sampled and be
multiplied with ``img_scale``.
If multiple scales are specified by ``img_scale``, a scale will be
sampled according to ``multiscale_mode``.
Otherwise, single scale will be used.
Args:
results (dict): Result dict from :obj:`dataset`.
Returns:
dict: Two new keys 'scale` and 'scale_idx` are added into
``results``, which would be used by subsequent pipelines.
"""
if
self
.
ratio_range
is
not
None
:
if
self
.
img_scale
is
None
:
h
,
w
=
results
[
'img'
].
shape
[:
2
]
scale
,
scale_idx
=
self
.
random_sample_ratio
((
w
,
h
),
self
.
ratio_range
)
else
:
scale
,
scale_idx
=
self
.
random_sample_ratio
(
self
.
img_scale
[
0
],
self
.
ratio_range
)
elif
len
(
self
.
img_scale
)
==
1
:
scale
,
scale_idx
=
self
.
img_scale
[
0
],
0
elif
self
.
multiscale_mode
==
'range'
:
scale
,
scale_idx
=
self
.
random_sample
(
self
.
img_scale
)
elif
self
.
multiscale_mode
==
'value'
:
scale
,
scale_idx
=
self
.
random_select
(
self
.
img_scale
)
else
:
raise
NotImplementedError
results
[
'scale'
]
=
scale
results
[
'scale_idx'
]
=
scale_idx
def
_resize_img
(
self
,
results
):
"""Resize images with ``results['scale']``."""
if
self
.
keep_ratio
:
img
,
scale_factor
=
mmcv
.
imrescale
(
results
[
'img'
],
results
[
'scale'
],
return_scale
=
True
)
# the w_scale and h_scale has minor difference
# a real fix should be done in the mmcv.imrescale in the future
new_h
,
new_w
=
img
.
shape
[:
2
]
h
,
w
=
results
[
'img'
].
shape
[:
2
]
w_scale
=
new_w
/
w
h_scale
=
new_h
/
h
else
:
img
,
w_scale
,
h_scale
=
mmcv
.
imresize
(
results
[
'img'
],
results
[
'scale'
],
return_scale
=
True
)
scale_factor
=
np
.
array
([
w_scale
,
h_scale
,
w_scale
,
h_scale
],
dtype
=
np
.
float32
)
results
[
'img'
]
=
img
results
[
'img_shape'
]
=
img
.
shape
results
[
'pad_shape'
]
=
img
.
shape
# in case that there is no padding
results
[
'scale_factor'
]
=
scale_factor
results
[
'keep_ratio'
]
=
self
.
keep_ratio
def
_resize_seg
(
self
,
results
):
"""Resize semantic segmentation map with ``results['scale']``."""
for
key
in
results
.
get
(
'seg_fields'
,
[]):
if
self
.
keep_ratio
:
gt_seg
=
mmcv
.
imrescale
(
results
[
key
],
results
[
'scale'
],
interpolation
=
'nearest'
)
else
:
gt_seg
=
mmcv
.
imresize
(
results
[
key
],
results
[
'scale'
],
interpolation
=
'nearest'
)
results
[
key
]
=
gt_seg
def
__call__
(
self
,
results
):
"""Call function to resize images, bounding boxes, masks, semantic
segmentation map.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor',
'keep_ratio' keys are added into result dict.
"""
if
'scale'
not
in
results
:
self
.
_random_scale
(
results
)
self
.
_resize_img
(
results
)
self
.
_resize_seg
(
results
)
return
results
def
__repr__
(
self
):
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
(
f
'(img_scale=
{
self
.
img_scale
}
, '
f
'multiscale_mode=
{
self
.
multiscale_mode
}
, '
f
'ratio_range=
{
self
.
ratio_range
}
, '
f
'keep_ratio=
{
self
.
keep_ratio
}
)'
)
return
repr_str
@
PIPELINES
.
register_module
()
class
RandomFlip
(
object
):
"""Flip the image & seg.
If the input dict contains the key "flip", then the flag will be used,
otherwise it will be randomly decided by a ratio specified in the init
method.
Args:
prob (float, optional): The flipping probability. Default: None.
direction(str, optional): The flipping direction. Options are
'horizontal' and 'vertical'. Default: 'horizontal'.
"""
@
deprecated_api_warning
({
'flip_ratio'
:
'prob'
},
cls_name
=
'RandomFlip'
)
def
__init__
(
self
,
prob
=
None
,
direction
=
'horizontal'
):
self
.
prob
=
prob
self
.
direction
=
direction
if
prob
is
not
None
:
assert
prob
>=
0
and
prob
<=
1
assert
direction
in
[
'horizontal'
,
'vertical'
]
def
__call__
(
self
,
results
):
"""Call function to flip bounding boxes, masks, semantic segmentation
maps.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Flipped results, 'flip', 'flip_direction' keys are added into
result dict.
"""
if
'flip'
not
in
results
:
flip
=
True
if
np
.
random
.
rand
()
<
self
.
prob
else
False
results
[
'flip'
]
=
flip
if
'flip_direction'
not
in
results
:
results
[
'flip_direction'
]
=
self
.
direction
if
results
[
'flip'
]:
# flip image
results
[
'img'
]
=
mmcv
.
imflip
(
results
[
'img'
],
direction
=
results
[
'flip_direction'
])
# flip segs
for
key
in
results
.
get
(
'seg_fields'
,
[]):
# use copy() to make numpy stride positive
results
[
key
]
=
mmcv
.
imflip
(
results
[
key
],
direction
=
results
[
'flip_direction'
]).
copy
()
return
results
def
__repr__
(
self
):
return
self
.
__class__
.
__name__
+
f
'(prob=
{
self
.
prob
}
)'
@
PIPELINES
.
register_module
()
class
Pad
(
object
):
"""Pad the image & mask.
There are two padding modes: (1) pad to a fixed size and (2) pad to the
minimum size that is divisible by some number.
Added keys are "pad_shape", "pad_fixed_size", "pad_size_divisor",
Args:
size (tuple, optional): Fixed padding size.
size_divisor (int, optional): The divisor of padded size.
pad_val (float, optional): Padding value. Default: 0.
seg_pad_val (float, optional): Padding value of segmentation map.
Default: 255.
"""
def
__init__
(
self
,
size
=
None
,
size_divisor
=
None
,
pad_val
=
0
,
seg_pad_val
=
255
):
self
.
size
=
size
self
.
size_divisor
=
size_divisor
self
.
pad_val
=
pad_val
self
.
seg_pad_val
=
seg_pad_val
# only one of size and size_divisor should be valid
assert
size
is
not
None
or
size_divisor
is
not
None
assert
size
is
None
or
size_divisor
is
None
def
_pad_img
(
self
,
results
):
"""Pad images according to ``self.size``."""
if
self
.
size
is
not
None
:
padded_img
=
mmcv
.
impad
(
results
[
'img'
],
shape
=
self
.
size
,
pad_val
=
self
.
pad_val
)
elif
self
.
size_divisor
is
not
None
:
padded_img
=
mmcv
.
impad_to_multiple
(
results
[
'img'
],
self
.
size_divisor
,
pad_val
=
self
.
pad_val
)
results
[
'img'
]
=
padded_img
results
[
'pad_shape'
]
=
padded_img
.
shape
results
[
'pad_fixed_size'
]
=
self
.
size
results
[
'pad_size_divisor'
]
=
self
.
size_divisor
def
_pad_seg
(
self
,
results
):
"""Pad masks according to ``results['pad_shape']``."""
for
key
in
results
.
get
(
'seg_fields'
,
[]):
results
[
key
]
=
mmcv
.
impad
(
results
[
key
],
shape
=
results
[
'pad_shape'
][:
2
],
pad_val
=
self
.
seg_pad_val
)
def
__call__
(
self
,
results
):
"""Call function to pad images, masks, semantic segmentation maps.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Updated result dict.
"""
self
.
_pad_img
(
results
)
self
.
_pad_seg
(
results
)
return
results
def
__repr__
(
self
):
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(size=
{
self
.
size
}
, size_divisor=
{
self
.
size_divisor
}
, '
\
f
'pad_val=
{
self
.
pad_val
}
)'
return
repr_str
@
PIPELINES
.
register_module
()
class
Normalize
(
object
):
"""Normalize the image.
Added key is "img_norm_cfg".
Args:
mean (sequence): Mean values of 3 channels.
std (sequence): Std values of 3 channels.
to_rgb (bool): Whether to convert the image from BGR to RGB,
default is true.
"""
def
__init__
(
self
,
mean
,
std
,
to_rgb
=
True
):
self
.
mean
=
np
.
array
(
mean
,
dtype
=
np
.
float32
)
self
.
std
=
np
.
array
(
std
,
dtype
=
np
.
float32
)
self
.
to_rgb
=
to_rgb
def
__call__
(
self
,
results
):
"""Call function to normalize images.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Normalized results, 'img_norm_cfg' key is added into
result dict.
"""
results
[
'img'
]
=
mmcv
.
imnormalize
(
results
[
'img'
],
self
.
mean
,
self
.
std
,
self
.
to_rgb
)
results
[
'img_norm_cfg'
]
=
dict
(
mean
=
self
.
mean
,
std
=
self
.
std
,
to_rgb
=
self
.
to_rgb
)
return
results
def
__repr__
(
self
):
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(mean=
{
self
.
mean
}
, std=
{
self
.
std
}
, to_rgb='
\
f
'
{
self
.
to_rgb
}
)'
return
repr_str
@
PIPELINES
.
register_module
()
class
Rerange
(
object
):
"""Rerange the image pixel value.
Args:
min_value (float or int): Minimum value of the reranged image.
Default: 0.
max_value (float or int): Maximum value of the reranged image.
Default: 255.
"""
def
__init__
(
self
,
min_value
=
0
,
max_value
=
255
):
assert
isinstance
(
min_value
,
float
)
or
isinstance
(
min_value
,
int
)
assert
isinstance
(
max_value
,
float
)
or
isinstance
(
max_value
,
int
)
assert
min_value
<
max_value
self
.
min_value
=
min_value
self
.
max_value
=
max_value
def
__call__
(
self
,
results
):
"""Call function to rerange images.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Reranged results.
"""
img
=
results
[
'img'
]
img_min_value
=
np
.
min
(
img
)
img_max_value
=
np
.
max
(
img
)
assert
img_min_value
<
img_max_value
# rerange to [0, 1]
img
=
(
img
-
img_min_value
)
/
(
img_max_value
-
img_min_value
)
# rerange to [min_value, max_value]
img
=
img
*
(
self
.
max_value
-
self
.
min_value
)
+
self
.
min_value
results
[
'img'
]
=
img
return
results
def
__repr__
(
self
):
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(min_value=
{
self
.
min_value
}
, max_value=
{
self
.
max_value
}
)'
return
repr_str
@
PIPELINES
.
register_module
()
class
CLAHE
(
object
):
"""Use CLAHE method to process the image.
See `ZUIDERVELD,K. Contrast Limited Adaptive Histogram Equalization[J].
Graphics Gems, 1994:474-485.` for more information.
Args:
clip_limit (float): Threshold for contrast limiting. Default: 40.0.
tile_grid_size (tuple[int]): Size of grid for histogram equalization.
Input image will be divided into equally sized rectangular tiles.
It defines the number of tiles in row and column. Default: (8, 8).
"""
def
__init__
(
self
,
clip_limit
=
40.0
,
tile_grid_size
=
(
8
,
8
)):
assert
isinstance
(
clip_limit
,
(
float
,
int
))
self
.
clip_limit
=
clip_limit
assert
is_tuple_of
(
tile_grid_size
,
int
)
assert
len
(
tile_grid_size
)
==
2
self
.
tile_grid_size
=
tile_grid_size
def
__call__
(
self
,
results
):
"""Call function to Use CLAHE method process images.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Processed results.
"""
for
i
in
range
(
results
[
'img'
].
shape
[
2
]):
results
[
'img'
][:,
:,
i
]
=
mmcv
.
clahe
(
np
.
array
(
results
[
'img'
][:,
:,
i
],
dtype
=
np
.
uint8
),
self
.
clip_limit
,
self
.
tile_grid_size
)
return
results
def
__repr__
(
self
):
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(clip_limit=
{
self
.
clip_limit
}
, '
\
f
'tile_grid_size=
{
self
.
tile_grid_size
}
)'
return
repr_str
@
PIPELINES
.
register_module
()
class
RandomCrop
(
object
):
"""Random crop the image & seg.
Args:
crop_size (tuple): Expected size after cropping, (h, w).
cat_max_ratio (float): The maximum ratio that single category could
occupy.
"""
def
__init__
(
self
,
crop_size
,
cat_max_ratio
=
1.
,
ignore_index
=
255
):
assert
crop_size
[
0
]
>
0
and
crop_size
[
1
]
>
0
self
.
crop_size
=
crop_size
self
.
cat_max_ratio
=
cat_max_ratio
self
.
ignore_index
=
ignore_index
def
get_crop_bbox
(
self
,
img
):
"""Randomly get a crop bounding box."""
margin_h
=
max
(
img
.
shape
[
0
]
-
self
.
crop_size
[
0
],
0
)
margin_w
=
max
(
img
.
shape
[
1
]
-
self
.
crop_size
[
1
],
0
)
offset_h
=
np
.
random
.
randint
(
0
,
margin_h
+
1
)
offset_w
=
np
.
random
.
randint
(
0
,
margin_w
+
1
)
crop_y1
,
crop_y2
=
offset_h
,
offset_h
+
self
.
crop_size
[
0
]
crop_x1
,
crop_x2
=
offset_w
,
offset_w
+
self
.
crop_size
[
1
]
return
crop_y1
,
crop_y2
,
crop_x1
,
crop_x2
def
crop
(
self
,
img
,
crop_bbox
):
"""Crop from ``img``"""
crop_y1
,
crop_y2
,
crop_x1
,
crop_x2
=
crop_bbox
img
=
img
[
crop_y1
:
crop_y2
,
crop_x1
:
crop_x2
,
...]
return
img
def
__call__
(
self
,
results
):
"""Call function to randomly crop images, semantic segmentation maps.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Randomly cropped results, 'img_shape' key in result dict is
updated according to crop size.
"""
img
=
results
[
'img'
]
crop_bbox
=
self
.
get_crop_bbox
(
img
)
if
self
.
cat_max_ratio
<
1.
:
# Repeat 10 times
for
_
in
range
(
10
):
seg_temp
=
self
.
crop
(
results
[
'gt_semantic_seg'
],
crop_bbox
)
labels
,
cnt
=
np
.
unique
(
seg_temp
,
return_counts
=
True
)
cnt
=
cnt
[
labels
!=
self
.
ignore_index
]
if
len
(
cnt
)
>
1
and
np
.
max
(
cnt
)
/
np
.
sum
(
cnt
)
<
self
.
cat_max_ratio
:
break
crop_bbox
=
self
.
get_crop_bbox
(
img
)
# crop the image
img
=
self
.
crop
(
img
,
crop_bbox
)
img_shape
=
img
.
shape
results
[
'img'
]
=
img
results
[
'img_shape'
]
=
img_shape
# crop semantic seg
for
key
in
results
.
get
(
'seg_fields'
,
[]):
results
[
key
]
=
self
.
crop
(
results
[
key
],
crop_bbox
)
return
results
def
__repr__
(
self
):
return
self
.
__class__
.
__name__
+
f
'(crop_size=
{
self
.
crop_size
}
)'
@
PIPELINES
.
register_module
()
class
RandomRotate
(
object
):
"""Rotate the image & seg.
Args:
prob (float): The rotation probability.
degree (float, tuple[float]): Range of degrees to select from. If
degree is a number instead of tuple like (min, max),
the range of degree will be (``-degree``, ``+degree``)
pad_val (float, optional): Padding value of image. Default: 0.
seg_pad_val (float, optional): Padding value of segmentation map.
Default: 255.
center (tuple[float], optional): Center point (w, h) of the rotation in
the source image. If not specified, the center of the image will be
used. Default: None.
auto_bound (bool): Whether to adjust the image size to cover the whole
rotated image. Default: False
"""
def
__init__
(
self
,
prob
,
degree
,
pad_val
=
0
,
seg_pad_val
=
255
,
center
=
None
,
auto_bound
=
False
):
self
.
prob
=
prob
assert
prob
>=
0
and
prob
<=
1
if
isinstance
(
degree
,
(
float
,
int
)):
assert
degree
>
0
,
f
'degree
{
degree
}
should be positive'
self
.
degree
=
(
-
degree
,
degree
)
else
:
self
.
degree
=
degree
assert
len
(
self
.
degree
)
==
2
,
f
'degree
{
self
.
degree
}
should be a '
\
f
'tuple of (min, max)'
self
.
pal_val
=
pad_val
self
.
seg_pad_val
=
seg_pad_val
self
.
center
=
center
self
.
auto_bound
=
auto_bound
def
__call__
(
self
,
results
):
"""Call function to rotate image, semantic segmentation maps.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Rotated results.
"""
rotate
=
True
if
np
.
random
.
rand
()
<
self
.
prob
else
False
degree
=
np
.
random
.
uniform
(
min
(
*
self
.
degree
),
max
(
*
self
.
degree
))
if
rotate
:
# rotate image
results
[
'img'
]
=
mmcv
.
imrotate
(
results
[
'img'
],
angle
=
degree
,
border_value
=
self
.
pal_val
,
center
=
self
.
center
,
auto_bound
=
self
.
auto_bound
)
# rotate segs
for
key
in
results
.
get
(
'seg_fields'
,
[]):
results
[
key
]
=
mmcv
.
imrotate
(
results
[
key
],
angle
=
degree
,
border_value
=
self
.
seg_pad_val
,
center
=
self
.
center
,
auto_bound
=
self
.
auto_bound
,
interpolation
=
'nearest'
)
return
results
def
__repr__
(
self
):
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(prob=
{
self
.
prob
}
, '
\
f
'degree=
{
self
.
degree
}
, '
\
f
'pad_val=
{
self
.
pal_val
}
, '
\
f
'seg_pad_val=
{
self
.
seg_pad_val
}
, '
\
f
'center=
{
self
.
center
}
, '
\
f
'auto_bound=
{
self
.
auto_bound
}
)'
return
repr_str
@
PIPELINES
.
register_module
()
class
RGB2Gray
(
object
):
"""Convert RGB image to grayscale image.
This transform calculate the weighted mean of input image channels with
``weights`` and then expand the channels to ``out_channels``. When
``out_channels`` is None, the number of output channels is the same as
input channels.
Args:
out_channels (int): Expected number of output channels after
transforming. Default: None.
weights (tuple[float]): The weights to calculate the weighted mean.
Default: (0.299, 0.587, 0.114).
"""
def
__init__
(
self
,
out_channels
=
None
,
weights
=
(
0.299
,
0.587
,
0.114
)):
assert
out_channels
is
None
or
out_channels
>
0
self
.
out_channels
=
out_channels
assert
isinstance
(
weights
,
tuple
)
for
item
in
weights
:
assert
isinstance
(
item
,
(
float
,
int
))
self
.
weights
=
weights
def
__call__
(
self
,
results
):
"""Call function to convert RGB image to grayscale image.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Result dict with grayscale image.
"""
img
=
results
[
'img'
]
assert
len
(
img
.
shape
)
==
3
assert
img
.
shape
[
2
]
==
len
(
self
.
weights
)
weights
=
np
.
array
(
self
.
weights
).
reshape
((
1
,
1
,
-
1
))
img
=
(
img
*
weights
).
sum
(
2
,
keepdims
=
True
)
if
self
.
out_channels
is
None
:
img
=
img
.
repeat
(
weights
.
shape
[
2
],
axis
=
2
)
else
:
img
=
img
.
repeat
(
self
.
out_channels
,
axis
=
2
)
results
[
'img'
]
=
img
results
[
'img_shape'
]
=
img
.
shape
return
results
def
__repr__
(
self
):
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(out_channels=
{
self
.
out_channels
}
, '
\
f
'weights=
{
self
.
weights
}
)'
return
repr_str
@
PIPELINES
.
register_module
()
class
AdjustGamma
(
object
):
"""Using gamma correction to process the image.
Args:
gamma (float or int): Gamma value used in gamma correction.
Default: 1.0.
"""
def
__init__
(
self
,
gamma
=
1.0
):
assert
isinstance
(
gamma
,
float
)
or
isinstance
(
gamma
,
int
)
assert
gamma
>
0
self
.
gamma
=
gamma
inv_gamma
=
1.0
/
gamma
self
.
table
=
np
.
array
([(
i
/
255.0
)
**
inv_gamma
*
255
for
i
in
np
.
arange
(
256
)]).
astype
(
'uint8'
)
def
__call__
(
self
,
results
):
"""Call function to process the image with gamma correction.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Processed results.
"""
results
[
'img'
]
=
mmcv
.
lut_transform
(
np
.
array
(
results
[
'img'
],
dtype
=
np
.
uint8
),
self
.
table
)
return
results
def
__repr__
(
self
):
return
self
.
__class__
.
__name__
+
f
'(gamma=
{
self
.
gamma
}
)'
@
PIPELINES
.
register_module
()
class
SegRescale
(
object
):
"""Rescale semantic segmentation maps.
Args:
scale_factor (float): The scale factor of the final output.
"""
def
__init__
(
self
,
scale_factor
=
1
):
self
.
scale_factor
=
scale_factor
def
__call__
(
self
,
results
):
"""Call function to scale the semantic segmentation map.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Result dict with semantic segmentation map scaled.
"""
for
key
in
results
.
get
(
'seg_fields'
,
[]):
if
self
.
scale_factor
!=
1
:
results
[
key
]
=
mmcv
.
imrescale
(
results
[
key
],
self
.
scale_factor
,
interpolation
=
'nearest'
)
return
results
def
__repr__
(
self
):
return
self
.
__class__
.
__name__
+
f
'(scale_factor=
{
self
.
scale_factor
}
)'
@
PIPELINES
.
register_module
()
class
PhotoMetricDistortion
(
object
):
"""Apply photometric distortion to image sequentially, every transformation
is applied with a probability of 0.5. The position of random contrast is in
second or second to last.
1. random brightness
2. random contrast (mode 0)
3. convert color from BGR to HSV
4. random saturation
5. random hue
6. convert color from HSV to BGR
7. random contrast (mode 1)
Args:
brightness_delta (int): delta of brightness.
contrast_range (tuple): range of contrast.
saturation_range (tuple): range of saturation.
hue_delta (int): delta of hue.
"""
def
__init__
(
self
,
brightness_delta
=
32
,
contrast_range
=
(
0.5
,
1.5
),
saturation_range
=
(
0.5
,
1.5
),
hue_delta
=
18
):
self
.
brightness_delta
=
brightness_delta
self
.
contrast_lower
,
self
.
contrast_upper
=
contrast_range
self
.
saturation_lower
,
self
.
saturation_upper
=
saturation_range
self
.
hue_delta
=
hue_delta
def
convert
(
self
,
img
,
alpha
=
1
,
beta
=
0
):
"""Multiple with alpha and add beat with clip."""
img
=
img
.
astype
(
np
.
float32
)
*
alpha
+
beta
img
=
np
.
clip
(
img
,
0
,
255
)
return
img
.
astype
(
np
.
uint8
)
def
brightness
(
self
,
img
):
"""Brightness distortion."""
if
random
.
randint
(
2
):
return
self
.
convert
(
img
,
beta
=
random
.
uniform
(
-
self
.
brightness_delta
,
self
.
brightness_delta
))
return
img
def
contrast
(
self
,
img
):
"""Contrast distortion."""
if
random
.
randint
(
2
):
return
self
.
convert
(
img
,
alpha
=
random
.
uniform
(
self
.
contrast_lower
,
self
.
contrast_upper
))
return
img
def
saturation
(
self
,
img
):
"""Saturation distortion."""
if
random
.
randint
(
2
):
img
=
mmcv
.
bgr2hsv
(
img
)
img
[:,
:,
1
]
=
self
.
convert
(
img
[:,
:,
1
],
alpha
=
random
.
uniform
(
self
.
saturation_lower
,
self
.
saturation_upper
))
img
=
mmcv
.
hsv2bgr
(
img
)
return
img
def
hue
(
self
,
img
):
"""Hue distortion."""
if
random
.
randint
(
2
):
img
=
mmcv
.
bgr2hsv
(
img
)
img
[:,
:,
0
]
=
(
img
[:,
:,
0
].
astype
(
int
)
+
random
.
randint
(
-
self
.
hue_delta
,
self
.
hue_delta
))
%
180
img
=
mmcv
.
hsv2bgr
(
img
)
return
img
def
__call__
(
self
,
results
):
"""Call function to perform photometric distortion on images.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Result dict with images distorted.
"""
img
=
results
[
'img'
]
# random brightness
img
=
self
.
brightness
(
img
)
# mode == 0 --> do random contrast first
# mode == 1 --> do random contrast last
mode
=
random
.
randint
(
2
)
if
mode
==
1
:
img
=
self
.
contrast
(
img
)
# random saturation
img
=
self
.
saturation
(
img
)
# random hue
img
=
self
.
hue
(
img
)
# random contrast
if
mode
==
0
:
img
=
self
.
contrast
(
img
)
results
[
'img'
]
=
img
return
results
def
__repr__
(
self
):
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
(
f
'(brightness_delta=
{
self
.
brightness_delta
}
, '
f
'contrast_range=(
{
self
.
contrast_lower
}
, '
f
'
{
self
.
contrast_upper
}
), '
f
'saturation_range=(
{
self
.
saturation_lower
}
, '
f
'
{
self
.
saturation_upper
}
), '
f
'hue_delta=
{
self
.
hue_delta
}
)'
)
return
repr_str
lavis/common/annotator/uniformer/mmseg/datasets/stare.py
0 → 100644
View file @
c04f261a
import
os.path
as
osp
from
.builder
import
DATASETS
from
.custom
import
CustomDataset
@
DATASETS
.
register_module
()
class
STAREDataset
(
CustomDataset
):
"""STARE dataset.
In segmentation map annotation for STARE, 0 stands for background, which is
included in 2 categories. ``reduce_zero_label`` is fixed to False. The
``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to
'.ah.png'.
"""
CLASSES
=
(
'background'
,
'vessel'
)
PALETTE
=
[[
120
,
120
,
120
],
[
6
,
230
,
230
]]
def
__init__
(
self
,
**
kwargs
):
super
(
STAREDataset
,
self
).
__init__
(
img_suffix
=
'.png'
,
seg_map_suffix
=
'.ah.png'
,
reduce_zero_label
=
False
,
**
kwargs
)
assert
osp
.
exists
(
self
.
img_dir
)
Prev
1
…
15
16
17
18
19
20
21
22
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment