Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
InstructBLIP_pytorch
Commits
c04f261a
Commit
c04f261a
authored
Aug 22, 2024
by
dongchy920
Browse files
InstruceBLIP
parents
Pipeline
#1594
canceled with stages
Changes
421
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
909 additions
and
0 deletions
+909
-0
lavis/common/annotator/uniformer/__init__.py
lavis/common/annotator/uniformer/__init__.py
+23
-0
lavis/common/annotator/uniformer/configs/_base_/datasets/ade20k.py
...mon/annotator/uniformer/configs/_base_/datasets/ade20k.py
+54
-0
lavis/common/annotator/uniformer/configs/_base_/datasets/chase_db1.py
.../annotator/uniformer/configs/_base_/datasets/chase_db1.py
+59
-0
lavis/common/annotator/uniformer/configs/_base_/datasets/cityscapes.py
...annotator/uniformer/configs/_base_/datasets/cityscapes.py
+54
-0
lavis/common/annotator/uniformer/configs/_base_/datasets/cityscapes_769x769.py
...r/uniformer/configs/_base_/datasets/cityscapes_769x769.py
+35
-0
lavis/common/annotator/uniformer/configs/_base_/datasets/drive.py
...mmon/annotator/uniformer/configs/_base_/datasets/drive.py
+59
-0
lavis/common/annotator/uniformer/configs/_base_/datasets/hrf.py
...common/annotator/uniformer/configs/_base_/datasets/hrf.py
+59
-0
lavis/common/annotator/uniformer/configs/_base_/datasets/pascal_context.py
...tator/uniformer/configs/_base_/datasets/pascal_context.py
+60
-0
lavis/common/annotator/uniformer/configs/_base_/datasets/pascal_context_59.py
...or/uniformer/configs/_base_/datasets/pascal_context_59.py
+60
-0
lavis/common/annotator/uniformer/configs/_base_/datasets/pascal_voc12.py
...notator/uniformer/configs/_base_/datasets/pascal_voc12.py
+57
-0
lavis/common/annotator/uniformer/configs/_base_/datasets/pascal_voc12_aug.py
...tor/uniformer/configs/_base_/datasets/pascal_voc12_aug.py
+9
-0
lavis/common/annotator/uniformer/configs/_base_/datasets/stare.py
...mmon/annotator/uniformer/configs/_base_/datasets/stare.py
+59
-0
lavis/common/annotator/uniformer/configs/_base_/default_runtime.py
...mon/annotator/uniformer/configs/_base_/default_runtime.py
+14
-0
lavis/common/annotator/uniformer/configs/_base_/models/ann_r50-d8.py
...n/annotator/uniformer/configs/_base_/models/ann_r50-d8.py
+46
-0
lavis/common/annotator/uniformer/configs/_base_/models/apcnet_r50-d8.py
...nnotator/uniformer/configs/_base_/models/apcnet_r50-d8.py
+44
-0
lavis/common/annotator/uniformer/configs/_base_/models/ccnet_r50-d8.py
...annotator/uniformer/configs/_base_/models/ccnet_r50-d8.py
+44
-0
lavis/common/annotator/uniformer/configs/_base_/models/cgnet.py
...common/annotator/uniformer/configs/_base_/models/cgnet.py
+35
-0
lavis/common/annotator/uniformer/configs/_base_/models/danet_r50-d8.py
...annotator/uniformer/configs/_base_/models/danet_r50-d8.py
+44
-0
lavis/common/annotator/uniformer/configs/_base_/models/deeplabv3_r50-d8.py
...tator/uniformer/configs/_base_/models/deeplabv3_r50-d8.py
+44
-0
lavis/common/annotator/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py
.../uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py
+50
-0
No files found.
Too many changes to show.
To preserve performance only
421 of 421+
files are displayed.
Plain diff
Email patch
lavis/common/annotator/uniformer/__init__.py
0 → 100644
View file @
c04f261a
import
os
from
annotator.uniformer.mmseg.apis
import
init_segmentor
,
inference_segmentor
,
show_result_pyplot
from
annotator.uniformer.mmseg.core.evaluation
import
get_palette
from
annotator.util
import
annotator_ckpts_path
checkpoint_file
=
"https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/upernet_global_small.pth"
class
UniformerDetector
:
def
__init__
(
self
):
modelpath
=
os
.
path
.
join
(
annotator_ckpts_path
,
"upernet_global_small.pth"
)
if
not
os
.
path
.
exists
(
modelpath
):
from
basicsr.utils.download_util
import
load_file_from_url
load_file_from_url
(
checkpoint_file
,
model_dir
=
annotator_ckpts_path
)
config_file
=
os
.
path
.
join
(
os
.
path
.
dirname
(
annotator_ckpts_path
),
"uniformer"
,
"exp"
,
"upernet_global_small"
,
"config.py"
)
self
.
model
=
init_segmentor
(
config_file
,
modelpath
).
cuda
()
def
__call__
(
self
,
img
):
result
=
inference_segmentor
(
self
.
model
,
img
)
res_img
=
show_result_pyplot
(
self
.
model
,
img
,
result
,
get_palette
(
'ade'
),
opacity
=
1
)
return
res_img
lavis/common/annotator/uniformer/configs/_base_/datasets/ade20k.py
0 → 100644
View file @
c04f261a
# dataset settings
dataset_type
=
'ADE20KDataset'
data_root
=
'data/ade/ADEChallengeData2016'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
crop_size
=
(
512
,
512
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
reduce_zero_label
=
True
),
dict
(
type
=
'Resize'
,
img_scale
=
(
2048
,
512
),
ratio_range
=
(
0.5
,
2.0
)),
dict
(
type
=
'RandomCrop'
,
crop_size
=
crop_size
,
cat_max_ratio
=
0.75
),
dict
(
type
=
'RandomFlip'
,
prob
=
0.5
),
dict
(
type
=
'PhotoMetricDistortion'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size
=
crop_size
,
pad_val
=
0
,
seg_pad_val
=
255
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_semantic_seg'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
2048
,
512
),
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
samples_per_gpu
=
4
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
img_dir
=
'images/training'
,
ann_dir
=
'annotations/training'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
img_dir
=
'images/validation'
,
ann_dir
=
'annotations/validation'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
img_dir
=
'images/validation'
,
ann_dir
=
'annotations/validation'
,
pipeline
=
test_pipeline
))
lavis/common/annotator/uniformer/configs/_base_/datasets/chase_db1.py
0 → 100644
View file @
c04f261a
# dataset settings
dataset_type
=
'ChaseDB1Dataset'
data_root
=
'data/CHASE_DB1'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
img_scale
=
(
960
,
999
)
crop_size
=
(
128
,
128
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
),
dict
(
type
=
'Resize'
,
img_scale
=
img_scale
,
ratio_range
=
(
0.5
,
2.0
)),
dict
(
type
=
'RandomCrop'
,
crop_size
=
crop_size
,
cat_max_ratio
=
0.75
),
dict
(
type
=
'RandomFlip'
,
prob
=
0.5
),
dict
(
type
=
'PhotoMetricDistortion'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size
=
crop_size
,
pad_val
=
0
,
seg_pad_val
=
255
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_semantic_seg'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
img_scale
,
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
])
])
]
data
=
dict
(
samples_per_gpu
=
4
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
'RepeatDataset'
,
times
=
40000
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
img_dir
=
'images/training'
,
ann_dir
=
'annotations/training'
,
pipeline
=
train_pipeline
)),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
img_dir
=
'images/validation'
,
ann_dir
=
'annotations/validation'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
img_dir
=
'images/validation'
,
ann_dir
=
'annotations/validation'
,
pipeline
=
test_pipeline
))
lavis/common/annotator/uniformer/configs/_base_/datasets/cityscapes.py
0 → 100644
View file @
c04f261a
# dataset settings
dataset_type
=
'CityscapesDataset'
data_root
=
'data/cityscapes/'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
crop_size
=
(
512
,
1024
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
),
dict
(
type
=
'Resize'
,
img_scale
=
(
2048
,
1024
),
ratio_range
=
(
0.5
,
2.0
)),
dict
(
type
=
'RandomCrop'
,
crop_size
=
crop_size
,
cat_max_ratio
=
0.75
),
dict
(
type
=
'RandomFlip'
,
prob
=
0.5
),
dict
(
type
=
'PhotoMetricDistortion'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size
=
crop_size
,
pad_val
=
0
,
seg_pad_val
=
255
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_semantic_seg'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
2048
,
1024
),
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
samples_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
img_dir
=
'leftImg8bit/train'
,
ann_dir
=
'gtFine/train'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
img_dir
=
'leftImg8bit/val'
,
ann_dir
=
'gtFine/val'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
img_dir
=
'leftImg8bit/val'
,
ann_dir
=
'gtFine/val'
,
pipeline
=
test_pipeline
))
lavis/common/annotator/uniformer/configs/_base_/datasets/cityscapes_769x769.py
0 → 100644
View file @
c04f261a
_base_
=
'./cityscapes.py'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
crop_size
=
(
769
,
769
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
),
dict
(
type
=
'Resize'
,
img_scale
=
(
2049
,
1025
),
ratio_range
=
(
0.5
,
2.0
)),
dict
(
type
=
'RandomCrop'
,
crop_size
=
crop_size
,
cat_max_ratio
=
0.75
),
dict
(
type
=
'RandomFlip'
,
prob
=
0.5
),
dict
(
type
=
'PhotoMetricDistortion'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size
=
crop_size
,
pad_val
=
0
,
seg_pad_val
=
255
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_semantic_seg'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
2049
,
1025
),
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
train
=
dict
(
pipeline
=
train_pipeline
),
val
=
dict
(
pipeline
=
test_pipeline
),
test
=
dict
(
pipeline
=
test_pipeline
))
lavis/common/annotator/uniformer/configs/_base_/datasets/drive.py
0 → 100644
View file @
c04f261a
# dataset settings
dataset_type
=
'DRIVEDataset'
data_root
=
'data/DRIVE'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
img_scale
=
(
584
,
565
)
crop_size
=
(
64
,
64
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
),
dict
(
type
=
'Resize'
,
img_scale
=
img_scale
,
ratio_range
=
(
0.5
,
2.0
)),
dict
(
type
=
'RandomCrop'
,
crop_size
=
crop_size
,
cat_max_ratio
=
0.75
),
dict
(
type
=
'RandomFlip'
,
prob
=
0.5
),
dict
(
type
=
'PhotoMetricDistortion'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size
=
crop_size
,
pad_val
=
0
,
seg_pad_val
=
255
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_semantic_seg'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
img_scale
,
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
])
])
]
data
=
dict
(
samples_per_gpu
=
4
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
'RepeatDataset'
,
times
=
40000
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
img_dir
=
'images/training'
,
ann_dir
=
'annotations/training'
,
pipeline
=
train_pipeline
)),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
img_dir
=
'images/validation'
,
ann_dir
=
'annotations/validation'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
img_dir
=
'images/validation'
,
ann_dir
=
'annotations/validation'
,
pipeline
=
test_pipeline
))
lavis/common/annotator/uniformer/configs/_base_/datasets/hrf.py
0 → 100644
View file @
c04f261a
# dataset settings
dataset_type
=
'HRFDataset'
data_root
=
'data/HRF'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
img_scale
=
(
2336
,
3504
)
crop_size
=
(
256
,
256
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
),
dict
(
type
=
'Resize'
,
img_scale
=
img_scale
,
ratio_range
=
(
0.5
,
2.0
)),
dict
(
type
=
'RandomCrop'
,
crop_size
=
crop_size
,
cat_max_ratio
=
0.75
),
dict
(
type
=
'RandomFlip'
,
prob
=
0.5
),
dict
(
type
=
'PhotoMetricDistortion'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size
=
crop_size
,
pad_val
=
0
,
seg_pad_val
=
255
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_semantic_seg'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
img_scale
,
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
])
])
]
data
=
dict
(
samples_per_gpu
=
4
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
'RepeatDataset'
,
times
=
40000
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
img_dir
=
'images/training'
,
ann_dir
=
'annotations/training'
,
pipeline
=
train_pipeline
)),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
img_dir
=
'images/validation'
,
ann_dir
=
'annotations/validation'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
img_dir
=
'images/validation'
,
ann_dir
=
'annotations/validation'
,
pipeline
=
test_pipeline
))
lavis/common/annotator/uniformer/configs/_base_/datasets/pascal_context.py
0 → 100644
View file @
c04f261a
# dataset settings
dataset_type
=
'PascalContextDataset'
data_root
=
'data/VOCdevkit/VOC2010/'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
img_scale
=
(
520
,
520
)
crop_size
=
(
480
,
480
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
),
dict
(
type
=
'Resize'
,
img_scale
=
img_scale
,
ratio_range
=
(
0.5
,
2.0
)),
dict
(
type
=
'RandomCrop'
,
crop_size
=
crop_size
,
cat_max_ratio
=
0.75
),
dict
(
type
=
'RandomFlip'
,
prob
=
0.5
),
dict
(
type
=
'PhotoMetricDistortion'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size
=
crop_size
,
pad_val
=
0
,
seg_pad_val
=
255
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_semantic_seg'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
img_scale
,
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
samples_per_gpu
=
4
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
img_dir
=
'JPEGImages'
,
ann_dir
=
'SegmentationClassContext'
,
split
=
'ImageSets/SegmentationContext/train.txt'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
img_dir
=
'JPEGImages'
,
ann_dir
=
'SegmentationClassContext'
,
split
=
'ImageSets/SegmentationContext/val.txt'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
img_dir
=
'JPEGImages'
,
ann_dir
=
'SegmentationClassContext'
,
split
=
'ImageSets/SegmentationContext/val.txt'
,
pipeline
=
test_pipeline
))
lavis/common/annotator/uniformer/configs/_base_/datasets/pascal_context_59.py
0 → 100644
View file @
c04f261a
# dataset settings
dataset_type
=
'PascalContextDataset59'
data_root
=
'data/VOCdevkit/VOC2010/'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
img_scale
=
(
520
,
520
)
crop_size
=
(
480
,
480
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
reduce_zero_label
=
True
),
dict
(
type
=
'Resize'
,
img_scale
=
img_scale
,
ratio_range
=
(
0.5
,
2.0
)),
dict
(
type
=
'RandomCrop'
,
crop_size
=
crop_size
,
cat_max_ratio
=
0.75
),
dict
(
type
=
'RandomFlip'
,
prob
=
0.5
),
dict
(
type
=
'PhotoMetricDistortion'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size
=
crop_size
,
pad_val
=
0
,
seg_pad_val
=
255
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_semantic_seg'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
img_scale
,
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
samples_per_gpu
=
4
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
img_dir
=
'JPEGImages'
,
ann_dir
=
'SegmentationClassContext'
,
split
=
'ImageSets/SegmentationContext/train.txt'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
img_dir
=
'JPEGImages'
,
ann_dir
=
'SegmentationClassContext'
,
split
=
'ImageSets/SegmentationContext/val.txt'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
img_dir
=
'JPEGImages'
,
ann_dir
=
'SegmentationClassContext'
,
split
=
'ImageSets/SegmentationContext/val.txt'
,
pipeline
=
test_pipeline
))
lavis/common/annotator/uniformer/configs/_base_/datasets/pascal_voc12.py
0 → 100644
View file @
c04f261a
# dataset settings
dataset_type
=
'PascalVOCDataset'
data_root
=
'data/VOCdevkit/VOC2012'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
crop_size
=
(
512
,
512
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
),
dict
(
type
=
'Resize'
,
img_scale
=
(
2048
,
512
),
ratio_range
=
(
0.5
,
2.0
)),
dict
(
type
=
'RandomCrop'
,
crop_size
=
crop_size
,
cat_max_ratio
=
0.75
),
dict
(
type
=
'RandomFlip'
,
prob
=
0.5
),
dict
(
type
=
'PhotoMetricDistortion'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size
=
crop_size
,
pad_val
=
0
,
seg_pad_val
=
255
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_semantic_seg'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
2048
,
512
),
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
samples_per_gpu
=
4
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
img_dir
=
'JPEGImages'
,
ann_dir
=
'SegmentationClass'
,
split
=
'ImageSets/Segmentation/train.txt'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
img_dir
=
'JPEGImages'
,
ann_dir
=
'SegmentationClass'
,
split
=
'ImageSets/Segmentation/val.txt'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
img_dir
=
'JPEGImages'
,
ann_dir
=
'SegmentationClass'
,
split
=
'ImageSets/Segmentation/val.txt'
,
pipeline
=
test_pipeline
))
lavis/common/annotator/uniformer/configs/_base_/datasets/pascal_voc12_aug.py
0 → 100644
View file @
c04f261a
_base_
=
'./pascal_voc12.py'
# dataset settings
data
=
dict
(
train
=
dict
(
ann_dir
=
[
'SegmentationClass'
,
'SegmentationClassAug'
],
split
=
[
'ImageSets/Segmentation/train.txt'
,
'ImageSets/Segmentation/aug.txt'
]))
lavis/common/annotator/uniformer/configs/_base_/datasets/stare.py
0 → 100644
View file @
c04f261a
# dataset settings
dataset_type
=
'STAREDataset'
data_root
=
'data/STARE'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
img_scale
=
(
605
,
700
)
crop_size
=
(
128
,
128
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
),
dict
(
type
=
'Resize'
,
img_scale
=
img_scale
,
ratio_range
=
(
0.5
,
2.0
)),
dict
(
type
=
'RandomCrop'
,
crop_size
=
crop_size
,
cat_max_ratio
=
0.75
),
dict
(
type
=
'RandomFlip'
,
prob
=
0.5
),
dict
(
type
=
'PhotoMetricDistortion'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size
=
crop_size
,
pad_val
=
0
,
seg_pad_val
=
255
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_semantic_seg'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
img_scale
,
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
])
])
]
data
=
dict
(
samples_per_gpu
=
4
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
'RepeatDataset'
,
times
=
40000
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
img_dir
=
'images/training'
,
ann_dir
=
'annotations/training'
,
pipeline
=
train_pipeline
)),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
img_dir
=
'images/validation'
,
ann_dir
=
'annotations/validation'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
img_dir
=
'images/validation'
,
ann_dir
=
'annotations/validation'
,
pipeline
=
test_pipeline
))
lavis/common/annotator/uniformer/configs/_base_/default_runtime.py
0 → 100644
View file @
c04f261a
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
,
by_epoch
=
False
),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
cudnn_benchmark
=
True
lavis/common/annotator/uniformer/configs/_base_/models/ann_r50-d8.py
0 → 100644
View file @
c04f261a
# model settings
norm_cfg
=
dict
(
type
=
'SyncBN'
,
requires_grad
=
True
)
model
=
dict
(
type
=
'EncoderDecoder'
,
pretrained
=
'open-mmlab://resnet50_v1c'
,
backbone
=
dict
(
type
=
'ResNetV1c'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
dilations
=
(
1
,
1
,
2
,
4
),
strides
=
(
1
,
2
,
1
,
1
),
norm_cfg
=
norm_cfg
,
norm_eval
=
False
,
style
=
'pytorch'
,
contract_dilation
=
True
),
decode_head
=
dict
(
type
=
'ANNHead'
,
in_channels
=
[
1024
,
2048
],
in_index
=
[
2
,
3
],
channels
=
512
,
project_channels
=
256
,
query_scales
=
(
1
,
),
key_pool_scales
=
(
1
,
3
,
6
,
8
),
dropout_ratio
=
0.1
,
num_classes
=
19
,
norm_cfg
=
norm_cfg
,
align_corners
=
False
,
loss_decode
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
)),
auxiliary_head
=
dict
(
type
=
'FCNHead'
,
in_channels
=
1024
,
in_index
=
2
,
channels
=
256
,
num_convs
=
1
,
concat_input
=
False
,
dropout_ratio
=
0.1
,
num_classes
=
19
,
norm_cfg
=
norm_cfg
,
align_corners
=
False
,
loss_decode
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.4
)),
# model training and testing settings
train_cfg
=
dict
(),
test_cfg
=
dict
(
mode
=
'whole'
))
lavis/common/annotator/uniformer/configs/_base_/models/apcnet_r50-d8.py
0 → 100644
View file @
c04f261a
# model settings
norm_cfg
=
dict
(
type
=
'SyncBN'
,
requires_grad
=
True
)
model
=
dict
(
type
=
'EncoderDecoder'
,
pretrained
=
'open-mmlab://resnet50_v1c'
,
backbone
=
dict
(
type
=
'ResNetV1c'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
dilations
=
(
1
,
1
,
2
,
4
),
strides
=
(
1
,
2
,
1
,
1
),
norm_cfg
=
norm_cfg
,
norm_eval
=
False
,
style
=
'pytorch'
,
contract_dilation
=
True
),
decode_head
=
dict
(
type
=
'APCHead'
,
in_channels
=
2048
,
in_index
=
3
,
channels
=
512
,
pool_scales
=
(
1
,
2
,
3
,
6
),
dropout_ratio
=
0.1
,
num_classes
=
19
,
norm_cfg
=
dict
(
type
=
'SyncBN'
,
requires_grad
=
True
),
align_corners
=
False
,
loss_decode
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
)),
auxiliary_head
=
dict
(
type
=
'FCNHead'
,
in_channels
=
1024
,
in_index
=
2
,
channels
=
256
,
num_convs
=
1
,
concat_input
=
False
,
dropout_ratio
=
0.1
,
num_classes
=
19
,
norm_cfg
=
norm_cfg
,
align_corners
=
False
,
loss_decode
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.4
)),
# model training and testing settings
train_cfg
=
dict
(),
test_cfg
=
dict
(
mode
=
'whole'
))
lavis/common/annotator/uniformer/configs/_base_/models/ccnet_r50-d8.py
0 → 100644
View file @
c04f261a
# model settings
norm_cfg
=
dict
(
type
=
'SyncBN'
,
requires_grad
=
True
)
model
=
dict
(
type
=
'EncoderDecoder'
,
pretrained
=
'open-mmlab://resnet50_v1c'
,
backbone
=
dict
(
type
=
'ResNetV1c'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
dilations
=
(
1
,
1
,
2
,
4
),
strides
=
(
1
,
2
,
1
,
1
),
norm_cfg
=
norm_cfg
,
norm_eval
=
False
,
style
=
'pytorch'
,
contract_dilation
=
True
),
decode_head
=
dict
(
type
=
'CCHead'
,
in_channels
=
2048
,
in_index
=
3
,
channels
=
512
,
recurrence
=
2
,
dropout_ratio
=
0.1
,
num_classes
=
19
,
norm_cfg
=
norm_cfg
,
align_corners
=
False
,
loss_decode
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
)),
auxiliary_head
=
dict
(
type
=
'FCNHead'
,
in_channels
=
1024
,
in_index
=
2
,
channels
=
256
,
num_convs
=
1
,
concat_input
=
False
,
dropout_ratio
=
0.1
,
num_classes
=
19
,
norm_cfg
=
norm_cfg
,
align_corners
=
False
,
loss_decode
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.4
)),
# model training and testing settings
train_cfg
=
dict
(),
test_cfg
=
dict
(
mode
=
'whole'
))
lavis/common/annotator/uniformer/configs/_base_/models/cgnet.py
0 → 100644
View file @
c04f261a
# model settings
norm_cfg
=
dict
(
type
=
'SyncBN'
,
eps
=
1e-03
,
requires_grad
=
True
)
model
=
dict
(
type
=
'EncoderDecoder'
,
backbone
=
dict
(
type
=
'CGNet'
,
norm_cfg
=
norm_cfg
,
in_channels
=
3
,
num_channels
=
(
32
,
64
,
128
),
num_blocks
=
(
3
,
21
),
dilations
=
(
2
,
4
),
reductions
=
(
8
,
16
)),
decode_head
=
dict
(
type
=
'FCNHead'
,
in_channels
=
256
,
in_index
=
2
,
channels
=
256
,
num_convs
=
0
,
concat_input
=
False
,
dropout_ratio
=
0
,
num_classes
=
19
,
norm_cfg
=
norm_cfg
,
loss_decode
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
,
class_weight
=
[
2.5959933
,
6.7415504
,
3.5354059
,
9.8663225
,
9.690899
,
9.369352
,
10.289121
,
9.953208
,
4.3097677
,
9.490387
,
7.674431
,
9.396905
,
10.347791
,
6.3927646
,
10.226669
,
10.241062
,
10.280587
,
10.396974
,
10.055647
])),
# model training and testing settings
train_cfg
=
dict
(
sampler
=
None
),
test_cfg
=
dict
(
mode
=
'whole'
))
lavis/common/annotator/uniformer/configs/_base_/models/danet_r50-d8.py
0 → 100644
View file @
c04f261a
# model settings
norm_cfg
=
dict
(
type
=
'SyncBN'
,
requires_grad
=
True
)
model
=
dict
(
type
=
'EncoderDecoder'
,
pretrained
=
'open-mmlab://resnet50_v1c'
,
backbone
=
dict
(
type
=
'ResNetV1c'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
dilations
=
(
1
,
1
,
2
,
4
),
strides
=
(
1
,
2
,
1
,
1
),
norm_cfg
=
norm_cfg
,
norm_eval
=
False
,
style
=
'pytorch'
,
contract_dilation
=
True
),
decode_head
=
dict
(
type
=
'DAHead'
,
in_channels
=
2048
,
in_index
=
3
,
channels
=
512
,
pam_channels
=
64
,
dropout_ratio
=
0.1
,
num_classes
=
19
,
norm_cfg
=
norm_cfg
,
align_corners
=
False
,
loss_decode
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
)),
auxiliary_head
=
dict
(
type
=
'FCNHead'
,
in_channels
=
1024
,
in_index
=
2
,
channels
=
256
,
num_convs
=
1
,
concat_input
=
False
,
dropout_ratio
=
0.1
,
num_classes
=
19
,
norm_cfg
=
norm_cfg
,
align_corners
=
False
,
loss_decode
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.4
)),
# model training and testing settings
train_cfg
=
dict
(),
test_cfg
=
dict
(
mode
=
'whole'
))
lavis/common/annotator/uniformer/configs/_base_/models/deeplabv3_r50-d8.py
0 → 100644
View file @
c04f261a
# model settings
norm_cfg
=
dict
(
type
=
'SyncBN'
,
requires_grad
=
True
)
model
=
dict
(
type
=
'EncoderDecoder'
,
pretrained
=
'open-mmlab://resnet50_v1c'
,
backbone
=
dict
(
type
=
'ResNetV1c'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
dilations
=
(
1
,
1
,
2
,
4
),
strides
=
(
1
,
2
,
1
,
1
),
norm_cfg
=
norm_cfg
,
norm_eval
=
False
,
style
=
'pytorch'
,
contract_dilation
=
True
),
decode_head
=
dict
(
type
=
'ASPPHead'
,
in_channels
=
2048
,
in_index
=
3
,
channels
=
512
,
dilations
=
(
1
,
12
,
24
,
36
),
dropout_ratio
=
0.1
,
num_classes
=
19
,
norm_cfg
=
norm_cfg
,
align_corners
=
False
,
loss_decode
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
)),
auxiliary_head
=
dict
(
type
=
'FCNHead'
,
in_channels
=
1024
,
in_index
=
2
,
channels
=
256
,
num_convs
=
1
,
concat_input
=
False
,
dropout_ratio
=
0.1
,
num_classes
=
19
,
norm_cfg
=
norm_cfg
,
align_corners
=
False
,
loss_decode
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.4
)),
# model training and testing settings
train_cfg
=
dict
(),
test_cfg
=
dict
(
mode
=
'whole'
))
lavis/common/annotator/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py
0 → 100644
View file @
c04f261a
# model settings
norm_cfg
=
dict
(
type
=
'SyncBN'
,
requires_grad
=
True
)
model
=
dict
(
type
=
'EncoderDecoder'
,
pretrained
=
None
,
backbone
=
dict
(
type
=
'UNet'
,
in_channels
=
3
,
base_channels
=
64
,
num_stages
=
5
,
strides
=
(
1
,
1
,
1
,
1
,
1
),
enc_num_convs
=
(
2
,
2
,
2
,
2
,
2
),
dec_num_convs
=
(
2
,
2
,
2
,
2
),
downsamples
=
(
True
,
True
,
True
,
True
),
enc_dilations
=
(
1
,
1
,
1
,
1
,
1
),
dec_dilations
=
(
1
,
1
,
1
,
1
),
with_cp
=
False
,
conv_cfg
=
None
,
norm_cfg
=
norm_cfg
,
act_cfg
=
dict
(
type
=
'ReLU'
),
upsample_cfg
=
dict
(
type
=
'InterpConv'
),
norm_eval
=
False
),
decode_head
=
dict
(
type
=
'ASPPHead'
,
in_channels
=
64
,
in_index
=
4
,
channels
=
16
,
dilations
=
(
1
,
12
,
24
,
36
),
dropout_ratio
=
0.1
,
num_classes
=
2
,
norm_cfg
=
norm_cfg
,
align_corners
=
False
,
loss_decode
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
)),
auxiliary_head
=
dict
(
type
=
'FCNHead'
,
in_channels
=
128
,
in_index
=
3
,
channels
=
64
,
num_convs
=
1
,
concat_input
=
False
,
dropout_ratio
=
0.1
,
num_classes
=
2
,
norm_cfg
=
norm_cfg
,
align_corners
=
False
,
loss_decode
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.4
)),
# model training and testing settings
train_cfg
=
dict
(),
test_cfg
=
dict
(
mode
=
'slide'
,
crop_size
=
256
,
stride
=
170
))
Prev
1
…
3
4
5
6
7
8
9
10
11
…
22
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment